310 files changed, 7283 insertions, 579 deletions
diff --git a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index e740b29f92..1226b98a99 100644
--- a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -1,15 +1,19 @@
-; RUN: opt < %s -argpromotion -S | grep nounwind | count 2
+; RUN: opt < %s -argpromotion -S | FileCheck %s
 
+; CHECK: define internal i32 @deref(i32 %x.val) #0 {
 define internal i32 @deref(i32* %x) nounwind {
 entry:
-	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
-	ret i32 %tmp2
+  %tmp2 = load i32* %x, align 4
+  ret i32 %tmp2
 }
 
 define i32 @f(i32 %x) {
 entry:
-	%x_addr = alloca i32		; <i32*> [#uses=2]
-	store i32 %x, i32* %x_addr, align 4
-	%tmp1 = call i32 @deref( i32* %x_addr ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp1
+  %x_addr = alloca i32
+  store i32 %x, i32* %x_addr, align 4
+; CHECK: %tmp1 = call i32 @deref(i32 %x_addr.val) [[NUW:#[0-9]+]]
+  %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+  ret i32 %tmp1
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ArgumentPromotion/crash.ll b/test/Transforms/ArgumentPromotion/crash.ll
index fed002aa98..f70d8de60e 100644
--- a/test/Transforms/ArgumentPromotion/crash.ll
+++ b/test/Transforms/ArgumentPromotion/crash.ll
@@ -1,5 +1,5 @@
 ; rdar://7879828
-; RUN: opt -inline -argpromotion %s
+; RUN: opt -inline -argpromotion < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
new file mode 100644
index 0000000000..07cc5d8b96
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/pr15289.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -basicaa -bb-vectorize -disable-output
+; This is a bugpoint-reduced test case. It did not always assert, but does reproduce the bug
+; and running under valgrind (or some similar tool) will catch the error.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.2.0"
+
+%0 = type { [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }] }
+%1 = type { [10 x [8 x i8]] }
+%2 = type { i64, i64 }
+%3 = type { [10 x i64], i64, i64, i64, i64, i64 }
+%4 = type { i64, i64, i64, i64, i64, i64 }
+%5 = type { [10 x i64] }
+%6 = type { [10 x float], [10 x float], [10 x float], [10 x float] }
+%struct.__st_parameter_dt.1.3.5.7 = type { %struct.__st_parameter_common.0.2.4.6, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%struct.__st_parameter_common.0.2.4.6 = type { i32, i32, i8*, i32, i32, i8*, i32* }
+
+@cctenso_ = external unnamed_addr global %0, align 32
+@ctenso_ = external unnamed_addr global %1, align 32
+@i_dim_ = external unnamed_addr global %2, align 16
+@itenso1_ = external unnamed_addr global %3, align 32
+@itenso2_ = external unnamed_addr global %4, align 32
+@ltenso_ = external unnamed_addr global %5, align 32
+@rtenso_ = external unnamed_addr global %6, align 32
+@.cst = external unnamed_addr constant [8 x i8], align 8
+@.cst1 = external unnamed_addr constant [3 x i8], align 8
+@.cst2 = external unnamed_addr constant [29 x i8], align 8
+@.cst3 = external unnamed_addr constant [32 x i8], align 64
+
+define void @cart_to_dc2y_(double* noalias nocapture %xx, double* noalias nocapture %yy, double* noalias nocapture %zz, [5 x { double, double }]* noalias nocapture %c2ten) nounwind uwtable {
+entry:
+  %0 = fmul double undef, undef
+  %1 = fmul double undef, undef
+  %2 = fadd double undef, undef
+  %3 = fmul double undef, 0x3FE8B8B76E3E9919
+  %4 = fsub double %0, %1
+  %5 = fsub double -0.000000e+00, undef
+  %6 = fmul double undef, undef
+  %7 = fmul double %4, %6
+  %8 = fmul double undef, 2.000000e+00
+  %9 = fmul double %8, undef
+  %10 = fmul double undef, %9
+  %11 = fmul double %10, undef
+  %12 = fsub double undef, %7
+  %13 = fmul double %3, %12
+  %14 = fmul double %3, undef
+  %15 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
+  store double %13, double* %15, align 8, !tbaa !0
+  %16 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
+  %17 = fmul double undef, %8
+  %18 = fmul double %17, undef
+  %19 = fmul double undef, %18
+  %20 = fadd double undef, undef
+  %21 = fmul double %3, %19
+  %22 = fsub double -0.000000e+00, %21
+  %23 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
+  store double %22, double* %23, align 8, !tbaa !0
+  %24 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
+  %25 = fmul double undef, 0x3FE42F601A8C6794
+  %26 = fmul double undef, 2.000000e+00
+  %27 = fsub double %26, %0
+  %28 = fmul double %6, undef
+  %29 = fsub double undef, %28
+  %30 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
+  store double undef, double* %30, align 8, !tbaa !0
+  %31 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
+  %32 = fmul double undef, %17
+  %33 = fmul double undef, %17
+  %34 = fmul double undef, %32
+  %35 = fmul double undef, %33
+  %36 = fsub double undef, %35
+  %37 = fmul double %3, %34
+  %38 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
+  store double %37, double* %38, align 8, !tbaa !0
+  %39 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
+  %40 = fmul double undef, %8
+  %41 = fmul double undef, %40
+  %42 = fmul double undef, %41
+  %43 = fsub double undef, %42
+  %44 = fmul double %3, %43
+  %45 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
+  store double %13, double* %45, align 8, !tbaa !0
+  %46 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
+  %47 = fsub double -0.000000e+00, %14
+  store double %47, double* %16, align 8, !tbaa !0
+  store double undef, double* %24, align 8, !tbaa !0
+  store double -0.000000e+00, double* %31, align 8, !tbaa !0
+  store double undef, double* %39, align 8, !tbaa !0
+  store double undef, double* %46, align 8, !tbaa !0
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!0 = metadata !{metadata !"alias set 17: real(kind=8)", metadata !1}
+!1 = metadata !{metadata !1}
diff --git a/test/Transforms/BBVectorize/X86/simple-int.ll b/test/Transforms/BBVectorize/X86/simple-int.ll
new file mode 100644
index 0000000000..f5dbe46b14
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/simple-int.ll
@@ -0,0 +1,79 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1a
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test3
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+        %P2 = add i32 %P, 1
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test4
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
index e8e82ce024..bdcb30da88 100644
--- a/test/Transforms/BBVectorize/cycle.ll
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
 ; want to select the pairs:
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
index cea225d076..ea5cb5dd93 100644
--- a/test/Transforms/BBVectorize/ld1.ll
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
index c22ea5852a..e592edb44a 100644
--- a/test/Transforms/BBVectorize/loop1.ll
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -1,7 +1,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
 ; The second check covers the use of alias analysis (with loop unrolling).
 
 define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
index 8c9cc3c188..e0120059b9 100644
--- a/test/Transforms/BBVectorize/req-depth.ll
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -S | FileCheck %s -check-prefix=CHECK-RD3
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -S | FileCheck %s -check-prefix=CHECK-RD2
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD3
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD2
 
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
 	%X1 = fsub double %A1, %B1
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
index aeaf98865b..a694e45bc1 100644
--- a/test/Transforms/BBVectorize/search-limit.ll
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
 
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: @test1
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index ae1d63bfd8..e4d51526ca 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -1,7 +1,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
 declare double @llvm.cos.f64(double)
 declare double @llvm.powi.f64(double, i32)
 
@@ -31,6 +32,32 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1,
 ; CHECK: ret double %R
 }
 
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1a
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
+; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
 ; Basic depth-3 chain with cos
 define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%X1 = fsub double %A1, %B1
@@ -97,7 +124,10 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 ; CHECK: ret double %R
 }
 
-; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
-; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #1
+; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #1
 
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
index 7dd77c933f..8e51d297e8 100644
--- a/test/Transforms/BBVectorize/simple-ldstr.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
 
 ; Simple 3-pair chain with loads and stores
 define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
index 15ecb59702..8caccfd32c 100644
--- a/test/Transforms/BBVectorize/simple-sel.ll
+++ b/test/Transforms/BBVectorize/simple-sel.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
 
 ; Basic depth-3 chain with select
 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
index 3527ae75b4..a447908d16 100644
--- a/test/Transforms/BBVectorize/simple.ll
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
index 153be73f83..78bcc9f830 100644
--- a/test/Transforms/BBVectorize/simple3.ll
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index c68e77eb55..d617e43be8 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -codegenprepare %s -S -o - | FileCheck %s
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 6d34cb17ff..c5ee70c2ff 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -3,6 +3,6 @@
 declare i13 @llvm.cttz.i13(i13, i1)
 
 define i13 @test() {
-	%X = call i13 @llvm.cttz.i13(i13 0, i1 true)
+	%X = call i13 @llvm.cttz.i13(i13 0, i1 false)
 	ret i13 %X
 }
diff --git a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
index ce79e3b296..a415995070 100644
--- a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
+++ b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -constmerge %s | FileCheck %s
+; RUN: opt -S -constmerge < %s | FileCheck %s
 
 ; CHECK: @foo = constant i32 6
 ; CHECK: @bar = constant i32 6
diff --git a/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
index f561daf667..5aafcfe3d4 100644
--- a/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
+++ b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; PR8978
 
 declare i32 @zed(%struct.foobar*, %struct.foobar*)
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index b71eb437db..b00345557c 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; Test that in one run var3 is merged into var2 and var1 into var4.
 ; Test that we merge @var5 and @var6 into one with the higher alignment, and
 ; don't merge var7/var8 into var5/var6.
diff --git a/test/Transforms/ConstantMerge/unnamed-addr.ll b/test/Transforms/ConstantMerge/unnamed-addr.ll
index 24100837aa..aff8540f2c 100644
--- a/test/Transforms/ConstantMerge/unnamed-addr.ll
+++ b/test/Transforms/ConstantMerge/unnamed-addr.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; Test which corresponding x and y are merged and that unnamed_addr
 ; is correctly set.
 
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 475cd8d772..39c437ccfa 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -81,6 +81,26 @@ LessThanOrEqualToTwo:
   ret i32 0
 }
 
+declare i32* @f(i32*)
+define void @test5(i32* %x, i32* %y) {
+; CHECK: @test5
+entry:
+  %pre = icmp eq i32* %x, null
+  br i1 %pre, label %return, label %loop
+
+loop:
+  %phi = phi i32* [ %sel, %loop ], [ %x, %entry ]
+; CHECK: %phi = phi i32* [ %f, %loop ], [ %x, %entry ]
+  %f = tail call i32* @f(i32* %phi)
+  %cmp1 = icmp ne i32* %f, %y
+  %sel = select i1 %cmp1, i32* %f, i32* null
+  %cmp2 = icmp eq i32* %sel, null
+  br i1 %cmp2, label %return, label %loop
+
+return:
+  ret void
+}
+
 define i32 @switch1(i32 %s) {
 ; CHECK: @switch1
 entry:
@@ -105,7 +125,7 @@ negative:
   ]
 
 out:
-  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
   ret i32 %p
 
 next:
diff --git a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
index 7c6c575ea8..f049265ce4 100644
--- a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -1,20 +1,20 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: cat %t | grep nounwind | count 2
-; RUN: cat %t | grep signext | count 2
-; RUN: cat %t | not grep inreg
-; RUN: cat %t | not grep zeroext
-; RUN: cat %t | not grep byval
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
-	%struct = type { }
+%struct = type { }
 
 @g = global i8 0
 
+; CHECK: define internal void @foo(i8 signext %y) [[NUW:#[0-9]+]]
+
 define internal zeroext i8 @foo(i8* inreg %p, i8 signext %y, ... )  nounwind {
-	store i8 %y, i8* @g
-	ret i8 0
+  store i8 %y, i8* @g
+  ret i8 0
 }
 
 define i32 @bar() {
-	%A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
-	ret i32 0
+; CHECK: call void @foo(i8 signext 1) [[NUW]]
+  %A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
+  ret i32 0
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 2f820bad84..f5d2588dd0 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -8,14 +8,14 @@ entry:
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
   call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
   call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
-; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) nounwind, !dbg !13
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !13
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) nounwind noinline ssp {
+define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
 entry:
   call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
@@ -38,6 +38,11 @@ bb2:                                              ; preds = %bb1, %bb
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+; CHECK: attributes #0 = { nounwind ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { noinline nounwind ssp }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", metadata !2, i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !3} ; [ DW_TAG_file_type ]
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index dcbfaaa3d7..59eb4588bd 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -deadargelim -S | FileCheck %s
+; RUN: opt -deadargelim -S < %s | FileCheck %s
 ; PR14016
 
 ; Check that debug info metadata for subprograms stores pointers to
@@ -37,13 +37,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", metadata !"clang version 3.2 (trunk 165305)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !8, metadata !9}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !8, metadata !9}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
 !6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index e3fe1bbb54..cca58721e5 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -1,4 +1,4 @@
-; RUN: opt -deadargelim -S %s | FileCheck %s
+; RUN: opt -deadargelim -S < %s | FileCheck %s
 
 define void @test(i32) {
   ret void
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index dc92dc9f17..e41110c96e 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: grep "define internal zeroext i32 @test1() nounwind" %t
-; RUN: grep "define internal <{ i32, i32 }> @test2" %t
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
 %Ty = type <{ i32, i32 }>
 
@@ -9,11 +7,13 @@
 ; the function and then changing too much.
 
 ; This checks if the return value attributes are not removed
+; CHECK: define internal zeroext i32 @test1() #0
 define internal zeroext i32 @test1(i32 %DEADARG1) nounwind {
         ret i32 1
 }
 
 ; This checks if the struct doesn't get non-packed
+; CHECK: define internal <{ i32, i32 }> @test2
 define internal <{ i32, i32 }> @test2(i32 %DEADARG1) {
         ret <{ i32, i32 }> <{ i32 1, i32 2 }>
 }
@@ -28,3 +28,4 @@ define void @caller() {
         ret void
 }
 
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index 7d57804631..15976f9f10 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -dse -S | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
 %t = type { i32 }
 
diff --git a/test/Transforms/DeadStoreElimination/dominate.ll b/test/Transforms/DeadStoreElimination/dominate.ll
index 284fea4234..38cf1a066d 100644
--- a/test/Transforms/DeadStoreElimination/dominate.ll
+++ b/test/Transforms/DeadStoreElimination/dominate.ll
@@ -1,4 +1,4 @@
-; RUN: opt  %s -dse -disable-output
+; RUN: opt -dse -disable-output < %s
 ; test that we don't crash
 declare void @bar()
 
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index 6c7f940316..4022d76dcb 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -dse -S | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
 declare void @test1f()
 
diff --git a/test/Transforms/DeadStoreElimination/pr11390.ll b/test/Transforms/DeadStoreElimination/pr11390.ll
index 2ce6eea365..f63aa1eb8a 100644
--- a/test/Transforms/DeadStoreElimination/pr11390.ll
+++ b/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -dse -S -o - %s | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 ; PR11390
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/EarlyCSE/commute.ll b/test/Transforms/EarlyCSE/commute.ll
index f84a7dd1aa..8cf04d1765 100644
--- a/test/Transforms/EarlyCSE/commute.ll
+++ b/test/Transforms/EarlyCSE/commute.ll
@@ -19,9 +19,9 @@ define void @test2(float %A, float %B, i1* %PA, i1* %PB) {
   ; CHECK-NEXT: store
   ; CHECK-NEXT: store
   ; CHECK-NEXT: ret
-  %C = fcmp eq float %A, %B
+  %C = fcmp oeq float %A, %B
   store i1 %C, i1* %PA
-  %D = fcmp eq float %B, %A
+  %D = fcmp oeq float %B, %A
   store i1 %D, i1* %PB
   ret void
 }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index 946453f586..36a7658734 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -1,18 +1,24 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readnone | count 4
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 @x = global i32 0
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readnone
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 	%tmp = call i32 @e( )		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
+; CHECK: define i32 @g() #0
 define i32 @g() readonly {
 	ret i32 0
 }
 
+; CHECK: define i32 @h() #0
 define i32 @h() readnone {
 	%tmp = load i32* @x		; <i32> [#uses=1]
 	ret i32 %tmp
 }
+
+; CHECK: attributes #0 = { readnone }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
index 22eca13204..d8256ae8e6 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -1,9 +1,13 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readonly | count 2
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 entry:
-	%tmp = call i32 @e( )		; <i32> [#uses=1]
-	ret i32 %tmp
+  %tmp = call i32 @e( )
+  ret i32 %tmp
 }
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readonly
+
+; CHECK: attributes #0 = { readonly }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index 7c2bff7a05..027ee0fd06 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -3,7 +3,7 @@
 ; Atomic load/store to local doesn't affect whether a function is
 ; readnone/readonly.
 define i32 @test1(i32 %x) uwtable ssp {
-; CHECK: define i32 @test1(i32 %x) uwtable readnone ssp {
+; CHECK: define i32 @test1(i32 %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store atomic i32 %x, i32* %x.addr seq_cst, align 4
@@ -13,9 +13,11 @@ entry:
 
 ; A function with an Acquire load is not readonly.
 define i32 @test2(i32* %x) uwtable ssp {
-; CHECK: define i32 @test2(i32* nocapture %x) uwtable ssp {
+; CHECK: define i32 @test2(i32* nocapture %x) #1 {
 entry:
   %r = load atomic i32* %x seq_cst, align 4
   ret i32 %r
 }
 
+; CHECK: attributes #0 = { readnone ssp uwtable }
+; CHECK: attributes #1 = { ssp uwtable }
diff --git a/test/Transforms/FunctionAttrs/noreturn.ll b/test/Transforms/FunctionAttrs/noreturn.ll
new file mode 100644
index 0000000000..470ebcb1d3
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/noreturn.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -functionattrs -instcombine -S | FileCheck %s
+
+define void @endless_loop() noreturn nounwind readnone ssp uwtable {
+entry:
+  br label %while.body
+
+while.body:
+  br label %while.body
+}
+;CHECK: @main
+;CHECK: endless_loop
+;CHECK: ret
+define i32 @main() noreturn nounwind ssp uwtable {
+entry:
+  tail call void @endless_loop()
+  unreachable
+}
+
diff --git a/test/Transforms/GVN/2011-04-27-phioperands.ll b/test/Transforms/GVN/2011-04-27-phioperands.ll
index 6e5075db7c..42c46500c4 100644
--- a/test/Transforms/GVN/2011-04-27-phioperands.ll
+++ b/test/Transforms/GVN/2011-04-27-phioperands.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -disable-output
+; RUN: opt -gvn -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/GVN/MemdepMiscompile.ll b/test/Transforms/GVN/MemdepMiscompile.ll
new file mode 100644
index 0000000000..d420169615
--- /dev/null
+++ b/test/Transforms/GVN/MemdepMiscompile.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; rdar://12801584
+; Value of %shouldExit can be changed by RunInMode.
+; Make sure we do not replace load %shouldExit in while.cond.backedge
+; with a phi node where the value from while.body is 0.
+define i32 @test() nounwind ssp {
+entry:
+; CHECK: test()
+; CHECK: while.body:
+; CHECK: call void @RunInMode
+; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then
+; CHECK: while.cond.backedge:
+; CHECK: load i32* %shouldExit
+; CHECK: br i1 %cmp, label %while.body
+  %shouldExit = alloca i32, align 4
+  %tasksIdle = alloca i32, align 4
+  store i32 0, i32* %shouldExit, align 4
+  store i32 0, i32* %tasksIdle, align 4
+  call void @CTestInitialize(i32* %tasksIdle) nounwind
+  %0 = load i32* %shouldExit, align 4
+  %cmp1 = icmp eq i32 %0, 0
+  br i1 %cmp1, label %while.body.lr.ph, label %while.end
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  call void @RunInMode(i32 100) nounwind
+  %1 = load i32* %tasksIdle, align 4
+  %tobool = icmp eq i32 %1, 0
+  br i1 %tobool, label %while.cond.backedge, label %if.then
+
+if.then:
+  store i32 0, i32* %tasksIdle, align 4
+  call void @TimerCreate(i32* %shouldExit) nounwind
+  br label %while.cond.backedge
+
+while.cond.backedge:
+  %2 = load i32* %shouldExit, align 4
+  %cmp = icmp eq i32 %2, 0
+  br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge:
+  br label %while.end
+
+while.end:
+  ret i32 0
+}
+declare void @CTestInitialize(i32*)
+declare void @RunInMode(i32)
+declare void @TimerCreate(i32*)
diff --git a/test/Transforms/GVN/Stats/lit.local.cfg b/test/Transforms/GVN/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/GVN/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/Stats/nonescaping-malloc.ll
index afcb7fe3bb..afcb7fe3bb 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/Stats/nonescaping-malloc.ll
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
index c87a9c6576..9ad63a7350 100644
--- a/test/Transforms/GVN/crash-no-aa.ll
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -1,4 +1,4 @@
-; RUN: opt -no-aa -gvn -S %s
+; RUN: opt -no-aa -gvn -S < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-freebsd8.0"
diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll
index 4a8c8e4589..9fb612fcae 100644
--- a/test/Transforms/GVN/crash.ll
+++ b/test/Transforms/GVN/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn %s -disable-output
+; RUN: opt -gvn -disable-output < %s
 
 ; PR5631
 
diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll
index 32392f3ab0..3a102b6c35 100644
--- a/test/Transforms/GVN/edge.ll
+++ b/test/Transforms/GVN/edge.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define i32 @f1(i32 %x) {
   ; CHECK: define i32 @f1(
diff --git a/test/Transforms/GVN/fpmath.ll b/test/Transforms/GVN/fpmath.ll
index 8ab285448f..403df5c900 100644
--- a/test/Transforms/GVN/fpmath.ll
+++ b/test/Transforms/GVN/fpmath.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define double @test1(double %x, double %y) {
 ; CHECK: @test1(double %x, double %y)
diff --git a/test/Transforms/GVN/lpre-call-wrap-2.ll b/test/Transforms/GVN/lpre-call-wrap-2.ll
index e39f3ed87d..35e3534a9c 100644
--- a/test/Transforms/GVN/lpre-call-wrap-2.ll
+++ b/test/Transforms/GVN/lpre-call-wrap-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -gvn -enable-load-pre %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -enable-load-pre < %s | FileCheck %s
 ;
 ; The partially redundant load in bb1 should be hoisted to "bb".  This comes
 ; from this C code (GCC PR 23455):
diff --git a/test/Transforms/GVN/lpre-call-wrap.ll b/test/Transforms/GVN/lpre-call-wrap.ll
index 40462798b5..0646f3fe0a 100644
--- a/test/Transforms/GVN/lpre-call-wrap.ll
+++ b/test/Transforms/GVN/lpre-call-wrap.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -gvn -enable-load-pre %s | FileCheck %s
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
 ;
 ; Make sure the load in bb3.backedge is removed and moved into bb1 after the 
 ; call.  This makes the non-call case faster. 
diff --git a/test/Transforms/GVN/null-aliases-nothing.ll b/test/Transforms/GVN/null-aliases-nothing.ll
index 9e4ae18c71..37bf09d7f3 100644
--- a/test/Transforms/GVN/null-aliases-nothing.ll
+++ b/test/Transforms/GVN/null-aliases-nothing.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 %t = type { i32 }
 declare void @test1f(i8*)
diff --git a/test/Transforms/GVN/pr12979.ll b/test/Transforms/GVN/pr12979.ll
index 669da9127d..0198a56513 100644
--- a/test/Transforms/GVN/pr12979.ll
+++ b/test/Transforms/GVN/pr12979.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32 %x, i32 %y) {
 ; CHECK: @test1(i32 %x, i32 %y)
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
index 3759c415da..2115fe8566 100644
--- a/test/Transforms/GVN/range.ll
+++ b/test/Transforms/GVN/range.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32* %p) {
 ; CHECK: @test1(i32* %p)
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 72fa819d1c..f470ed88bb 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -254,14 +254,11 @@ Cont:
   %A = load i8* %P3
   ret i8 %A
 
-;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
-;; bootstrap, see r82411
-;
-; HECK: @coerce_mustalias_nonlocal1
-; HECK: Cont:
-; HECK:   %A = phi i8 [
-; HECK-NOT: load
-; HECK: ret i8 %A
+; CHECK: @coerce_mustalias_nonlocal1
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
 }
 
 
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index 90661c6250..85fe39a93b 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i8* %p, i8* %q) {
 ; CHECK: @test1(i8* %p, i8* %q)
diff --git a/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll b/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
index 27352fa290..629d57c884 100644
--- a/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
+++ b/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
@@ -1,5 +1,5 @@
 ; PR6422
-; RUN: opt -globalopt -S %s
+; RUN: opt -globalopt -S < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll b/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
index 6f1996a867..ab7721fd97 100644
--- a/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
+++ b/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt -S %s
+; RUN: opt -globalopt -S < %s
 ; PR6435
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/Stats/2009-03-05-dbg.ll
index 0f3efa09a1..0f3efa09a1 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/Stats/2009-03-05-dbg.ll
diff --git a/test/Transforms/GlobalOpt/Stats/lit.local.cfg b/test/Transforms/GlobalOpt/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/GlobalOpt/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
index 366a874f73..80c777ccab 100644
--- a/test/Transforms/GlobalOpt/crash.ll
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt -disable-output %s
+; RUN: opt -globalopt -disable-output < %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.8"
 
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index e3bc473f52..c907610944 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt %s -S | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
new file mode 100644
index 0000000000..9295c2025a
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; rdar://12580965.
+; ObjC++ test case.
+
+%struct.ButtonInitData = type { i8* }
+
+@_ZL14buttonInitData = internal global [1 x %struct.ButtonInitData] zeroinitializer, align 4
+
+@"\01L_OBJC_METH_VAR_NAME_40" = internal global [7 x i8] c"print:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_41" = internal externally_initialized  global i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0),  i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_41" to i8*)]
+
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009
+  store i8* %1, i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+declare void @test(i8*)
+
+define void @print() {
+; CHECK: %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  call void @test(i8* %1)
+  ret void
+}
+
+!2009 = metadata !{}
diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll
index 5a34a9c4da..51858069ac 100644
--- a/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/test/Transforms/GlobalOpt/integer-bool.ll
@@ -1,23 +1,28 @@
-; RUN: opt < %s -globalopt -instcombine | \
-; RUN:    llvm-dis | grep "ret i1 true"
-
+; RUN: opt < %s -S -globalopt -instcombine | FileCheck %s
 ;; check that global opt turns integers that only hold 0 or 1 into bools.
 
-@G = internal global i32 0              ; <i32*> [#uses=3]
+@G = internal addrspace(1) global i32 0
+; CHECK: @G
+; CHECK: addrspace(1)
+; CHECK: global i1 false
 
 define void @set1() {
-        store i32 0, i32* @G
-        ret void
+  store i32 0, i32 addrspace(1)* @G
+; CHECK: store i1 false
+  ret void
 }
 
 define void @set2() {
-        store i32 1, i32* @G
-        ret void
+  store i32 1, i32 addrspace(1)* @G
+; CHECK: store i1 true
+  ret void
 }
 
 define i1 @get() {
-        %A = load i32* @G               ; <i32> [#uses=1]
-        %C = icmp slt i32 %A, 2         ; <i1> [#uses=1]
-        ret i1 %C
+; CHECK: @get
+  %A = load i32 addrspace(1) * @G
+  %C = icmp slt i32 %A, 2
+  ret i1 %C
+; CHECK: ret i1 true
 }
 
diff --git a/test/Transforms/GlobalOpt/memset-null.ll b/test/Transforms/GlobalOpt/memset-null.ll
index 01534025fa..53ec755113 100644
--- a/test/Transforms/GlobalOpt/memset-null.ll
+++ b/test/Transforms/GlobalOpt/memset-null.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt %s -S -o - | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 ; PR10047
 
 %0 = type { i32, void ()* }
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index ee75058731..2ca91e50da 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -globalopt -S | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 
 @a = internal global i32 0, align 4
 @b = internal global i32 0, align 4
diff --git a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
index 402ea41167..968718084e 100644
--- a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
+++ b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
@@ -4,9 +4,9 @@
 ; IPSCCP should propagate the 0 argument, eliminate the switch, and propagate
 ; the result.
 
-; CHECK: define i32 @main() noreturn nounwind {
+; CHECK: define i32 @main() #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) nounwind
+; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 123
 
 define i32 @main() noreturn nounwind {
@@ -28,3 +28,7 @@ sw.default:
 return:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { noreturn nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
index 150ae70a82..e3de75e36f 100644
--- a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -indvars %s | FileCheck %s
+; RUN: opt -S -indvars < %s | FileCheck %s
 
 ; The indvar simplification code should ensure that the first PHI in the block 
 ; is the canonical one!
diff --git a/test/Transforms/IndVarSimplify/Stats/lit.local.cfg b/test/Transforms/IndVarSimplify/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/test/Transforms/IndVarSimplify/Stats/phi-uses-value-multiple-times.ll
index 52c9e5c3ff..52c9e5c3ff 100644
--- a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
+++ b/test/Transforms/IndVarSimplify/Stats/phi-uses-value-multiple-times.ll
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index 1b702a3b1a..aa6a2ee165 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -indvars %s -disable-output
+; RUN: opt -indvars -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 declare i32 @putchar(i8) nounwind
diff --git a/test/Transforms/Inline/2010-05-12-ValueMap.ll b/test/Transforms/Inline/2010-05-12-ValueMap.ll
index f9cc13f499..f452907efd 100644
--- a/test/Transforms/Inline/2010-05-12-ValueMap.ll
+++ b/test/Transforms/Inline/2010-05-12-ValueMap.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -mergefunc -disable-output
+; RUN: opt -inline -mergefunc -disable-output < %s
 
 ; This tests for a bug where the inliner kept the functions in a ValueMap after
 ; it had completed and a ModulePass started to run. LLVM would crash deleting
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/Stats/delete-call.ll
index 7716d6a47b..0afd2ee4c2 100644
--- a/test/Transforms/Inline/delete-call.ll
+++ b/test/Transforms/Inline/Stats/delete-call.ll
@@ -1,5 +1,8 @@
-; RUN: opt %s -S  -inline -functionattrs -stats 2>&1 | grep "Number of call sites deleted, not inlined"
-; RUN: opt %s -S  -inline -stats 2>&1 | grep "Number of functions inlined"
+; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s
+; CHECK: Number of functions inlined
+
+; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=FUNCTIONATTRS %s
+; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.8"
diff --git a/test/Transforms/Inline/Stats/lit.local.cfg b/test/Transforms/Inline/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/Inline/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/Inline/alloca_test.ll b/test/Transforms/Inline/alloca_test.ll
index e5791d5d25..8464259ce1 100644
--- a/test/Transforms/Inline/alloca_test.ll
+++ b/test/Transforms/Inline/alloca_test.ll
@@ -1,7 +1,7 @@
 ; This test ensures that alloca instructions in the entry block for an inlined
 ; function are moved to the top of the function they are inlined into.
 ;
-; RUN: opt -S -inline %s | FileCheck %s
+; RUN: opt -S -inline < %s | FileCheck %s
 
 define i32 @func(i32 %i) {
         %X = alloca i32         ; <i32*> [#uses=1]
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index 609a3d4e15..39e25cb5d6 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -45,3 +45,48 @@ define i32 @test2(i1 %cond) {
 ; CHECK-NOT: = alloca
 ; CHECK: ret i32
 }
+
+declare void @barrier() noduplicate
+
+define internal i32 @f() {
+  call void @barrier() noduplicate
+  ret i32 1
+}
+
+define i32 @g() {
+  call void @barrier() noduplicate
+  ret i32 2
+}
+
+define internal i32 @h() {
+  call void @barrier() noduplicate
+  ret i32 3
+}
+
+define i32 @test3() {
+  %b = call i32 @f()
+  ret i32 %b
+}
+
+; The call to @f cannot be inlined as there is another callsite
+; calling @f, and @f contains a noduplicate call.
+;
+; The call to @g cannot be inlined as it has external linkage.
+;
+; The call to @h *can* be inlined.
+
+; CHECK: @test
+define i32 @test() {
+; CHECK: call i32 @f()
+  %a = call i32 @f()
+; CHECK: call i32 @g()
+  %b = call i32 @g()
+; CHECK-NOT: call i32 @h()
+  %c = call i32 @h()
+
+  %d = add i32 %a, %b
+  %e = add i32 %d, %c
+
+  ret i32 %e
+; CHECK: }
+}
diff --git a/test/Transforms/Inline/crash2.ll b/test/Transforms/Inline/crash2.ll
index cb1f44d5cc..be634f6256 100644
--- a/test/Transforms/Inline/crash2.ll
+++ b/test/Transforms/Inline/crash2.ll
@@ -1,4 +1,4 @@
-; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1  %s -disable-output
+; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1 -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.3"
 
diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll
index c32be4e024..3f019676e4 100644
--- a/test/Transforms/Inline/devirtualize-3.ll
+++ b/test/Transforms/Inline/devirtualize-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
+; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine < %s | FileCheck %s
 ; PR5009
 
 ; CHECK: define i32 @main() 
diff --git a/test/Transforms/Inline/devirtualize.ll b/test/Transforms/Inline/devirtualize.ll
index 51ea4baa38..d46154ef6a 100644
--- a/test/Transforms/Inline/devirtualize.ll
+++ b/test/Transforms/Inline/devirtualize.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce %s | FileCheck %s
+; RUN: opt -S -Os < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/Inline/gvn-inline-iteration.ll b/test/Transforms/Inline/gvn-inline-iteration.ll
index e502fd5777..526ed79e7b 100644
--- a/test/Transforms/Inline/gvn-inline-iteration.ll
+++ b/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -gvn %s -S -max-cg-scc-iterations=1 | FileCheck %s
+; RUN: opt -basicaa -inline -gvn -S -max-cg-scc-iterations=1 < %s | FileCheck %s
 ; rdar://6295824 and PR6724
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
index 20d7426abd..3ad573a04e 100644
--- a/test/Transforms/Inline/inline-optsize.ll
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -Oz %s | FileCheck %s -check-prefix=OZ
-; RUN: opt -S -O2 %s | FileCheck %s -check-prefix=O2
+; RUN: opt -S -Oz < %s | FileCheck %s -check-prefix=OZ
+; RUN: opt -S -O2 < %s | FileCheck %s -check-prefix=O2
 
 ; The inline threshold for a function with the optsize attribute is currently
 ; the same as the global inline threshold for -Os. Check that the optsize
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 0b48a7282f..77bc3784ac 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -111,6 +111,82 @@ bb.false:
   ret i32 %sub
 }
 
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+
+define i8 @caller4(i8 %z) {
+; Check that we can constant fold through intrinsics such as the
+; overflow-detecting arithmetic instrinsics. These are particularly important
+; as they are used heavily in standard library code and generic C++ code where
+; the arguments are oftent constant but complete generality is required.
+;
+; CHECK: @caller4
+; CHECK-NOT: call
+; CHECK: ret i8 -1
+
+entry:
+  %x = call i8 @callee4(i8 254, i8 14, i8 %z)
+  ret i8 %x
+}
+
+define i8 @callee4(i8 %x, i8 %y, i8 %z) {
+  %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
+  %o = extractvalue {i8, i1} %uadd, 1
+  br i1 %o, label %bb.true, label %bb.false
+
+bb.true:
+  ret i8 -1
+
+bb.false:
+  ; This block musn't be counted in the inline cost.
+  %z1 = add i8 %z, 1
+  %z2 = add i8 %z1, 1
+  %z3 = add i8 %z2, 1
+  %z4 = add i8 %z3, 1
+  %z5 = add i8 %z4, 1
+  %z6 = add i8 %z5, 1
+  %z7 = add i8 %z6, 1
+  %z8 = add i8 %z7, 1
+  ret i8 %z8
+}
+
+define i64 @caller5(i64 %y) {
+; Check that we can round trip constants through various kinds of casts etc w/o
+; losing track of the constant prop in the inline cost analysis.
+;
+; CHECK: @caller5
+; CHECK-NOT: call
+; CHECK: ret i64 -1
+
+entry:
+  %x = call i64 @callee5(i64 42, i64 %y)
+  ret i64 %x
+}
+
+define i64 @callee5(i64 %x, i64 %y) {
+  %inttoptr = inttoptr i64 %x to i8*
+  %bitcast = bitcast i8* %inttoptr to i32*
+  %ptrtoint = ptrtoint i32* %bitcast to i64
+  %trunc = trunc i64 %ptrtoint to i32
+  %zext = zext i32 %trunc to i64
+  %cmp = icmp eq i64 %zext, 42
+  br i1 %cmp, label %bb.true, label %bb.false
+
+bb.true:
+  ret i64 -1
+
+bb.false:
+  ; This block musn't be counted in the inline cost.
+  %y1 = add i64 %y, 1
+  %y2 = add i64 %y1, 1
+  %y3 = add i64 %y2, 1
+  %y4 = add i64 %y3, 1
+  %y5 = add i64 %y4, 1
+  %y6 = add i64 %y5, 1
+  %y7 = add i64 %y6, 1
+  %y8 = add i64 %y7, 1
+  ret i64 %y8
+}
+
 
 define i32 @PR13412.main() {
 ; This is a somewhat complicated three layer subprogram that was reported to
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index 9f5f670b85..c53bb5aa17 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -330,7 +330,7 @@ terminate:
 ; CHECK-NEXT: br label %[[JOIN]]
 ; CHECK:    [[JOIN]]:
 ; CHECK-NEXT: phi { i8*, i32 }
-; CHECK-NEXT: call void @opaque() nounwind
+; CHECK-NEXT: call void @opaque() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: br label %[[FIX:[^\s]+]]
 ; CHECK:    lpad:
 ; CHECK-NEXT: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
@@ -340,3 +340,8 @@ terminate:
 ; CHECK-NEXT: [[T1:%.*]] = phi i32 [ 0, %[[JOIN]] ], [ 1, %lpad ]
 ; CHECK-NEXT: call void @use(i32 [[T1]])
 ; CHECK-NEXT: call void @_ZSt9terminatev()
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { ssp uwtable }
+; CHECK: attributes #3 = { noreturn nounwind }
diff --git a/test/Transforms/Inline/inline_minisize.ll b/test/Transforms/Inline/inline_minisize.ll
new file mode 100644
index 0000000000..3dddbcf330
--- /dev/null
+++ b/test/Transforms/Inline/inline_minisize.ll
@@ -0,0 +1,232 @@
+; RUN: opt -O2 -S < %s | FileCheck %s
+
+@data = common global i32* null, align 8
+
+define i32 @fct1(i32 %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  %res = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %idxprom = sext i32 %tmp to i64
+  %tmp1 = load i32** @data, align 8
+  %arrayidx = getelementptr inbounds i32* %tmp1, i64 %idxprom
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %tmp3, 1
+  %idxprom1 = sext i32 %add to i64
+  %tmp4 = load i32** @data, align 8
+  %arrayidx2 = getelementptr inbounds i32* %tmp4, i64 %idxprom1
+  %tmp5 = load i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %tmp2, %tmp5
+  store i32 %mul, i32* %res, align 4
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %tmp6 = load i32* %i, align 4
+  %tmp7 = load i32* %res, align 4
+  %cmp = icmp slt i32 %tmp6, %tmp7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp8 = load i32* %i, align 4
+  %idxprom3 = sext i32 %tmp8 to i64
+  %tmp9 = load i32** @data, align 8
+  %arrayidx4 = getelementptr inbounds i32* %tmp9, i64 %idxprom3
+  call void @fct0(i32* %arrayidx4)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %tmp10 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, i32* %i, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc10, %for.end
+  %tmp11 = load i32* %i, align 4
+  %tmp12 = load i32* %res, align 4
+  %cmp6 = icmp slt i32 %tmp11, %tmp12
+  br i1 %cmp6, label %for.body7, label %for.end12
+
+for.body7:                                        ; preds = %for.cond5
+  %tmp13 = load i32* %i, align 4
+  %idxprom8 = sext i32 %tmp13 to i64
+  %tmp14 = load i32** @data, align 8
+  %arrayidx9 = getelementptr inbounds i32* %tmp14, i64 %idxprom8
+  call void @fct0(i32* %arrayidx9)
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.body7
+  %tmp15 = load i32* %i, align 4
+  %inc11 = add nsw i32 %tmp15, 1
+  store i32 %inc11, i32* %i, align 4
+  br label %for.cond5
+
+for.end12:                                        ; preds = %for.cond5
+  store i32 0, i32* %i, align 4
+  br label %for.cond13
+
+for.cond13:                                       ; preds = %for.inc18, %for.end12
+  %tmp16 = load i32* %i, align 4
+  %tmp17 = load i32* %res, align 4
+  %cmp14 = icmp slt i32 %tmp16, %tmp17
+  br i1 %cmp14, label %for.body15, label %for.end20
+
+for.body15:                                       ; preds = %for.cond13
+  %tmp18 = load i32* %i, align 4
+  %idxprom16 = sext i32 %tmp18 to i64
+  %tmp19 = load i32** @data, align 8
+  %arrayidx17 = getelementptr inbounds i32* %tmp19, i64 %idxprom16
+  call void @fct0(i32* %arrayidx17)
+  br label %for.inc18
+
+for.inc18:                                        ; preds = %for.body15
+  %tmp20 = load i32* %i, align 4
+  %inc19 = add nsw i32 %tmp20, 1
+  store i32 %inc19, i32* %i, align 4
+  br label %for.cond13
+
+for.end20:                                        ; preds = %for.cond13
+  %tmp21 = load i32* %res, align 4
+  ret i32 %tmp21
+}
+
+declare void @fct0(i32*)
+
+define i32 @fct2(i32 %a) nounwind uwtable inlinehint ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  %res = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %shl = shl i32 %tmp, 1
+  %idxprom = sext i32 %shl to i64
+  %tmp1 = load i32** @data, align 8
+  %arrayidx = getelementptr inbounds i32* %tmp1, i64 %idxprom
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %a.addr, align 4
+  %shl1 = shl i32 %tmp3, 1
+  %add = add nsw i32 %shl1, 13
+  %idxprom2 = sext i32 %add to i64
+  %tmp4 = load i32** @data, align 8
+  %arrayidx3 = getelementptr inbounds i32* %tmp4, i64 %idxprom2
+  %tmp5 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %tmp2, %tmp5
+  store i32 %mul, i32* %res, align 4
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %tmp6 = load i32* %i, align 4
+  %tmp7 = load i32* %res, align 4
+  %cmp = icmp slt i32 %tmp6, %tmp7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp8 = load i32* %i, align 4
+  %idxprom4 = sext i32 %tmp8 to i64
+  %tmp9 = load i32** @data, align 8
+  %arrayidx5 = getelementptr inbounds i32* %tmp9, i64 %idxprom4
+  call void @fct0(i32* %arrayidx5)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %tmp10 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, i32* %i, align 4
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.inc11, %for.end
+  %tmp11 = load i32* %i, align 4
+  %tmp12 = load i32* %res, align 4
+  %cmp7 = icmp slt i32 %tmp11, %tmp12
+  br i1 %cmp7, label %for.body8, label %for.end13
+
+for.body8:                                        ; preds = %for.cond6
+  %tmp13 = load i32* %i, align 4
+  %idxprom9 = sext i32 %tmp13 to i64
+  %tmp14 = load i32** @data, align 8
+  %arrayidx10 = getelementptr inbounds i32* %tmp14, i64 %idxprom9
+  call void @fct0(i32* %arrayidx10)
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.body8
+  %tmp15 = load i32* %i, align 4
+  %inc12 = add nsw i32 %tmp15, 1
+  store i32 %inc12, i32* %i, align 4
+  br label %for.cond6
+
+for.end13:                                        ; preds = %for.cond6
+  store i32 0, i32* %i, align 4
+  br label %for.cond14
+
+for.cond14:                                       ; preds = %for.inc19, %for.end13
+  %tmp16 = load i32* %i, align 4
+  %tmp17 = load i32* %res, align 4
+  %cmp15 = icmp slt i32 %tmp16, %tmp17
+  br i1 %cmp15, label %for.body16, label %for.end21
+
+for.body16:                                       ; preds = %for.cond14
+  %tmp18 = load i32* %i, align 4
+  %idxprom17 = sext i32 %tmp18 to i64
+  %tmp19 = load i32** @data, align 8
+  %arrayidx18 = getelementptr inbounds i32* %tmp19, i64 %idxprom17
+  call void @fct0(i32* %arrayidx18)
+  br label %for.inc19
+
+for.inc19:                                        ; preds = %for.body16
+  %tmp20 = load i32* %i, align 4
+  %inc20 = add nsw i32 %tmp20, 1
+  store i32 %inc20, i32* %i, align 4
+  br label %for.cond14
+
+for.end21:                                        ; preds = %for.cond14
+  %tmp21 = load i32* %res, align 4
+  ret i32 %tmp21
+}
+
+define i32 @fct3(i32 %c) nounwind uwtable ssp {
+entry:
+  ;CHECK: @fct3
+  ;CHECK: call i32 @fct1
+  ; The inline keyword gives a sufficient benefits to inline fct2
+  ;CHECK-NOT: call i32 @fct2
+  %c.addr = alloca i32, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %tmp = load i32* %c.addr, align 4
+  %call = call i32 @fct1(i32 %tmp)
+  %tmp1 = load i32* %c.addr, align 4
+  %call1 = call i32 @fct2(i32 %tmp1)
+  %add = add nsw i32 %call, %call1
+  ret i32 %add
+}
+
+define i32 @fct4(i32 %c) minsize nounwind uwtable ssp {
+entry:
+  ;CHECK: @fct4
+  ;CHECK: call i32 @fct1
+  ; With Oz (minsize attribute), the benefit of inlining fct2
+  ; is the same as fct1, thus no inlining for fct2
+  ;CHECK: call i32 @fct2
+  %c.addr = alloca i32, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %tmp = load i32* %c.addr, align 4
+  %call = call i32 @fct1(i32 %tmp)
+  %tmp1 = load i32* %c.addr, align 4
+  %call1 = call i32 @fct2(i32 %tmp1)
+  %add = add nsw i32 %call, %call1
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/inline_ssp.ll b/test/Transforms/Inline/inline_ssp.ll
new file mode 100644
index 0000000000..a4b43a77ba
--- /dev/null
+++ b/test/Transforms/Inline/inline_ssp.ll
@@ -0,0 +1,160 @@
+; RUN: opt -inline %s -S | FileCheck %s
+; Ensure SSP attributes are propagated correctly when inlining.
+
+@.str = private unnamed_addr constant [11 x i8] c"fun_nossp\0A\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"fun_ssp\0A\00", align 1
+@.str2 = private unnamed_addr constant [15 x i8] c"fun_sspstrong\0A\00", align 1
+@.str3 = private unnamed_addr constant [12 x i8] c"fun_sspreq\0A\00", align 1
+
+; These first four functions (@fun_sspreq, @fun_sspstrong, @fun_ssp, @fun_nossp)
+; are used by the remaining functions to ensure that the SSP attributes are
+; propagated correctly.  The caller should have its SSP attribute set as:
+; strictest(caller-ssp-attr, callee-ssp-attr), where strictness is ordered as:
+;  sspreq > sspstrong > ssp > [no ssp]
+define internal void @fun_sspreq() nounwind sspreq uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_sspstrong() nounwind sspstrong uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str2, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_ssp() nounwind ssp uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str1, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_nossp() nounwind uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+; Tests start below 
+
+define void @inline_req_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_req_req() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_req_strong() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_req_ssp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_req_nossp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_strong_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_strong_req() #0
+  call void @fun_sspstrong()
+  ret void
+}
+
+
+define void @inline_strong_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_strong_strong() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_strong_ssp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_strong_nossp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_ssp_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_ssp_req() #0
+  call void @fun_ssp()
+  ret void
+}
+
+
+define void @inline_ssp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_ssp_strong() #1
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_ssp_ssp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_ssp_nossp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_nossp_req() nounwind uwtable sspreq {
+entry:
+; CHECK: @inline_nossp_req() #0
+  call void @fun_nossp()
+  ret void
+}
+
+
+define void @inline_nossp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_nossp_strong() #1
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_nossp_ssp() #2
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_nossp_nossp() #3
+  call void @fun_nossp()
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+; CHECK: attributes #0 = { nounwind sspreq uwtable }
+; CHECK: attributes #1 = { nounwind sspstrong uwtable }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
+; CHECK: attributes #3 = { nounwind uwtable }
diff --git a/test/Transforms/Inline/lifetime-no-datalayout.ll b/test/Transforms/Inline/lifetime-no-datalayout.ll
index 9ad14282f9..f4ffef3850 100644
--- a/test/Transforms/Inline/lifetime-no-datalayout.ll
+++ b/test/Transforms/Inline/lifetime-no-datalayout.ll
@@ -1,4 +1,4 @@
-; RUN: opt -inline %s -S -o - | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 
 declare void @use(i8* %a)
 
diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll
index fb520498c4..fc73385295 100644
--- a/test/Transforms/Inline/lifetime.ll
+++ b/test/Transforms/Inline/lifetime.ll
@@ -1,4 +1,4 @@
-; RUN: opt -inline %s -S -o - | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 declare void @llvm.lifetime.start(i64, i8*)
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
index 6cde0e27fd..5520093ee4 100644
--- a/test/Transforms/Inline/noinline-recursive-fn.ll
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -2,7 +2,7 @@
 ; This effectively is just peeling off the first iteration of a loop, and the
 ; inliner heuristics are not set up for this.
 
-; RUN: opt -inline %s -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.3"
diff --git a/test/Transforms/Inline/noinline.ll b/test/Transforms/Inline/noinline.ll
index dc3f6e0030..7667114b68 100644
--- a/test/Transforms/Inline/noinline.ll
+++ b/test/Transforms/Inline/noinline.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 ; PR6682
 declare void @foo() nounwind
 
diff --git a/test/Transforms/Inline/recursive.ll b/test/Transforms/Inline/recursive.ll
index 5fe8d1639c..fe1c041af9 100644
--- a/test/Transforms/Inline/recursive.ll
+++ b/test/Transforms/Inline/recursive.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
index 1da28562aa..d266164fd8 100644
--- a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
+++ b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -instcombine %s | FileCheck %s
+; RUN: opt -S -instcombine < %s | FileCheck %s
 ; PR2297
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
index 2df12d670a..bb3159e1e6 100644
--- a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
+++ b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR6486
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
diff --git a/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
index b75fa5ad40..09a96749f2 100644
--- a/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
+++ b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine %s -S -o - | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR7265
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
index 0907c490bb..2dedd44e2b 100644
--- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -50,7 +50,7 @@ entry:
   %b = add <4 x i32> zeroinitializer, %a
   ret <4 x i32> %b
 ; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret <4 x i32> %a
 }
 
@@ -66,3 +66,7 @@ entry:
 
 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind readnone ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll b/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
new file mode 100644
index 0000000000..fc29b095e5
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define <4 x i32> @foo(<4 x i32*>* %in) {
+  %t17 = load <4 x i32*>* %in, align 8
+  %t18 = icmp eq <4 x i32*> %t17, zeroinitializer
+  %t19 = zext <4 x i1> %t18 to <4 x i32>
+  ret <4 x i32> %t19
+}
diff --git a/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
new file mode 100644
index 0000000000..b20c3a07c0
--- /dev/null
+++ b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct._my_struct = type <{ [12 x i8], [4 x i8] }>
+
+@initval = common global %struct._my_struct zeroinitializer, align 1
+
+; InstCombine will try to change the %struct._my_struct alloca into an
+; allocation of an i96 because of the bitcast to create %2. That's not valid,
+; as the other 32 bits of the structure still feed into the return value
+define { i64, i64 } @function(i32 %x, i32 %y, i32 %z) nounwind {
+; CHECK: @function
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %retval = alloca %struct._my_struct, align 8
+; CHECK-NOT: bitcast i96* %retval to %struct._my_struct*
+entry:
+  %retval = alloca %struct._my_struct, align 8
+  %k.sroa.0.0.copyload = load i96* bitcast (%struct._my_struct* @initval to i96*), align 1
+  %k.sroa.1.12.copyload = load i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1
+  %0 = zext i32 %x to i96
+  %bf.value = shl nuw nsw i96 %0, 6
+  %bf.clear = and i96 %k.sroa.0.0.copyload, -288230376151711744
+  %1 = zext i32 %y to i96
+  %bf.value2 = shl nuw nsw i96 %1, 32
+  %bf.shl3 = and i96 %bf.value2, 288230371856744448
+  %bf.value.masked = and i96 %bf.value, 4294967232
+  %2 = zext i32 %z to i96
+  %bf.value8 = and i96 %2, 63
+  %bf.clear4 = or i96 %bf.shl3, %bf.value.masked
+  %bf.set5 = or i96 %bf.clear4, %bf.value8
+  %bf.set10 = or i96 %bf.set5, %bf.clear
+  %retval.0.cast7 = bitcast %struct._my_struct* %retval to i96*
+  store i96 %bf.set10, i96* %retval.0.cast7, align 8
+  %retval.12.idx8 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 1
+  %retval.12.cast9 = bitcast [4 x i8]* %retval.12.idx8 to i32*
+  store i32 %k.sroa.1.12.copyload, i32* %retval.12.cast9, align 4
+  %trunc = trunc i96 %bf.set10 to i64
+  %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %trunc, 0
+  %retval.8.idx12 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 0, i64 8
+  %retval.8.cast13 = bitcast i8* %retval.8.idx12 to i64*
+  %retval.8.load14 = load i64* %retval.8.cast13, align 8
+  %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.8.load14, 1
+  ret { i64, i64 } %.fca.1.insert
+}
diff --git a/test/Transforms/InstCombine/bitcast-vector-fold.ll b/test/Transforms/InstCombine/bitcast-vector-fold.ll
index 8feec22917..8fd7f35b7b 100644
--- a/test/Transforms/InstCombine/bitcast-vector-fold.ll
+++ b/test/Transforms/InstCombine/bitcast-vector-fold.ll
@@ -31,3 +31,8 @@ define <4 x i32> @test6() {
 	%tmp3 = bitcast <2 x double> <double 0.5, double 1.0> to <4 x i32>
 	ret <4 x i32> %tmp3
 }
+
+define i32 @test7() {
+       %tmp3 = bitcast <2 x half> <half 0xH1100, half 0xH0011> to i32
+       ret i32 %tmp3
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 8f6ae7d835..1e6113256b 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -11,7 +11,7 @@ define i32 @test1(i64 %a) {
         %t3 = xor <2 x i32> %t1, %t2
         %t4 = extractelement <2 x i32> %t3, i32 0
         ret i32 %t4
-        
+
 ; CHECK: @test1
 ; CHECK: ret i32 0
 }
@@ -30,7 +30,7 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test2
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
 ; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
@@ -55,7 +55,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test3
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
 ; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
@@ -75,7 +75,7 @@ define <2 x i32> @test4(i32 %A, i32 %B){
   ; CHECK: @test4
   ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
   ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
-  ; CHECK-NEXT: ret <2 x i32> 
+  ; CHECK-NEXT: ret <2 x i32>
 
 }
 
@@ -92,7 +92,7 @@ define <2 x float> @test5(float %A, float %B) {
   ; CHECK: @test5
   ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
   ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
-  ; CHECK-NEXT: ret <2 x float> 
+  ; CHECK-NEXT: ret <2 x float>
 }
 
 define <2 x float> @test6(float %A){
@@ -123,7 +123,7 @@ define i64 @Vec2(i64 %in) {
 }
 
 define i64 @All11(i64 %in) {
-  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1) 
+  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
   ret i64 %out
 ; CHECK: @All11
 ; CHECK: ret i64 0
@@ -131,9 +131,16 @@ define i64 @All11(i64 %in) {
 
 
 define i32 @All111(i32 %in) {
-  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1) 
+  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
   ret i32 %out
 ; CHECK: @All111
 ; CHECK: ret i32 0
 }
 
+define <2 x i16> @BitcastInsert(i32 %a) {
+  %v = insertelement <1 x i32> undef, i32 %a, i32 0
+  %r = bitcast <1 x i32> %v to <2 x i16>
+  ret <2 x i16> %r
+; CHECK: @BitcastInsert
+; CHECK: bitcast i32 %a to <2 x i16>
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index b4eb69d436..de738bb7c0 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -473,14 +473,12 @@ define i64 @test51(i64 %A, i1 %cond) {
   %F = sext i32 %E to i64
   ret i64 %F
 ; CHECK: @test51
-
-; FIXME: disabled, see PR5997
-; HECK-NEXT: %C = and i64 %A, 4294967294
-; HECK-NEXT: %D = or i64 %A, 1
-; HECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
-; HECK-NEXT: %sext = shl i64 %E, 32
-; HECK-NEXT: %F = ashr i64 %sext, 32
-; HECK-NEXT: ret i64 %F
+; CHECK-NEXT: %C = and i64 %A, 4294967294
+; CHECK-NEXT: %D = or i64 %A, 1
+; CHECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
+; CHECK-NEXT: %sext = shl i64 %E, 32
+; CHECK-NEXT: %F = ashr exact i64 %sext, 32
+; CHECK-NEXT: ret i64 %F
 }
 
 define i32 @test52(i64 %A) {
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index f8e4911061..72db66e3ab 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR5438
 
 ; TODO: This should also optimize down.
diff --git a/test/Transforms/InstCombine/constant-expr-datalayout.ll b/test/Transforms/InstCombine/constant-expr-datalayout.ll
new file mode 100644
index 0000000000..9a72c77afd
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-expr-datalayout.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%test1.struct = type { i32, i32 }
+@test1.aligned_glbl = global %test1.struct zeroinitializer, align 4
+define void @test1(i64 *%ptr) {
+  store i64 and (i64 ptrtoint (i32* getelementptr (%test1.struct* @test1.aligned_glbl, i32 0, i32 1) to i64), i64 3), i64* %ptr
+; CHECK: store i64 0, i64* %ptr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index f6892fc3e1..f27ab278da 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -28,22 +28,23 @@ entry:
   ret i8* %call, !dbg !21
 }
 
-!llvm.dbg.lv.foobar = !{!0, !7, !9}
-!llvm.dbg.sp = !{!1}
+!25 = metadata !{metadata !0, metadata !7, metadata !9}
+!llvm.dbg.cu = !{!3}
+!24 = metadata !{metadata !1}
 
-!0 = metadata !{i32 590081, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"string.h", metadata !"Game", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589841, i32 0, i32 12, metadata !"bits.c", metadata !"Game", metadata !"clang version 3.0 (trunk 127710)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"string.h", metadata !"Game", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 12, metadata !"bits.c", metadata !"Game", metadata !"clang version 3.0 (trunk 127710)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 589839, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 590081, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 589860, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 590081, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 786447, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786468, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786689, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 589846, metadata !3, metadata !"size_t", metadata !2, i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
 !11 = metadata !{i32 589846, metadata !3, metadata !"__darwin_size_t", metadata !2, i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 589860, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786468, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !13 = metadata !{metadata !"any pointer", metadata !14}
 !14 = metadata !{metadata !"omnipotent char", metadata !15}
 !15 = metadata !{metadata !"Simple C/C++ TBAA", null}
@@ -53,5 +54,5 @@ entry:
 !19 = metadata !{metadata !"long", metadata !14}
 !20 = metadata !{i32 78, i32 54, metadata !1, null}
 !21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 589835, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
diff --git a/test/Transforms/InstCombine/devirt.ll b/test/Transforms/InstCombine/devirt.ll
index 6189dc2af4..9c7cf5d697 100644
--- a/test/Transforms/InstCombine/devirt.ll
+++ b/test/Transforms/InstCombine/devirt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; CHECK-NOT: getelementptr
 ; CHECK-NOT: ptrtoint
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 14741e3c1c..88ca88c3b9 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -99,9 +99,9 @@ define i1 @ashr_icmp2(i64 %X) nounwind {
 ; PR9998
 ; Make sure we don't transform the ashr here into an sdiv
 ; CHECK: @pr9998
-; CHECK: = and i32 %V, 1
-; CHECK: %Z = icmp ne
-; CHECK: ret i1 %Z
+; CHECK:      [[BIT:%[A-Za-z0-9.]+]] = and i32 %V, 1
+; CHECK-NEXT: [[CMP:%[A-Za-z0-9.]+]] = icmp ne i32 [[BIT]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
 define i1 @pr9998(i32 %V) nounwind {
 entry:
   %W = shl i32 %V, 31
@@ -112,6 +112,7 @@ entry:
 }
 
 
+
 ; CHECK: @udiv_icmp1
 ; CHECK: icmp ne i64 %X, 0
 define i1 @udiv_icmp1(i64 %X) nounwind {
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index b6a15677bb..3e32a2e4dd 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -3,19 +3,17 @@
 ; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
 ; 1.2f and 2.3f is supposed to be fold.
 define float @fold(float %a) {
-fold:
   %mul = fmul fast float %a, 0x3FF3333340000000
   %mul1 = fmul fast float %mul, 0x4002666660000000
   ret float %mul1
-; CHECK: fold
-; CHECK: fmul float %a, 0x4006147AE0000000
+; CHECK: @fold
+; CHECK: fmul fast float %a, 0x4006147AE0000000
 }
 
 ; Same testing-case as the one used in fold() except that the operators have
 ; fixed FP mode.
 define float @notfold(float %a) {
-notfold:
-; CHECK: notfold
+; CHECK: @notfold
 ; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000
   %mul = fmul fast float %a, 0x3FF3333340000000
   %mul1 = fmul float %mul, 0x4002666660000000
@@ -23,10 +21,332 @@ notfold:
 }
 
 define float @fold2(float %a) {
-fold2:
-; CHECK: fold2
-; CHECK: fmul float %a, 0x4006147AE0000000
+; CHECK: @fold2
+; CHECK: fmul fast float %a, 0x4006147AE0000000
   %mul = fmul float %a, 0x3FF3333340000000
   %mul1 = fmul fast float %mul, 0x4002666660000000
   ret float %mul1
 }
+
+; C * f1 + f1 = (C+1) * f1
+define double @fold3(double %f1) {
+  %t1 = fmul fast double 2.000000e+00, %f1
+  %t2 = fadd fast double %f1, %t1
+  ret double %t2
+; CHECK: @fold3
+; CHECK: fmul fast double %f1, 3.000000e+00
+}
+
+; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
+define float @fold4(float %f1, float %f2) {
+  %sub = fsub float 4.000000e+00, %f1
+  %sub1 = fsub float 5.000000e+00, %f2
+  %add = fadd fast float %sub, %sub1
+  ret float %add
+; CHECK: @fold4
+; CHECK: %1 = fadd fast float %f1, %f2
+; CHECK: fsub fast float 9.000000e+00, %1
+}
+
+; (X + C1) + C2 => X + (C1 + C2)
+define float @fold5(float %f1, float %f2) {
+  %add = fadd float %f1, 4.000000e+00
+  %add1 = fadd fast float %add, 5.000000e+00
+  ret float %add1
+; CHECK: @fold5
+; CHECK: fadd fast float %f1, 9.000000e+00
+}
+
+; (X + X) + X => 3.0 * X
+define float @fold6(float %f1) {
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %t1
+  ret float %t2
+; CHECK: @fold6
+; CHECK: fmul fast float %f1, 3.000000e+00
+}
+
+; C1 * X + (X + X) = (C1 + 2) * X
+define float @fold7(float %f1) {
+  %t1 = fmul fast float %f1, 5.000000e+00
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fold7
+; CHECK: fmul fast float %f1, 7.000000e+00
+}
+
+; (X + X) + (X + X) => 4.0 * X
+define float @fold8(float %f1) {
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fold8
+; CHECK: fmul fast float %f1, 4.000000e+00
+}
+
+; X - (X + Y) => 0 - Y
+define float @fold9(float %f1, float %f2) {
+  %t1 = fadd float %f1, %f2
+  %t3 = fsub fast float %f1, %t1
+  ret float %t3
+
+; CHECK: @fold9
+; CHECK: fsub fast float 0.000000e+00, %f2
+}
+
+; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
+; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
+; top of resulting simplified expression tree may potentially reveal some
+; optimization opportunities in the super-expression trees.
+;
+define float @fold10(float %f1, float %f2) {
+  %t1 = fadd fast float 2.000000e+00, %f1
+  %t2 = fsub fast float %f2, 3.000000e+00
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fold10
+; CHECK: %t3 = fadd fast float %t2, -1.000000e+00
+; CHECK: ret float %t3
+}
+
+; once cause Crash/miscompilation
+define float @fail1(float %f1, float %f2) {
+  %conv3 = fadd fast float %f1, -1.000000e+00
+  %add = fadd fast float %conv3, %conv3
+  %add2 = fadd fast float %add, %conv3
+  ret float %add2
+; CHECK: @fail1
+; CHECK: ret
+}
+
+define double @fail2(double %f1, double %f2) {
+  %t1 = fsub fast double %f1, %f2
+  %t2 = fadd fast double %f1, %f2
+  %t3 = fsub fast double %t1, %t2
+  ret double %t3
+; CHECK: @fail2
+; CHECK: ret
+}
+
+; =========================================================================
+;
+;   Testing-cases about fmul begin
+;
+; =========================================================================
+
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
+define float @fmul_distribute1(float %f1) {
+  %t1 = fmul float %f1, 6.0e+3
+  %t2 = fadd float %t1, 2.0e+3
+  %t3 = fmul fast float %t2, 5.0e+3
+  ret float %t3
+; CHECK: @fmul_distribute1
+; CHECK: %1 = fmul fast float %f1, 3.000000e+07
+; CHECK: %t3 = fadd fast float %1, 1.000000e+07
+}
+
+; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
+define double @fmul_distribute2(double %f1, double %f2) {
+  %t1 = fdiv double %f1, 3.0e+0
+  %t2 = fadd double %t1, 5.0e+1
+  ; 0x10000000000000 = DBL_MIN
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+
+; CHECK: @fmul_distribute2
+; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000
+; CHECK: fadd fast double %1, 0x69000000000000
+}
+
+; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
+; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
+define double @fmul_distribute3(double %f1) {
+  %t1 = fdiv double %f1, 3.0e+0
+  %t2 = fadd double %t1, 5.0e-1
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+
+; CHECK: @fmul_distribute3
+; CHECK: fmul fast double %t2, 0x10000000000000
+}
+
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
+define float @fmul_distribute4(float %f1) {
+  %t1 = fmul float %f1, 6.0e+3
+  %t2 = fsub float 2.0e+3, %t1
+  %t3 = fmul fast float %t2, 5.0e+3
+  ret float %t3
+; CHECK: @fmul_distribute4
+; CHECK: %1 = fmul fast float %f1, 3.000000e+07
+; CHECK: %t3 = fsub fast float 1.000000e+07, %1
+}
+
+; C1/X * C2 => (C1*C2) / X
+define float @fmul2(float %f1) {
+  %t1 = fdiv float 2.0e+3, %f1
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+; CHECK: @fmul2
+; CHECK: fdiv fast float 1.200000e+07, %f1
+}
+
+; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
+define float @fmul3(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 2.0e+3
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+; CHECK: @fmul3
+; CHECK: fmul fast float %f1, 3.000000e+00
+}
+
+; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special
+; value of a denormal. The 0x3810000000000000 here take value FLT_MIN
+;
+define float @fmul4(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 2.0e+3
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  ret float %t3
+; CHECK: @fmul4
+; CHECK: fmul fast float %t1, 0x3810000000000000
+}
+
+; X / C1 * C2 => X / (C2/C1) if  C1/C2 is either a special value of a denormal,
+;  and C2/C1 is a normal value.
+;
+define float @fmul5(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 3.0e+0
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  ret float %t3
+; CHECK: @fmul5
+; CHECK: fdiv fast float %f1, 0x47E8000000000000
+}
+
+; (X*Y) * X => (X*X) * Y
+define float @fmul6(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  ret float %mul1
+; CHECK: @fmul6
+; CHECK: fmul fast float %f1, %f1
+}
+
+; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
+define float @fmul7(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  %add = fadd float %mul1, %mul
+  ret float %add
+; CHECK: @fmul7
+; CHECK: fmul fast float %mul, %f1
+}
+
+; =========================================================================
+;
+;   Testing-cases about negation
+;
+; =========================================================================
+define float @fneg1(float %f1, float %f2) {
+  %sub = fsub float -0.000000e+00, %f1
+  %sub1 = fsub nsz float 0.000000e+00, %f2
+  %mul = fmul float %sub, %sub1
+  ret float %mul
+; CHECK: @fneg1
+; CHECK: fmul float %f1, %f2
+}
+
+; =========================================================================
+;
+;   Testing-cases about div
+;
+; =========================================================================
+
+; X/C1 / C2 => X * (1/(C2*C1))
+define float @fdiv1(float %x) {
+  %div = fdiv float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FD7303B60000000 = 0.36231884057971014492
+; CHECK: @fdiv1
+; CHECK: fmul fast float %x, 0x3FD7303B60000000
+}
+
+; X*C1 / C2 => X * (C1/C2)
+define float @fdiv2(float %x) {
+  %mul = fmul float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %mul, 0x4002666660000000
+  ret float %div1
+
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FE0B21660000000 = 0.52173918485641479492
+; CHECK: @fdiv2
+; CHECK: fmul fast float %x, 0x3FE0B21660000000
+}
+
+; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
+;
+define float @fdiv3(float %x) {
+  %div = fdiv float %x, 0x47EFFFFFE0000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; CHECK: @fdiv3
+; CHECK: fdiv float %x, 0x47EFFFFFE0000000
+}
+
+; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
+define float @fdiv4(float %x) {
+  %mul = fmul float %x, 0x47EFFFFFE0000000
+  %div = fdiv float %mul, 0x3FC99999A0000000
+  ret float %div
+; CHECK: @fdiv4
+; CHECK: fmul float %x, 0x47EFFFFFE0000000
+}
+
+; (X/Y)/Z = > X/(Y*Z)
+define float @fdiv5(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %t1, %f3
+  ret float %t2
+; CHECK: @fdiv5
+; CHECK: fmul float %f2, %f3
+}
+
+; Z/(X/Y) = > (Z*Y)/X
+define float @fdiv6(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %f3, %t1
+  ret float %t2
+; CHECK: @fdiv6
+; CHECK: fmul float %f3, %f2
+}
+
+; C1/(X*C2) => (C1/C2) / X
+define float @fdiv7(float %x) {
+  %t1 = fmul float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv7
+; CHECK: fdiv fast float 5.000000e+00, %x
+}
+
+; C1/(X/C2) => (C1*C2) / X
+define float @fdiv8(float %x) {
+  %t1 = fdiv float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv8
+; CHECK: fdiv fast float 4.500000e+01, %x
+}
+
+; C1/(C2/X) => (C1/C2) * X
+define float @fdiv9(float %x) {
+  %t1 = fdiv float 3.0e0, %x
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv9
+; CHECK: fmul fast float %x, 5.000000e+00
+}
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
new file mode 100644
index 0000000000..3671b4c699
--- /dev/null
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; (-0.0 - X) * C => X * -C
+define float @test1(float %x) {
+  %sub = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test1
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (0.0 - X) * C => X * -C
+define float @test2(float %x) {
+  %sub = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test2
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (-0.0 - X) * (-0.0 - Y) => X * Y
+define float @test3(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %sub2 = fsub float -0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test3
+; CHECK: fmul float %x, %y
+}
+
+; (0.0 - X) * (0.0 - Y) => X * Y
+define float @test4(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %sub2 = fsub nsz float 0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test4
+; CHECK: fmul float %x, %y
+}
+
+; (-0.0 - X) * Y => -0.0 - (X * Y)
+define float @test5(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test5
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; (0.0 - X) * Y => 0.0 - (X * Y)
+define float @test6(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test6
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; "(-0.0 - X) * Y => -0.0 - (X * Y)" is disabled if expression "-0.0 - X"
+; has multiple uses.
+define float @test7(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  %mul2 = fmul float %mul, %sub1
+  ret float %mul2
+; CHECK: @test7
+; CHECK: fsub float -0.000000e+00, %x
+}
diff --git a/test/Transforms/InstCombine/fold-phi.ll b/test/Transforms/InstCombine/fold-phi.ll
new file mode 100644
index 0000000000..bd01d58aa5
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-phi.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: no_crash
+define float @no_crash(float %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
+  %add5 = fadd float %sum.057, %a    ; PR14592
+  br i1 undef, label %bb0, label %end
+
+bb0:
+  br label %for.body
+
+end:
+  ret float %add5
+}
+
+; CHECK: fold_phi
+define float @fold_phi(float %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: phi float
+; CHECK-NEXT: br i1 undef
+  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
+  %add5 = fadd float %sum.057, 1.0 ;; Should be moved to the latch!
+  br i1 undef, label %bb0, label %end
+
+; CHECK: bb0:
+bb0:
+; CHECK: fadd float
+  br label %for.body
+
+end:
+  ret float %add5
+}
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index bc6aa0a689..09f053289d 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -13,3 +13,22 @@ define i8 @test2() {
 ; CHECK: ret i8 -1
 }
 
+; CHECK: test3
+define half @test3(float %a) {
+; CHECK: fptrunc
+; CHECK: llvm.fabs.f16
+  %b = call float @llvm.fabs.f32(float %a)
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+; CHECK: test4
+define half @test4(float %a) {
+; CHECK: fptrunc
+; CHECK: fsub
+  %b = fsub float -0.0, %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 1c120ecbe9..bb07736ef8 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -424,7 +424,7 @@ define i32 @test35() nounwind {
              i8* getelementptr (%t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
   ret i32 0
 ; CHECK: @test35
-; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) nounwind
+; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
 }
 
 ; Instcombine should constant-fold the GEP so that indices that have
@@ -492,3 +492,21 @@ define void @three_gep_f(%three_gep_t2* %x) {
 
 declare void @three_gep_g(i32*)
 declare void @three_gep_h(%three_gep_t2*)
+
+%struct.ham = type { i32, %struct.zot*, %struct.zot*, %struct.zot* }
+%struct.zot = type { i64, i8 }
+
+define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
+  %tmp = getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+  %tmp2 = load %struct.zot** %tmp, align 8
+  %tmp3 = bitcast %struct.zot* %tmp2 to i8*
+  %tmp4 = getelementptr inbounds i8* %tmp3, i64 -8
+  store i8 %arg1, i8* %tmp4, align 8
+  ret void
+
+; CHECK: @test39
+; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 8e064a4f2f..331eb3f21a 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -677,3 +677,70 @@ define i1 @test66(i64 %A, i64 %B) {
 ; CHECK-NEXT: ret i1 true
   ret i1 %cmp
 }
+
+; CHECK: @test67
+; CHECK: %and = and i32 %x, 96
+; CHECK: %cmp = icmp ne i32 %and, 0
+define i1 @test67(i32 %x) nounwind uwtable {
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 31
+  ret i1 %cmp
+}
+
+; CHECK: @test68
+; CHECK: %cmp = icmp ugt i32 %and, 30
+define i1 @test68(i32 %x) nounwind uwtable {
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 30
+  ret i1 %cmp
+}
+
+; PR14708
+; CHECK: @test69
+; CHECK: %1 = and i32 %c, -33
+; CHECK: %2 = icmp eq i32 %1, 65
+; CHECK: ret i1 %2
+define i1 @test69(i32 %c) nounwind uwtable {
+  %1 = icmp eq i32 %c, 97
+  %2 = icmp eq i32 %c, 65
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+
+; CHECK: @icmp_sext16trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_sext16trunc(i32 %x) {
+  %trunc = trunc i32 %x to i16
+  %sext = sext i16 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sext8trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_sext8trunc(i32 %x) {
+  %trunc = trunc i32 %x to i8
+  %sext = sext i8 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl16
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_shl16(i32 %x) {
+  %shl = shl i32 %x, 16
+  %cmp = icmp slt i32 %shl, 2359296
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl24
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_shl24(i32 %x) {
+  %shl = shl i32 %x, 24
+  %cmp = icmp slt i32 %shl, 603979776
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/idioms.ll b/test/Transforms/InstCombine/idioms.ll
index 6b3567fc6e..1a211668c3 100644
--- a/test/Transforms/InstCombine/idioms.ll
+++ b/test/Transforms/InstCombine/idioms.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine %s -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; Check that code corresponding to the following C function is
 ; simplified into a single ASR operation:
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 382e6b3857..f334b3b1e9 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 %overflow.result = type {i8, i1}
 
@@ -220,3 +220,39 @@ define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
 ; CHECK: @cttz_simplify1b
 ; CHECK-NEXT: ret i32 0
 }
+
+define i32 @ctlz_undef(i32 %Value) nounwind {
+  %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
+  ret i32 %ctlz
+
+; CHECK: @ctlz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @cttz_undef(i32 %Value) nounwind {
+  %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
+  ret i32 %cttz
+
+; CHECK: @cttz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @ctlz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %ctlz, i32 32
+  ret i32 %s
+
+; CHECK: @ctlz_select
+; CHECK: select i1 %tobool, i32 %ctlz, i32 32
+}
+
+define i32 @cttz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %cttz, i32 32
+  ret i32 %s
+
+; CHECK: @cttz_select
+; CHECK: select i1 %tobool, i32 %cttz, i32 32
+}
diff --git a/test/Transforms/InstCombine/load3.ll b/test/Transforms/InstCombine/load3.ll
index 35398e17db..db74426783 100644
--- a/test/Transforms/InstCombine/load3.ll
+++ b/test/Transforms/InstCombine/load3.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.0.0"
 
 ; Instcombine should be able to do trivial CSE of loads.
 
@@ -24,4 +24,23 @@ define float @test2() {
   
 ; CHECK: @test2
 ; CHECK: ret float 0x3806965600000000
-}
-\ No newline at end of file
+}
+
+@rslts32 = global [36 x i32] zeroinitializer, align 4
+
+@expect32 = internal constant [36 x i32][ i32 1, i32 2, i32 0, i32 100, i32 3,
+i32 4, i32 0, i32 -7, i32 4, i32 4, i32 8, i32 8, i32 1, i32 3, i32 8, i32 3,
+i32 4, i32 -2, i32 2, i32 8, i32 83, i32 77, i32 8, i32 17, i32 77, i32 88, i32
+22, i32 33, i32 44, i32 88, i32 77, i32 4, i32 4, i32 7, i32 -7, i32 -8] ,
+align 4
+
+; PR14986
+define void @test3() nounwind {
+; This is a weird way of computing zero.
+  %l = load i32* getelementptr ([36 x i32]* @expect32, i32 29826161, i32 28), align 4
+  store i32 %l, i32* getelementptr ([36 x i32]* @rslts32, i32 29826161, i32 28), align 4
+  ret void
+
+; CHECK: @test3
+; CHECK: store i32 1, i32* getelementptr inbounds ([36 x i32]* @rslts32, i32 0, i32 0)
+}
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index bb59817a4f..f8c0676845 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -10,8 +10,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %g, %i
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %e = icmp slt i32 %a, %b
@@ -22,8 +22,8 @@ define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %i, %g
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -36,8 +36,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 entry:
@@ -49,8 +49,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -63,6 +63,6 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 4e3217dc2d..cd12b29b11 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -91,3 +91,32 @@ define void @test5(i8* %ptr, i8** %esc) {
   store volatile i8 4, i8* %g
   ret void
 }
+
+;; When a basic block contains only a call to free and this block is accessed
+;; through a test of the argument of free against null, move the call in the
+;; predecessor block.
+;; Using simplifycfg will remove the empty basic block and the branch operation
+;; Then, performing a dead elimination will remove the comparison.
+;; This is what happens with -O1 and upper.
+; CHECK: @test6
+define void @test6(i8* %foo) minsize {
+; CHECK:  %tobool = icmp eq i8* %foo, null
+;; Call to free moved
+; CHECK-NEXT: tail call void @free(i8* %foo)
+; CHECK-NEXT: br i1 %tobool, label %if.end, label %if.then
+; CHECK: if.then:
+;; Block is now empty and may be simplified by simplifycfg
+; CHECK-NEXT:   br label %if.end
+; CHECK: if.end:
+; CHECK-NEXT:  ret void
+entry:
+  %tobool = icmp eq i8* %foo, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @free(i8* %foo)
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index bbc70fe333..16213b8628 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -138,10 +138,8 @@ define i32 @test16(i32 %b, i1 %c) {
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
-; CHECK: [[TEST16:%.*]] = zext i1 %c to i32
-; CHECK-NEXT: %1 = sub i32 0, [[TEST16]]
-; CHECK-NEXT: %e = and i32 %1, %b
-; CHECK-NEXT: ret i32 %e
+; CHECK: [[TEST16:%.*]] = select i1 %c, i32 %b, i32 0
+; CHECK-NEXT: ret i32 [[TEST16]]
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
diff --git a/test/Transforms/InstCombine/obfuscated_splat.ll b/test/Transforms/InstCombine/obfuscated_splat.ll
index c25dade168..fa9cb423d0 100644
--- a/test/Transforms/InstCombine/obfuscated_splat.ll
+++ b/test/Transforms/InstCombine/obfuscated_splat.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) {
   %A = load <4 x float>* %in_ptr, align 16
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 31a3cb46e4..0ead9d1237 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,3 +256,131 @@ xpto:
 return:
   ret i32 7
 }
+
+declare noalias i8* @valloc(i32) nounwind
+
+; CHECK: @test14
+; CHECK: ret i32 6
+define i32 @test14(i32 %a) nounwind {
+  switch i32 %a, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %call = tail call noalias i8* @malloc(i32 6) nounwind
+  br label %sw.epilog
+
+sw.bb1:
+  %call2 = tail call noalias i8* @calloc(i32 3, i32 2) nounwind
+  br label %sw.epilog
+
+sw.default:
+  %call3 = tail call noalias i8* @valloc(i32 6) nounwind
+  br label %sw.epilog
+
+sw.epilog:
+  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
+  ret i32 %1
+}
+
+; CHECK: @test15
+; CHECK: llvm.objectsize
+define i32 @test15(i32 %a) nounwind {
+  switch i32 %a, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %call = tail call noalias i8* @malloc(i32 3) nounwind
+  br label %sw.epilog
+
+sw.bb1:
+  %call2 = tail call noalias i8* @calloc(i32 2, i32 1) nounwind
+  br label %sw.epilog
+
+sw.default:
+  %call3 = tail call noalias i8* @valloc(i32 3) nounwind
+  br label %sw.epilog
+
+sw.epilog:
+  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
+  ret i32 %1
+}
+
+; CHECK: @test16
+; CHECK: llvm.objectsize
+define i32 @test16(i8* %a, i32 %n) nounwind {
+  %b = alloca [5 x i8], align 1
+  %c = alloca [5 x i8], align 1
+  switch i32 %n, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %bp = bitcast [5 x i8]* %b to i8*
+  br label %sw.epilog
+
+sw.bb1:
+  %cp = bitcast [5 x i8]* %c to i8*
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %phi = phi i8* [ %a, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
+  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
+  ret i32 %sz
+}
+
+; CHECK: @test17
+; CHECK: ret i32 5
+define i32 @test17(i32 %n) nounwind {
+  %b = alloca [5 x i8], align 1
+  %c = alloca [5 x i8], align 1
+  %bp = bitcast [5 x i8]* %b to i8*
+  switch i32 %n, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  %cp = bitcast [5 x i8]* %c to i8*
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %phi = phi i8* [ %bp, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
+  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
+  ret i32 %sz
+}
+
+@globalalias = alias internal [60 x i8]* @a
+
+; CHECK: @test18
+; CHECK-NEXT: ret i32 60
+define i32 @test18() {
+  %bc = bitcast [60 x i8]* @globalalias to i8*
+  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+@globalalias2 = alias weak [60 x i8]* @a
+
+; CHECK: @test19
+; CHECK: llvm.objectsize
+define i32 @test19() {
+  %bc = bitcast [60 x i8]* @globalalias2 to i8*
+  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  ret i32 %1
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index c0bb28d15c..bde2a54048 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -344,10 +344,9 @@ define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32
   %and.i = and <4 x i32> %vecinit6.i191, %neg.i   ; <<4 x i32>> [#uses=1]
   %or.i = or <4 x i32> %and.i, %and.i129          ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %or.i
-; Don't turn this into a vector select until codegen matures to handle them
-; better.
+; codegen is mature enough to handle vector selects.
 ; CHECK: @test32
-; CHECK: or <4 x i32> %and.i, %and.i129
+; CHECK: select <4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191
 }
 
 define i1 @test33(i1 %X, i1 %Y) {
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index c8e5f3806f..8a311f0b74 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -30,7 +30,7 @@ define double @test_simplify2(double %x) {
 define float @test_simplify3(float %x) {
 ; CHECK: @test_simplify3
   %retval = call float @powf(float 2.0, float %x)
-; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) nounwind readonly
+; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) [[NUW_RO:#[0-9]+]]
   ret float %retval
 ; CHECK-NEXT: ret float [[EXP2F]]
 }
@@ -38,7 +38,7 @@ define float @test_simplify3(float %x) {
 define double @test_simplify4(double %x) {
 ; CHECK: @test_simplify4
   %retval = call double @pow(double 2.0, double %x)
-; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) nounwind readonly
+; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) [[NUW_RO]]
   ret double %retval
 ; CHECK-NEXT: ret double [[EXP2]]
 }
@@ -64,8 +64,8 @@ define double @test_simplify6(double %x) {
 define float @test_simplify7(float %x) {
 ; CHECK: @test_simplify7
   %retval = call float @powf(float %x, float 0.5)
-; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) nounwind readonly
-; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) nounwind readonly
+; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
   ret float %retval
@@ -75,8 +75,8 @@ define float @test_simplify7(float %x) {
 define double @test_simplify8(double %x) {
 ; CHECK: @test_simplify8
   %retval = call double @pow(double %x, double 0.5)
-; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) nounwind readonly
-; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) nounwind readonly
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
   ret double %retval
@@ -150,3 +150,5 @@ define double @test_simplify16(double %x) {
   ret double %retval
 ; CHECK-NEXT: ret double [[RECIPROCAL]]
 }
+
+; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
diff --git a/test/Transforms/InstCombine/ptr-int-cast.ll b/test/Transforms/InstCombine/ptr-int-cast.ll
index 9524d449dd..7a6ecff9c0 100644
--- a/test/Transforms/InstCombine/ptr-int-cast.ll
+++ b/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -27,3 +27,34 @@ define i64 @f0(i32 %a0) nounwind {
        ret i64 %t1
 }
 
+define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
+; CHECK: @test4
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: trunc <4 x i64> %1 to <4 x i32>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  ret <4 x i32> %p1
+}
+
+define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
+; CHECK: @test5
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: zext <4 x i64> %1 to <4 x i128>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i128>
+  ret <4 x i128> %p1
+}
+
+define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
+; CHECK: @test6
+; CHECK: zext <4 x i32> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i32> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
+
+define <4 x i8*> @test7(<4 x i128> %arg) nounwind {
+; CHECK: @test7
+; CHECK: trunc <4 x i128> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i128> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index f1987973f4..968f37c9c1 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -184,3 +184,12 @@ define i32 @test16(i16 %x) nounwind {
 ; CHECK-NEXT: %ext = sext i16 %sext to i32
 ; CHECK-NEXT: ret i32 %ext
 }
+
+define i32 @test17(i1 %x) nounwind {
+  %c1 = sext i1 %x to i32
+  %c2 = sub i32 0, %c1
+  ret i32 %c2
+; CHECK: @test17
+; CHECK-NEXT: [[TEST17:%.*]] = zext i1 %x to i32
+; CHECK-NEXT: ret i32 [[TEST17]]
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 32867761a3..41f8aa9ee8 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -735,3 +735,13 @@ define i32 @test61(i32 %x) {
 ; CHECK: @test61
 ; CHECK: ashr i32 %x, 4
 }
+
+; propagate "exact" trait
+define i32 @test62(i32 %x) {
+  %shr = ashr exact i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+; CHECK: @test62
+; CHECK: ashr exact i32 %x, 3
+}
diff --git a/test/Transforms/InstCombine/sink_instruction.ll b/test/Transforms/InstCombine/sink_instruction.ll
index e521de208f..5c4019a98d 100644
--- a/test/Transforms/InstCombine/sink_instruction.ll
+++ b/test/Transforms/InstCombine/sink_instruction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine %s -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ;; This tests that the instructions in the entry blocks are sunk into each
 ;; arm of the 'if'.
diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll
index cc78417ebb..440b974851 100644
--- a/test/Transforms/InstCombine/sqrt.ll
+++ b/test/Transforms/InstCombine/sqrt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -instcombine %s | FileCheck %s
+; RUN: opt -S -instcombine < %s | FileCheck %s
 
 define float @test1(float %x) nounwind readnone ssp {
 entry:
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 64460d7a6d..164ba76326 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -83,3 +83,37 @@ Cont:
 ; CHECK-NEXT:  ret void
 }
 
+
+; PR14753 - merging two stores should preserve the TBAA tag.
+define void @test6(i32 %n, float* %a, i32* %gi) nounwind uwtable ssp {
+entry:
+  store i32 42, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32* %gi, align 4, !tbaa !0
+  %cmp = icmp slt i32 %0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds float* %a, i64 %idxprom
+  store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
+  %1 = load i32* %gi, align 4, !tbaa !0
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+; CHECK: @test6
+; CHECK: for.cond:
+; CHECK-NEXT: phi i32 [ 42
+; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 63e4ee2112..166066a201 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -7,3 +7,13 @@ define i32 @test(float %f) {
         ret i32 %tmp19
 }
 
+define i64 @test2(i64 %in) {
+  %vec = insertelement <8 x i64> undef, i64 %in, i32 0
+  %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer
+  %add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %scl1 = extractelement <8 x i64> %add, i32 0
+  %scl2 = extractelement <8 x i64> %add, i32 0
+  %r = add i64 %scl1, %scl2
+  ret i64 %r
+}
+
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 7bbf53c270..2f2990b7b0 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -64,7 +64,8 @@ entry:
         
 ; CHECK: @test5
 ; CHECK:   sext <4 x i1> %cmp to <4 x i32>	
-; CHECK:   sext <4 x i1> %cmp4 to <4 x i32>	
+; The sext-and pair is canonicalized to a select.
+; CHECK:   select <4 x i1> %cmp4, <4 x i32>	%sext, <4 x i32> zeroinitializer
 }
 
 
diff --git a/test/Transforms/InstCombine/vector_gep1.ll b/test/Transforms/InstCombine/vector_gep1.ll
index f4c75c8009..90ca26212f 100644
--- a/test/Transforms/InstCombine/vector_gep1.ll
+++ b/test/Transforms/InstCombine/vector_gep1.ll
@@ -1,5 +1,5 @@
-; RUN: opt -instcombine %s -disable-output
-; RUN: opt -instsimplify %s -disable-output
+; RUN: opt -instcombine -disable-output < %s
+; RUN: opt -instsimplify -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index 78bcedbbc2..b531057550 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -4,9 +4,9 @@
 define i32 @a(i1 zeroext %x, i1 zeroext %y) {
 entry:
 ; CHECK: @a
-; CHECK: [[TMP1:%.*]] = zext i1 %y to i32
+; CHECK: [[TMP1:%.*]] = sext i1 %y to i32
 ; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
-; CHECK-NEXT: sub i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: add i32 [[TMP2]], [[TMP1]]
   %conv = zext i1 %x to i32
   %conv3 = zext i1 %y to i32
   %conv3.neg = sub i32 0, %conv3
diff --git a/test/Transforms/InstSimplify/call-callconv.ll b/test/Transforms/InstSimplify/call-callconv.ll
new file mode 100644
index 0000000000..e475be781d
--- /dev/null
+++ b/test/Transforms/InstSimplify/call-callconv.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Verify that the non-default calling conv doesn't prevent the libcall simplification
+
+@.str = private unnamed_addr constant [4 x i8] c"abc\00", align 1
+
+define arm_aapcscc i32 @_abs(i32 %i) nounwind readnone {
+; CHECK: _abs
+  %call = tail call arm_aapcscc i32 @abs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @abs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_labs(i32 %i) nounwind readnone {
+; CHECK: _labs
+  %call = tail call arm_aapcscc i32 @labs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @labs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_strlen1() {
+; CHECK: _strlen1
+  %call = tail call arm_aapcscc i32 @strlen(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0))
+  ret i32 %call
+; CHECK: ret i32 3
+}
+
+declare arm_aapcscc i32 @strlen(i8*)
+
+define arm_aapcscc zeroext i1 @_strlen2(i8* %str) {
+; CHECK: _strlen2
+  %call = tail call arm_aapcscc i32 @strlen(i8* %str)
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+
+; CHECK: %[[STRLENFIRST:.*]] = load i8* %str
+; CHECK: %[[CMP:.*]] = icmp ne i8 %[[STRLENFIRST]], 0
+; CHECK: ret i1 %[[CMP]]
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
new file mode 100644
index 0000000000..cf2f847676
--- /dev/null
+++ b/test/Transforms/InstSimplify/call.ll
@@ -0,0 +1,103 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+
+define i1 @test_uadd1() {
+; CHECK: @test_uadd1
+  %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 3)
+  %overflow = extractvalue {i8, i1} %x, 1
+  ret i1 %overflow
+; CHECK-NEXT: ret i1 true
+}
+
+define i8 @test_uadd2() {
+; CHECK: @test_uadd2
+  %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 44)
+  %result = extractvalue {i8, i1} %x, 0
+  ret i8 %result
+; CHECK-NEXT: ret i8 42
+}
+
+declare i256 @llvm.cttz.i256(i256 %src, i1 %is_zero_undef)
+
+define i256 @test_cttz() {
+; CHECK: @test_cttz
+  %x = call i256 @llvm.cttz.i256(i256 10, i1 false)
+  ret i256 %x
+; CHECK-NEXT: ret i256 1
+}
+
+declare i256 @llvm.ctpop.i256(i256 %src)
+
+define i256 @test_ctpop() {
+; CHECK: @test_ctpop
+  %x = call i256 @llvm.ctpop.i256(i256 10)
+  ret i256 %x
+; CHECK-NEXT: ret i256 2
+}
+
+; Test a non-intrinsic that we know about as a library call.
+declare float @fabs(float %x)
+
+define float @test_fabs_libcall() {
+; CHECK: @test_fabs_libcall
+
+  %x = call float @fabs(float -42.0)
+; This is still a real function call, so instsimplify won't nuke it -- other
+; passes have to do that.
+; CHECK-NEXT: call float @fabs
+
+  ret float %x
+; CHECK-NEXT: ret float 4.2{{0+}}e+01
+}
+
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare float @llvm.rint.f32(float) nounwind readnone
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+
+; Test idempotent intrinsics
+define float @test_idempotence(float %a) {
+; CHECK: @test_idempotence
+
+; CHECK: fabs
+; CHECK-NOT: fabs
+  %a0 = call float @llvm.fabs.f32(float %a)
+  %a1 = call float @llvm.fabs.f32(float %a0)
+
+; CHECK: floor
+; CHECK-NOT: floor
+  %b0 = call float @llvm.floor.f32(float %a)
+  %b1 = call float @llvm.floor.f32(float %b0)
+
+; CHECK: ceil
+; CHECK-NOT: ceil
+  %c0 = call float @llvm.ceil.f32(float %a)
+  %c1 = call float @llvm.ceil.f32(float %c0)
+
+; CHECK: trunc
+; CHECK-NOT: trunc
+  %d0 = call float @llvm.trunc.f32(float %a)
+  %d1 = call float @llvm.trunc.f32(float %d0)
+
+; CHECK: rint
+; CHECK-NOT: rint
+  %e0 = call float @llvm.rint.f32(float %a)
+  %e1 = call float @llvm.rint.f32(float %e0)
+
+; CHECK: nearbyint
+; CHECK-NOT: nearbyint
+  %f0 = call float @llvm.nearbyint.f32(float %a)
+  %f1 = call float @llvm.nearbyint.f32(float %f0)
+
+  %r0 = fadd float %a1, %b1
+  %r1 = fadd float %r0, %c1
+  %r2 = fadd float %r1, %d1
+  %r3 = fadd float %r2, %e1
+  %r4 = fadd float %r3, %f1
+
+  ret float %r4
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 56627b99a4..0ecfb1f8d2 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -647,3 +647,38 @@ unreachableblock:
   %Y = icmp eq i32* %X, null
   ret i1 %Y
 }
+
+; It's not valid to fold a comparison of an argument with an alloca, even though
+; that's tempting. An argument can't *alias* an alloca, however the aliasing rule
+; relies on restrictions against guessing an object's address and dereferencing.
+; There are no restrictions against guessing an object's address and comparing.
+
+define i1 @alloca_argument_compare(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare
+  ; CHECK: ret i1 %cmp
+}
+
+; As above, but with the operands reversed.
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare_swapped
+  ; CHECK: ret i1 %cmp
+}
+
+; Don't assume that a noalias argument isn't equal to a global variable's
+; address. This is an example where AliasAnalysis' NoAlias concept is
+; different from actual pointer inequality.
+
+@y = external global i32
+define zeroext i1 @external_compare(i32* noalias %x) {
+  %cmp = icmp eq i32* %x, @y
+  ret i1 %cmp
+  ; CHECK: external_compare
+  ; CHECK: ret i1 %cmp
+}
diff --git a/test/Transforms/InstSimplify/fast-math.ll b/test/Transforms/InstSimplify/fast-math.ll
index e4b3ea306a..154b967397 100644
--- a/test/Transforms/InstSimplify/fast-math.ll
+++ b/test/Transforms/InstSimplify/fast-math.ll
@@ -33,3 +33,75 @@ define float @no_mul_zero_3(float %a) {
 ; CHECK: ret float %b
   ret float %b
 }
+
+; fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+;   where nnan and ninf have to occur at least once somewhere in this
+;   expression
+; CHECK: fadd_fsub_0
+define float @fadd_fsub_0(float %a) {
+; X + -X ==> 0
+  %t1 = fsub nnan ninf float 0.0, %a
+  %zero1 = fadd nnan ninf float %t1, %a
+
+  %t2 = fsub nnan float 0.0, %a
+  %zero2 = fadd ninf float %t2, %a
+
+  %t3 = fsub nnan ninf float 0.0, %a
+  %zero3 = fadd float %t3, %a
+
+  %t4 = fsub float 0.0, %a
+  %zero4 = fadd nnan ninf float %t4, %a
+
+; Dont fold this
+; CHECK: %nofold = fsub float 0.0
+  %nofold = fsub float 0.0, %a
+; CHECK: %no_zero = fadd nnan float %nofold, %a
+  %no_zero = fadd nnan float %nofold, %a
+
+; Coalesce the folded zeros
+  %zero5 = fadd float %zero1, %zero2
+  %zero6 = fadd float %zero3, %zero4
+  %zero7 = fadd float %zero5, %zero6
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero7
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fsub nnan ninf x, x ==> 0.0
+; CHECK: @fsub_x_x
+define float @fsub_x_x(float %a) {
+; X - X ==> 0
+  %zero1 = fsub nnan ninf float %a, %a
+
+; Dont fold
+; CHECK: %no_zero1 = fsub
+  %no_zero1 = fsub ninf float %a, %a
+; CHECK: %no_zero2 = fsub
+  %no_zero2 = fsub nnan float %a, %a
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero1
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fadd nsz X, 0 ==> X
+; CHECK: @nofold_fadd_x_0
+define float @nofold_fadd_x_0(float %a) {
+; Dont fold
+; CHECK: %no_zero1 = fadd
+  %no_zero1 = fadd ninf float %a, 0.0
+; CHECK: %no_zero2 = fadd
+  %no_zero2 = fadd nnan float %a, 0.0
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; CHECK: ret float %no_zero
+  ret float %no_zero
+}
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
new file mode 100644
index 0000000000..f9c364cade
--- /dev/null
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; fsub 0, (fsub 0, X) ==> X
+; CHECK: @fsub_0_0_x
+define float @fsub_0_0_x(float %a) {
+  %t1 = fsub float -0.0, %a
+  %ret = fsub float -0.0, %t1
+
+; CHECK: ret float %a
+  ret float %ret
+}
+
+; fsub X, 0 ==> X
+; CHECK: @fsub_x_0
+define float @fsub_x_0(float %a) {
+  %ret = fsub float %a, 0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fadd X, -0 ==> X
+; CHECK: @fadd_x_n0
+define float @fadd_x_n0(float %a) {
+  %ret = fadd float %a, -0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fmul X, 1.0 ==> X
+; CHECK: @fmul_X_1
+define double @fmul_X_1(double %a) {
+  %b = fmul double 1.000000e+00, %a                ; <double> [#uses=1]
+  ; CHECK: ret double %a
+  ret double %b
+}
diff --git a/test/Transforms/InstSimplify/past-the-end.ll b/test/Transforms/InstSimplify/past-the-end.ll
new file mode 100644
index 0000000000..075da4a24b
--- /dev/null
+++ b/test/Transforms/InstSimplify/past-the-end.ll
@@ -0,0 +1,77 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "p:32:32"
+
+; Check some past-the-end subtleties.
+
+@opte_a = global i32 0
+@opte_b = global i32 0
+
+; Comparing base addresses of two distinct globals. Never equal.
+
+define zeroext i1 @no_offsets() {
+  %t = icmp eq i32* @opte_a, @opte_b
+  ret i1 %t
+  ; CHECK: no_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct globals. Never equal.
+
+define zeroext i1 @both_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %y = getelementptr i32* @opte_b, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one global to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %t = icmp eq i32* %x, @opte_b
+  ret i1 %t
+  ; CHECK: just_one_past_the_end(
+  ; CHECK: ret i1 icmp eq (i32* getelementptr inbounds (i32* @opte_a, i32 1), i32* @opte_b)
+}
+
+; Comparing base addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @no_alloca_offsets() {
+  %m = alloca i32
+  %n = alloca i32
+  %t = icmp eq i32* %m, %n
+  ret i1 %t
+  ; CHECK: no_alloca_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @both_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %y = getelementptr i32* %n, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end_alloca(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one alloca to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %t = icmp eq i32* %x, %n
+  ret i1 %t
+  ; CHECK: just_one_past_the_end_alloca(
+  ; CHECK: ret i1 %t
+}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
index 1eb1fd4c09..8b4aa79601 100644
--- a/test/Transforms/InstSimplify/ptr_diff.ll
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -46,3 +46,33 @@ define i64 @ptrdiff3(i8* %ptr) {
   %diff = sub i64 %last.int, %first.int
   ret i64 %diff
 }
+
+define <4 x i32> @ptrdiff4(<4 x i8*> %arg) nounwind {
+; Handle simple cases of vectors of pointers.
+; CHECK: @ptrdiff4
+; CHECK: ret <4 x i32> zeroinitializer
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  %bc = bitcast <4 x i8*> %arg to <4 x i32*>
+  %p2 = ptrtoint <4 x i32*> %bc to <4 x i32>
+  %sub = sub <4 x i32> %p1, %p2
+  ret <4 x i32> %sub
+}
+
+%struct.ham = type { i32, [2 x [2 x i32]] }
+
+@global = internal global %struct.ham zeroinitializer, align 4
+
+define i32 @ptrdiff5() nounwind {
+bb:
+  %tmp = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp1 = getelementptr inbounds [2 x [2 x i32]]* %tmp, i32 0, i32 0
+  %tmp2 = bitcast [2 x i32]* %tmp1 to i32*
+  %tmp3 = ptrtoint i32* %tmp2 to i32
+  %tmp4 = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp5 = getelementptr inbounds [2 x [2 x i32]]* %tmp4, i32 0, i32 0
+  %tmp6 = ptrtoint [2 x i32]* %tmp5 to i32
+  %tmp7 = sub i32 %tmp3, %tmp6
+  ret i32 %tmp7
+; CHECK: @ptrdiff5
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index f65260e00f..5ac1ddef64 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -1,4 +1,4 @@
-;RUN: opt -instsimplify %s -disable-output
+;RUN: opt -instsimplify -disable-output < %s
 declare void @helper(<2 x i8*>)
 define void @test(<2 x i8*> %a) {
   %A = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 0>
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 46271379bd..fe3dc77c9c 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -jump-threading -S | FileCheck %s
+; RUN: opt -jump-threading -S < %s | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
@@ -476,3 +476,41 @@ exit1:
 ; CHECK: }
 }
 
+; In this test we check that block duplication is inhibited by the presence
+; of a function with the 'noduplicate' attribute.
+
+declare void @g()
+declare void @j()
+declare void @k()
+
+; CHECK: define void @h(i32 %p) {
+define void @h(i32 %p) {
+  %x = icmp ult i32 %p, 5
+  br i1 %x, label %l1, label %l2
+
+l1:
+  call void @j()
+  br label %l3
+
+l2:
+  call void @k()
+  br label %l3
+
+l3:
+; CHECK: call void @g() [[NOD:#[0-9]+]]
+; CHECK-NOT: call void @g() [[NOD]]
+  call void @g() noduplicate
+  %y = icmp ult i32 %p, 5
+  br i1 %y, label %l4, label %l5
+
+l4:
+  call void @j()
+  ret void
+
+l5:
+  call void @k()
+  ret void
+; CHECK: }
+}
+
+; CHECK: attributes [[NOD]] = { noduplicate }
diff --git a/test/Transforms/JumpThreading/degenerate-phi.ll b/test/Transforms/JumpThreading/degenerate-phi.ll
index 35d9fdec42..2905b43af7 100644
--- a/test/Transforms/JumpThreading/degenerate-phi.ll
+++ b/test/Transforms/JumpThreading/degenerate-phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt -jump-threading -disable-output %s
+; RUN: opt -jump-threading -disable-output < %s
 ; PR9112
 
 ; This is actually a test for value tracking. Jump threading produces
diff --git a/test/Transforms/JumpThreading/or-undef.ll b/test/Transforms/JumpThreading/or-undef.ll
index 6e359925b6..6311b6df43 100644
--- a/test/Transforms/JumpThreading/or-undef.ll
+++ b/test/Transforms/JumpThreading/or-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -jump-threading -S %s | FileCheck %s
+; RUN: opt -jump-threading -S < %s | FileCheck %s
 ; rdar://7620633
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
index fe8d445313..2bf2604162 100644
--- a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
+++ b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm | lli %defaultjit
+; RUN: opt < %s -licm | lli -force-interpreter
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/LICM/2011-07-06-Alignment.ll b/test/Transforms/LICM/2011-07-06-Alignment.ll
index f97b7010bc..569231489f 100644
--- a/test/Transforms/LICM/2011-07-06-Alignment.ll
+++ b/test/Transforms/LICM/2011-07-06-Alignment.ll
@@ -1,4 +1,4 @@
-; RUN: opt -licm -S %s | FileCheck %s
+; RUN: opt -licm -S < %s | FileCheck %s
 
 @A = common global [1024 x float] zeroinitializer, align 4
 
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/Stats/hoist-invariant-load.ll
index f9fc551df3..f9fc551df3 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/Stats/hoist-invariant-load.ll
diff --git a/test/Transforms/LICM/Stats/lit.local.cfg b/test/Transforms/LICM/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/LICM/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index de41d008a7..b43477a56d 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -licm %s -disable-output
+; RUN: opt -licm -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 98f93345e3..1ca377eb4a 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -90,3 +90,29 @@ for.end:                                          ; preds = %for.body
 
 declare void @foo_may_call_exit(i32)
 
+; PR14854
+; CHECK: @test5
+; CHECK: extractvalue
+; CHECK: br label %tailrecurse
+; CHECK: tailrecurse:
+; CHECK: ifend:
+; CHECK: insertvalue
+define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %then, %entry
+  %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
+  %out = extractvalue { i32*, i32 } %e, 1
+  %d = insertvalue { i32*, i32 } %e, i32* null, 0
+  %cmp1 = icmp sgt i32 %out, %i.tr
+  br i1 %cmp1, label %then, label %ifend
+
+then:                                             ; preds = %tailrecurse
+  call void @foo()
+  %cmp2 = add i32 %i.tr, 1
+  br label %tailrecurse
+
+ifend:                                            ; preds = %tailrecurse
+  ret { i32*, i32 } %d
+}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 05a64d6322..e7eab92aa8 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -1,28 +1,28 @@
-; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+; RUN: opt < %s -basicaa -tbaa -licm -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-@X = global i32 7		; <i32*> [#uses=4]
+@X = global i32 7   ; <i32*> [#uses=4]
 
 define void @test1(i32 %i) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test1
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
-Loop:		; preds = %Loop, %0
-	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
-	%x = load i32* @X		; <i32> [#uses=1]
-	%x2 = add i32 %x, 1		; <i32> [#uses=1]
-	store i32 %x2, i32* @X
-	%Next = add i32 %j, 1		; <i32> [#uses=2]
-	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
-	br i1 %cond, label %Out, label %Loop
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load i32* @X   ; <i32> [#uses=1]
+  %x2 = add i32 %x, 1   ; <i32> [#uses=1]
+  store i32 %x2, i32* @X
+  %Next = add i32 %j, 1   ; <i32> [#uses=2]
+  %cond = icmp eq i32 %Next, 0    ; <i1> [#uses=1]
+  br i1 %cond, label %Out, label %Loop
 
-Out:	
-	ret void
+Out:
+  ret void
 ; CHECK: Out:
 ; CHECK-NEXT:   store i32 %x2, i32* @X
 ; CHECK-NEXT:   ret void
@@ -31,22 +31,22 @@ Out:
 
 define void @test2(i32 %i) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test2
 ; CHECK: Entry:
 ; CHECK-NEXT:    %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:    br label %Loop
 
-Loop:		; preds = %Loop, %0
-	%X1 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
-	%A = load i32* %X1		; <i32> [#uses=1]
-	%V = add i32 %A, 1		; <i32> [#uses=1]
-	%X2 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
-	store i32 %V, i32* %X2
-	br i1 false, label %Loop, label %Exit
+Loop:   ; preds = %Loop, %0
+  %X1 = getelementptr i32* @X, i64 1    ; <i32*> [#uses=1]
+  %A = load i32* %X1    ; <i32> [#uses=1]
+  %V = add i32 %A, 1    ; <i32> [#uses=1]
+  %X2 = getelementptr i32* @X, i64 1    ; <i32*> [#uses=1]
+  store i32 %V, i32* %X2
+  br i1 false, label %Loop, label %Exit
 
-Exit:		; preds = %Loop
-	ret void
+Exit:   ; preds = %Loop
+  ret void
 ; CHECK: Exit:
 ; CHECK-NEXT:   store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:   ret void
@@ -56,19 +56,19 @@ Exit:		; preds = %Loop
 
 define void @test3(i32 %i) {
 ; CHECK: @test3
-	br label %Loop
+  br label %Loop
 Loop:
         ; Should not promote this to a register
-	%x = load volatile i32* @X
-	%x2 = add i32 %x, 1	
-	store i32 %x2, i32* @X
-	br i1 true, label %Out, label %Loop
-        
+  %x = load volatile i32* @X
+  %x2 = add i32 %x, 1
+  store i32 %x2, i32* @X
+  br i1 true, label %Out, label %Loop
+
 ; CHECK: Loop:
 ; CHECK-NEXT: load volatile
 
-Out:		; preds = %Loop
-	ret void
+Out:    ; preds = %Loop
+  ret void
 }
 
 ; PR8041
@@ -120,27 +120,27 @@ exit:
 
 define void @test5(i32 %i, i32** noalias %P2) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test5
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
-Loop:		; preds = %Loop, %0
-	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
-	%x = load i32* @X		; <i32> [#uses=1]
-	%x2 = add i32 %x, 1		; <i32> [#uses=1]
-	store i32 %x2, i32* @X
-        
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load i32* @X   ; <i32> [#uses=1]
+  %x2 = add i32 %x, 1   ; <i32> [#uses=1]
+  store i32 %x2, i32* @X
+
         store volatile i32* @X, i32** %P2
-        
-	%Next = add i32 %j, 1		; <i32> [#uses=2]
-	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
-	br i1 %cond, label %Out, label %Loop
 
-Out:	
-	ret void
+  %Next = add i32 %j, 1   ; <i32> [#uses=2]
+  %cond = icmp eq i32 %Next, 0    ; <i1> [#uses=1]
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  ret void
 ; CHECK: Out:
 ; CHECK-NEXT:   store i32 %x2, i32* @X
 ; CHECK-NEXT:   ret void
@@ -148,3 +148,40 @@ Out:
 }
 
 
+; PR14753 - Preserve TBAA tags when promoting values in a loop.
+define void @test6(i32 %n, float* nocapture %a, i32* %gi) {
+entry:
+  store i32 0, i32* %gi, align 4, !tbaa !0
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %storemerge2 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %idxprom = sext i32 %storemerge2 to i64
+  %arrayidx = getelementptr inbounds float* %a, i64 %idxprom
+  store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
+  %0 = load i32* %gi, align 4, !tbaa !0
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %gi, align 4, !tbaa !0
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT:  %gi.promoted = load i32* %gi, align 4, !tbaa !0
+; CHECK: for.cond.for.end_crit_edge:
+; CHECK-NEXT:  store i32 %inc, i32* %gi, align 4, !tbaa !0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
index 40c6629e6f..cf9d8ce923 100644
--- a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
+++ b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -loop-deletion -disable-output
+; RUN: opt -loop-deletion -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/LoopDeletion/simplify-then-delete.ll b/test/Transforms/LoopDeletion/simplify-then-delete.ll
index 5a21672a59..4278ef16d2 100644
--- a/test/Transforms/LoopDeletion/simplify-then-delete.ll
+++ b/test/Transforms/LoopDeletion/simplify-then-delete.ll
@@ -4,7 +4,7 @@
 ; Indvars and loop deletion should be able to eliminate all looping
 ; in this testcase.
 
-; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) nounwind {
+; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   ret i32 0
 ; CHECK-NEXT: }
@@ -63,3 +63,5 @@ w.e:
 w.e12:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopIdiom/X86/popcnt.ll b/test/Transforms/LoopIdiom/X86/popcnt.ll
index 2f458fb2f1..25df93d3a0 100644
--- a/test/Transforms/LoopIdiom/X86/popcnt.ll
+++ b/test/Transforms/LoopIdiom/X86/popcnt.ll
@@ -118,3 +118,23 @@ while.end:                                        ; preds = %while.body, %entry
   %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
   ret i32 %c.0.lcssa
 }
+
+define i32 @PopCntCrash3(i64 %a, i32 %x) {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  %cmp = icmp eq i32 %x, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll
index b7bcb21d56..78878f9fa6 100644
--- a/test/Transforms/LoopRotate/basic.ll
+++ b/test/Transforms/LoopRotate/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-rotate %s | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
@@ -33,3 +33,29 @@ for.end:                                          ; preds = %for.cond
 
 declare void @g(i32*)
 
+; CHECK: @test2
+define void @test2() nounwind ssp {
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, 100
+; CHECK: call void @f
+; CHECK-NOT: call void @f
+  call void @f() noduplicate 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %inc = add nsw i32 %i.0, 1
+  call void @h()
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+; CHECK: }
+}
+
+declare void @f() noduplicate
+declare void @h()
diff --git a/test/Transforms/LoopRotate/crash.ll b/test/Transforms/LoopRotate/crash.ll
index 954b834765..fd922cb556 100644
--- a/test/Transforms/LoopRotate/crash.ll
+++ b/test/Transforms/LoopRotate/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-rotate %s -disable-output -verify-dom-info -verify-loop-info
+; RUN: opt -loop-rotate -disable-output -verify-dom-info -verify-loop-info < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index b32ee82d3a..6a8d30820f 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-rotate  %s  | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll
index 7372830922..8ad2dce71a 100644
--- a/test/Transforms/LoopRotate/phi-duplicate.ll
+++ b/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S %s -loop-rotate | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0"
 
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
index 3793baccbb..9524be3cee 100644
--- a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-reduce -disable-output -debug-only=loop-reduce %s 2> %t
+; RUN: opt -loop-reduce -disable-output -debug-only=loop-reduce < %s 2> %t
 ; RUN: FileCheck %s < %t
 ; REQUIRES: asserts
 ;
@@ -10,15 +10,13 @@
 ; CHECK: After generating reuse formulae:
 ; CHECK: LSR is examining the following uses:
 ; CHECK: LSR Use: Kind=Special
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
 ; CHECK-NOT:reg
 ; CHECK: Filtering for use
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-freebsd9"
 
 %struct.snork = type { %struct.fuga, i32, i32, i32, i32, i32, i32 }
 %struct.fuga = type { %struct.gork, i64 }
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
new file mode 100644
index 0000000000..bce234cd40
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
@@ -0,0 +1,44 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; Indirect branch in the preheader crashes replaceCongruentIVs.
+; rdar://12910141
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+; CHECK: @test
+; CHECK: bb8:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: phi i8
+; CHECK: ret void
+define void @test() nounwind ssp {
+bb:
+  br label %bb190
+
+bb8:                                              ; preds = %bb190, %bb11
+  %tmp = phi i8 [ %tmp14, %bb11 ], [ 25, %bb190 ]
+  %tmp9 = phi i8 [ %tmp12, %bb11 ], [ 25, %bb190 ]
+  %tmp10 = add i8 %tmp, -5
+  indirectbr i8* undef, [label %bb11, label %bb15]
+
+bb11:                                             ; preds = %bb8
+  %tmp12 = add i8 %tmp9, 1
+  %tmp13 = add i8 %tmp9, -19
+  %tmp14 = add i8 %tmp, 1
+  indirectbr i8* undef, [label %bb8]
+
+bb15:                                             ; preds = %bb8
+  indirectbr i8* undef, [label %bb16]
+
+bb16:                                             ; preds = %bb16, %bb15
+  indirectbr i8* undef, [label %bb37, label %bb190]
+
+
+bb37:                                             ; preds = %bb190
+  indirectbr i8* undef, [label %bb38]
+
+bb38:                                             ; preds = %bb37, %bb5
+  ret void
+
+bb190:                                            ; preds = %bb189, %bb187
+  indirectbr i8* undef, [label %bb37, label %bb8]
+}
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
new file mode 100644
index 0000000000..8fbddf8ae4
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; LTO of clang, which mistakenly uses no TargetLoweringInfo, causes a
+; miscompile. ReuseOrCreateCast replace ptrtoint operand with undef.
+; Reproducing the miscompile requires no triple, hence no "TTI".
+; rdar://13007381
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Verify that nothing uses the "dead" ptrtoint from "undef".
+; CHECK: @VerifyDiagnosticConsumerTest
+; CHECK: bb:
+; CHECK: %0 = ptrtoint i8* undef to i64
+; CHECK-NOT: %0
+; CHECK: .lr.ph
+; CHECK-NOT: %0
+; CHECK: sub i64 %7, %tmp6
+; CHECK-NOT: %0
+; CHECK: ret void
+define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
+bb:
+  %tmp3 = call i8* @getCharData() nounwind
+  %tmp4 = call i8* @getCharData() nounwind
+  %tmp5 = ptrtoint i8* %tmp4 to i64
+  %tmp6 = ptrtoint i8* %tmp3 to i64
+  %tmp7 = sub i64 %tmp5, %tmp6
+  br i1 undef, label %bb87, label %.preheader
+
+.preheader:                                       ; preds = %bb10, %bb
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %bb10
+
+bb10:                                             ; preds = %.preheader
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42, label %.preheader
+
+_ZNK4llvm9StringRef4findEcm.exit42:               ; preds = %bb10
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %.lr.ph
+
+_ZNK4llvm9StringRef4findEcm.exit42.thread:        ; preds = %_ZNK4llvm9StringRef4findEcm.exit42, %.preheader
+  unreachable
+
+.lr.ph:                                           ; preds = %_ZNK4llvm9StringRef4findEcm.exit42
+  br label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit.loopexit:        ; preds = %bb63
+  %tmp21 = icmp eq i64 %i.0.i, -1
+  br i1 %tmp21, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit._crit_edge:      ; preds = %bb61, %_ZNK4llvm9StringRef4findEcm.exit.loopexit
+  unreachable
+
+bb36:                                             ; preds = %_ZNK4llvm9StringRef4findEcm.exit.loopexit, %.lr.ph
+  %loc.063 = phi i64 [ undef, %.lr.ph ], [ %i.0.i, %_ZNK4llvm9StringRef4findEcm.exit.loopexit ]
+  switch i8 undef, label %bb57 [
+    i8 10, label %bb48
+    i8 13, label %bb48
+  ]
+
+bb48:                                             ; preds = %bb36, %bb36
+  br label %bb58
+
+bb57:                                             ; preds = %bb36
+  br label %bb58
+
+bb58:                                             ; preds = %bb57, %bb48
+  %tmp59 = icmp ugt i64 %tmp7, undef
+  %tmp60 = select i1 %tmp59, i64 undef, i64 %tmp7
+  br label %bb61
+
+bb61:                                             ; preds = %bb63, %bb58
+  %i.0.i = phi i64 [ %tmp60, %bb58 ], [ %tmp67, %bb63 ]
+  %tmp62 = icmp eq i64 %i.0.i, %tmp7
+  br i1 %tmp62, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb63
+
+bb63:                                             ; preds = %bb61
+  %tmp64 = getelementptr inbounds i8* %tmp3, i64 %i.0.i
+  %tmp65 = load i8* %tmp64, align 1
+  %tmp67 = add i64 %i.0.i, 1
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit.loopexit, label %bb61
+
+bb87:                                             ; preds = %bb
+  ret void
+}
+
+declare i8* @getCharData()
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 9189d79e2f..ee3cc4dd78 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -205,18 +205,18 @@ for.end:                                          ; preds = %for.body
 ; post-increment addressing, no add's or add.w's beyond the three
 ; mentioned. Most importantly, there should be no spills or reloads!
 ;
-; CHECK: testNeon:
-; CHECK: %.lr.ph
-; CHECK-NOT: lsl.w
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK-NOT: add.w r
-; CHECK: bne
+; A9: testNeon:
+; A9: %.lr.ph
+; A9-NOT: lsl.w
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9-NOT: add.w r
+; A9: bne
 define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
   %1 = icmp sgt i32 %limit, 0
   br i1 %1, label %.lr.ph, label %45
@@ -290,3 +290,80 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
 }
 
 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+; Handle chains in which the same offset is used for both loads and
+; stores to the same array.
+; rdar://11410078.
+;
+; A9: @testReuse
+; A9: %for.body
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], {{r[0-9]}}
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]]
+; A9: bne
+define void @testReuse(i8* %src, i32 %stride) nounwind ssp {
+entry:
+  %mul = shl nsw i32 %stride, 2
+  %idx.neg = sub i32 0, %mul
+  %mul1 = mul nsw i32 %stride, 3
+  %idx.neg2 = sub i32 0, %mul1
+  %mul5 = shl nsw i32 %stride, 1
+  %idx.neg6 = sub i32 0, %mul5
+  %idx.neg10 = sub i32 0, %stride
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.0110 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %src.addr = phi i8* [ %src, %entry ], [ %add.ptr45, %for.body ]
+  %add.ptr = getelementptr inbounds i8* %src.addr, i32 %idx.neg
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr, i32 1)
+  %add.ptr3 = getelementptr inbounds i8* %src.addr, i32 %idx.neg2
+  %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr3, i32 1)
+  %add.ptr7 = getelementptr inbounds i8* %src.addr, i32 %idx.neg6
+  %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr7, i32 1)
+  %add.ptr11 = getelementptr inbounds i8* %src.addr, i32 %idx.neg10
+  %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr11, i32 1)
+  %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %src.addr, i32 1)
+  %add.ptr17 = getelementptr inbounds i8* %src.addr, i32 %stride
+  %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr17, i32 1)
+  %add.ptr20 = getelementptr inbounds i8* %src.addr, i32 %mul5
+  %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr20, i32 1)
+  %add.ptr23 = getelementptr inbounds i8* %src.addr, i32 %mul1
+  %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr23, i32 1)
+  %vadd1 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld1, <8 x i8> %vld2) nounwind
+  %vadd2 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld2, <8 x i8> %vld3) nounwind
+  %vadd3 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld3, <8 x i8> %vld4) nounwind
+  %vadd4 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld4, <8 x i8> %vld5) nounwind
+  %vadd5 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld5, <8 x i8> %vld6) nounwind
+  %vadd6 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld6, <8 x i8> %vld7) nounwind
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
+  %inc = add nsw i32 %i.0110, 1
+  %add.ptr45 = getelementptr inbounds i8* %src.addr, i32 8
+  %exitcond = icmp eq i32 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
index c650d8cf76..9a7f4865c5 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep "phi double" | count 1
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | grep "phi double" | count 1
 
 define void @foobar(i32 %n) nounwind {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
index 5d9ed64ef4..a932b47925 100644
--- a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s
 ;
 ; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
 ; nonzero initial value.
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index 5108650962..eedfc200f4 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ;
 ; Test LSR's ability to prune formulae that refer to nonexistant
 ; AddRecs in other loops.
@@ -15,13 +15,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin"
 
 ; CHECK: @test
-; CHECK: # %for.body{{$}}
-; dummyiv copy should be removed
-; CHECK-NOT: movq
-; CHECK: # %for.cond19.preheader
-; dummycnt should be removed
-; CHECK-NOT: incq
-; CHECK: # %for.body23{{$}}
+; CHECK: for.body:
+; CHECK: %lsr.iv
+; CHECK-NOT: %dummyout
+; CHECK: ret
 define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
 entry:
   %cmp34 = icmp eq i64 %count, 0
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
index b87bf620de..ff8cab8313 100644
--- a/test/Transforms/LoopStrengthReduce/dominate-assert.ll
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-reduce %s
+; RUN: opt -loop-reduce < %s
 ; we used to crash on this one
 
 declare i8* @_Znwm()
diff --git a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
index ad4959be34..498be1a9a1 100644
--- a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
+++ b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -2,7 +2,7 @@
 ; having overlapping live ranges that result in copies.  We want the setcc 
 ; instruction immediately before the conditional branch.
 ;
-; RUN: opt -S -loop-reduce %s | FileCheck %s
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
 
 define void @foo(float* %D, i32 %E) {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 96904c66e6..45aeb4e691 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,18 +4,17 @@
 ; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %5 = lshr i64 %4, 1
-; CHECK:   %6 = mul i64 %5, 2
+; CHECK:   [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
+; CHECK:   [[r3:%[a-z0-9]+]] = mul i64 [[r2]], 2
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ]
 ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
 
 %struct.Vector2 = type { i16*, [64 x i16], i32 }
 
diff --git a/test/Transforms/LoopUnroll/Stats/lit.local.cfg b/test/Transforms/LoopUnroll/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/LoopUnroll/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/LoopUnroll/runtime-loop3.ll b/test/Transforms/LoopUnroll/Stats/runtime-loop3.ll
index 55cf22373e..55cf22373e 100644
--- a/test/Transforms/LoopUnroll/runtime-loop3.ll
+++ b/test/Transforms/LoopUnroll/Stats/runtime-loop3.ll
diff --git a/test/Transforms/LoopUnroll/basic.ll b/test/Transforms/LoopUnroll/basic.ll
index eeb3e9a57b..ab5bc568ed 100644
--- a/test/Transforms/LoopUnroll/basic.ll
+++ b/test/Transforms/LoopUnroll/basic.ll
@@ -22,3 +22,26 @@ l1:                                               ; preds = %l1, %entry
 l2:                                               ; preds = %l1
   ret i32 0
 }
+
+; This should not unroll since the call is 'noduplicate'.
+
+; CHECK: @test2
+define i32 @test2(i8** %P) nounwind ssp {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l1, %entry
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() noduplicate
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %l2, label %l1
+
+l2:                                               ; preds = %l1
+  ret i32 0
+; CHECK: }
+}
+
+declare void @f()
diff --git a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/Stats/2008-11-03-Invariant.ll
index 9d73d31d50..9d73d31d50 100644
--- a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
+++ b/test/Transforms/LoopUnswitch/Stats/2008-11-03-Invariant.ll
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/Stats/2011-11-18-SimpleSwitch.ll
index c1fd588106..bde52da87a 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/Stats/2011-11-18-SimpleSwitch.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 2 loop-unswitch - Number of switches unswitched
@@ -19,7 +19,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -40,7 +40,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      dec.us3:                                          ; preds = %loop_begin.us1
-; CHECK-NEXT:   call void @decf() noreturn nounwind
+; CHECK-NEXT:   call void @decf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us5
 
 ; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
@@ -89,3 +89,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches-Threshold.ll
index f3db471199..c3bf5966ad 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches-Threshold.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 1 loop-unswitch - Number of switches unswitched
@@ -25,7 +25,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -45,7 +45,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge
 
 define i32 @test(i32* %var) {
@@ -82,3 +82,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches.ll
index 270899642f..95303337da 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 3 loop-unswitch - Number of switches unswitched
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
 ; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us.us
 
 ; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
@@ -50,7 +50,7 @@
 ; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -75,7 +75,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us4:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us6
 
 ; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
@@ -136,3 +136,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/Stats/infinite-loop.ll
index 73391ca8d1..f3fba642f8 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/Stats/infinite-loop.ll
@@ -21,11 +21,11 @@
 ; CHECK-NEXT: br label %cond.end.us
 
 ; CHECK: abort0.split:
-; CHECK-NEXT: call void @end0() noreturn nounwind
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: abort1:
-; CHECK-NEXT: call void @end1() noreturn nounwind
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: }
@@ -51,3 +51,7 @@ abort1:
 
 declare void @end0() noreturn
 declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/Stats/lit.local.cfg b/test/Transforms/LoopUnswitch/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index 1e6f2cf15e..e98d82b652 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
 
 define i32 @test(i32* %A, i1 %C) {
 entry:
@@ -29,3 +29,40 @@ return:		; preds = %endif, %then
 	ret i32 %tmp.13
 }
 
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK: @test2
+define i32 @test2(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+  call void @decf() noreturn nounwind noduplicate
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/preserve-analyses.ll b/test/Transforms/LoopUnswitch/preserve-analyses.ll
index 668f8ecaf8..f79612bef5 100644
--- a/test/Transforms/LoopUnswitch/preserve-analyses.ll
+++ b/test/Transforms/LoopUnswitch/preserve-analyses.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info %s -disable-output
+; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s
 
 ; Loop unswitch should be able to unswitch these loops and
 ; preserve LCSSA and LoopSimplify forms.
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
new file mode 100644
index 0000000000..2dd7fe34a7
--- /dev/null
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: icmp eq <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @foo(i32 %x, i32 %t, i32* nocapture %A) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i32 %x, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %if.end
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %1 = add nsw i64 %indvars.iv, 45
+  %2 = trunc i64 %indvars.iv to i32
+  %mul = mul nsw i32 %2, %t
+  %3 = trunc i64 %1 to i32
+  %add1 = add nsw i32 %3, %mul
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.then
+  %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
+  store i32 %z.0, i32* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %x
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end, %entry
+  ret i32 undef
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
index 0176c9a189..aa7cc0ee32 100644
--- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce
 
 ; Check that we don't fall into an infinite loop.
 define void @test() nounwind {
@@ -25,3 +25,47 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
  unreachable
 }
+
+;PR14701
+define void @start_model_rare() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %cond.false, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  unreachable
+
+cond.false:                                       ; preds = %if.end
+  br i1 undef, label %cond.false28, label %cond.true20
+
+cond.true20:                                      ; preds = %cond.false
+  unreachable
+
+cond.false28:                                     ; preds = %cond.false
+  br label %for.body40
+
+for.body40:                                       ; preds = %for.inc50, %cond.false28
+  %indvars.iv123 = phi i64 [ 3, %cond.false28 ], [ %indvars.iv.next124, %for.inc50 ]
+  %step.0121 = phi i32 [ 1, %cond.false28 ], [ %step.1, %for.inc50 ]
+  br i1 undef, label %if.then46, label %for.inc50
+
+if.then46:                                        ; preds = %for.body40
+  %inc47 = add nsw i32 %step.0121, 1
+  br label %for.inc50
+
+for.inc50:                                        ; preds = %if.then46, %for.body40
+  %k.1 = phi i32 [ undef, %for.body40 ], [ %inc47, %if.then46 ]
+  %step.1 = phi i32 [ %step.0121, %for.body40 ], [ %inc47, %if.then46 ]
+  %indvars.iv.next124 = add i64 %indvars.iv123, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next124 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end52, label %for.body40
+
+for.end52:                                        ; preds = %for.inc50
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index 2516e248bc..405582c408 100644
--- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -force-vector-width=4 
+; RUN: opt < %s  -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4 
 
 ; Check that we don't crash.
 
diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
new file mode 100644
index 0000000000..c8d307f5d4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFT
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+;SWIFT: @foo
+;SWIFT: load <4 x i32>
+;SWIFT: load <4 x i32>
+;SWIFT: ret
+define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
+  %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i32 %i.02
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, %sum.01
+  %5 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
new file mode 100644
index 0000000000..6a68e81bca
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S -dce | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; Select VF = 8;
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+  %3 = load i16* %2, align 2
+  %4 = sext i16 %3 to i32
+  %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+  store i32 %4, i32* %5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
new file mode 100644
index 0000000000..cb77b09ef4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
new file mode 100644
index 0000000000..d2e3de279f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
+; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
+
+; ModuleID = 'arm.ll'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+%T216 = type <2 x i16>
+%T232 = type <2 x i32>
+%T264 = type <2 x i64>
+
+%T416 = type <4 x i16>
+%T432 = type <4 x i32>
+%T464 = type <4 x i64>
+
+define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'direct':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmul.i32
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'ups1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = sext %T416 %v0 to %T432
+  %r2 = sext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.s16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'upu1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = zext %T416 %v0 to %T432
+  %r2 = zext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.u16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'ups3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = sext %T232 %r3 to %T264
+; ASM: vmovl.s32
+; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'upu3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = zext %T232 %r3 to %T264
+; ASM: vmovl.u32
+; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+; COST: function 'dn3216':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+  %st = trunc %T432 %r3 to %T416
+; ASM: vmovn.i32
+; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
+  store %T416 %st, %T416* %storeaddr
+; ASM: vstr
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
new file mode 100644
index 0000000000..c0795b6a79
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK:foo_F64
+;CHECK: <2 x double>
+;CHECK:ret
+define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+  %2 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %3 = load double* %2, align 8
+  %4 = fmul fast double %prod.01, %3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+  ret double %prod.0.lcssa
+}
+
+;CHECK:foo_I8
+;CHECK: xor <16 x i8>
+;CHECK:ret
+define signext i8 @foo_I8(i8* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i8* %A, i64 %indvars.iv
+  %3 = load i8* %2, align 1
+  %4 = xor i8 %3, %red.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %red.0.lcssa = phi i8 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i8 %red.0.lcssa
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
index a2d176a534..6c0366eae9 100644
--- a/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -27,7 +27,7 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 
 
 ;CHECK: @read_mod_i64
-;CHECK: load <8 x i64>
+;CHECK: load <2 x i64>
 ;CHECK: ret i32
 define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
@@ -37,7 +37,7 @@ define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i64* %a, i64 %indvars.iv
   %3 = load i64* %2, align 4
-  %4 = mul i64 %3, 3
+  %4 = add i64 %3, 3
   store i64 %4, i64* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 8f1bb545fa..23d9233544 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: @conversion_cost1
-;CHECK: store <2 x i8>
+;CHECK: store <32 x i8>
 ;CHECK: ret
 define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 3
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index 628f9912c8..b7f479acf9 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -8,8 +8,11 @@ target triple = "x86_64-apple-macosx10.8.0"
 @d = common global [2048 x i32] zeroinitializer, align 16
 @a = common global [2048 x i32] zeroinitializer, align 16
 
+; The program below gathers and scatters data. We better not vectorize it.
 ;CHECK: cost_model_1
-;CHECK: <4 x i32>
+;CHECK-NOT: <2 x i32>
+;CHECK-NOT: <4 x i32>
+;CHECK-NOT: <8 x i32>
 ;CHECK: ret void
 define void @cost_model_1() nounwind uwtable noinline ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index 574c529834..d2d0eac305 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -9,10 +10,19 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ; Select VF = 8;
 ;CHECK: @example1
-;CHECK: load <8 x i32>
-;CHECK: add nsw <8 x i32>
-;CHECK: store <8 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
 ;CHECK: ret void
+
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example1() nounwind uwtable ssp {
   br label %1
 
@@ -34,13 +44,18 @@ define void @example1() nounwind uwtable ssp {
   ret void
 }
 
-
-; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. 
+; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive.
 ;CHECK: @example10b
 ;CHECK: load <4 x i16>
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example10b
+;UNROLL: load <4 x i16>
+;UNROLL: load <4 x i16>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
new file mode 100644
index 0000000000..186fba87d6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: <4 x float>
+define void @trivial_loop(float* nocapture %a) nounwind uwtable optsize {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %add = fadd float %0, 1.000000e+00
+  store float %add, float* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/X86/no-vector.ll b/test/Transforms/LoopVectorize/X86/no-vector.ll
new file mode 100644
index 0000000000..692eec9895
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/no-vector.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -mtriple=i386-unknown-freebsd -mcpu=i486 -loop-vectorize < %s
+
+define i32 @PR14639(i8* nocapture %s, i32 %len) nounwind {
+entry:
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %s, i32 %i.06
+  %0 = load i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %xor = xor i32 %conv, %r.05
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, %len
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+  ret i32 %r.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
new file mode 100644
index 0000000000..452d0df133
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The parallel loop has been invalidated by the new memory accesses introduced
+; by reg2mem (Loop::isParallel() starts to return false). Ensure the loop is
+; now non-vectorizable.
+
+;CHECK-NOT: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %indvars.iv.next.reg2mem = alloca i64
+  %indvars.iv.reg2mem = alloca i64
+  %"reg2mem alloca point" = bitcast i32 0 to i32
+  store i64 0, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
+  %indvars.iv.reload = load i64* %indvars.iv.reg2mem
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next = add i64 %indvars.iv.reload, 1
+  ; A new store without the parallel metadata here:
+  store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
+  %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop.parallel !3
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
new file mode 100644
index 0000000000..f648722734
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; A tricky loop:
+;
+; void loop(int *a, int *b) {
+;    for (int i = 0; i < 512; ++i) {
+;        a[a[i]] = b[i];
+;        a[i] = b[i+1];
+;    }
+;}
+
+;CHECK: @loop
+;CHECK-NOT: <4 x i32>
+define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with parallel loop metadata added to the loop branch
+; and the memory instructions.
+
+;CHECK: @parallel_loop
+;CHECK: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This store might have originated from inlining a function with a parallel
+  ; loop. Refers to a list with the "original loop reference" (!4) also included.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with an illegal parallel loop metadata: the memory
+; accesses refer to a different loop's identifier.
+
+;CHECK: @mixed_metadata
+;CHECK-NOT: <4 x i32>
+
+define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This refers to the loop marked with !7 which we are not in at the moment.
+  ; It should prevent detecting as a parallel loop.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
+!4 = metadata !{metadata !4}
+!5 = metadata !{metadata !3, metadata !4}
+!6 = metadata !{metadata !6}
+!7 = metadata !{metadata !7}
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
new file mode 100644
index 0000000000..f580846a02
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -loop-vectorize -mcpu=prescott < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-apple-darwin"
+
+; PR15344
+define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
+; CHECK: @test1
+; CHECK: preheader
+; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
+; CHECK: vector.memcheck
+
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp = load double* null, align 8
+  br i1 undef, label %bb3, label %bb12
+
+bb3:                                              ; preds = %bb3, %bb2
+  %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
+  %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
+  %tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5
+  %tmp7 = load double* %tmp6, align 4
+  %tmp8 = add nsw i32 %tmp5, 1
+  %tmp9 = fadd fast double %tmp4, undef
+  %tmp10 = getelementptr inbounds float* %arg, i32 %tmp5
+  store float undef, float* %tmp10, align 4
+  %tmp11 = icmp eq i32 %tmp8, %arg1
+  br i1 %tmp11, label %bb12, label %bb3
+
+bb12:                                             ; preds = %bb3, %bb2
+  %tmp13 = phi double [ %tmp, %bb2 ], [ %tmp9, %bb3 ]
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
new file mode 100644
index 0000000000..f390b33c03
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -0,0 +1,170 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+@G = common global [32 x [1024 x i32]] zeroinitializer, align 16
+@ub = common global [1024 x i32] zeroinitializer, align 16
+@uc = common global [1024 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@fa = common global [1024 x float] zeroinitializer, align 16
+@fb = common global [1024 x float] zeroinitializer, align 16
+@ic = common global [1024 x i32] zeroinitializer, align 16
+@da = common global [1024 x float] zeroinitializer, align 16
+@db = common global [1024 x float] zeroinitializer, align 16
+@dc = common global [1024 x float] zeroinitializer, align 16
+@dd = common global [1024 x float] zeroinitializer, align 16
+@dj = common global [1024 x i32] zeroinitializer, align 16
+
+; We can optimize this test without a tail.
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+; Can't vectorize in 'optsize' mode because we need a tail.
+;CHECK: @example2
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret void
+define void @example2(i32 %n, i32 %x) optsize {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph5, label %.preheader
+
+..preheader_crit_edge:                            ; preds = %.lr.ph5
+  %phitmp = sext i32 %n to i64
+  br label %.preheader
+
+.preheader:                                       ; preds = %..preheader_crit_edge, %0
+  %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ]
+  %2 = icmp eq i32 %n, 0
+  br i1 %2, label %._crit_edge, label %.lr.ph
+
+.lr.ph5:                                          ; preds = %0, %.lr.ph5
+  %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
+  %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6
+  store i32 %x, i32* %3, align 4
+  %indvars.iv.next7 = add i64 %indvars.iv6, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5
+
+.lr.ph:                                           ; preds = %.preheader, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
+  %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
+  %4 = add nsw i32 %.02, -1
+  %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %6 = load i32* %5, align 4
+  %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %8 = load i32* %7, align 4
+  %9 = and i32 %8, %6
+  %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %9, i32* %10, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %11 = icmp eq i32 %4, 0
+  br i1 %11, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %.preheader
+  ret void
+}
+
+; N is unknown, we need a tail. Can't vectorize.
+;CHECK: @example3
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) optsize {
+  %1 = icmp eq i32 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+  %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+  %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+  %2 = add nsw i32 %.05, -1
+  %3 = getelementptr inbounds i32* %.023, i64 1
+  %4 = load i32* %.023, align 16
+  %5 = getelementptr inbounds i32* %.014, i64 1
+  store i32 %4, i32* %.014, align 16
+  %6 = icmp eq i32 %2, 0
+  br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
+
+; We can't vectorize this one because we need a runtime ptr check.
+;CHECK: @example23
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example23(i16* nocapture %src, i32* nocapture %dst) optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+  %2 = getelementptr inbounds i16* %.04, i64 1
+  %3 = load i16* %.04, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw nsw i32 %4, 7
+  %6 = getelementptr inbounds i32* %.013, i64 1
+  store i32 %5, i32* %.013, align 4
+  %7 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %7, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+
+; We CAN vectorize this example because the pointers are marked as noalias.
+;CHECK: @example23b
+;CHECK: <4 x i32>
+;CHECK: ret void
+define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst) optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+  %2 = getelementptr inbounds i16* %.04, i64 1
+  %3 = load i16* %.04, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw nsw i32 %4, 7
+  %6 = getelementptr inbounds i32* %.013, i64 1
+  store i32 %5, i32* %.013, align 4
+  %7 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %7, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/struct-store.ll b/test/Transforms/LoopVectorize/X86/struct-store.ll
new file mode 100644
index 0000000000..a995e43a5a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/struct-store.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S
+
+; Make sure we are not crashing on this one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@glbl = external global [16 x { i64, i64 }], align 16
+
+declare void @fn()
+
+define void @test() {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
+  %tmp = getelementptr inbounds [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv
+  store { i64, i64 } { i64 ptrtoint (void ()* @fn to i64), i64 0 }, { i64, i64 }* %tmp, align 16
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 16
+  br i1 %exitcond, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
new file mode 100644
index 0000000000..ef63a145d0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK-NOT: load <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 undef
+}
+
+;CHECK: @bar
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
new file mode 100644
index 0000000000..2d7b663804
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Don't unroll when we have register pressure.
+;CHECK: reg_pressure
+;CHECK: load <4 x double>
+;CHECK-NOT: load  <4 x double>
+;CHECK: store <4 x double>
+;CHECK-NOT: store <4 x double>
+;CHECK: ret
+define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = sext i32 %n to i64
+  br label %2
+
+; <label>:2                                       ; preds = %2, %0
+  %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
+  %3 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %4 = load double* %3, align 8
+  %5 = fadd double %4, 3.000000e+00
+  %6 = fmul double %4, 2.000000e+00
+  %7 = fadd double %5, %6
+  %8 = fadd double %7, 2.000000e+00
+  %9 = fmul double %8, 5.000000e-01
+  %10 = fadd double %6, %9
+  %11 = fsub double %10, %5
+  %12 = fadd double %4, %11
+  %13 = fdiv double %8, %12
+  %14 = fmul double %13, %8
+  %15 = fmul double %6, %14
+  %16 = fmul double %5, %15
+  %17 = fadd double %16, -3.000000e+00
+  %18 = fsub double %4, %5
+  %19 = fadd double %6, %18
+  %20 = fadd double %13, %19
+  %21 = fadd double %20, %17
+  %22 = fadd double %21, 3.000000e+00
+  %23 = fmul double %4, %22
+  store double %23, double* %3, align 8
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %24 = trunc i64 %indvars.iv to i32
+  %25 = icmp eq i32 %24, 0
+  br i1 %25, label %26, label %2
+
+; <label>:26                                      ; preds = %2
+  ret void
+}
+
+; This is a small loop. Unroll it twice. 
+;CHECK: small_loop
+;CHECK: xor
+;CHECK: xor
+;CHECK: ret
+define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i16* %A, i64 %i.01
+  %3 = load i16* %2, align 2
+  %4 = xor i16 %3, 3
+  store i16 %4, i16* %2, align 2
+  %5 = add i64 %i.01, 1
+  %exitcond = icmp eq i64 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
new file mode 100644
index 0000000000..59bb8d0054
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -0,0 +1,150 @@
+; RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%0 = type { %0*, %1 }
+%1 = type { i8*, i32 }
+
+@p = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q = global [2048 x i16] zeroinitializer, align 16
+@r = global [2048 x i16] zeroinitializer, align 16
+
+; Tests for widest type
+; Ensure that we count the pointer store in the first test case. We have a
+; consecutive vector of pointers store, therefore we should count it towards the
+; widest vector count.
+;
+; CHECK: test_consecutive_store
+; CHECK: The Widest type: 64 bits
+define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
+  %4 = load %0** %2, align 8
+  %5 = icmp eq %0** %0, %1
+  br i1 %5, label %12, label %6
+
+; <label>:6                                       ; preds = %3
+  br label %7
+
+; <label>:7                                       ; preds = %7, %6
+  %8 = phi %0** [ %0, %6 ], [ %9, %7 ]
+  store %0* %4, %0** %8, align 8
+  %9 = getelementptr inbounds %0** %8, i64 1
+  %10 = icmp eq %0** %9, %1
+  br i1 %10, label %11, label %7
+
+; <label>:11                                      ; preds = %7
+  br label %12
+
+; <label>:12                                      ; preds = %11, %3
+  ret void
+}
+
+; However, if the store of a set of pointers is not to consecutive memory we do
+; NOT count the store towards the widest vector type.
+; In the test case below we add i16 types to store it in an array of pointer,
+; therefore the widest type should be i16.
+; int* p[2048][8];
+; short q[2048];
+;   for (int y = 0; y < 8; ++y)
+;     for (int i = 0; i < 1024; ++i) {
+;       p[i][y] = (int*) (1 + q[i]);
+;     }
+; CHECK: test_nonconsecutive_store
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_store() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %14, %0
+  %2 = phi i64 [ 0, %0 ], [ %15, %14 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %11, %3 ]
+  %5 = getelementptr inbounds [2048 x i16]* @q, i64 0, i64 %4
+  %6 = load i16* %5, align 2
+  %7 = sext i16 %6 to i64
+  %8 = add i64 %7, 1
+  %9 = inttoptr i64 %8 to i32*
+  %10 = getelementptr inbounds [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
+  store i32* %9, i32** %10, align 8
+  %11 = add i64 %4, 1
+  %12 = trunc i64 %11 to i32
+  %13 = icmp ne i32 %12, 1024
+  br i1 %13, label %3, label %14
+
+; <label>:14                                      ; preds = %3
+  %15 = add i64 %2, 1
+  %16 = trunc i64 %15 to i32
+  %17 = icmp ne i32 %16, 8
+  br i1 %17, label %1, label %18
+
+; <label>:18                                      ; preds = %14
+  ret void
+}
+
+
+@ia = global [1024 x i32*] zeroinitializer, align 16
+@ib = global [1024 x i32] zeroinitializer, align 16
+@ic = global [1024 x i8] zeroinitializer, align 16
+@p2 = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q2 = global [2048 x i16] zeroinitializer, align 16
+
+;; Now we check the same rules for loads. We should take consecutive loads of
+;; pointer types into account.
+; CHECK: test_consecutive_ptr_load
+; CHECK: The Widest type: 64 bits
+define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi i64 [ 0, %0 ], [ %10, %1 ]
+  %3 = phi i8 [ 0, %0 ], [ %9, %1 ]
+  %4 = getelementptr inbounds [1024 x i32*]* @ia, i32 0, i64 %2
+  %5 = load i32** %4, align 4
+  %6 = ptrtoint i32* %5 to i64
+  %7 = trunc i64 %6 to i8
+  %8 = add i8 %3, 1
+  %9 = add i8 %7, %8
+  %10 = add i64 %2, 1
+  %11 = icmp ne i64 %10, 1024
+  br i1 %11, label %1, label %12
+
+; <label>:12                                      ; preds = %1
+  %13 = phi i8 [ %9, %1 ]
+  ret i8 %13
+}
+
+;; However, we should not take unconsecutive loads of pointers into account.
+; CHECK: test_nonconsecutive_ptr_load
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %13, %0
+  %2 = phi i64 [ 0, %0 ], [ %14, %13 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %10, %3 ]
+  %5 = getelementptr inbounds [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
+  %6 = getelementptr inbounds [2048 x i16]* @q2, i64 0, i64 %4
+  %7 = load i32** %5, align 2
+  %8 = ptrtoint i32* %7 to i64
+  %9 = trunc i64 %8 to i16
+  store i16 %9, i16* %6, align 8
+  %10 = add i64 %4, 1
+  %11 = trunc i64 %10 to i32
+  %12 = icmp ne i32 %11, 1024
+  br i1 %12, label %3, label %13
+
+; <label>:13                                      ; preds = %3
+  %14 = add i64 %2, 1
+  %15 = trunc i64 %14 to i32
+  %16 = icmp ne i32 %15, 8
+  br i1 %16, label %1, label %17
+
+; <label>:17                                      ; preds = %13
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
new file mode 100644
index 0000000000..431e422c2f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: fc
+;CHECK: load <4 x i16>
+;CHECK-NEXT: shufflevector <4 x i16>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i16>
+;CHECK: ret
+define void @fc(i16* nocapture %p, i32 %n, i32 %size) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %cond.end, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ]
+  %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.0, i64 -1
+  %0 = load i16* %incdec.ptr, align 2, !tbaa !0
+  %conv = zext i16 %0 to i32
+  %cmp = icmp ult i32 %conv, %size
+  br i1 %cmp, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %do.body
+  %sub = sub i32 %conv, %size
+  %phitmp = trunc i32 %sub to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %do.body, %cond.true
+  %cond = phi i16 [ %phitmp, %cond.true ], [ 0, %do.body ]
+  store i16 %cond, i16* %incdec.ptr, align 2, !tbaa !0
+  %dec = add i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %cond.end
+  ret void
+}
+
+;CHECK: example1
+;CHECK: load <4 x i32>
+;CHECK-NEXT: shufflevector <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define void @example1(i32* nocapture %a, i32 %n, i32 %wsize) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ]
+  %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ]
+  %incdec.ptr = getelementptr inbounds i32* %p.0, i64 -1
+  %0 = load i32* %incdec.ptr, align 4, !tbaa !3
+  %cmp = icmp slt i32 %0, %wsize
+  %sub = sub nsw i32 %0, %wsize
+  %cond = select i1 %cmp, i32 0, i32 %sub
+  store i32 %cond, i32* %incdec.ptr, align 4, !tbaa !3
+  %dec = add nsw i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll
new file mode 100644
index 0000000000..08c84eff5d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/calloc.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK: hexit
+;CHECK: zext <4 x i8>
+;CHECK: ret
+
+define noalias i8* @hexit(i8* nocapture %bytes, i64 %length) nounwind uwtable ssp {
+entry:
+  %shl = shl i64 %length, 1
+  %add28 = or i64 %shl, 1
+  %call = tail call i8* @calloc(i64 1, i64 %add28) nounwind
+  %cmp29 = icmp eq i64 %shl, 0
+  br i1 %cmp29, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = shl i64 %length, 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %shr = lshr i64 %i.030, 1
+  %arrayidx = getelementptr inbounds i8* %bytes, i64 %shr
+  %1 = load i8* %arrayidx, align 1, !tbaa !0
+  %conv = zext i8 %1 to i32
+  %and = shl i64 %i.030, 2
+  %neg = and i64 %and, 4
+  %and3 = xor i64 %neg, 4
+  %sh_prom = trunc i64 %and3 to i32
+  %shl4 = shl i32 15, %sh_prom
+  %and5 = and i32 %conv, %shl4
+  %shr11 = lshr i32 %and5, %sh_prom
+  %conv13 = and i32 %shr11, 254
+  %cmp15 = icmp ugt i32 %conv13, 9
+  %cond = select i1 %cmp15, i32 87, i32 48
+  %add17 = add nsw i32 %cond, %shr11
+  %conv18 = trunc i32 %add17 to i8
+  %arrayidx19 = getelementptr inbounds i8* %call, i64 %i.030
+  store i8 %conv18, i8* %arrayidx19, align 1, !tbaa !0
+  %inc = add i64 %i.030, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i8* %call
+}
+
+declare noalias i8* @calloc(i64, i64) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
new file mode 100644
index 0000000000..2aa29ed2c8
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; rdar://problem/12848162
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example12
+;CHECK: trunc i64
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example12() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = trunc i64 %indvars.iv to i32
+  store i32 %3, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %4, label %1
+
+; <label>:4                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index 26902eba9e..da0fb05fe8 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -1,10 +1,10 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: @cpp_new_arrays
-;CHECK: insertelement <4 x i32>
+;CHECK: sext i32
 ;CHECK: load <4 x float>
 ;CHECK: fadd <4 x float>
 ;CHECK: ret i32
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
new file mode 100644
index 0000000000..a2ea9511bb
--- /dev/null
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
+; Make sure we vectorize with debugging turned on.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@A = global [1024 x i32] zeroinitializer, align 16
+@B = global [1024 x i32] zeroinitializer, align 16
+@C = global [1024 x i32] zeroinitializer, align 16
+
+; CHECK: @test
+define i32 @test() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18
+  br label %for.body, !dbg !18
+
+for.body:
+  ;CHECK: load <4 x i32>
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19
+  %0 = load i32* %arrayidx, align 4, !dbg !19, !tbaa !21
+  %arrayidx2 = getelementptr inbounds [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19
+  %1 = load i32* %arrayidx2, align 4, !dbg !19, !tbaa !21
+  %add = add nsw i32 %1, %0, !dbg !19
+  %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19
+  store i32 %add, i32* %arrayidx4, align 4, !dbg !19, !tbaa !21
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18
+  %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18
+  br i1 %exitcond, label %for.body, label %for.end, !dbg !18
+
+for.end:
+  ret i32 0, !dbg !24
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test", metadata !"/path/to/somewhere", metadata !"clang", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, metadata !""}
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"test", metadata !"test", metadata !"test", metadata !4, i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
+!4 = metadata !{i32 786473, metadata !"test", metadata !"/path/to/somewhere", null}
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0}
+!10 = metadata !{i32 786443, metadata !3, i32 6, i32 0, metadata !4, i32 0}
+!11 = metadata !{metadata !12, metadata !16, metadata !17}
+!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
+!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, i32 0}
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786465, i64 0, i64 1024}
+!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
+!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} 
+!18 = metadata !{i32 6, i32 0, metadata !10, null}
+!19 = metadata !{i32 7, i32 0, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1}
+!21 = metadata !{metadata !"int", metadata !22}
+!22 = metadata !{metadata !"omnipotent char", metadata !23}
+!23 = metadata !{metadata !"Simple C/C++ TBAA"}
+!24 = metadata !{i32 9, i32 0, metadata !3, null}
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index 2f22a76457..656912e178 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
new file mode 100644
index 0000000000..565684cccb
--- /dev/null
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK: @foo
+;CHECK: fadd <4 x float>
+;CHECK: ret
+define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %add = fadd fast float %sum.04, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret float %add
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index f1bf6cb6d8..f335557c00 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -24,6 +25,20 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example1() nounwind uwtable ssp {
   br label %1
 
@@ -48,6 +63,12 @@ define void @example1() nounwind uwtable ssp {
 ;CHECK: @example2
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example2
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph5, label %.preheader
@@ -92,6 +113,12 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
 ;CHECK: @example3
 ;CHECK: <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example3
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: ret void
 define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
   %1 = icmp eq i32 %n, 0
   br i1 %1, label %._crit_edge, label %.lr.ph
@@ -115,6 +142,12 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;CHECK: @example4
 ;CHECK: load <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example4
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: ret void
 define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
   %1 = add nsw i32 %n, -1
   %2 = icmp eq i32 %n, 0
@@ -175,6 +208,12 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;CHECK: @example8
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example8
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example8(i32 %x) nounwind uwtable ssp {
   br label %.preheader
 
@@ -329,7 +368,7 @@ define void @example11() nounwind uwtable ssp {
 }
 
 ;CHECK: @example12
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
 define void @example12() nounwind uwtable ssp {
@@ -537,7 +576,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
 }
 
 ;CHECK: @example21
-;CHECK: <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
 ;CHECK: ret i32
 define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
new file mode 100644
index 0000000000..121da8ba7e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -0,0 +1,1078 @@
+; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+@Bar = common global %struct.anon.0 zeroinitializer, align 4
+
+@PB = external global i32*
+@PA = external global i32*
+
+
+;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
+
+
+; /// Different objects, positive induction, constant distance
+; int noAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias01
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx2, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with widening slide
+; int noAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[i] = Foo.B[i+10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias02
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias02(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with shortening slide
+; int noAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias03
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias03(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add1 = add nsw i32 %4, 10
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, positive stride, run-time check added
+; int noAlias04 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+i) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias04
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias04(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %2 = load i32* %i, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 %2
+  %3 = load i32* %add.ptr, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr1 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr2 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr2, align 4
+  ret i32 %10
+}
+
+; /// Different objects, positive induction, multi-array
+; int noAlias05 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.B[N][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias05
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias05(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8
+  %10 = load i32* %arrayidx5, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, multi-array, different sub-elements
+; int noAlias06 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.A[N+1][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias06
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias06(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add2 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5
+  store i32 %add2, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8
+  %10 = load i32* %arrayidx6, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, constant distance
+; int noAlias07 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias07
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias07(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, shortening slide
+; int noAlias08 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias08
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias08(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, widening slide
+; int noAlias09 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias09
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias09(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 10
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, negative stride, run-time check added
+; int noAlias10 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias10
+; CHECK-NOT: sub nsw <4 x i32>
+; CHECK: ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias10(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 100
+  %6 = load i32* %i, align 4
+  %idx.neg4 = sub i32 0, %6
+  %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4
+  %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1
+  store i32 %add, i32* %add.ptr6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr7 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr7, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, multi-array
+; int noAlias11 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias11
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias11(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub3 = sub nsw i32 100, %5
+  %sub4 = sub nsw i32 %sub3, 1
+  %6 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4
+  store i32 %add, i32* %arrayidx6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8
+  %10 = load i32* %arrayidx8, align 4
+  ret i32 %10
+}
+
+; /// Same objects, negative induction, multi-array, different sub-elements
+; int noAlias12 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias12
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias12(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add3 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub4 = sub nsw i32 100, %5
+  %sub5 = sub nsw i32 %sub4, 1
+  %6 = load i32* %N, align 4
+  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5
+  store i32 %add3, i32* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8
+  %10 = load i32* %arrayidx9, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, constant distance, just enough for vector size
+; int noAlias13 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.A[i+4] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias13
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias13(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Same objects, negative induction, constant distance, just enough for vector size
+; int noAlias14 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias14
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias14(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 5
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+
+;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
+
+
+; /// Different objects, swapped induction, alias at the end
+; int mayAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, swapped induction, alias at the beginning
+; int mayAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %4
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, run-time check added
+; int mayAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @mayAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr4 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr4, align 4
+  ret i32 %10
+}
+
+
+;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
+
+
+; int mustAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
+
+; int mustAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; int mustAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll
new file mode 100644
index 0000000000..7759b7085a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/i8-induction.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global i8 0, align 1
+@b = common global i8 0, align 1
+
+define void @f() nounwind uwtable ssp {
+scalar.ph:
+  store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0
+  %0 = load i8* @a, align 1, !tbaa !0
+  br label %for.body
+
+for.body:
+  %mul16 = phi i8 [ 0, %scalar.ph ], [ %mul, %for.body ]              ; <------- i8 induction var.
+  %c.015 = phi i8 [ undef, %scalar.ph ], [ %conv8, %for.body ]
+  %conv2 = sext i8 %c.015 to i32
+  %tobool = icmp ne i8 %c.015, 0
+  %.sink = select i1 %tobool, i8 %c.015, i8 %0
+  %mul = mul i8 %mul16, %.sink
+  %add = add nsw i32 %conv2, 1
+  %conv8 = trunc i32 %add to i8
+  %sext = shl i32 %add, 24
+  %phitmp14 = icmp slt i32 %sext, 268435456
+  br i1 %phitmp14, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  store i8 %mul, i8* @b, align 1, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+
diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll
new file mode 100644
index 0000000000..3283456aa3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define fastcc void @DD_dump() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %lor.lhs.false, label %if.end25
+
+lor.lhs.false:                                    ; preds = %entry
+  br i1 undef, label %if.end21, label %if.else
+
+if.else:                                          ; preds = %lor.lhs.false
+  br i1 undef, label %num_q.exit, label %while.body.i.preheader
+
+while.body.i.preheader:                           ; preds = %if.else
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %if.end.i, %while.body.i.preheader
+  switch i8 undef, label %if.end.i [
+    i8 39, label %if.then.i
+    i8 92, label %if.then.i
+  ]
+
+if.then.i:                                        ; preds = %while.body.i, %while.body.i
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %if.then.i, %while.body.i
+  br i1 undef, label %num_q.exit, label %while.body.i
+
+num_q.exit:                                       ; preds = %if.end.i, %if.else
+  unreachable
+
+if.end21:                                         ; preds = %lor.lhs.false
+  unreachable
+
+if.end25:                                         ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index bacf9c00d0..3a2d82e15d 100644
--- a/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index b4701b9655..6e7c03a556 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index 71ea7689fc..3fa6b19ca9 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index b31bceb50d..96595cdc16 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -6,8 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @array = common global [1024 x i32] zeroinitializer, align 16
 
 ;CHECK: @array_at_plus_one
-;CHECK: add <4 x i64>
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
 ;CHECK: add i64 %index, 12
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 54e3c69fe1..e79d78de67 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -788,6 +788,66 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.fma.f64(double, double, double) nounwind readnone
 
+;CHECK: @fmuladd_f32
+;CHECK: llvm.fmuladd.v4f32
+;CHECK: ret void
+define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
+  %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+
+;CHECK: @fmuladd_f64
+;CHECK: llvm.fmuladd.v4f64
+;CHECK: ret void
+define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
+  %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
+
 ;CHECK: @pow_f32
 ;CHECK: llvm.pow.v4f32
 ;CHECK: ret void
@@ -842,6 +902,30 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
+; CHECK: fabs_libm
+; CHECK:  call <4 x float> @llvm.fabs.v4f32
+; CHECK: ret void
+define void @fabs_libm(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @fabsf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare float @fabsf(float) nounwind readnone
+
 declare double @llvm.pow.f64(double, double) nounwind readnone
 
 !0 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll
new file mode 100644
index 0000000000..06b3b08aa0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lcssa-crash.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%type1 = type { %type2 }
+%type2 = type { [0 x i8*], i8**, i32, i32, i32 }
+
+define void @test() nounwind uwtable align 2 {
+  br label %for.body.lr.ph.i.i.i
+
+for.body.lr.ph.i.i.i:
+  br label %for.body.i.i.i
+
+for.body.i.i.i:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.i.i.i ], [ 0, %for.body.lr.ph.i.i.i ]
+  br label %for.inc.i.i.i
+
+for.inc.i.i.i:
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, undef
+  br i1 %exitcond, label %for.body.i.i.i, label %for.end.i.i.i
+
+for.end.i.i.i:
+  %lcssa = phi %type1* [ undef, %for.inc.i.i.i ]
+  unreachable
+}
+
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
index 516fd1de07..45aa8c7cd9 100644
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; int __attribute__((noinline)) sum_array(int *A, int n) {
 ;  return std::accumulate(A, A + n, 0);
diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll
new file mode 100644
index 0000000000..de23bf02b6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nofloat.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example12
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret void
+define void @example12() noimplicitfloat { ;           <--------- "noimplicitfloat" attribute here!
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = trunc i64 %indvars.iv to i32
+  store i32 %3, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %4, label %1
+
+; <label>:4                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 1a6c15ed96..8262a18f18 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll
new file mode 100644
index 0000000000..e5fad14d0d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nsw-crash.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @test() {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  %it.sroa.0.091 = phi i32* [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
+  %incdec.ptr.i = getelementptr inbounds i32* %it.sroa.0.091, i64 1
+  %inc32 = add i32 undef, 1                                        ; <------------- Make sure we don't set NSW flags to the undef.
+  %cmp.i11 = icmp eq i32* %incdec.ptr.i, undef
+  br i1 %cmp.i11, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll
new file mode 100644
index 0000000000..b80d45995d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/phi-hang.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -loop-vectorize < %s
+
+; PR15384
+define void @test1(i32 %arg) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb5, %bb
+  %tmp = phi i32 [ 1, %bb ], [ %tmp7, %bb5 ]
+  %tmp2 = phi i32 [ %arg, %bb ], [ %tmp9, %bb5 ]
+  br i1 true, label %bb5, label %bb3
+
+bb3:                                              ; preds = %bb1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb1
+  %tmp6 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
+  %tmp7 = phi i32 [ 0, %bb4 ], [ %tmp6, %bb1 ]
+  %tmp8 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
+  %tmp9 = add nsw i32 %tmp2, 1
+  %tmp10 = icmp eq i32 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb1
+
+bb11:                                             ; preds = %bb5
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
new file mode 100644
index 0000000000..25599f8f4c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+@B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+
+;CHECK:_Z5test1v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test1v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
+  %2 = load i32* %b.01, align 4
+  %3 = shl nsw i32 %2, 1
+  store i32 %3, i32* %p.02, align 4
+  %4 = getelementptr inbounds i32* %p.02, i64 -1
+  %5 = getelementptr inbounds i32* %b.01, i64 1
+  %6 = icmp eq i32* %4, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %6, label %7, label %1
+
+; <label>:7                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test2v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test2v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %4, getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test3v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test3v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %3, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index b4d1bac132..bfaa6d452b 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index c1848b35fc..08b7b27e42 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -7,6 +7,11 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -37,6 +42,11 @@ define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -67,6 +77,11 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul nsw <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -95,6 +110,11 @@ define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 
 ;CHECK: @reduction_mul
 ;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -124,6 +144,11 @@ define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: @start_at_non_zero
 ;CHECK: phi <4 x i32>
 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
 entry:
@@ -152,6 +177,11 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: @reduction_and
 ;CHECK: and <4 x i32>
 ;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -179,6 +209,11 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK: @reduction_or
 ;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -206,6 +241,11 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK: @reduction_xor
 ;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -230,3 +270,56 @@ for.end:                                          ; preds = %for.body, %entry
   %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
   ret i32 %result.0.lcssa
 }
+
+; In this code the subtracted variable is on the RHS and this is not an induction variable.
+;CHECK: @reduction_sub_rhs
+;CHECK-NOT: phi <4 x i32>
+;CHECK-NOT: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %sub = sub nsw i32 %0, %x.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
+
+
+; In this test the reduction variable is on the LHS and we can vectorize it.
+;CHECK: @reduction_sub_lhs
+;CHECK: phi <4 x i32>
+;CHECK: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %sub = sub nsw i32 %x.05, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 23933cf7c7..86098a6e7d 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -9,6 +9,10 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;     a[i] = b[i] * 3;
 ; }
 
+;CHECK: for.body.preheader:
+;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
+;CHECK: vector.memcheck:
+;CHECK: br i1 %found.conflict, label %middle.block, label %vector.ph
 ;CHECK: load <4 x float>
 define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
new file mode 100644
index 0000000000..1573893645
--- /dev/null
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
+; access both x[k] and x[k-1].
+;
+; void kernel11(double *x, double *y, int n) {
+;   for ( int k=1 ; k<n ; k++ )
+;     x[k] = x[k-1] + y[k];
+; }
+
+; CHECK: @kernel11
+; CHECK-NOT: <4 x double>
+; CHECK: ret
+define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
+  %1 = alloca double*, align 8
+  %2 = alloca double*, align 8
+  %3 = alloca i32, align 4
+  %k = alloca i32, align 4
+  store double* %x, double** %1, align 8
+  store double* %y, double** %2, align 8
+  store i32 %n, i32* %3, align 4
+  store i32 1, i32* %k, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %25, %0
+  %5 = load i32* %k, align 4
+  %6 = load i32* %3, align 4
+  %7 = icmp slt i32 %5, %6
+  br i1 %7, label %8, label %28
+
+; <label>:8                                       ; preds = %4
+  %9 = load i32* %k, align 4
+  %10 = sub nsw i32 %9, 1
+  %11 = sext i32 %10 to i64
+  %12 = load double** %1, align 8
+  %13 = getelementptr inbounds double* %12, i64 %11
+  %14 = load double* %13, align 8
+  %15 = load i32* %k, align 4
+  %16 = sext i32 %15 to i64
+  %17 = load double** %2, align 8
+  %18 = getelementptr inbounds double* %17, i64 %16
+  %19 = load double* %18, align 8
+  %20 = fadd double %14, %19
+  %21 = load i32* %k, align 4
+  %22 = sext i32 %21 to i64
+  %23 = load double** %1, align 8
+  %24 = getelementptr inbounds double* %23, i64 %22
+  store double %20, double* %24, align 8
+  br label %25
+
+; <label>:25                                      ; preds = %8
+  %26 = load i32* %k, align 4
+  %27 = add nsw i32 %26, 1
+  store i32 %27, i32* %k, align 4
+  br label %4
+
+; <label>:28                                      ; preds = %4
+  ret i32 0
+}
+
+
+
+; We don't vectorize this function because A[i*7] is scalarized, and the
+; different scalars can in theory wrap around and overwrite other scalar
+; elements. At the moment we only allow read/write access to arrays
+; that are consecutive.
+; 
+; void foo(int *a) {
+;   for (int i=0; i<256; ++i) {
+;     int x = a[i*7];
+;     if (x>3)
+;       x = x*x+x*4;
+;     a[i*7] = x+3;
+;   }
+; }
+
+; CHECK: @func2
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %7, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
+  %2 = mul nsw i64 %indvars.iv, 7
+  %3 = getelementptr inbounds i32* %a, i64 %2
+  %4 = load i32* %3, align 4
+  %5 = icmp sgt i32 %4, 3
+  br i1 %5, label %6, label %7
+
+; <label>:6                                       ; preds = %1
+  %tmp = add i32 %4, 4
+  %tmp1 = mul i32 %tmp, %4
+  br label %7
+
+; <label>:7                                       ; preds = %6, %1
+  %x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
+  %8 = add nsw i32 %x.0, 3
+  store i32 %8, i32* %3, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %9, label %1
+
+; <label>:9                                       ; preds = %7
+  ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index e537bde31b..7a14d247c9 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll
new file mode 100644
index 0000000000..7e2dd5fc0f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK: @inc
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index 4a6e4b231d..fa83dba3d3 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
index 5aa3bc034d..998001c318 100644
--- a/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
new file mode 100644
index 0000000000..de65d0d148
--- /dev/null
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.coordinate = type { i32, i32 }
+
+; Make sure that we don't generate a wide load when accessing the struct.
+; struct coordinate {
+;  int x;
+;  int y;
+; };
+;
+;
+; int foo(struct coordinate *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK: @foo
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0
+  %0 = load i32* %x, align 4, !tbaa !0
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
new file mode 100644
index 0000000000..ac1694802a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;
+; We want to make sure that we are vectorizeing the scalar loop only once
+; even if the pass manager runs the vectorizer multiple times due to inlining.
+
+
+; This test checks that we add metadata to vectorized loops
+; CHECK: _Z4foo1Pii
+; CHECK: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+
+; This test comes from the loop:
+;
+;int foo (int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+;}
+define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+; This test checks that we don't vectorize loops that are marked with the "already vectorized" metadata.
+; CHECK: _Z4foo2Pii
+; CHECK-NOT: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.vectorizer.already_vectorized !3
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{}
+
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
index eb02760413..54cbe8df46 100644
--- a/test/Transforms/LoopVectorize/write-only.ll
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 2f1ccb493d..d70814fcb6 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -30,16 +30,16 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"testfunc.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"testfunc.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 2, i32 0, metadata !1, null}
-!9 = metadata !{i32 524545, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786689, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
 
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 4cb621f61c..d1a4d8414a 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -30,23 +30,23 @@ return:                                           ; preds = %entry
   ret void, !dbg !19
 }
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"bar.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"bar.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
-!9 = metadata !{i32 524545, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 786478, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
-!13 = metadata !{i32 524324, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{i32 4, i32 0, metadata !10, metadata !8}
-!16 = metadata !{i32 524545, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 524545, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 786689, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 5, i32 0, metadata !10, metadata !8}
 !19 = metadata !{i32 10, i32 0, metadata !1, null}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 3fa16288c2..582a57b5d3 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -70,20 +70,20 @@ define void @test4(i8 *%P) {
   %A = alloca %1
   %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a)
+  call void @test4a(i8* align 1 byval %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
 }
 
-declare void @test4a(i8* byval align 1)
+declare void @test4a(i8* align 1 byval)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 %struct.S = type { i128, [4 x i8]}
 
 @sS = external global %struct.S, align 16
 
-declare void @test5a(%struct.S* byval align 16) nounwind ssp
+declare void @test5a(%struct.S* align 16 byval) nounwind ssp
 
 
 ; rdar://8713376 - This memcpy can't be eliminated.
@@ -94,7 +94,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
   %a = getelementptr %struct.S* %y, i64 0, i32 1, i64 0
   store i8 4, i8* %a
-  call void @test5a(%struct.S* byval align 16 %y)
+  call void @test5a(%struct.S* align 16 byval %y)
   ret i32 0
   ; CHECK: @test5(
   ; CHECK: store i8 4
@@ -114,19 +114,19 @@ define void @test6(i8 *%P) {
 ; isn't itself 8 byte aligned.
 %struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 
-define i32 @test7(%struct.p* nocapture byval align 8 %q) nounwind ssp {
+define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp {
 entry:
   %agg.tmp = alloca %struct.p, align 4
   %tmp = bitcast %struct.p* %agg.tmp to i8*
   %tmp1 = bitcast %struct.p* %q to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
-  %call = call i32 @g(%struct.p* byval align 8 %agg.tmp) nounwind
+  %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
   ret i32 %call
 ; CHECK: @test7
-; CHECK: call i32 @g(%struct.p* byval align 8 %q) nounwind
+; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[NUW:#[0-9]+]]
 }
 
-declare i32 @g(%struct.p* byval align 8)
+declare i32 @g(%struct.p* align 8 byval)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
@@ -152,7 +152,7 @@ declare noalias i8* @malloc(i32)
 ; rdar://11341081
 %struct.big = type { [50 x i32] }
 
-define void @test9() nounwind uwtable ssp {
+define void @test9() nounwind ssp uwtable {
 entry:
 ; CHECK: test9
 ; CHECK: f1
@@ -170,3 +170,7 @@ entry:
 
 declare void @f1(%struct.big* sret)
 declare void @f2(%struct.big*)
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
diff --git a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
index e3e52b401a..19cd6a5171 100644
--- a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
+++ b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mergefunc %s -disable-output
+; RUN: opt -mergefunc -disable-output < %s
 ; This used to crash.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
diff --git a/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
new file mode 100644
index 0000000000..3f6a5ba157
--- /dev/null
+++ b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
@@ -0,0 +1,36 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to trigger a ConstantExpr::getBitCast assertion.
+
+define void @t1() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb12 [
+    i32 127, label %sw.bb
+    i32 126, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  unreachable
+
+sw.bb12:                                          ; preds = %entry
+  ret void
+}
+
+define void @t2() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb8 [
+    i32 4, label %sw.bb
+    i32 3, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  ret void
+
+sw.bb8:                                           ; preds = %entry
+  unreachable
+}
diff --git a/test/Transforms/MergeFunc/Stats/lit.local.cfg b/test/Transforms/MergeFunc/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/MergeFunc/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/MergeFunc/phi-speculation1.ll b/test/Transforms/MergeFunc/Stats/phi-speculation1.ll
index fd0baffb31..fd0baffb31 100644
--- a/test/Transforms/MergeFunc/phi-speculation1.ll
+++ b/test/Transforms/MergeFunc/Stats/phi-speculation1.ll
diff --git a/test/Transforms/MergeFunc/phi-speculation2.ll b/test/Transforms/MergeFunc/Stats/phi-speculation2.ll
index eec8b5c5a9..eec8b5c5a9 100644
--- a/test/Transforms/MergeFunc/phi-speculation2.ll
+++ b/test/Transforms/MergeFunc/Stats/phi-speculation2.ll
diff --git a/test/Transforms/MergeFunc/vector.ll b/test/Transforms/MergeFunc/Stats/vector.ll
index 4af079f8cd..4af079f8cd 100644
--- a/test/Transforms/MergeFunc/vector.ll
+++ b/test/Transforms/MergeFunc/Stats/vector.ll
diff --git a/test/Transforms/MergeFunc/vectors-and-arrays.ll b/test/Transforms/MergeFunc/Stats/vectors-and-arrays.ll
index dc64a0858b..dc64a0858b 100644
--- a/test/Transforms/MergeFunc/vectors-and-arrays.ll
+++ b/test/Transforms/MergeFunc/Stats/vectors-and-arrays.ll
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index ad41bcf50f..4020e10450 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -metarenamer -S | FileCheck %s
+; RUN: opt -metarenamer -S < %s | FileCheck %s
 
 ; CHECK: target triple {{.*}}
 ; CHECK-NOT: {{^x*}}xxx{{^x*}}
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
index 8c7b5b1e65..4541b3f2fd 100644
--- a/test/Transforms/ObjCARC/apelim.ll
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -38,8 +38,8 @@ entry:
 }
 
 ; CHECK: define internal void @_GLOBAL__I_y()
-; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
-; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() [[NUW:#[0-9]+]]
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) [[NUW]]
 ; CHECK: }
 define internal void @_GLOBAL__I_y() {
 entry:
@@ -51,3 +51,5 @@ entry:
 
 declare i8* @objc_autoreleasePoolPush()
 declare void @objc_autoreleasePoolPop(i8*)
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 7b64b1be7c..4c24ebf865 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -92,10 +92,10 @@ alt_return:
 
 ; CHECK: define void @test1b(
 ; CHECK: entry:
-; CHECK:   tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: if.end5:
-; CHECK:   tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test1b(i8* %x, i1 %p, i1 %q) {
@@ -404,8 +404,8 @@ entry:
 ; a stack argument.
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test11(i8* %x) nounwind {
 entry:
@@ -431,8 +431,8 @@ entry:
 ; Same as test11 but the value is returned. Do an RV optimization.
 
 ; CHECK: define i8* @test11b(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) [[NUW]]
 ; CHECK: }
 define i8* @test11b(i8* %x) nounwind {
 entry:
@@ -462,10 +462,10 @@ entry:
 ; Trivial retain,autorelease pair. Don't delete!
 
 ; CHECK: define void @test13(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %x)
-; CHECK: tail call i8* @objc_autorelease(i8* %x) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %x) [[NUW]]
 ; CHECK: }
 define void @test13(i8* %x, i64 %n) {
 entry:
@@ -716,7 +716,7 @@ entry:
 ; Bitcast insertion
 
 ; CHECK: define void @test20(
-; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: invoke
 define void @test20(double* %self) {
 if.then12:
@@ -980,7 +980,7 @@ done:
 ; CHECK: call i8* @objc_retain(
 ; CHECK: call void @callee()
 ; CHECK: store
-; CHECK: call void @objc_release(i8* %p) nounwind, !clang.imprecise_release
+; CHECK: call void @objc_release(i8* %p) [[NUW]], !clang.imprecise_release
 ; CHECK: done:
 ; CHECK-NOT: @objc_
 ; CHECK: }
@@ -1450,9 +1450,9 @@ define void @test45(i8** %pp, i8** %qq) {
 ; Don't delete retain and autorelease here.
 
 ; CHECK: define void @test46(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test46(i8* %p, i1 %a) {
 entry:
   call i8* @objc_retain(i8* %p)
@@ -1565,7 +1565,7 @@ define void @test53(void ()** %zz, i8** %pp) {
 
 ; CHECK: define void @test54(
 ; CHECK: call i8* @returner()
-; CHECK-NEXT: call void @objc_release(i8* %t) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: ret void
 define void @test54() {
   %t = call i8* @returner()
@@ -1595,10 +1595,10 @@ entry:
 ; CHECK: define void @test56(
 ; CHECK-NOT: @objc
 ; CHECK: if.then:
-; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
-; CHECK-NEXT: tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: br label %if.end
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1630,10 +1630,10 @@ if.end:                                           ; preds = %entry, %if.then
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test57(i8* %x) nounwind {
@@ -1673,10 +1673,10 @@ entry:
 
 ; CHECK:      define void @test59(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test59(i8* %x) nounwind {
@@ -1875,8 +1875,8 @@ return:                                           ; preds = %if.then, %entry
 ; rdar://11931823
 
 ; CHECK: define void @test66(
-; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
-; CHECK:   tail call void @objc_release(i8* %cond) nounwind
+; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
 ; CHECK: }
 define void @test66(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
 entry:
@@ -2224,3 +2224,6 @@ end:                                              ; preds = %if.end125, %if.end1
 !0 = metadata !{}
 
 declare i32 @__gxx_personality_v0(...)
+
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 15194237c4..899298b596 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -86,9 +86,9 @@ for.end:                                          ; preds = %for.body
 
 ; Delete nested retain+release pairs around loops.
 
-;      CHECK: define void @test3(i8* %a) nounwind {
+;      CHECK: define void @test3(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -112,9 +112,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test4(i8* %a) nounwind {
+;      CHECK: define void @test4(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -142,9 +142,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test5(i8* %a) nounwind {
+;      CHECK: define void @test5(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -176,9 +176,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test6(i8* %a) nounwind {
+;      CHECK: define void @test6(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -209,9 +209,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test7(i8* %a) nounwind {
+;      CHECK: define void @test7(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -242,9 +242,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test8(i8* %a) nounwind {
+;      CHECK: define void @test8(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -274,7 +274,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test9(i8* %a) nounwind {
+;      CHECK: define void @test9(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -303,7 +303,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test10(i8* %a) nounwind {
+;      CHECK: define void @test10(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -332,7 +332,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test11(i8* %a) nounwind {
+;      CHECK: define void @test11(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -362,15 +362,15 @@ exit:
 
 ; Don't delete anything if they're not balanced.
 
-;      CHECK: define void @test12(i8* %a) nounwind {
+;      CHECK: define void @test12(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) nounwind
-; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]]
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test12(i8* %a) nounwind {
@@ -394,4 +394,6 @@ exit:
   ret void
 }
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
index 01d978a0e2..01fd1e7143 100644
--- a/test/Transforms/ObjCARC/contract-marker.ll
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -3,7 +3,7 @@
 ; CHECK:      %call = tail call i32* @qux()
 ; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) [[NUW:#[0-9]+]]
 
 define void @foo() {
 entry:
@@ -21,3 +21,5 @@ declare void @bar(i8*)
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 2922f816d5..6999237300 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -10,7 +10,7 @@ declare void @use_pointer(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
@@ -25,10 +25,10 @@ entry:
 
 ;      CHECK: define void @test1(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load volatile i8** @x, align 8
 ; CHECK-NEXT:   store i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test1(i8* %p) {
@@ -44,10 +44,10 @@ entry:
 
 ;      CHECK: define void @test2(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store volatile i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test2(i8* %p) {
@@ -64,11 +64,11 @@ entry:
 
 ; CHECK:      define void @test3(i8* %newValue) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test3(i8* %newValue) {
@@ -85,11 +85,11 @@ entry:
 
 ; CHECK:      define i1 @test4(i8* %newValue, i8* %foo) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   %t = icmp eq i8* %x1, %foo
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret i1 %t
 ; CHECK-NEXT: }
 define i1 @test4(i8* %newValue, i8* %foo) {
@@ -106,7 +106,7 @@ entry:
 
 ; CHECK: define i1 @test5(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test5(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -121,7 +121,7 @@ entry:
 
 ; CHECK: define i1 @test6(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test6(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -136,9 +136,9 @@ entry:
 
 ;      CHECK: define void @test7(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test7(i8* %p) {
@@ -155,7 +155,7 @@ entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %p, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test8(i8* %p) {
@@ -167,3 +167,5 @@ entry:
 }
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index 1510ed00e6..85b03be275 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -69,7 +69,7 @@ bb7:                                              ; preds = %bb6, %bb6, %bb5
 ; CHECK: define void @_Z6doTestP8NSString() {
 ; CHECK: invoke.cont:                                      ; preds = %entry
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) [[NUW:#[0-9]+]]
 define void @_Z6doTestP8NSString() {
 entry:
   %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
@@ -88,3 +88,6 @@ lpad:                                             ; preds = %entry
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes #0 = { optsize }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index c48f8a534f..b6fba59603 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -34,12 +34,12 @@ entry:
 ; Merge objc_retain and objc_autorelease into objc_retainAutorelease.
 
 ; CHECK: define void @test2(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: }
 define void @test2(i8* %x) nounwind {
 entry:
   %0 = tail call i8* @objc_retain(i8* %x) nounwind
-  tail call i8* @objc_autorelease(i8* %0) nounwind
+  call i8* @objc_autorelease(i8* %0) nounwind
   call void @use_pointer(i8* %x)
   ret void
 }
@@ -47,7 +47,7 @@ entry:
 ; Same as test2 but the value is returned. Do an RV optimization.
 
 ; CHECK: define i8* @test2b(
-; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) [[NUW]]
 ; CHECK: }
 define i8* @test2b(i8* %x) nounwind {
 entry:
@@ -59,14 +59,14 @@ entry:
 ; Merge a retain,autorelease pair around a call.
 
 ; CHECK: define void @test3(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %0)
 ; CHECK: }
 define void @test3(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   ret void
 }
 
@@ -75,7 +75,7 @@ entry:
 
 ; CHECK: define void @test4(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: @objc_retainAutorelease(i8* %x) nounwind
+; CHECK-NEXT: @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: @objc_release
 ; CHECK-NEXT: ret void
@@ -84,7 +84,7 @@ define void @test4(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   tail call void @objc_release(i8* %x) nounwind
   ret void
 }
@@ -92,9 +92,9 @@ entry:
 ; Don't merge retain and autorelease if they're not control-equivalent.
 
 ; CHECK: define void @test5(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test5(i8* %p, i1 %a) {
 entry:
@@ -102,7 +102,7 @@ entry:
   br i1 %a, label %true, label %false
 
 true:
-  tail call i8* @objc_autorelease(i8* %p) nounwind
+  call i8* @objc_autorelease(i8* %p) nounwind
   call void @use_pointer(i8* %p)
   ret void
 
@@ -119,8 +119,8 @@ false:
 ; Those entrypoints don't exist yet though.
 
 ; CHECK: define i8* @test6(
-; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) nounwind
-; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) nounwind
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) [[NUW]]
+; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) [[NUW]]
 ; CHECK: }
 define i8* @test6() {
   %p = call i8* @returner()
@@ -161,3 +161,5 @@ return:                                           ; preds = %if.then, %entry
   %retval = phi i8* [ %c, %if.then ], [ null, %entry ]
   ret i8* %retval
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll b/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll
new file mode 100644
index 0000000000..bdee2be94f
--- /dev/null
+++ b/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll
@@ -0,0 +1,87 @@
+; RUN: opt -S -objc-arc < %s
+; bugzilla://14551
+; rdar://12851911
+
+; Make sure that we do not hang clang during escape analysis.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-darwin"
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_byref_foo = type { i8*, %struct.__block_byref_foo*, i32, i32, i32 }
+
+@_NSConcreteGlobalBlock = external global i8*
+@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00", align 1
+@__block_descriptor_tmp = internal constant { i64, i64, i8*, i8* } { i64 0, i64 32, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8* null }
+@__block_literal_global = internal constant { i8**, i32, i32, i8*, %struct.__block_descriptor* } { i8** @_NSConcreteGlobalBlock, i32 1342177280, i32 0, i8* bitcast (void (i8*)* @__hang_clang_block_invoke to i8*), %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*) }, align 8
+
+define void @hang_clang() uwtable optsize ssp {
+entry:
+  %foo = alloca %struct.__block_byref_foo, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 1
+  store %struct.__block_byref_foo* %foo, %struct.__block_byref_foo** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 2
+  store i32 536870912, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %foo1 = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 4
+  store i32 0, i32* %foo1, align 8, !tbaa !4
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc.for.body_crit_edge, %entry
+  %0 = phi i1 [ true, %entry ], [ %phitmp, %for.inc.for.body_crit_edge ]
+  %i.06 = phi i32 [ 1, %entry ], [ %phitmp8, %for.inc.for.body_crit_edge ]
+  %block.05 = phi void (...)* [ null, %entry ], [ %block.1, %for.inc.for.body_crit_edge ]
+  br i1 %0, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %1 = call i8* @objc_retainBlock(i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) nounwind, !clang.arc.copy_on_escape !7
+  %2 = bitcast i8* %1 to void (...)*
+  %3 = bitcast void (...)* %block.05 to i8*
+  call void @objc_release(i8* %3) nounwind, !clang.imprecise_release !7
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %block.1 = phi void (...)* [ %2, %if.then ], [ %block.05, %for.body ]
+  %exitcond = icmp eq i32 %i.06, 10
+  br i1 %exitcond, label %for.end, label %for.inc.for.body_crit_edge
+
+for.inc.for.body_crit_edge:                       ; preds = %for.inc
+  %.pre = load %struct.__block_byref_foo** %byref.forwarding, align 8
+  %foo2.phi.trans.insert = getelementptr inbounds %struct.__block_byref_foo* %.pre, i64 0, i32 4
+  %.pre7 = load i32* %foo2.phi.trans.insert, align 4, !tbaa !4
+  %phitmp = icmp eq i32 %.pre7, 0
+  %phitmp8 = add i32 %i.06, 1
+  br label %for.body
+
+for.end:                                          ; preds = %for.inc
+  %4 = bitcast %struct.__block_byref_foo* %foo to i8*
+  call void @_Block_object_dispose(i8* %4, i32 8)
+  %5 = bitcast void (...)* %block.1 to i8*
+  call void @objc_release(i8* %5) nounwind, !clang.imprecise_release !7
+  ret void
+}
+
+define internal void @__hang_clang_block_invoke(i8* nocapture %.block_descriptor) nounwind uwtable readnone optsize ssp {
+entry:
+  ret void
+}
+
+declare i8* @objc_retainBlock(i8*)
+
+declare void @objc_release(i8*) nonlazybind
+
+declare void @_Block_object_dispose(i8*, i32)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!4 = metadata !{metadata !"int", metadata !5}
+!5 = metadata !{metadata !"omnipotent char", metadata !6}
+!6 = metadata !{metadata !"Simple C/C++ TBAA"}
+!7 = metadata !{}
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
new file mode 100644
index 0000000000..05257d1d5c
--- /dev/null
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -0,0 +1,174 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+; rdar://11744105
+; bugzilla://14584
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%0 = type opaque
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._objc_cache = type opaque
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+%struct._prop_t = type { i8*, i8* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@"OBJC_CLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_NSObject", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"new\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = linker_private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i64 10 }, section "__DATA,__cfstring"
+@"OBJC_CLASS_$_NSException" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_1" = internal global %struct._class_t* @"OBJC_CLASS_$_NSException", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@.str2 = linker_private unnamed_addr constant [4 x i8] c"Foo\00", align 1
+@_unnamed_cfstring_3 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([4 x i8]* @.str2, i32 0, i32 0), i64 3 }, section "__DATA,__cfstring"
+@"\01L_OBJC_METH_VAR_NAME_4" = internal global [14 x i8] c"raise:format:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@llvm.used = appending global [6 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" to i8*), i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_5" to i8*)], section "llvm.metadata"
+
+define i32 @main() uwtable ssp {
+entry:
+  %tmp = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
+  %tmp1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
+  %tmp2 = bitcast %struct._class_t* %tmp to i8*, !dbg !37
+; CHECK: call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1)
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1), !dbg !37, !clang.arc.no_objc_arc_exceptions !38
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !12), !dbg !37
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+  %tmp3 = call i8* @objc_retain(i8* %call) nounwind, !dbg !39
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !25), !dbg !39
+  invoke fastcc void @ThrowFunc(i8* %call)
+          to label %eh.cont unwind label %lpad, !dbg !40, !clang.arc.no_objc_arc_exceptions !38
+
+eh.cont:                                          ; preds = %entry
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  br label %if.end, !dbg !43
+
+lpad:                                             ; preds = %entry
+  %tmp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null, !dbg !40
+  %tmp5 = extractvalue { i8*, i32 } %tmp4, 0, !dbg !40
+  %exn.adjusted = call i8* @objc_begin_catch(i8* %tmp5) nounwind, !dbg !44
+  call void @llvm.dbg.value(metadata !45, i64 0, metadata !21), !dbg !46
+  call void @objc_end_catch(), !dbg !49, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !50, !clang.arc.no_objc_arc_exceptions !38
+  br label %if.end, !dbg !52
+
+if.end:                                           ; preds = %lpad, %eh.cont
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !53, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !54, !clang.imprecise_release !38
+  ret i32 0, !dbg !54
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+declare i8* @objc_retain(i8*) nonlazybind
+
+declare i8* @objc_begin_catch(i8*)
+
+declare void @objc_end_catch()
+
+declare void @objc_exception_rethrow()
+
+define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
+entry:
+  %tmp = call i8* @objc_retain(i8* %obj) nounwind
+  call void @llvm.dbg.value(metadata !{i8* %obj}, i64 0, metadata !32), !dbg !55
+  %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
+  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
+  %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
+  call void (i8*, i8*, %0*, %0*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*, %0*, ...)*)(i8* %tmp3, i8* %tmp2, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*), %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*)), !dbg !56, !clang.arc.no_objc_arc_exceptions !38
+  call void @objc_release(i8* %obj) nounwind, !dbg !58, !clang.imprecise_release !38
+  ret void, !dbg !58
+}
+
+declare i32 @__objc_personality_v0(...)
+
+declare void @objc_release(i8*) nonlazybind
+
+declare void @NSLog(i8*, ...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+; CHECK: attributes #0 = { ssp uwtable }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { nonlazybind }
+; CHECK: attributes #3 = { noinline ssp uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33, !34, !35, !36}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", metadata !"clang version 3.3 ", i1 true, i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !27}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
+!6 = metadata !{i32 786473, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{metadata !12, metadata !21, metadata !25}
+!12 = metadata !{i32 786688, metadata !13, metadata !"obj", metadata !6, i32 11, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj] [line 11]
+!13 = metadata !{i32 786443, metadata !5, i32 10, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!14 = metadata !{i32 786454, null, metadata !"id", metadata !6, i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!16 = metadata !{i32 786451, null, metadata !"objc_object", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786445, metadata !16, metadata !"isa", metadata !6, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!19 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!20 = metadata !{i32 786451, null, metadata !"objc_class", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
+!22 = metadata !{i32 786443, metadata !13, i32 12, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!23 = metadata !{i32 786454, null, metadata !"BOOL", metadata !6, i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
+!24 = metadata !{i32 786468, null, metadata !"signed char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
+!26 = metadata !{i32 786443, metadata !22, i32 14, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!27 = metadata !{i32 786478, i32 0, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", metadata !6, i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !14}
+!30 = metadata !{metadata !31}
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786689, metadata !27, metadata !"obj", metadata !6, i32 16777220, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [obj] [line 4]
+!33 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!34 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!35 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!36 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!37 = metadata !{i32 11, i32 0, metadata !13, null}
+!38 = metadata !{}
+!39 = metadata !{i32 15, i32 0, metadata !26, null}
+!40 = metadata !{i32 17, i32 0, metadata !41, null}
+!41 = metadata !{i32 786443, metadata !26, i32 16, i32 0, metadata !6, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!42 = metadata !{i32 22, i32 0, metadata !26, null}
+!43 = metadata !{i32 23, i32 0, metadata !22, null}
+!44 = metadata !{i32 19, i32 0, metadata !41, null}
+!45 = metadata !{i8 0}
+!46 = metadata !{i32 20, i32 0, metadata !47, null}
+!47 = metadata !{i32 786443, metadata !48, i32 19, i32 0, metadata !6, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!48 = metadata !{i32 786443, metadata !26, i32 19, i32 0, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!49 = metadata !{i32 21, i32 0, metadata !47, null}
+!50 = metadata !{i32 24, i32 0, metadata !51, null}
+!51 = metadata !{i32 786443, metadata !22, i32 23, i32 0, metadata !6, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!52 = metadata !{i32 25, i32 0, metadata !51, null}
+!53 = metadata !{i32 27, i32 0, metadata !13, null}
+!54 = metadata !{i32 28, i32 0, metadata !13, null}
+!55 = metadata !{i32 4, i32 0, metadata !27, null}
+!56 = metadata !{i32 6, i32 0, metadata !57, null}
+!57 = metadata !{i32 786443, metadata !27, i32 5, i32 0, metadata !6, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!58 = metadata !{i32 7, i32 0, metadata !57, null}
diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll
index 3f694cf1d5..8f252a0d34 100644
--- a/test/Transforms/ObjCARC/escape.ll
+++ b/test/Transforms/ObjCARC/escape.ll
@@ -10,8 +10,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; with the objc_storeWeak call.
 
 ; CHECK: define void @test0(
-; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape !0
+; CHECK: call void @objc_release(i8* %tmp7) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0() nounwind {
 entry:
@@ -129,3 +129,6 @@ declare i8* @not_really_objc_storeWeak(i8**, i8*)
 declare void @objc_release(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
diff --git a/test/Transforms/ObjCARC/gvn.ll b/test/Transforms/ObjCARC/gvn.ll
index 6917b02e03..3648866de0 100644
--- a/test/Transforms/ObjCARC/gvn.ll
+++ b/test/Transforms/ObjCARC/gvn.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -objc-arc -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -objc-arc-aa -gvn < %s | FileCheck %s
 
 @x = common global i8* null, align 8
 
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 1a58e34940..f528b4ac35 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -12,10 +12,10 @@ declare i8* @returner()
 
 ; CHECK: define void @test0(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW:#[0-9]+]], !clang.imprecise_release !0
 ; CHECK:   ret void
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   ret void
 define void @test0(i8* %zipFile) {
 entry:
@@ -39,11 +39,11 @@ lpad:                                             ; preds = %entry
 
 ; CHECK: define void @test1(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: done:
@@ -108,7 +108,7 @@ finally.rethrow:                                  ; preds = %invoke.cont, %entry
 
 ; CHECK: define void @test3(
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test3(i8* %p, i1 %b) {
 entry:
@@ -140,10 +140,10 @@ if.end:
 ; CHECK: lpad:
 ; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
 ; CHECK-NEXT: cleanup
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test4(i8* %p, i1 %b) {
 entry:
@@ -215,4 +215,6 @@ if.end:
 declare i32 @__gxx_personality_v0(...)
 declare i32 @__objc_personality_v0(...)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
index 170d0a99c9..5d058257c6 100644
--- a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
+++ b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
@@ -4,7 +4,7 @@
 ; and various scary looking things and fold it into an objc_retainAutorelease.
 
 ; CHECK: bb57:
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) [[NUW:#[0-9]+]]
 ; CHECK: bb99:
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -212,10 +212,12 @@ bb99:                                             ; preds = %bb57
   br label %bb104
 
 bb104:                                            ; preds = %bb99, %bb57
-  %tmp105 = tail call i8* @objc_autorelease(i8* %tmp72) nounwind
+  %tmp105 = call i8* @objc_autorelease(i8* %tmp72) nounwind
   %tmp106 = bitcast i8* %tmp105 to %14*
   tail call void @objc_release(i8* %tmp85) nounwind
   %tmp107 = bitcast %18* %tmp47 to i8*
   tail call void @objc_release(i8* %tmp107) nounwind
   ret %14* %tmp106
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 32be03ec6a..ca9c58bcb3 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -770,9 +770,9 @@ forcoll.empty:
 @__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test11() {
 entry:
@@ -820,3 +820,6 @@ entry:
   call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nonlazybind }
diff --git a/test/Transforms/ObjCARC/pr12270.ll b/test/Transforms/ObjCARC/pr12270.ll
index 1faae5f687..bdff0d7b4d 100644
--- a/test/Transforms/ObjCARC/pr12270.ll
+++ b/test/Transforms/ObjCARC/pr12270.ll
@@ -1,4 +1,4 @@
-; RUN: opt -disable-output -objc-arc-contract %s
+; RUN: opt -disable-output -objc-arc-contract < %s
 ; test that we don't crash on unreachable code
 %2 = type opaque
 
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
index 01f208704c..f40be238ba 100644
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ b/test/Transforms/ObjCARC/retain-block-alloca.ll
@@ -9,7 +9,7 @@
 @"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 
 ; CHECK: define void @test(
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
+; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) [[NUW:#[0-9]+]]
 ; CHECK: @objc_msgSend
 ; CHECK-NEXT: @objc_release(i8* %3)
 define void @test(%0* %array) uwtable {
@@ -87,4 +87,8 @@ declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
 declare void @objc_release(i8*)
 
+; CHECK: attributes #0 = { uwtable }
+; CHECK: attributes #1 = { nonlazybind }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-side-effects.ll b/test/Transforms/ObjCARC/retain-block-side-effects.ll
index e84d48f869..7fa73cbfef 100644
--- a/test/Transforms/ObjCARC/retain-block-side-effects.ll
+++ b/test/Transforms/ObjCARC/retain-block-side-effects.ll
@@ -4,7 +4,7 @@
 ; objc_retainBlock stores into %repeater so the load from after the
 ; call isn't forwardable from the store before the call.
 
-; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) nounwind
+; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) [[NUW:#[0-9]+]]
 ; CHECK: %tmp17 = bitcast i8* %tmp16 to void ()*
 ; CHECK: %tmp18 = load %struct.__block_byref_repeater** %byref.forwarding, align 8
 ; CHECK: %repeater12 = getelementptr inbounds %struct.__block_byref_repeater* %tmp18, i64 0, i32 6
@@ -37,3 +37,6 @@ entry:
 }
 
 declare i8* @objc_retainBlock(i8*)
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
index b3b62d3000..ee57049d8a 100644
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -28,8 +28,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_no_metadata(i8* %tmp) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW:#[0-9]+]]
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_no_metadata(i8* %tmp) {
 entry:
@@ -43,8 +43,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_escape(i8* %tmp, i8** %z) {
 entry:
@@ -58,8 +58,8 @@ entry:
 ; Same as test0_escape, but there's no intervening call.
 
 ; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_just_escape(i8* %tmp, i8** %z) {
 entry:
@@ -73,9 +73,9 @@ entry:
 
 ; CHECK: define void @test1(i8* %tmp) {
 ; CHECK-NOT: @objc
-; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NOT: @objc
-; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1(i8* %tmp) {
@@ -95,10 +95,10 @@ entry:
 
 ; CHECK: define void @test1_no_metadata(i8* %tmp) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_no_metadata(i8* %tmp) {
@@ -118,11 +118,11 @@ entry:
 
 ; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
 ; CHECK-NEXT: store i8* %tmp2, i8** %z
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_escape(i8* %tmp, i8** %z) {
@@ -136,3 +136,5 @@ entry:
   tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index f876e51592..e834179bb7 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -13,7 +13,7 @@ declare void @objc_release(i8*)
 
 ; CHECK:      define i8* @test0(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret i8* %0
 ; CHECK-NEXT: }
 
@@ -65,3 +65,5 @@ lpad100:                                          ; preds = %invoke.cont93
 declare i32 @__gxx_personality_v0(...)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/rle-s2l.ll b/test/Transforms/ObjCARC/rle-s2l.ll
index 8f8d5c0d38..2865c94dc8 100644
--- a/test/Transforms/ObjCARC/rle-s2l.ll
+++ b/test/Transforms/ObjCARC/rle-s2l.ll
@@ -57,7 +57,7 @@ define void @test2(i8** %p) {
 
 ; CHECK:      define void @test3(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO:#[0-9]+]]
 ; CHECK-NEXT:   %1 = tail call i8* @objc_retain(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   ret void
@@ -74,7 +74,7 @@ define void @test3(i8** %p) {
 
 ; CHECK:      define void @test4(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   %y = call i8* @objc_loadWeak(i8** %p)
 ; CHECK-NEXT:   call void @use_pointer(i8* %y)
@@ -133,3 +133,6 @@ define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) {
   call void @use_pointer(i8* %y)
   ret void
 }
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes [[RO]] = { readonly }
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index 9353a19f71..a2fef96749 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -29,7 +29,7 @@ declare i8* @returner()
 ; CHECK:      define void @test0(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %x = call i8* @returner
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: t:
 ; CHECK-NOT: @objc_
 ; CHECK: return:
@@ -150,7 +150,7 @@ define void @test8() {
 ; Don't apply the RV optimization to autorelease if there's no retain.
 
 ; CHECK: define i8* @test9(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 define i8* @test9(i8* %p) {
   call i8* @objc_autorelease(i8* %p)
   ret i8* %p
@@ -159,8 +159,8 @@ define i8* @test9(i8* %p) {
 ; Apply the RV optimization.
 
 ; CHECK: define i8* @test10(i8* %p)
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret i8* %p
 define i8* @test10(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -174,7 +174,7 @@ define i8* @test10(i8* %p) {
 ; CHECK: define i8* @test11(i8* %p)
 ; CHECK: tail call i8* @objc_retain(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK-NEXT: ret i8* %p
 define i8* @test11(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -201,7 +201,7 @@ define i8* @test12(i8* %p) {
 
 ; CHECK: define i8* @test13(
 ; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK: ret i8* %p
 define i8* @test13() {
   %p = call i8* @returner()
@@ -215,7 +215,7 @@ define i8* @test13() {
 ; argument is not a return value.
 
 ; CHECK: define void @test14(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test14(i8* %p) {
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
@@ -227,7 +227,7 @@ define void @test14(i8* %p) {
 
 ; CHECK: define void @test15(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test15() {
   %y = call i8* @returner()
@@ -240,7 +240,7 @@ define void @test15() {
 
 ; CHECK: define void @test16(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test16() {
   %y = call i8* @returner()
@@ -252,7 +252,7 @@ define void @test16() {
 ; argument is not a return value.
 
 ; CHECK: define void @test17(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test17(i8* %y) {
   call i8* @objc_retain(i8* %y)
@@ -265,7 +265,7 @@ define void @test17(i8* %y) {
 ; CHECK: define void @test18(
 ; CHECK-NEXT: %y = call i8* @returner()
 ; CHECK-NEXT: call void @callee()
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test18() {
   %y = call i8* @returner()
@@ -323,7 +323,7 @@ define i8* @test22(i8* %p) {
 ; Convert autoreleaseRV to autorelease.
 
 ; CHECK: define void @test23(
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test23(i8* %p) {
   store i8 0, i8* %p
   call i8* @objc_autoreleaseReturnValue(i8* %p)
@@ -340,3 +340,5 @@ define {}* @test24(i8* %p) {
   %s = bitcast i8* %p to {}*
   ret {}* %s
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/split-backedge.ll b/test/Transforms/ObjCARC/split-backedge.ll
index 08e2dce1f5..5ac278a45d 100644
--- a/test/Transforms/ObjCARC/split-backedge.ll
+++ b/test/Transforms/ObjCARC/split-backedge.ll
@@ -4,12 +4,12 @@
 ; rdar://11256239
 
 ; CHECK: define void @test0
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %cond) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %cond) nounwind
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %cond) [[NUW]]
 define void @test0() {
 entry:
   br label %while.body
@@ -46,3 +46,5 @@ declare i8* @objc_retain(i8*)
 declare void @use_pointer(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
new file mode 100644
index 0000000000..74ac97c7b3
--- /dev/null
+++ b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
@@ -0,0 +1,84 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+declare i8* @objc_release(i8* %x)
+declare i8* @objc_retain(i8* %x)
+declare i8* @objc_autorelease(i8* %x)
+declare i8* @objc_autoreleaseReturnValue(i8* %x)
+declare i8* @objc_retainAutoreleasedReturnValue(i8* %x)
+
+; Never tail call objc_autorelease.
+define i8* @test0(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = call i8* @objc_autorelease(i8* %x)
+  ; CHECK: %tmp1 = call i8* @objc_autorelease(i8* %x)
+  %tmp1 = tail call i8* @objc_autorelease(i8* %x)
+
+  ret i8* %x
+}
+
+; Always tail call autoreleaseReturnValue.
+define i8* @test1(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp0 = call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %x
+}
+
+; Always tail call objc_retain.
+define i8* @test2(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
+  %tmp0 = call i8* @objc_retain(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %x)
+  %tmp1 = tail call i8* @objc_retain(i8* %x)
+  ret i8* %x
+}
+
+define i8* @tmp(i8* %x) {
+  ret i8* %x
+}
+
+; Always tail call objc_retainAutoreleasedReturnValue.
+define i8* @test3(i8* %x) {
+entry:
+  %y = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %tmp0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %z = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret i8* %x
+}
+
+; By itself, we should never change whether or not objc_release is tail called.
+define i8* @test4(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_release(i8* %x)
+  %tmp0 = call i8* @objc_release(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_release(i8* %x)
+  %tmp1 = tail call i8* @objc_release(i8* %x)
+  ret i8* %x
+}
+
+; If we convert a tail called @objc_autoreleaseReturnValue to an
+; @objc_autorelease, ensure that the tail call is removed.
+define i8* @test5(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %tmp0
+}
+
+; If we convert a called @objc_autorelease to an @objc_autoreleaseReturnValue,
+; ensure that the tail call is added.
+define i8* @test6(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
+  %tmp0 = tail call i8* @objc_retain(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp1 = call i8* @objc_autorelease(i8* %x)
+  ret i8* %x
+}
diff --git a/test/Transforms/ObjCARC/weak-copies.ll b/test/Transforms/ObjCARC/weak-copies.ll
index e1a94bb474..5dab4e049e 100644
--- a/test/Transforms/ObjCARC/weak-copies.ll
+++ b/test/Transforms/ObjCARC/weak-copies.ll
@@ -19,7 +19,7 @@ target triple = "x86_64-apple-darwin11.0.0"
 ; CHECK:      define void @foo() {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %call = call i8* @bar()
-; CHECK-NEXT:   call void @use(i8* %call) nounwind
+; CHECK-NEXT:   call void @use(i8* %call) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @foo() {
@@ -39,7 +39,7 @@ entry:
 
 ; Eliminate unnecessary weak pointer copies in a block initialization.
 
-; CHECK:      define void @qux(i8* %me) nounwind {
+; CHECK:      define void @qux(i8* %me) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %block = alloca %1, align 8
 ; CHECK-NOT:    alloca
@@ -84,4 +84,6 @@ declare i8* @objc_loadWeak(i8**)
 declare void @use(i8*) nounwind
 declare void @objc_destroyWeak(i8**)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll b/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
index 8859da8de1..53d98e02ec 100644
--- a/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
+++ b/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O2 %s -S -o - | FileCheck %s
+; RUN: opt -O2 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.1"
diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll
index ef9947f103..58b762a7af 100644
--- a/test/Transforms/PhaseOrdering/PR6627.ll
+++ b/test/Transforms/PhaseOrdering/PR6627.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S %s | FileCheck %s
+; RUN: opt -O3 -S < %s | FileCheck %s
 ; XFAIL: *
 
 declare i32 @doo(...)
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
index 88ebca0a9c..8fbe8c58f4 100644
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S %s | FileCheck %s
+; RUN: opt -O3 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
diff --git a/test/Transforms/PhaseOrdering/gdce.ll b/test/Transforms/PhaseOrdering/gdce.ll
index 273e47e97c..95f06757a7 100644
--- a/test/Transforms/PhaseOrdering/gdce.ll
+++ b/test/Transforms/PhaseOrdering/gdce.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O2 -S %s | FileCheck %s
+; RUN: opt -O2 -S < %s | FileCheck %s
 
 ; Run global DCE to eliminate unused ctor and dtor.
 ; rdar://9142819
diff --git a/test/Transforms/PhaseOrdering/scev.ll b/test/Transforms/PhaseOrdering/scev.ll
index c731280822..39adb6b73d 100644
--- a/test/Transforms/PhaseOrdering/scev.ll
+++ b/test/Transforms/PhaseOrdering/scev.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
+; RUN: opt -O3 -S -analyze -scalar-evolution < %s | FileCheck %s
 ;
 ; This file contains phase ordering tests for scalar evolution.
 ; Test that the standard passes don't obfuscate the IR so scalar evolution can't
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index e29b5dc9c0..770f97371d 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -reassociate -disable-output %s
+; RUN: opt -reassociate -disable-output < %s
 
 
 ; rdar://7507855
diff --git a/test/Transforms/Reg2Mem/crash.ll b/test/Transforms/Reg2Mem/crash.ll
new file mode 100644
index 0000000000..02fed94b85
--- /dev/null
+++ b/test/Transforms/Reg2Mem/crash.ll
@@ -0,0 +1,88 @@
+; RUN: opt -reg2mem -disable-output < %s
+; PR14782
+
+declare void @f1()
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @f2()
+
+declare void @f3()
+
+declare void @f4_()
+
+declare void @_Z12xxxdtsP10xxxpq()
+
+define hidden void @_ZN12xxxyzIi9xxxwLi29ELi0EE4f3NewES0_i() ssp align 2 {
+bb:
+  invoke void @f4_()
+          to label %bb1 unwind label %.thread
+
+.thread:                                          ; preds = %bb
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %bb13
+
+bb1:                                              ; preds = %bb
+  invoke void @f1()
+          to label %.noexc unwind label %bb10
+
+.noexc:                                           ; preds = %bb1
+  invoke void @f4_()
+          to label %bb6 unwind label %bb2
+
+bb2:                                              ; preds = %.noexc
+  %tmp3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  invoke void @f3()
+          to label %.body unwind label %bb4
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+
+bb6:                                              ; preds = %.noexc
+  invoke void @_Z12xxxdtsP10xxxpq()
+          to label %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit unwind label %bb10
+
+_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit:  ; preds = %bb6
+  invoke void @f2()
+          to label %bb7 unwind label %bb8
+
+bb7:                                              ; preds = %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit
+  ret void
+
+bb8:                                              ; preds = %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit
+  %tmp9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %_ZN10xxxpqdlev.exit
+
+bb10:                                             ; preds = %bb6, %bb1
+  %.1 = phi i1 [ true, %bb1 ], [ false, %bb6 ]
+  %tmp11 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %.body
+
+.body:                                            ; preds = %bb10, %bb2
+  %.1.lpad-body = phi i1 [ %.1, %bb10 ], [ true, %bb2 ]
+  invoke void @f2()
+          to label %bb12 unwind label %bb14
+
+bb12:                                             ; preds = %.body
+  br i1 %.1.lpad-body, label %bb13, label %_ZN10xxxpqdlev.exit
+
+bb13:                                             ; preds = %bb12, %.thread
+  invoke void @xxx_MemFree()
+          to label %_ZN10xxxpqdlev.exit unwind label %bb14
+
+_ZN10xxxpqdlev.exit:                              ; preds = %bb13, %bb12, %bb8
+  resume { i8*, i32 } undef
+
+bb14:                                             ; preds = %bb13, %.body
+  %tmp15 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+declare void @xxx_MemFree()
diff --git a/test/Transforms/Reg2Mem/lit.local.cfg b/test/Transforms/Reg2Mem/lit.local.cfg
new file mode 100644
index 0000000000..19eebc0ac7
--- /dev/null
+++ b/test/Transforms/Reg2Mem/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/crash.ll b/test/Transforms/SCCP/crash.ll
index 2f6da1d726..88528902d7 100644
--- a/test/Transforms/SCCP/crash.ll
+++ b/test/Transforms/SCCP/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -sccp -S
+; RUN: opt -sccp -S < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 
diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll
index c6572fa5d1..b49da97ab2 100644
--- a/test/Transforms/SCCP/ipsccp-addr-taken.ll
+++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -ipsccp -S | FileCheck %s
+; RUN: opt -ipsccp -S < %s | FileCheck %s
 ; PR7876
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/SCCP/retvalue-undef.ll b/test/Transforms/SCCP/retvalue-undef.ll
index 389561f8a1..5a4ba113b7 100644
--- a/test/Transforms/SCCP/retvalue-undef.ll
+++ b/test/Transforms/SCCP/retvalue-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -ipsccp -S %s | FileCheck %s
+; RUN: opt -ipsccp -S < %s | FileCheck %s
 ; PR6414
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/SCCP/undef-resolve.ll b/test/Transforms/SCCP/undef-resolve.ll
index a3dddb799a..a1a600c960 100644
--- a/test/Transforms/SCCP/undef-resolve.ll
+++ b/test/Transforms/SCCP/undef-resolve.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -sccp -S | FileCheck %s
+; RUN: opt -sccp -S < %s | FileCheck %s
 
 
 ; PR6940
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 9fe926ee2c..efc01acd59 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -575,8 +575,8 @@ entry:
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
   %ai = load i24* %aiptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
 ; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
@@ -597,8 +597,8 @@ entry:
   %b1 = load i8* %b1ptr
   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
   %b2 = load i8* %b2ptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
@@ -1176,3 +1176,50 @@ entry:
   %baz = load i1* %a.i1, align 1
   ret void
 }
+
+define <3 x i8> @PR14572.1(i32 %x) {
+; Ensure that a split integer store which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.1
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  %cast = bitcast <3 x i8>* %a to i32*
+  store i32 %x, i32* %cast, align 1
+  %y = load <3 x i8>* %a, align 4
+  ret <3 x i8> %y
+; CHECK: ret <3 x i8>
+}
+
+define i32 @PR14572.2(<3 x i8> %x) {
+; Ensure that a split integer load which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.2
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  store <3 x i8> %x, <3 x i8>* %a, align 1
+  %cast = bitcast <3 x i8>* %a to i32*
+  %y = load i32* %cast, align 4
+  ret i32 %y
+; CHECK: ret i32
+}
+
+define i32 @PR14601(i32 %x) {
+; Don't try to form a promotable integer alloca when there is a variable length
+; memory intrinsic.
+; CHECK: @PR14601
+
+entry:
+  %a = alloca i32
+; CHECK: alloca
+
+  %a.i8 = bitcast i32* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
+  %v = load i32* %a
+  ret i32 %v
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 1ac6d25d63..64a0cc7439 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -24,8 +24,8 @@ entry:
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
   %ai = load i24* %aiptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
 ; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
@@ -46,8 +46,8 @@ entry:
   %b1 = load i8* %b1ptr
   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
   %b2 = load i8* %b2ptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
 ; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
@@ -77,8 +77,8 @@ entry:
   %a2ptr = getelementptr [7 x i8]* %a, i64 0, i32 2
   %a3ptr = getelementptr [7 x i8]* %a, i64 0, i32 3
 
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 
   %a0i16ptr = bitcast i8* %a0ptr to i16*
   store i16 1, i16* %a0i16ptr
@@ -98,8 +98,8 @@ entry:
 ; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
 ; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
 
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 
   %aiptr = bitcast [7 x i8]* %a to i56*
   %ai = load i56* %aiptr
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index bb34e3f084..02f6d040cc 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -279,6 +279,89 @@ entry:
 ; CHECK-NEXT: ret <4 x i32> %[[ret]]
 }
 
+declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind
+
+define <4 x float> @test_subvec_memset() {
+; CHECK: @test_subvec_memset
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false)
+; CHECK-NOT: store
+; CHECK:      %[[insert1:.*]] = shufflevector <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x float> <float undef, float 0x3820202020000000, float 0x3820202020000000, float undef>, <4 x float> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x float> <float undef, float undef, float 0x3860606060000000, float 0x3860606060000000>, <4 x float> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
+; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x float> %[[insert3]], float 0x38E0E0E0E0000000, i32 3
+
+  %ret = load <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> %[[insert4]]
+}
+
+define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
+; CHECK: @test_subvec_memcpy
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
+; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
+; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]]
+; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %[[insert_x:.*]] = shufflevector <4 x float> %[[expand_x]], <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
+; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]]
+; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %[[insert_y:.*]] = shufflevector <4 x float> %[[expand_y]], <4 x float> %[[insert_x]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
+; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]]
+; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[insert_z:.*]] = shufflevector <4 x float> %[[expand_z]], <4 x float> %[[insert_y]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
+; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
+; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]]
+; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> %[[insert_z]], float %[[f]], i32 3
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
+; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
+
+  %ret = load <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> %[[insert_f]]
+}
+
 define i32 @PR14212() {
 ; CHECK: @PR14212
 ; This caused a crash when "splitting" the load of the i32 in order to promote
diff --git a/test/Transforms/SROA/vectors-of-pointers.ll b/test/Transforms/SROA/vectors-of-pointers.ll
new file mode 100644
index 0000000000..7e995b9e44
--- /dev/null
+++ b/test/Transforms/SROA/vectors-of-pointers.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -sroa
+
+; Make sure we don't crash on this one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @foo() {
+entry:
+  %Args.i = alloca <2 x i32*>, align 16
+  br i1 undef, label %bb0.exit158, label %if.then.i.i.i.i.i138
+
+if.then.i.i.i.i.i138:
+  unreachable
+
+bb0.exit158:
+  br i1 undef, label %bb0.exit257, label %if.then.i.i.i.i.i237
+
+if.then.i.i.i.i.i237:
+  unreachable
+
+bb0.exit257:
+  %0 = load <2 x i32*>* %Args.i, align 16
+  unreachable
+}
diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
index 0b5e4152c4..3f28cb187f 100644
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@@ -1,7 +1,6 @@
 ; Scalar replacement was incorrectly promoting this alloca!!
 ;
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   sed "s/;.*//g" | grep "\["
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i8* @test() {
 	%A = alloca [30 x i8]		; <[30 x i8]*> [#uses=1]
@@ -10,4 +9,4 @@ define i8* @test() {
 	store i8 0, i8* %B
 	ret i8* %C
 }
-
+; CHECK: alloca [
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index 58c5a3a052..8c60dceb8b 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -1,5 +1,5 @@
-; RUN: opt -scalarrepl %s -disable-output
-; RUN: opt -scalarrepl-ssa %s -disable-output
+; RUN: opt -scalarrepl -disable-output < %s
+; RUN: opt -scalarrepl-ssa -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index c149134535..8a59c2ec2a 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -40,20 +40,20 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
+!17 = metadata !{metadata !1}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !17, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 1, i32 11, metadata !1, null}
-!8 = metadata !{i32 590081, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 18, metadata !1, null}
-!10 = metadata !{i32 590080, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 9, metadata !11, null}
 !13 = metadata !{i32 2, i32 14, metadata !11, null}
 !14 = metadata !{i32 3, i32 5, metadata !11, null}
diff --git a/test/Transforms/ScalarRepl/memcpy-align.ll b/test/Transforms/ScalarRepl/memcpy-align.ll
index a7af208f4f..6046e1295d 100644
--- a/test/Transforms/ScalarRepl/memcpy-align.ll
+++ b/test/Transforms/ScalarRepl/memcpy-align.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -scalarrepl -S | FileCheck %s
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
 ; PR6832
 target datalayout =
 "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
index cb5101c2dd..05d9382cec 100644
--- a/test/Transforms/ScalarRepl/phi-cycle.ll
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -67,7 +67,7 @@ while.cond.backedge.i:                            ; preds = %if.end.i, %while.bo
 
 ; CHECK: func.exit:
 ; CHECK-NOT: load
-; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
 func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
   %tmp3 = load i32* %x.i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
@@ -75,3 +75,6 @@ func.exit:                                        ; preds = %while.body.i.func.e
 }
 
 declare i32 @printf(i8* nocapture, ...) nounwind
+
+; CHECK: attributes #0 = { nounwind uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll
index ffe0b1dd5f..5c21c3bd9f 100644
--- a/test/Transforms/ScalarRepl/phi-select.ll
+++ b/test/Transforms/ScalarRepl/phi-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -scalarrepl -S | FileCheck %s
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
 ; Test promotion of allocas that have phis and select users.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.2"
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index 056526cbd9..d506cdfbd8 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -1,12 +1,13 @@
-; RUN: opt < %s -scalarrepl -S | grep "load volatile"
-; RUN: opt < %s -scalarrepl -S | grep "store volatile"
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
 	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
 	store volatile i32 %T, i32* %B
+; CHECK: store volatile
 
 	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
 	%X = load volatile i32* %C
+; CHECK: load volatile
 	ret i32 %X
 }
diff --git a/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
index 7bffa1a8e0..333336de76 100644
--- a/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -simplifycfg -disable-output
+; RUN: opt -simplifycfg -disable-output < %s
 ; END.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
index feffb4e4c8..aba08dc073 100644
--- a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
@@ -1,8 +1,6 @@
 ; Basic block #2 should not be merged into BB #3!
 ;
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   grep "br label"
-;
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 declare void @foo()
 
@@ -13,6 +11,7 @@ bb0:
 	br i1 %cond218, label %bb3, label %bb2
 bb2:		; preds = %bb0
 	call void @foo( )
+; CHECK: br label %bb3
 	br label %bb3
 bb3:		; preds = %bb2, %bb0
 	%reg117 = phi i32 [ 110, %bb2 ], [ %reg108, %bb0 ]		; <i32> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll b/test/Transforms/SimplifyCFG/PHINode.ll
index 88f32bc082..25a242a559 100644
--- a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
+++ b/test/Transforms/SimplifyCFG/PHINode.ll
@@ -1,10 +1,11 @@
 ; -simplifycfg is not folding blocks if there is a PHI node involved.  This 
 ; should be fixed eventually
 
-; RUN: opt < %s -simplifycfg -S | not grep br
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define i32 @main(i32 %argc) {
 ; <label>:0
+; CHECK-NOT: br label %InlinedFunctionReturnNode
 	br label %InlinedFunctionReturnNode
 InlinedFunctionReturnNode:		; preds = %0
 	%X = phi i32 [ 7, %0 ]		; <i32> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/PR9946.ll b/test/Transforms/SimplifyCFG/PR9946.ll
index 4a61b84605..c355a8f5cc 100644
--- a/test/Transforms/SimplifyCFG/PR9946.ll
+++ b/test/Transforms/SimplifyCFG/PR9946.ll
@@ -1,4 +1,4 @@
-; RUN: opt  %s -simplifycfg -disable-output
+; RUN: opt -simplifycfg -disable-output < %s
 
 @foo = external constant i32
 
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index a61867fe89..dd2e5d1c3a 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -44,3 +44,44 @@ join:
   ret i8 %c
 }
 
+define i8* @test4(i1* %dummy, i8* %a, i8* %b) {
+; Test that we don't speculate an arbitrarily large number of unfolded constant
+; expressions.
+; CHECK: @test4
+
+entry:
+  %cond1 = load volatile i1* %dummy
+  br i1 %cond1, label %if, label %end
+
+if:
+  %cond2 = load volatile i1* %dummy
+  br i1 %cond2, label %then, label %end
+
+then:
+  br label %end
+
+end:
+  %x1 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 1 to i8*), %then ]
+  %x2 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 2 to i8*), %then ]
+  %x3 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 3 to i8*), %then ]
+  %x4 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 4 to i8*), %then ]
+  %x5 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 5 to i8*), %then ]
+  %x6 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 6 to i8*), %then ]
+  %x7 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 7 to i8*), %then ]
+  %x8 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 8 to i8*), %then ]
+  %x9 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 9 to i8*), %then ]
+  %x10 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 10 to i8*), %then ]
+; CHECK-NOT: select
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+
+  ret i8* %x10
+}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 0897c95a67..0526883fe8 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplifycfg -S %s | FileCheck %s
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
 
 %0 = type { i32*, i32* }
 
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 7654d0271a..3e2a6237b2 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplifycfg %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
 
 define i8* @test1(i8* %x, i64 %y) nounwind {
 entry:
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 673a62bf03..9cd709ff8e 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -35,7 +35,7 @@ define i32 @bar(i64 %x, i64 %y) nounwind {
 ; CHECK: @bar
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 0
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 2
@@ -61,7 +61,7 @@ define void @bazz(i64 %x, i64 %y) nounwind {
 ; CHECK: @bazz
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.b() nounwind
+; CHECK-NEXT: tail call void @bees.b() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 10, i32 12
@@ -86,7 +86,7 @@ define void @quux(i64 %x, i64 %y) nounwind {
 ; CHECK: @quux
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 0
@@ -136,3 +136,6 @@ bees:
 declare void @llvm.trap() nounwind noreturn
 declare void @bees.a() nounwind
 declare void @bees.b() nounwind
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { noreturn nounwind }
diff --git a/test/Transforms/SimplifyCFG/trivial-throw.ll b/test/Transforms/SimplifyCFG/trivial-throw.ll
new file mode 100644
index 0000000000..ca2b5693e6
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/trivial-throw.ll
@@ -0,0 +1,77 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+; <rdar://problem/13360379>
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS13TestException = linkonce_odr constant [16 x i8] c"13TestException\00"
+@_ZTI13TestException = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([16 x i8]* @_ZTS13TestException, i32 0, i32 0) }
+
+define void @throw(i32 %n) #0 {
+entry:
+  %exception = call i8* @__cxa_allocate_exception(i64 1) #4
+  call void @__cxa_throw(i8* %exception, i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*), i8* null) #2
+  unreachable
+}
+
+define void @func() #0 {
+entry:
+; CHECK: func()
+; CHECK: invoke void @throw
+; CHECK-NOT: call void @throw
+  invoke void @throw(i32 42) #0
+          to label %exit unwind label %lpad
+
+lpad:
+  %tmp0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %tmp0
+
+exit:
+  invoke void @abort() #2
+          to label %invoke.cont unwind label %lpad1
+
+invoke.cont:
+  unreachable
+
+lpad1:
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*)
+  %tmp2 = extractvalue { i8*, i32 } %tmp1, 1
+  %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*)) #4
+  %matches = icmp eq i32 %tmp2, %tmp3
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:
+  ret void
+
+eh.resume:
+  resume { i8*, i32 } %tmp1
+}
+
+define linkonce_odr hidden void @__clang_call_terminate(i8*) #1 {
+  %2 = call i8* @__cxa_begin_catch(i8* %0) #4
+  call void @_ZSt9terminatev() #5
+  unreachable
+}
+
+declare void @abort() #2
+
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+declare void @__cxa_end_catch()
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_ZSt9terminatev()
+
+attributes #0 = { ssp uwtable }
+attributes #1 = { noinline noreturn nounwind }
+attributes #2 = { noreturn }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+attributes #5 = { noreturn nounwind }
diff --git a/test/Transforms/SimplifyCFG/volatile-phioper.ll b/test/Transforms/SimplifyCFG/volatile-phioper.ll
new file mode 100644
index 0000000000..164898897e
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/volatile-phioper.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+;
+; rdar:13349374
+;
+; SimplifyCFG should not eliminate blocks with volatile stores.
+; Essentially, volatile needs to be backdoor that tells the optimizer
+; it can no longer use language standard as an excuse. The compiler
+; needs to expose the volatile access to the platform.
+;
+; CHECK: @test
+; CHECK: entry:
+; CHECK: @Trace
+; CHECK: while.body:
+; CHECK: store volatile
+; CHECK: end:
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @test(i8** nocapture %PeiServices) #0 {
+entry:
+  %call = tail call i32 (...)* @Trace() #2
+  %tobool = icmp eq i32 %call, 0
+  br i1 %tobool, label %while.body, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 (...)* @Trace() #2
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %if.then, %while.body
+  %Addr.017 = phi i8* [ %incdec.ptr, %while.body ], [ null, %if.then ], [ null, %entry ]
+  %x.016 = phi i8 [ %inc, %while.body ], [ 0, %if.then ], [ 0, %entry ]
+  %inc = add i8 %x.016, 1
+  %incdec.ptr = getelementptr inbounds i8* %Addr.017, i64 1
+  store volatile i8 %x.016, i8* %Addr.017, align 1
+  %0 = ptrtoint i8* %incdec.ptr to i64
+  %1 = trunc i64 %0 to i32
+  %cmp = icmp ult i32 %1, 4096
+  br i1 %cmp, label %while.body, label %end
+
+end:
+  ret void
+}
+declare i32 @Trace(...) #1
+
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #2 = { nounwind }
+
+!0 = metadata !{i32 1039}
diff --git a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
index 73eb05b05e..16791e20f4 100644
--- a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
@@ -1,12 +1,21 @@
-; RUN: opt < %s -simplify-libcalls -S > %t
-; RUN: grep noalias %t | count 2
-; RUN: grep nocapture %t | count 3
-; RUN: grep nounwind %t | count 3
-; RUN: grep readonly %t | count 1
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
+; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) #0
 declare i8* @fopen(i8*, i8*)
+
+; CHECK: declare i8 @strlen(i8* nocapture) #1
 declare i8 @strlen(i8*)
+
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) #0
 declare i32* @realloc(i32*, i32)
 
 ; Test deliberately wrong declaration
 declare i32 @strcpy(...)
+
+; CHECK-NOT: strcpy{{.*}}noalias
+; CHECK-NOT: strcpy{{.*}}nocapture
+; CHECK-NOT: strcpy{{.*}}nounwind
+; CHECK-NOT: strcpy{{.*}}readonly
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
index aecb887beb..ad54c3e38f 100644
--- a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
+++ b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplify-libcalls -instcombine %s | FileCheck %s
+; RUN: opt -S -simplify-libcalls -instcombine < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index 3965c37822..7de5a02805 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -strip-dead-debug-info -disable-output %s
+; RUN: opt -strip-dead-debug-info -disable-output < %s
 define i32 @foo() nounwind ssp {
 entry:
   ret i32 0, !dbg !8
diff --git a/test/Transforms/StripSymbols/block-address.ll b/test/Transforms/StripSymbols/block-address.ll
index d22c6b1b15..113d4d94fa 100644
--- a/test/Transforms/StripSymbols/block-address.ll
+++ b/test/Transforms/StripSymbols/block-address.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -strip -S | FileCheck %s
+; RUN: opt -strip -S < %s | FileCheck %s
 ; PR10286
 
 @main_addrs = constant [2 x i8*] [i8* blockaddress(@f, %FOO), i8* blockaddress(@f, %BAR)]
diff --git a/test/Transforms/TailCallElim/ackermann.ll b/test/Transforms/TailCallElim/Stats/ackermann.ll
index 5b5dbcc225..5b5dbcc225 100644
--- a/test/Transforms/TailCallElim/ackermann.ll
+++ b/test/Transforms/TailCallElim/Stats/ackermann.ll
diff --git a/test/Transforms/TailCallElim/dup_tail.ll b/test/Transforms/TailCallElim/Stats/dup_tail.ll
index 42ac2f9dc4..3b87ed3ca6 100644
--- a/test/Transforms/TailCallElim/dup_tail.ll
+++ b/test/Transforms/TailCallElim/Stats/dup_tail.ll
@@ -1,5 +1,7 @@
 ; Duplicate the return into if.end to enable TCE.
-; RUN: opt %s -tailcallelim -stats -disable-output 2>&1 | grep "Number of return duplicated"
+; RUN: opt -tailcallelim -stats -disable-output < %s 2>&1 | FileCheck %s
+
+; CHECK: Number of return duplicated
 
 define i32 @fib(i32 %n) nounwind ssp {
 entry:
diff --git a/test/Transforms/TailCallElim/Stats/lit.local.cfg b/test/Transforms/TailCallElim/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/TailCallElim/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True
diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
index e4f8b483c3..97e67b2642 100644
--- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
+++ b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "call i32 @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 declare void @bar(i32*)
 
@@ -7,6 +6,7 @@ define i32 @foo(i32 %N) {
 	%A = alloca i32, i32 %N		; <i32*> [#uses=2]
 	store i32 17, i32* %A
 	call void @bar( i32* %A )
+; CHECK: tail call i32 @foo
 	%X = tail call i32 @foo( i32 %N )		; <i32> [#uses=1]
 	ret i32 %X
 }
diff --git a/test/Transforms/TailCallElim/intervening-inst.ll b/test/Transforms/TailCallElim/intervening-inst.ll
index 0c40bd5dc5..10dffbd694 100644
--- a/test/Transforms/TailCallElim/intervening-inst.ll
+++ b/test/Transforms/TailCallElim/intervening-inst.ll
@@ -1,5 +1,5 @@
 ; This function contains intervening instructions which should be moved out of the way
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @Test(i32 %X) {
 entry:
@@ -10,6 +10,7 @@ then.0:		; preds = %entry
 	ret i32 %tmp.4
 endif.0:		; preds = %entry
 	%tmp.10 = add i32 %X, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.8 = call i32 @Test( i32 %tmp.10 )		; <i32> [#uses=1]
 	%DUMMY = add i32 %X, 1		; <i32> [#uses=0]
 	ret i32 %tmp.8
diff --git a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
index a556ddb6eb..741f5848bc 100644
--- a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
+++ b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
@@ -1,4 +1,4 @@
-; RUN: opt -tailcallelim %s -S | FileCheck %s
+; RUN: opt -tailcallelim -S < %s | FileCheck %s
 ; PR615
 
 declare void @bar(i32*)
diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll
index 87cb9dd427..e49d87cc4b 100644
--- a/test/Transforms/TailCallElim/nocapture.ll
+++ b/test/Transforms/TailCallElim/nocapture.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -tailcallelim -S | FileCheck %s
+; RUN: opt -tailcallelim -S < %s | FileCheck %s
 ; XFAIL: *
 
 declare void @use(i8* nocapture, i8* nocapture)
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index 7f5c36e4a2..53c65dab10 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 ; PR4323
 
 ; Several cases where tail call elimination should move the load above the call,
@@ -21,6 +21,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -47,6 +48,7 @@ unwind:		; preds = %else
 
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -66,6 +68,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -94,6 +97,7 @@ unwind:		; preds = %else
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%first = load i32* %a_arg		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
 	%second = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
diff --git a/test/Transforms/TailCallElim/return_constant.ll b/test/Transforms/TailCallElim/return_constant.ll
index 48e5641bb5..e99e57e145 100644
--- a/test/Transforms/TailCallElim/return_constant.ll
+++ b/test/Transforms/TailCallElim/return_constant.ll
@@ -1,7 +1,7 @@
 ; Though this case seems to be fairly unlikely to occur in the wild, someone
 ; plunked it into the demo script, so maybe they care about it.
 ;
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @aaa(i32 %c) {
 entry:
@@ -9,6 +9,7 @@ entry:
 	br i1 %tmp.1, label %return, label %else
 else:		; preds = %entry
 	%tmp.5 = add i32 %c, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.3 = call i32 @aaa( i32 %tmp.5 )		; <i32> [#uses=0]
 	ret i32 0
 return:		; preds = %entry
diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
index 3d01d17099..7049e4d588 100644
--- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
+++ b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "tail call void @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 
 declare void @foo()
 
 define void @bar() {
-	call void @foo( )
+; CHECK: tail call void @foo()
+	call void @foo()
 	ret void
 }
 
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/Stats/2008-06-11-AvoidDupLoopHeader.ll
index 7853d7ba06..7853d7ba06 100644
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ b/test/Transforms/TailDup/Stats/2008-06-11-AvoidDupLoopHeader.ll
diff --git a/test/Transforms/TailDup/Stats/lit.local.cfg b/test/Transforms/TailDup/Stats/lit.local.cfg
new file mode 100644
index 0000000000..89c0cd9d48
--- /dev/null
+++ b/test/Transforms/TailDup/Stats/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+if not config.root.enable_assertions:
+    config.unsupported = True