aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-04-03 14:07:16 +0000
committerAlexander Kornienko <alexfh@google.com>2013-04-03 14:07:16 +0000
commite133bc868944822bf8961f825d3aa63d6fa48fb7 (patch)
treeebbd4a8040181471467a9737d90d94dc6b58b316 /test/CodeGen/PowerPC
parent647735c781c5b37061ee03d6e9e6c7dda92218e2 (diff)
parent080e3c523e87ec68ca1ea5db4cd49816028dd8bd (diff)
Updating branches/google/stable to r178511stable
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/stable@178655 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/PowerPC')
-rw-r--r--test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll2
-rw-r--r--test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll4
-rw-r--r--test/CodeGen/PowerPC/2010-02-12-saveCR.ll39
-rw-r--r--test/CodeGen/PowerPC/2010-05-03-retaddr1.ll4
-rw-r--r--test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll2
-rw-r--r--test/CodeGen/PowerPC/DbgValueOtherTargets.test2
-rw-r--r--test/CodeGen/PowerPC/LargeAbsoluteAddr.ll6
-rw-r--r--test/CodeGen/PowerPC/Stats/lit.local.cfg8
-rw-r--r--test/CodeGen/PowerPC/allocate-r0.ll18
-rw-r--r--test/CodeGen/PowerPC/asym-regclass-copy.ll56
-rw-r--r--test/CodeGen/PowerPC/atomic-1.ll6
-rw-r--r--test/CodeGen/PowerPC/bswap-load-store.ll35
-rw-r--r--test/CodeGen/PowerPC/cr-spills.ll409
-rw-r--r--test/CodeGen/PowerPC/ctr-cleanup.ll25
-rw-r--r--test/CodeGen/PowerPC/cttz.ll4
-rw-r--r--test/CodeGen/PowerPC/dbg.ll4
-rw-r--r--test/CodeGen/PowerPC/float-to-int.ll93
-rw-r--r--test/CodeGen/PowerPC/frame-size.ll32
-rw-r--r--test/CodeGen/PowerPC/frameaddr.ll47
-rw-r--r--test/CodeGen/PowerPC/i32-to-float.ll82
-rw-r--r--test/CodeGen/PowerPC/i64-to-float.ll52
-rw-r--r--test/CodeGen/PowerPC/i64_fp_round.ll4
-rw-r--r--test/CodeGen/PowerPC/iabs.ll (renamed from test/CodeGen/PowerPC/Stats/iabs.ll)1
-rw-r--r--test/CodeGen/PowerPC/jaggedstructs.ll10
-rw-r--r--test/CodeGen/PowerPC/lbzux.ll2
-rw-r--r--test/CodeGen/PowerPC/lit.local.cfg2
-rw-r--r--test/CodeGen/PowerPC/negctr.ll83
-rw-r--r--test/CodeGen/PowerPC/popcnt.ll40
-rw-r--r--test/CodeGen/PowerPC/r31.ll15
-rw-r--r--test/CodeGen/PowerPC/rlwimi3.ll (renamed from test/CodeGen/PowerPC/Stats/rlwimi3.ll)1
-rw-r--r--test/CodeGen/PowerPC/rounding-ops.ll145
-rw-r--r--test/CodeGen/PowerPC/sjlj.ll112
-rw-r--r--test/CodeGen/PowerPC/stfiwx-2.ll9
-rw-r--r--test/CodeGen/PowerPC/store-update.ll170
-rw-r--r--test/CodeGen/PowerPC/structsinmem.ll22
-rw-r--r--test/CodeGen/PowerPC/structsinregs.ll42
-rw-r--r--test/CodeGen/PowerPC/stwu8.ll2
-rw-r--r--test/CodeGen/PowerPC/tls-gd.ll2
-rw-r--r--test/CodeGen/PowerPC/tls-ld-2.ll4
-rw-r--r--test/CodeGen/PowerPC/tls-ld.ll4
-rw-r--r--test/CodeGen/PowerPC/tls.ll2
-rw-r--r--test/CodeGen/PowerPC/unal4-std.ll27
-rw-r--r--test/CodeGen/PowerPC/unaligned.ll73
-rw-r--r--test/CodeGen/PowerPC/varargs.ll21
-rw-r--r--test/CodeGen/PowerPC/vec_rounding.ll36
-rw-r--r--test/CodeGen/PowerPC/vrsave-spill.ll19
-rw-r--r--test/CodeGen/PowerPC/vrspill.ll4
47 files changed, 1646 insertions, 136 deletions
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 82ef2b82cb..b6feb5abbc 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 -mattr=+altivec | grep dst | count 4
define hidden void @_Z4borkPc(i8* %image) {
entry:
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 84aa40c4b5..91253daae3 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0"
define void @foo(i32 %y) nounwind ssp {
entry:
; CHECK: foo
-; CHECK: add r3
-; CHECK: 0(r3)
+; CHECK: add r2
+; CHECK: 0(r2)
%y_addr = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i32 %y, i32* %y_addr
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 974a99a52c..097611a761 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -2,21 +2,21 @@
; ModuleID = 'hh.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
target triple = "powerpc-apple-darwin9.6"
-; This formerly used R0 for both the stack address and CR.
define void @foo() nounwind {
entry:
-;CHECK: mfcr r2
-;CHECK: lis r3, 1
-;CHECK: rlwinm r2, r2, 8, 0, 31
-;CHECK: ori r3, r3, 34524
-;CHECK: stwx r2, r1, r3
-; Make sure that the register scavenger returns the same temporary register.
-;CHECK: mfcr r2
-;CHECK: lis r3, 1
-;CHECK: rlwinm r2, r2, 12, 0, 31
-;CHECK: ori r3, r3, 34520
-;CHECK: stwx r2, r1, r3
+; Note that part of what is being checked here is proper register reuse.
+; CHECK: mfcr [[T1:r[0-9]+]] ; cr2
+; CHECK: lis [[T2:r[0-9]+]], 1
+; CHECK: addi r3, r1, 72
+; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31
+; CHECK: ori [[T2]], [[T2]], 34540
+; CHECK: stwx [[T1]], r1, [[T2]]
+; CHECK: lis [[T3:r[0-9]+]], 1
+; CHECK: mfcr [[T4:r[0-9]+]] ; cr3
+; CHECK: ori [[T3]], [[T3]], 34536
+; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31
+; CHECK: stwx [[T4]], r1, [[T3]]
%x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1]
@@ -25,11 +25,16 @@ entry:
br label %return
return: ; preds = %entry
-;CHECK: lis r3, 1
-;CHECK: ori r3, r3, 34524
-;CHECK: lwzx r2, r1, r3
-;CHECK: rlwinm r2, r2, 24, 0, 31
-;CHECK: mtcrf 32, r2
+; CHECK: lis [[T1:r[0-9]+]], 1
+; CHECK: ori [[T1]], [[T1]], 34536
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31
+; CHECK: mtcrf 16, [[T1]]
+; CHECK: lis [[T1]], 1
+; CHECK: ori [[T1]], [[T1]], 34540
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31
+; CHECK: mtcrf 32, [[T1]]
ret void
}
diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
index 72ae9d6c73..0dbc2d0180 100644
--- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
+++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -18,8 +18,8 @@ entry:
; CHECK: _g:
; CHECK: mflr r0
; CHECK: stw r0, 8(r1)
-; CHECK: lwz r3, 0(r1)
-; CHECK: lwz r3, 8(r3)
+; CHECK: lwz r2, 0(r1)
+; CHECK: lwz r3, 8(r2)
%0 = tail call i8* @llvm.returnaddress(i32 1) ; <i8*> [#uses=1]
ret i8* %0
}
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index bf3d577a36..d1a3c9f46b 100644
--- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -7,7 +7,7 @@ define i32 @main() nounwind {
entry:
; Make sure we're generating references using the red zone
; CHECK: main:
-; CHECK: stw r3, -12(r1)
+; CHECK: stw r2, -12(r1)
%retval = alloca i32
%0 = alloca i32
%"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/PowerPC/DbgValueOtherTargets.test b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
index b1b338776b..9702934f7e 100644
--- a/test/CodeGen/PowerPC/DbgValueOtherTargets.test
+++ b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 6f985c819f..e8765deab0 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN: grep "stw r4, 32751"
+; RUN: grep "stw r3, 32751"
; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN: grep "stw r4, 32751"
+; RUN: grep "stw r3, 32751"
; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN: grep "std r4, 9024"
+; RUN: grep "std r3, 9024"
define void @test() nounwind {
store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/Stats/lit.local.cfg b/test/CodeGen/PowerPC/Stats/lit.local.cfg
deleted file mode 100644
index 2608e139e9..0000000000
--- a/test/CodeGen/PowerPC/Stats/lit.local.cfg
+++ /dev/null
@@ -1,8 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
- config.unsupported = True
-
-if not config.root.enable_assertions:
- config.unsupported = True
diff --git a/test/CodeGen/PowerPC/allocate-r0.ll b/test/CodeGen/PowerPC/allocate-r0.ll
new file mode 100644
index 0000000000..1cf4cec076
--- /dev/null
+++ b/test/CodeGen/PowerPC/allocate-r0.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+ call void asm sideeffect "", "~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() nounwind
+ br label %return
+
+; CHECK: @foo
+; Because r0 is allocatable, we can use it to hold r3 without spilling.
+; CHECK: mr 0, 3
+; CHECK: mr 3, 0
+
+return: ; preds = %entry
+ ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
new file mode 100644
index 0000000000..d04a6c98ee
--- /dev/null
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with
+; GPRC is handled correctly. When it was not, this test would assert.
+
+@gen_random.last = external unnamed_addr global i64, align 8
+@.str = external unnamed_addr constant [4 x i8], align 1
+
+declare double @gen_random(double) #0
+
+declare void @benchmark_heapsort(i32 signext, double* nocapture) #0
+
+define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 {
+entry:
+ br i1 undef, label %cond.true, label %cond.end
+
+cond.true: ; preds = %entry
+ br label %cond.end
+
+cond.end: ; preds = %cond.true, %entry
+ %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ]
+ %add = add i32 %cond, 1
+ %conv = sext i32 %add to i64
+ %mul = shl nsw i64 %conv, 3
+ %call1 = tail call noalias i8* @malloc(i64 %mul) #1
+ br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %cond.end
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %add
+ br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %cond.end
+ ret i32 0
+}
+
+declare noalias i8* @malloc(i64) #0
+
+declare signext i32 @printf(i8* nocapture, ...) #0
+
+declare void @free(i8* nocapture) #0
+
+declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index cbfa4094fb..838db20ddd 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 | FileCheck %s
define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
; CHECK: exchange_and_add:
-; CHECK: lwarx
+; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}}
%tmp = atomicrmw add i32* %mem, i32 %val monotonic
-; CHECK: stwcx.
+; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}}
ret i32 %tmp
}
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index 4f6bfc7299..53bbc52167 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc32 -mcpu=ppc32 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=ppc64 -mcpu=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
+; RUN: llc < %s -march=ppc32 -mcpu=pwr7 | FileCheck %s -check-prefix=X32
define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@@ -34,18 +36,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
ret i16 %tmp6
}
+define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
+ %tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
+ %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
+ %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i ) ; <i64> [#uses=1]
+ store i64 %tmp13, i64* %tmp1.upgrd.1
+ ret void
+}
+
+define i64 @LDBRX(i8* %ptr, i64 %off) {
+ %tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
+ %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
+ %tmp = load i64* %tmp1.upgrd.2 ; <i64> [#uses=1]
+ %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp ) ; <i64> [#uses=1]
+ ret i64 %tmp14
+}
+
declare i32 @llvm.bswap.i32(i32)
declare i16 @llvm.bswap.i16(i16)
+declare i64 @llvm.bswap.i64(i64)
+
; X32: stwbrx
; X32: lwbrx
; X32: sthbrx
; X32: lhbrx
+; X32-NOT: ldbrx
+; X32-NOT: stdbrx
; X64: stwbrx
; X64: lwbrx
; X64: sthbrx
; X64: lhbrx
+; X64-NOT: ldbrx
+; X64-NOT: stdbrx
+
+; PWR7: stwbrx
+; PWR7: lwbrx
+; PWR7: sthbrx
+; PWR7: lhbrx
+; PWR7: stdbrx
+; PWR7: ldbrx
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
new file mode 100644
index 0000000000..d6df7a2376
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -0,0 +1,409 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This test case triggers several functions related to cr spilling, both in
+; frame lowering and to handle cr register pressure. When the register kill
+; flags were not being set correctly, this would cause the register scavenger to
+; assert.
+
+@SetupFastFullPelSearch.orig_pels = external unnamed_addr global [768 x i16], align 2
+@weight_luma = external global i32
+@offset_luma = external global i32
+@wp_luma_round = external global i32, align 4
+@luma_log_weight_denom = external global i32, align 4
+
+define void @SetupFastFullPelSearch() #0 {
+entry:
+ %mul10 = mul nsw i32 undef, undef
+ br i1 undef, label %land.end, label %land.lhs.true
+
+land.lhs.true: ; preds = %entry
+ switch i32 0, label %land.end [
+ i32 0, label %land.rhs
+ i32 3, label %land.rhs
+ ]
+
+land.rhs: ; preds = %land.lhs.true, %land.lhs.true
+ %tobool21 = icmp ne i32 undef, 0
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %land.lhs.true, %entry
+ %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
+ %cond = load i32** undef, align 8
+ br i1 undef, label %if.then95, label %for.body.lr.ph
+
+if.then95: ; preds = %land.end
+ %cmp.i4.i1427 = icmp slt i32 undef, undef
+ br label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %if.then95, %land.end
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ br i1 undef, label %for.body, label %for.body252
+
+for.body252: ; preds = %for.inc997, %for.body
+ %shl263 = add i32 undef, 80
+ br i1 %0, label %for.cond286.preheader, label %for.cond713.preheader
+
+for.cond286.preheader: ; preds = %for.body252
+ br label %for.cond290.preheader
+
+for.cond290.preheader: ; preds = %for.end520, %for.cond286.preheader
+ %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
+ %1 = load i32* undef, align 4, !tbaa !0
+ %2 = load i32* @weight_luma, align 4, !tbaa !0
+ %3 = load i32* @wp_luma_round, align 4, !tbaa !0
+ %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
+ %5 = load i32* @offset_luma, align 4, !tbaa !0
+ %incdec.ptr502.sum = add i64 undef, 16
+ br label %for.body293
+
+for.body293: ; preds = %for.body293, %for.cond290.preheader
+ %srcptr.41591 = phi i16* [ %srcptr.31595, %for.cond290.preheader ], [ undef, %for.body293 ]
+ %refptr.11590 = phi i16* [ undef, %for.cond290.preheader ], [ %add.ptr517, %for.body293 ]
+ %LineSadBlk0.01588 = phi i32 [ 0, %for.cond290.preheader ], [ %add346, %for.body293 ]
+ %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
+ %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
+ %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
+ %6 = load i16* %refptr.11590, align 2, !tbaa !3
+ %conv294 = zext i16 %6 to i32
+ %mul295 = mul nsw i32 %conv294, %2
+ %add296 = add nsw i32 %mul295, %3
+ %shr = ashr i32 %add296, %4
+ %add297 = add nsw i32 %shr, %5
+ %cmp.i.i1513 = icmp sgt i32 %add297, 0
+ %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
+ %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
+ %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
+ %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+ %conv300 = zext i16 %7 to i32
+ %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
+ %idxprom302 = sext i32 %sub301 to i64
+ %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
+ %8 = load i32* %arrayidx303, align 4, !tbaa !0
+ %add304 = add nsw i32 %8, %LineSadBlk0.01588
+ %9 = load i32* undef, align 4, !tbaa !0
+ %add318 = add nsw i32 %add304, %9
+ %10 = load i16* undef, align 2, !tbaa !3
+ %conv321 = zext i16 %10 to i32
+ %mul322 = mul nsw i32 %conv321, %2
+ %add323 = add nsw i32 %mul322, %3
+ %shr324 = ashr i32 %add323, %4
+ %add325 = add nsw i32 %shr324, %5
+ %cmp.i.i1505 = icmp sgt i32 %add325, 0
+ %cond.i.i1506 = select i1 %cmp.i.i1505, i32 %add325, i32 0
+ %cmp.i4.i1507 = icmp slt i32 %cond.i.i1506, %1
+ %cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1
+ %sub329 = sub nsw i32 %cond.i5.i1508, 0
+ %idxprom330 = sext i32 %sub329 to i64
+ %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
+ %11 = load i32* %arrayidx331, align 4, !tbaa !0
+ %add332 = add nsw i32 %add318, %11
+ %cmp.i.i1501 = icmp sgt i32 undef, 0
+ %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
+ %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
+ %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
+ %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
+ %12 = load i16* null, align 2, !tbaa !3
+ %conv342 = zext i16 %12 to i32
+ %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
+ %idxprom344 = sext i32 %sub343 to i64
+ %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
+ %13 = load i32* %arrayidx345, align 4, !tbaa !0
+ %add346 = add nsw i32 %add332, %13
+ %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
+ %14 = load i16* null, align 2, !tbaa !3
+ %conv349 = zext i16 %14 to i32
+ %mul350 = mul nsw i32 %conv349, %2
+ %add351 = add nsw i32 %mul350, %3
+ %shr352 = ashr i32 %add351, %4
+ %add353 = add nsw i32 %shr352, %5
+ %cmp.i.i1497 = icmp sgt i32 %add353, 0
+ %cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0
+ %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
+ %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
+ %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
+ %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+ %conv356 = zext i16 %15 to i32
+ %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
+ %idxprom358 = sext i32 %sub357 to i64
+ %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
+ %16 = load i32* %arrayidx359, align 4, !tbaa !0
+ %add360 = add nsw i32 %16, %LineSadBlk1.01587
+ %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
+ %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+ %conv363 = zext i16 %17 to i32
+ %mul364 = mul nsw i32 %conv363, %2
+ %add365 = add nsw i32 %mul364, %3
+ %shr366 = ashr i32 %add365, %4
+ %add367 = add nsw i32 %shr366, %5
+ %cmp.i.i1493 = icmp sgt i32 %add367, 0
+ %cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0
+ %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
+ %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
+ %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
+ %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+ %conv370 = zext i16 %18 to i32
+ %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
+ %idxprom372 = sext i32 %sub371 to i64
+ %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
+ %19 = load i32* %arrayidx373, align 4, !tbaa !0
+ %add374 = add nsw i32 %add360, %19
+ %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
+ %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+ %conv377 = zext i16 %20 to i32
+ %mul378 = mul nsw i32 %conv377, %2
+ %add379 = add nsw i32 %mul378, %3
+ %shr380 = ashr i32 %add379, %4
+ %add381 = add nsw i32 %shr380, %5
+ %cmp.i.i1489 = icmp sgt i32 %add381, 0
+ %cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0
+ %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
+ %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
+ %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
+ %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+ %conv384 = zext i16 %21 to i32
+ %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
+ %idxprom386 = sext i32 %sub385 to i64
+ %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
+ %22 = load i32* %arrayidx387, align 4, !tbaa !0
+ %add388 = add nsw i32 %add374, %22
+ %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+ %conv391 = zext i16 %23 to i32
+ %mul392 = mul nsw i32 %conv391, %2
+ %add395 = add nsw i32 0, %5
+ %cmp.i.i1485 = icmp sgt i32 %add395, 0
+ %cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0
+ %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
+ %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
+ %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
+ %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+ %conv398 = zext i16 %24 to i32
+ %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
+ %idxprom400 = sext i32 %sub399 to i64
+ %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
+ %25 = load i32* %arrayidx401, align 4, !tbaa !0
+ %add402 = add nsw i32 %add388, %25
+ %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
+ %cmp.i4.i1483 = icmp slt i32 undef, %1
+ %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
+ %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+ %conv412 = zext i16 %26 to i32
+ %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
+ %idxprom414 = sext i32 %sub413 to i64
+ %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
+ %27 = load i32* %arrayidx415, align 4, !tbaa !0
+ %add416 = add nsw i32 %27, %LineSadBlk2.01585
+ %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
+ %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+ %conv419 = zext i16 %28 to i32
+ %mul420 = mul nsw i32 %conv419, %2
+ %add421 = add nsw i32 %mul420, %3
+ %shr422 = ashr i32 %add421, %4
+ %add423 = add nsw i32 %shr422, %5
+ %cmp.i.i1477 = icmp sgt i32 %add423, 0
+ %cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0
+ %cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1
+ %cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1
+ %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10
+ %sub427 = sub nsw i32 %cond.i5.i1480, 0
+ %idxprom428 = sext i32 %sub427 to i64
+ %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
+ %29 = load i32* %arrayidx429, align 4, !tbaa !0
+ %add430 = add nsw i32 %add416, %29
+ %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
+ %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+ %conv433 = zext i16 %30 to i32
+ %mul434 = mul nsw i32 %conv433, %2
+ %add435 = add nsw i32 %mul434, %3
+ %shr436 = ashr i32 %add435, %4
+ %add437 = add nsw i32 %shr436, %5
+ %cmp.i.i1473 = icmp sgt i32 %add437, 0
+ %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
+ %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
+ %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
+ %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+ %conv440 = zext i16 %31 to i32
+ %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
+ %idxprom442 = sext i32 %sub441 to i64
+ %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
+ %32 = load i32* %arrayidx443, align 4, !tbaa !0
+ %add444 = add nsw i32 %add430, %32
+ %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
+ %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+ %conv447 = zext i16 %33 to i32
+ %mul448 = mul nsw i32 %conv447, %2
+ %add449 = add nsw i32 %mul448, %3
+ %shr450 = ashr i32 %add449, %4
+ %add451 = add nsw i32 %shr450, %5
+ %cmp.i.i1469 = icmp sgt i32 %add451, 0
+ %cond.i.i1470 = select i1 %cmp.i.i1469, i32 %add451, i32 0
+ %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
+ %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
+ %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
+ %34 = load i16* undef, align 2, !tbaa !3
+ %conv454 = zext i16 %34 to i32
+ %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
+ %idxprom456 = sext i32 %sub455 to i64
+ %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
+ %35 = load i32* %arrayidx457, align 4, !tbaa !0
+ %add458 = add nsw i32 %add444, %35
+ %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
+ %36 = load i16* %incdec.ptr446, align 2, !tbaa !3
+ %conv461 = zext i16 %36 to i32
+ %mul462 = mul nsw i32 %conv461, %2
+ %add463 = add nsw i32 %mul462, %3
+ %shr464 = ashr i32 %add463, %4
+ %add465 = add nsw i32 %shr464, %5
+ %cmp.i.i1465 = icmp sgt i32 %add465, 0
+ %cond.i.i1466 = select i1 %cmp.i.i1465, i32 %add465, i32 0
+ %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
+ %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
+ %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
+ %37 = load i16* %incdec.ptr453, align 2, !tbaa !3
+ %conv468 = zext i16 %37 to i32
+ %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
+ %idxprom470 = sext i32 %sub469 to i64
+ %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
+ %38 = load i32* %arrayidx471, align 4, !tbaa !0
+ %add472 = add nsw i32 %38, %LineSadBlk3.01586
+ %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
+ %add477 = add nsw i32 0, %3
+ %shr478 = ashr i32 %add477, %4
+ %add479 = add nsw i32 %shr478, %5
+ %cmp.i.i1461 = icmp sgt i32 %add479, 0
+ %cond.i.i1462 = select i1 %cmp.i.i1461, i32 %add479, i32 0
+ %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
+ %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
+ %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
+ %39 = load i16* %incdec.ptr467, align 2, !tbaa !3
+ %conv482 = zext i16 %39 to i32
+ %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
+ %idxprom484 = sext i32 %sub483 to i64
+ %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
+ %40 = load i32* %arrayidx485, align 4, !tbaa !0
+ %add486 = add nsw i32 %add472, %40
+ %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
+ %41 = load i16* %incdec.ptr474, align 2, !tbaa !3
+ %conv489 = zext i16 %41 to i32
+ %mul490 = mul nsw i32 %conv489, %2
+ %add491 = add nsw i32 %mul490, %3
+ %shr492 = ashr i32 %add491, %4
+ %add493 = add nsw i32 %shr492, %5
+ %cmp.i.i1457 = icmp sgt i32 %add493, 0
+ %cond.i.i1458 = select i1 %cmp.i.i1457, i32 %add493, i32 0
+ %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
+ %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
+ %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
+ %42 = load i16* %incdec.ptr481, align 2, !tbaa !3
+ %conv496 = zext i16 %42 to i32
+ %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
+ %idxprom498 = sext i32 %sub497 to i64
+ %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
+ %43 = load i32* %arrayidx499, align 4, !tbaa !0
+ %add500 = add nsw i32 %add486, %43
+ %44 = load i16* %incdec.ptr488, align 2, !tbaa !3
+ %conv503 = zext i16 %44 to i32
+ %mul504 = mul nsw i32 %conv503, %2
+ %add505 = add nsw i32 %mul504, %3
+ %shr506 = ashr i32 %add505, %4
+ %add507 = add nsw i32 %shr506, %5
+ %cmp.i.i1453 = icmp sgt i32 %add507, 0
+ %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
+ %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
+ %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
+ %45 = load i16* %incdec.ptr495, align 2, !tbaa !3
+ %conv510 = zext i16 %45 to i32
+ %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
+ %idxprom512 = sext i32 %sub511 to i64
+ %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
+ %46 = load i32* %arrayidx513, align 4, !tbaa !0
+ %add514 = add nsw i32 %add500, %46
+ %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
+ %exitcond1692 = icmp eq i32 undef, 4
+ br i1 %exitcond1692, label %for.end520, label %for.body293
+
+for.end520: ; preds = %for.body293
+ store i32 %add346, i32* undef, align 4, !tbaa !0
+ store i32 %add402, i32* undef, align 4, !tbaa !0
+ store i32 %add458, i32* undef, align 4, !tbaa !0
+ store i32 %add514, i32* null, align 4, !tbaa !0
+ br i1 undef, label %for.end543, label %for.cond290.preheader
+
+for.end543: ; preds = %for.end520
+ br i1 undef, label %for.inc997, label %for.body549
+
+for.body549: ; preds = %for.inc701, %for.end543
+ %call554 = call i16* null(i16**** null, i32 signext undef, i32 signext %shl263) #1
+ br label %for.cond559.preheader
+
+for.cond559.preheader: ; preds = %for.cond559.preheader, %for.body549
+ br i1 undef, label %for.inc701, label %for.cond559.preheader
+
+for.inc701: ; preds = %for.cond559.preheader
+ br i1 undef, label %for.inc997, label %for.body549
+
+for.cond713.preheader: ; preds = %for.end850, %for.body252
+ br label %for.body716
+
+for.body716: ; preds = %for.body716, %for.cond713.preheader
+ br i1 undef, label %for.end850, label %for.body716
+
+for.end850: ; preds = %for.body716
+ br i1 undef, label %for.end873, label %for.cond713.preheader
+
+for.end873: ; preds = %for.end850
+ br i1 undef, label %for.inc997, label %for.body879
+
+for.body879: ; preds = %for.inc992, %for.end873
+ br label %for.cond889.preheader
+
+for.cond889.preheader: ; preds = %for.end964, %for.body879
+ br i1 undef, label %for.cond894.preheader.lr.ph, label %for.end964
+
+for.cond894.preheader.lr.ph: ; preds = %for.cond889.preheader
+ br label %for.body898.lr.ph.us
+
+for.end957.us: ; preds = %for.body946.us
+ br i1 undef, label %for.body898.lr.ph.us, label %for.end964
+
+for.body946.us: ; preds = %for.body930.us, %for.body946.us
+ br i1 false, label %for.body946.us, label %for.end957.us
+
+for.body930.us: ; preds = %for.body914.us, %for.body930.us
+ br i1 undef, label %for.body930.us, label %for.body946.us
+
+for.body914.us: ; preds = %for.body898.us, %for.body914.us
+ br i1 undef, label %for.body914.us, label %for.body930.us
+
+for.body898.us: ; preds = %for.body898.lr.ph.us, %for.body898.us
+ br i1 undef, label %for.body898.us, label %for.body914.us
+
+for.body898.lr.ph.us: ; preds = %for.end957.us, %for.cond894.preheader.lr.ph
+ br label %for.body898.us
+
+for.end964: ; preds = %for.end957.us, %for.cond889.preheader
+ %inc990 = add nsw i32 undef, 1
+ br i1 false, label %for.inc992, label %for.cond889.preheader
+
+for.inc992: ; preds = %for.end964
+ br i1 false, label %for.inc997, label %for.body879
+
+for.inc997: ; preds = %for.inc992, %for.end873, %for.inc701, %for.end543
+ %cmp250 = icmp slt i32 undef, %mul10
+ br i1 %cmp250, label %for.body252, label %for.end999
+
+for.end999: ; preds = %for.inc997
+ ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll
new file mode 100644
index 0000000000..04e4ffb0d4
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctr-cleanup.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 5
+ br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main
+; CHECK: li {{[0-9]+}}, 4
+; CHECK-NOT: li {{[0-9]+}}, 4
+; CHECK: bdnz
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 1d365d47a8..3757fa3e2f 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,10 +1,12 @@
; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
declare i32 @llvm.cttz.i32(i32, i1)
define i32 @bar(i32 %x) {
entry:
+; CHECK: @bar
+; CHECK: cntlzw
%tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true ) ; <i32> [#uses=1]
ret i32 %tmp.1
}
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 8d87cf793d..21e36618c5 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -16,10 +16,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
!1 = metadata !{i32 0}
!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
!6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
!8 = metadata !{metadata !9, metadata !9, metadata !10}
diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll
new file mode 100644
index 0000000000..39cd4f929f
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-to-int.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(float %a) nounwind {
+ %x = fptosi float %a to i64
+ ret i64 %x
+
+; CHECK: @foo
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo2(double %a) nounwind {
+ %x = fptosi double %a to i64
+ ret i64 %x
+
+; CHECK: @foo2
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo3(float %a) nounwind {
+ %x = fptoui float %a to i64
+ ret i64 %x
+
+; CHECK: @foo3
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo4(double %a) nounwind {
+ %x = fptoui double %a to i64
+ ret i64 %x
+
+; CHECK: @foo4
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i32 @goo(float %a) nounwind {
+ %x = fptosi float %a to i32
+ ret i32 %x
+
+; CHECK: @goo
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo2(double %a) nounwind {
+ %x = fptosi double %a to i32
+ ret i32 %x
+
+; CHECK: @goo2
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo3(float %a) nounwind {
+ %x = fptoui float %a to i32
+ ret i32 %x
+
+; CHECK: @goo3
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo4(double %a) nounwind {
+ %x = fptoui double %a to i32
+ ret i32 %x
+
+; CHECK: @goo4
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/frame-size.ll b/test/CodeGen/PowerPC/frame-size.ll
new file mode 100644
index 0000000000..0e569a4602
--- /dev/null
+++ b/test/CodeGen/PowerPC/frame-size.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo() nounwind {
+entry:
+ %x = alloca [32568 x i8]
+ %"alloca point" = bitcast i32 0 to i32
+ %x1 = bitcast [32568 x i8]* %x to i8*
+
+; Check that the RS spill slot has been allocated (because the estimate
+; will fail the small-frame-size check and the function has spills).
+; CHECK: @foo
+; CHECK: stdu 1, -32768(1)
+
+ %s1 = call i64 @bar(i8* %x1) nounwind
+ %s2 = call i64 @bar(i8* %x1) nounwind
+ %s3 = call i64 @bar(i8* %x1) nounwind
+ %s4 = call i64 @bar(i8* %x1) nounwind
+ %s5 = call i64 @bar(i8* %x1) nounwind
+ %s6 = call i64 @bar(i8* %x1) nounwind
+ %s7 = call i64 @bar(i8* %x1) nounwind
+ %s8 = call i64 @bar(i8* %x1) nounwind
+ %r = call i64 @can(i64 %s1, i64 %s2, i64 %s3, i64 %s4, i64 %s5, i64 %s6, i64 %s7, i64 %s8) nounwind
+ br label %return
+
+return:
+ ret i64 %r
+}
+
+declare i64 @bar(i8*)
+declare i64 @can(i64, i64, i64, i64, i64, i64, i64, i64)
+
diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll
new file mode 100644
index 0000000000..eabd4a68aa
--- /dev/null
+++ b/test/CodeGen/PowerPC/frameaddr.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define i8* @main() #0 {
+entry:
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ ret i8* %0
+
+; CHECK: @main
+; CHECK: mr 3, 1
+}
+
+define i8* @foo() #3 { ; naked
+entry:
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ ret i8* %0
+
+; CHECK: @foo
+; CHECK: mr 3, 1
+}
+
+define i8* @bar() #0 {
+entry:
+ %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1]
+ %x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1]
+ call void @use(i8* %x1) nounwind
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ ret i8* %0
+
+; Note that if we start eliminating non-leaf frame pointers by default, this
+; will need to be updated.
+; CHECK: @bar
+; CHECK: mr 3, 31
+}
+
+declare void @use(i8*)
+
+declare i8* @llvm.frameaddress(i32) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll
new file mode 100644
index 0000000000..2707d0352d
--- /dev/null
+++ b/test/CodeGen/PowerPC/i32-to-float.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr6 | FileCheck -check-prefix=CHECK-PWR6 %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i32 %a) nounwind {
+entry:
+ %x = sitofp i32 %a to float
+ ret float %x
+
+; CHECK: @foo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
+; CHECK: frsp 1, [[REG3]]
+; CHECK: blr
+
+; CHECK-PWR6: @foo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid [[REG2:[0-9]+]], [[REG]]
+; CHECK-PWR6: frsp 1, [[REG2]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @foo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfids 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goo(i32 %a) nounwind {
+entry:
+ %x = sitofp i32 %a to double
+ ret double %x
+
+; CHECK: @goo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid 1, [[REG2]]
+; CHECK: blr
+
+; CHECK-PWR6: @goo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid 1, [[REG]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @goo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define float @foou(i32 %a) nounwind {
+entry:
+ %x = uitofp i32 %a to float
+ ret float %x
+
+; CHECK-A2: @foou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidus 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goou(i32 %a) nounwind {
+entry:
+ %x = uitofp i32 %a to double
+ ret double %x
+
+; CHECK-A2: @goou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidu 1, [[REG]]
+; CHECK-A2: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll
new file mode 100644
index 0000000000..b81d109e7f
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64-to-float.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i64 %a) nounwind {
+entry:
+ %x = sitofp i64 %a to float
+ ret float %x
+
+; CHECK: @foo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfids 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goo(i64 %a) nounwind {
+entry:
+ %x = sitofp i64 %a to double
+ ret double %x
+
+; CHECK: @goo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfid 1, [[REG]]
+; CHECK: blr
+}
+
+define float @foou(i64 %a) nounwind {
+entry:
+ %x = uitofp i64 %a to float
+ ret float %x
+
+; CHECK: @foou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidus 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goou(i64 %a) nounwind {
+entry:
+ %x = uitofp i64 %a to double
+ ret double %x
+
+; CHECK: @goou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidu 1, [[REG]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index 5ae1be8953..d2a3239ab8 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -22,6 +22,6 @@ entry:
; Also check that with -enable-unsafe-fp-math we do not get that extra
; code sequence. Simply verify that there is no "isel" present.
-; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
; CHECK-UNSAFE-NOT: isel
diff --git a/test/CodeGen/PowerPC/Stats/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index 7d089bbd65..f683238de2 100644
--- a/test/CodeGen/PowerPC/Stats/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
; RUN: grep "4 .*Number of machine instrs printed"
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
index 62aa7cf929..a10c5ddb36 100644
--- a/test/CodeGen/PowerPC/jaggedstructs.ll
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -23,22 +23,22 @@ entry:
; CHECK: std 4, 200(1)
; CHECK: std 3, 192(1)
; CHECK: lbz {{[0-9]+}}, 199(1)
-; CHECK: stb {{[0-9]+}}, 55(1)
; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
; CHECK: sth {{[0-9]+}}, 53(1)
; CHECK: lbz {{[0-9]+}}, 207(1)
-; CHECK: stb {{[0-9]+}}, 63(1)
; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
; CHECK: stw {{[0-9]+}}, 59(1)
; CHECK: lhz {{[0-9]+}}, 214(1)
-; CHECK: sth {{[0-9]+}}, 70(1)
; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
; CHECK: stw {{[0-9]+}}, 66(1)
; CHECK: lbz {{[0-9]+}}, 223(1)
-; CHECK: stb {{[0-9]+}}, 79(1)
; CHECK: lhz {{[0-9]+}}, 221(1)
-; CHECK: sth {{[0-9]+}}, 77(1)
; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
; CHECK: stw {{[0-9]+}}, 73(1)
; CHECK: ld 6, 72(1)
; CHECK: ld 5, 64(1)
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index 12f1d1f130..98951306fd 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -1,6 +1,6 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
entry:
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 4019eca0bb..aaa31d93d5 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
targets = set(config.root.targets_to_build.split())
if not 'PowerPC' in targets:
diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll
new file mode 100644
index 0000000000..2f6995c65d
--- /dev/null
+++ b/test/CodeGen/PowerPC/negctr.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mcpu=a2 -disable-lsr | FileCheck -check-prefix=NOLSR %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 0
+ br i1 %exitcond, label %for.end, label %for.body
+
+; FIXME: We currently can't form the 32-bit unsigned trip count necessary here!
+; CHECK: @main
+; CHECK-NOT: bdnz
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @main1() #0 {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 0
+ br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main1
+; CHECK: li [[REG:[0-9]+]], -1
+; CHECK: mtctr [[REG]]
+; CHECK: bdnz
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @main2() #0 {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, -100000
+ br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main2
+; CHECK: lis [[REG:[0-9]+]], -2
+; CHECK: ori [[REG2:[0-9]+]], [[REG]], 31071
+; CHECK: mtctr [[REG2]]
+; CHECK: bdnz
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+define void @main3() #0 {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 127984, %entry ]
+ %indvars.iv.next = add i64 %indvars.iv, -16
+ %exitcond = icmp eq i64 %indvars.iv.next, -16
+ br i1 %exitcond, label %for.end, label %for.body
+
+; NOLSR: @main3
+; NOLSR: li [[REG:[0-9]+]], 8000
+; NOLSR: mtctr [[REG]]
+; NOLSR: bdnz
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/popcnt.ll b/test/CodeGen/PowerPC/popcnt.ll
new file mode 100644
index 0000000000..b304d72aed
--- /dev/null
+++ b/test/CodeGen/PowerPC/popcnt.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+ %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+ ret i8 %cnt
+; CHECK: @cnt8
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+ %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+ ret i16 %cnt
+; CHECK: @cnt16
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+ %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+ ret i32 %cnt
+; CHECK: @cnt32
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+ %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+ ret i64 %cnt
+; CHECK: @cnt64
+; CHECK: popcntd
+; CHECK: blr
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/PowerPC/r31.ll b/test/CodeGen/PowerPC/r31.ll
new file mode 100644
index 0000000000..7ce12f600b
--- /dev/null
+++ b/test/CodeGen/PowerPC/r31.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+ call void asm sideeffect "", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30}"() nounwind
+ br label %return
+
+; CHECK: @foo
+; CHECK: mr 31, 3
+
+return: ; preds = %entry
+ ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/Stats/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index 7efdbe9634..31b6d4aa03 100644
--- a/test/CodeGen/PowerPC/Stats/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
; RUN: grep "Number of machine instrs printed" | grep 12
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
new file mode 100644
index 0000000000..b210a6bda8
--- /dev/null
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test1(float %x) nounwind {
+ %call = tail call float @floorf(float %x) nounwind readnone
+ ret float %call
+
+; CHECK: test1:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: frim 1, 1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind {
+ %call = tail call double @floor(double %x) nounwind readnone
+ ret double %call
+
+; CHECK: test2:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: frim 1, 1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind {
+ %call = tail call float @nearbyintf(float %x) nounwind readnone
+ ret float %call
+
+; CHECK: test3:
+; CHECK-NOT: frin
+
+; CHECK-FM: test3:
+; CHECK-FM: frin 1, 1
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind {
+ %call = tail call double @nearbyint(double %x) nounwind readnone
+ ret double %call
+
+; CHECK: test4:
+; CHECK-NOT: frin
+
+; CHECK-FM: test4:
+; CHECK-FM: frin 1, 1
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind {
+ %call = tail call float @ceilf(float %x) nounwind readnone
+ ret float %call
+
+; CHECK: test5:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: frip 1, 1
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind {
+ %call = tail call double @ceil(double %x) nounwind readnone
+ ret double %call
+
+; CHECK: test6:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: frip 1, 1
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test9(float %x) nounwind {
+ %call = tail call float @truncf(float %x) nounwind readnone
+ ret float %call
+
+; CHECK: test9:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: friz 1, 1
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind {
+ %call = tail call double @trunc(double %x) nounwind readnone
+ ret double %call
+
+; CHECK: test10:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: friz 1, 1
+}
+
+declare double @trunc(double) nounwind readnone
+
+define float @test11(float %x) nounwind {
+ %call = tail call float @rintf(float %x) nounwind readnone
+ ret float %call
+
+; CHECK: test11:
+; CHECK-NOT: frin
+
+; CHECK-FM: test11:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test12(double %x) nounwind {
+ %call = tail call double @rint(double %x) nounwind readnone
+ ret double %call
+
+; CHECK: test12:
+; CHECK-NOT: frin
+
+; CHECK-FM: test12:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare double @rint(double) nounwind readnone
+
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
new file mode 100644
index 0000000000..7ea35dafc3
--- /dev/null
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-NOAV %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] }
+%struct.__sigset_t = type { [16 x i64] }
+
+@env_sigill = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+define void @foo() #0 {
+entry:
+ call void @llvm.eh.sjlj.longjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+ unreachable
+
+; CHECK: @foo
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: ld 31, 0([[REG]])
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG]])
+; CHECK: ld 1, 16([[REG]])
+; CHECK: mtctr [[REG2]]
+; CHECK: ld 2, 24([[REG]])
+; CHECK: bctr
+
+return: ; No predecessors!
+ ret void
+}
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define signext i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
+ %1 = call i8* @llvm.stacksave()
+ store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+ %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+ %tobool = icmp ne i32 %2, 0
+ br i1 %tobool, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ store i32 1, i32* %retval
+ br label %return
+
+if.else: ; preds = %entry
+ call void @foo()
+ br label %if.end
+
+if.end: ; preds = %if.else
+ store i32 0, i32* %retval
+ br label %return
+
+return: ; preds = %if.end, %if.then
+ %3 = load i32* %retval
+ ret i32 %3
+
+; FIXME: We should be saving VRSAVE on Darwin, but we're not!
+
+; CHECK: @main
+; CHECK: std
+; Make sure that we're not saving VRSAVE on non-Darwin:
+; CHECK-NOT: mfspr
+; CHECK: stfd
+; CHECK: stvx
+
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: std 31, env_sigill@toc@l([[REG]])
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: std [[REG]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill
+; CHECK: std 1, 16([[REG]])
+; CHECK: std 2, 24([[REG]])
+; CHECK: bcl 20, 31, .LBB1_1
+; CHECK: li 3, 1
+; CHECK: #EH_SjLj_Setup .LBB1_1
+; CHECK: b .LBB1_2
+
+; CHECK: .LBB1_1:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
+; CHECK: .LBB1_2:
+
+; CHECK: lfd
+; CHECK: lvx
+; CHECK: ld
+; CHECK: blr
+
+; CHECK-NOAV: @main
+; CHECK-NOAV-NOT: stvx
+; CHECK-NOAV: bcl
+; CHECK-NOAV: mflr
+; CHECK-NOAV: bl foo
+; CHECK-NOAV-NOT: lvx
+; CHECK-NOAV: blr
+}
+
+declare i8* @llvm.frameaddress(i32) #2
+
+declare i8* @llvm.stacksave() #3
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index c49b25cc23..7786fc17ea 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,11 +1,14 @@
-; This cannot be a stfiwx
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 | FileCheck %s
define void @test(float %F, i8* %P) {
%I = fptosi float %F to i32
%X = trunc i32 %I to i8
store i8 %X, i8* %P
ret void
+; CHECK: fctiwz 0, 1
+; CHECK: stfiwx 0, 0, 4
+; CHECK: lwz 4, 12(1)
+; CHECK: stb 4, 0(3)
+; CHECK: blr
}
diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll
new file mode 100644
index 0000000000..538ed24fbc
--- /dev/null
+++ b/test/CodeGen/PowerPC/store-update.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @stbu(i8* %base, i8 zeroext %val) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i8* %base, i64 16
+ store i8 %val, i8* %arrayidx, align 1
+ ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+ store i8 %val, i8* %arrayidx, align 1
+ ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu(i16* %base, i16 zeroext %val) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i16* %base, i64 16
+ store i16 %val, i16* %arrayidx, align 2
+ ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+ store i16 %val, i16* %arrayidx, align 2
+ ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu(i32* %base, i32 zeroext %val) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i32* %base, i64 16
+ store i32 %val, i32* %arrayidx, align 4
+ ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+ store i32 %val, i32* %arrayidx, align 4
+ ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i8* @stbu8(i8* %base, i64 %val) nounwind {
+entry:
+ %conv = trunc i64 %val to i8
+ %arrayidx = getelementptr inbounds i8* %base, i64 16
+ store i8 %conv, i8* %arrayidx, align 1
+ ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
+entry:
+ %conv = trunc i64 %val to i8
+ %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+ store i8 %conv, i8* %arrayidx, align 1
+ ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu8(i16* %base, i64 %val) nounwind {
+entry:
+ %conv = trunc i64 %val to i16
+ %arrayidx = getelementptr inbounds i16* %base, i64 16
+ store i16 %conv, i16* %arrayidx, align 2
+ ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
+entry:
+ %conv = trunc i64 %val to i16
+ %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+ store i16 %conv, i16* %arrayidx, align 2
+ ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu8(i32* %base, i64 %val) nounwind {
+entry:
+ %conv = trunc i64 %val to i32
+ %arrayidx = getelementptr inbounds i32* %base, i64 16
+ store i32 %conv, i32* %arrayidx, align 4
+ ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
+entry:
+ %conv = trunc i64 %val to i32
+ %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+ store i32 %conv, i32* %arrayidx, align 4
+ ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i64* @stdu(i64* %base, i64 %val) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i64* %base, i64 16
+ store i64 %val, i64* %arrayidx, align 8
+ ret i64* %arrayidx
+}
+; CHECK: @stdu
+; CHECK: %entry
+; CHECK-NEXT: stdu
+; CHECK-NEXT: blr
+
+define i64* @stdux(i64* %base, i64 %val, i64 %offset) nounwind {
+entry:
+ %arrayidx = getelementptr inbounds i64* %base, i64 %offset
+ store i64 %val, i64* %arrayidx, align 8
+ ret i64* %arrayidx
+}
+; CHECK: @stdux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stdux
+; CHECK-NEXT: blr
+
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
index 884d3a89d1..2a17e740ea 100644
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -1,9 +1,5 @@
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads. This test case will
-; need to be revised when that is fixed.
-
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -118,8 +114,8 @@ entry:
ret i32 %add13
; CHECK: lha {{[0-9]+}}, 126(1)
-; CHECK: lbz {{[0-9]+}}, 119(1)
; CHECK: lha {{[0-9]+}}, 132(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
; CHECK: lwz {{[0-9]+}}, 140(1)
; CHECK: lwz {{[0-9]+}}, 144(1)
; CHECK: lwz {{[0-9]+}}, 152(1)
@@ -209,19 +205,11 @@ entry:
%add13 = add nsw i32 %add11, %6
ret i32 %add13
-; CHECK: lbz {{[0-9]+}}, 149(1)
-; CHECK: lbz {{[0-9]+}}, 150(1)
-; CHECK: lbz {{[0-9]+}}, 147(1)
-; CHECK: lbz {{[0-9]+}}, 148(1)
-; CHECK: lbz {{[0-9]+}}, 133(1)
-; CHECK: lbz {{[0-9]+}}, 134(1)
; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lha {{[0-9]+}}, 133(1)
; CHECK: lbz {{[0-9]+}}, 119(1)
; CHECK: lwz {{[0-9]+}}, 140(1)
-; CHECK: lhz {{[0-9]+}}, 154(1)
-; CHECK: lhz {{[0-9]+}}, 156(1)
-; CHECK: lbz {{[0-9]+}}, 163(1)
-; CHECK: lbz {{[0-9]+}}, 164(1)
-; CHECK: lbz {{[0-9]+}}, 161(1)
-; CHECK: lbz {{[0-9]+}}, 162(1)
+; CHECK: lwz {{[0-9]+}}, 147(1)
+; CHECK: lwz {{[0-9]+}}, 154(1)
+; CHECK: lwz {{[0-9]+}}, 161(1)
}
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index ef706af95d..54de6060d0 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,9 +1,5 @@
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads. This test case will
-; need to be revised when that is fixed.
-
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -63,13 +59,13 @@ entry:
%call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
ret i32 %call
-; CHECK: ld 9, 128(31)
-; CHECK: ld 8, 136(31)
-; CHECK: ld 7, 144(31)
-; CHECK: lwz 6, 152(31)
-; CHECK: lwz 5, 160(31)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: ld 9, 112(31)
+; CHECK: ld 8, 120(31)
+; CHECK: ld 7, 128(31)
+; CHECK: lwz 6, 136(31)
+; CHECK: lwz 5, 144(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -109,8 +105,8 @@ entry:
; CHECK: sth 4, 62(1)
; CHECK: stb 3, 55(1)
; CHECK: lha {{[0-9]+}}, 62(1)
-; CHECK: lbz {{[0-9]+}}, 55(1)
; CHECK: lha {{[0-9]+}}, 68(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
; CHECK: lwz {{[0-9]+}}, 76(1)
; CHECK: lwz {{[0-9]+}}, 80(1)
; CHECK: lwz {{[0-9]+}}, 88(1)
@@ -155,10 +151,10 @@ entry:
; CHECK: ld 9, 96(1)
; CHECK: ld 8, 88(1)
; CHECK: ld 7, 80(1)
-; CHECK: lwz 6, 152(31)
+; CHECK: lwz 6, 136(31)
; CHECK: ld 5, 64(1)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
}
define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
@@ -195,19 +191,11 @@ entry:
; CHECK: std 5, 64(1)
; CHECK: sth 4, 62(1)
; CHECK: stb 3, 55(1)
-; CHECK: lbz {{[0-9]+}}, 85(1)
-; CHECK: lbz {{[0-9]+}}, 86(1)
-; CHECK: lbz {{[0-9]+}}, 83(1)
-; CHECK: lbz {{[0-9]+}}, 84(1)
-; CHECK: lbz {{[0-9]+}}, 69(1)
-; CHECK: lbz {{[0-9]+}}, 70(1)
; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lha {{[0-9]+}}, 69(1)
; CHECK: lbz {{[0-9]+}}, 55(1)
; CHECK: lwz {{[0-9]+}}, 76(1)
-; CHECK: lhz {{[0-9]+}}, 90(1)
-; CHECK: lhz {{[0-9]+}}, 92(1)
-; CHECK: lbz {{[0-9]+}}, 99(1)
-; CHECK: lbz {{[0-9]+}}, 100(1)
-; CHECK: lbz {{[0-9]+}}, 97(1)
-; CHECK: lbz {{[0-9]+}}, 98(1)
+; CHECK: lwz {{[0-9]+}}, 83(1)
+; CHECK: lwz {{[0-9]+}}, 90(1)
+; CHECK: lwz {{[0-9]+}}, 97(1)
}
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
index 897bfc6d6c..e0bd043454 100644
--- a/test/CodeGen/PowerPC/stwu8.ll
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/tls-gd.ll b/test/CodeGen/PowerPC/tls-gd.ll
index fb8dfaf04a..5f0ef9a050 100644
--- a/test/CodeGen/PowerPC/tls-gd.ll
+++ b/test/CodeGen/PowerPC/tls-gd.ll
@@ -18,6 +18,6 @@ entry:
; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsgd@ha
; CHECK-NEXT: addi 3, [[REG]], a@got@tlsgd@l
-; CHECK-NEXT: bl __tls_get_addr(a@tlsgd)
+; CHECK: bl __tls_get_addr(a@tlsgd)
; CHECK-NEXT: nop
diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll
index 4954afeb24..4399b330ea 100644
--- a/test/CodeGen/PowerPC/tls-ld-2.ll
+++ b/test/CodeGen/PowerPC/tls-ld-2.ll
@@ -18,7 +18,7 @@ entry:
; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
-; CHECK-NEXT: bl __tls_get_addr(a@tlsld)
+; CHECK: bl __tls_get_addr(a@tlsld)
; CHECK-NEXT: nop
-; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
diff --git a/test/CodeGen/PowerPC/tls-ld.ll b/test/CodeGen/PowerPC/tls-ld.ll
index 1ebc6129e2..db02a56f6a 100644
--- a/test/CodeGen/PowerPC/tls-ld.ll
+++ b/test/CodeGen/PowerPC/tls-ld.ll
@@ -18,7 +18,7 @@ entry:
; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
-; CHECK-NEXT: bl __tls_get_addr(a@tlsld)
+; CHECK: bl __tls_get_addr(a@tlsld)
; CHECK-NEXT: nop
-; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
; CHECK-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 151b4b7dda..2daa60ab37 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -12,7 +12,7 @@ entry:
;OPT0: addis [[REG1:[0-9]+]], 13, a@tprel@ha
;OPT0-NEXT: li [[REG2:[0-9]+]], 42
;OPT0-NEXT: addi [[REG1]], [[REG1]], a@tprel@l
-;OPT0-NEXT: stw [[REG2]], 0([[REG1]])
+;OPT0: stw [[REG2]], 0([[REG1]])
;OPT1: addis [[REG1:[0-9]+]], 13, a@tprel@ha
;OPT1-NEXT: li [[REG2:[0-9]+]], 42
;OPT1-NEXT: stw [[REG2]], a@tprel@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
new file mode 100644
index 0000000000..169bd787c0
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc void @copy_to_conceal() #0 {
+entry:
+ br i1 undef, label %if.then, label %if.end210
+
+if.then: ; preds = %entry
+ br label %vector.body.i
+
+vector.body.i: ; preds = %vector.body.i, %if.then
+ %index.i = phi i64 [ 0, %vector.body.i ], [ 0, %if.then ]
+ store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2
+ br label %vector.body.i
+
+if.end210: ; preds = %entry
+ ret void
+
+; This will generate two align-1 i64 stores. Make sure that they are
+; indexed stores and not in r+i form (which require the offset to be
+; a multiple of 4).
+; CHECK: @copy_to_conceal
+; CHECK: stdx {{[0-9]+}}, 0,
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll
new file mode 100644
index 0000000000..d05080338f
--- /dev/null
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define void @foo1(i16* %p, i16* %r) nounwind {
+entry:
+ %v = load i16* %p, align 1
+ store i16 %v, i16* %r, align 1
+ ret void
+
+; CHECK: @foo1
+; CHECK: lhz
+; CHECK: sth
+}
+
+define void @foo2(i32* %p, i32* %r) nounwind {
+entry:
+ %v = load i32* %p, align 1
+ store i32 %v, i32* %r, align 1
+ ret void
+
+; CHECK: @foo2
+; CHECK: lwz
+; CHECK: stw
+}
+
+define void @foo3(i64* %p, i64* %r) nounwind {
+entry:
+ %v = load i64* %p, align 1
+ store i64 %v, i64* %r, align 1
+ ret void
+
+; CHECK: @foo3
+; CHECK: ld
+; CHECK: std
+}
+
+define void @foo4(float* %p, float* %r) nounwind {
+entry:
+ %v = load float* %p, align 1
+ store float %v, float* %r, align 1
+ ret void
+
+; CHECK: @foo4
+; CHECK: lfs
+; CHECK: stfs
+}
+
+define void @foo5(double* %p, double* %r) nounwind {
+entry:
+ %v = load double* %p, align 1
+ store double %v, double* %r, align 1
+ ret void
+
+; CHECK: @foo5
+; CHECK: lfd
+; CHECK: stfd
+}
+
+define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
+entry:
+ %v = load <4 x float>* %p, align 1
+ store <4 x float> %v, <4 x float>* %r, align 1
+ ret void
+
+; These loads and stores are legalized into aligned loads and stores
+; using aligned stack slots.
+; CHECK: @foo6
+; CHECK: ld
+; CHECK: ld
+; CHECK: std
+; CHECK: std
+}
+
diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll
index 1769be957a..90f0480d6a 100644
--- a/test/CodeGen/PowerPC/varargs.ll
+++ b/test/CodeGen/PowerPC/varargs.ll
@@ -8,15 +8,16 @@ define i8* @test1(i8** %foo) nounwind {
}
; P32: test1:
-; P32: lwz r4, 0(r3)
-; P32: addi r5, r4, 4
-; P32: stw r5, 0(r3)
-; P32: lwz r3, 0(r4)
-; P32: blr
+; P32: lwz r2, 0(r3)
+; P32: addi r4, r2, 4
+; P32: stw r4, 0(r3)
+; P32: lwz r3, 0(r2)
+; P32: blr
; P64: test1:
-; P64: ld r4, 0(r3)
-; P64: addi r5, r4, 8
-; P64: std r5, 0(r3)
-; P64: ld r3, 0(r4)
-; P64: blr
+; P64: ld r2, 0(r3)
+; P64: addi r4, r2, 8
+; P64: std r4, 0(r3)
+; P64: ld r3, 0(r2)
+; P64: blr
+
diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll
index f41faa0339..7c55638620 100644
--- a/test/CodeGen/PowerPC/vec_rounding.ll
+++ b/test/CodeGen/PowerPC/vec_rounding.ll
@@ -13,8 +13,8 @@ define <2 x double> @floor_v2f64(<2 x double> %p)
ret <2 x double> %t
}
; CHECK: floor_v2f64:
-; CHECK: bl floor
-; CHECK: bl floor
+; CHECK: frim
+; CHECK: frim
declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
define <4 x double> @floor_v4f64(<4 x double> %p)
@@ -23,10 +23,10 @@ define <4 x double> @floor_v4f64(<4 x double> %p)
ret <4 x double> %t
}
; CHECK: floor_v4f64:
-; CHECK: bl floor
-; CHECK: bl floor
-; CHECK: bl floor
-; CHECK: bl floor
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
define <2 x double> @ceil_v2f64(<2 x double> %p)
@@ -35,8 +35,8 @@ define <2 x double> @ceil_v2f64(<2 x double> %p)
ret <2 x double> %t
}
; CHECK: ceil_v2f64:
-; CHECK: bl ceil
-; CHECK: bl ceil
+; CHECK: frip
+; CHECK: frip
declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
define <4 x double> @ceil_v4f64(<4 x double> %p)
@@ -45,10 +45,10 @@ define <4 x double> @ceil_v4f64(<4 x double> %p)
ret <4 x double> %t
}
; CHECK: ceil_v4f64:
-; CHECK: bl ceil
-; CHECK: bl ceil
-; CHECK: bl ceil
-; CHECK: bl ceil
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
define <2 x double> @trunc_v2f64(<2 x double> %p)
@@ -57,8 +57,8 @@ define <2 x double> @trunc_v2f64(<2 x double> %p)
ret <2 x double> %t
}
; CHECK: trunc_v2f64:
-; CHECK: bl trunc
-; CHECK: bl trunc
+; CHECK: friz
+; CHECK: friz
declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
define <4 x double> @trunc_v4f64(<4 x double> %p)
@@ -67,10 +67,10 @@ define <4 x double> @trunc_v4f64(<4 x double> %p)
ret <4 x double> %t
}
; CHECK: trunc_v4f64:
-; CHECK: bl trunc
-; CHECK: bl trunc
-; CHECK: bl trunc
-; CHECK: bl trunc
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
define <2 x double> @nearbyint_v2f64(<2 x double> %p)
diff --git a/test/CodeGen/PowerPC/vrsave-spill.ll b/test/CodeGen/PowerPC/vrsave-spill.ll
new file mode 100644
index 0000000000..c73206d8fc
--- /dev/null
+++ b/test/CodeGen/PowerPC/vrsave-spill.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-apple-darwin"
+
+define <4 x float> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %c = fadd <4 x float> %a, %b
+ %d = fmul <4 x float> %c, %a
+ call void asm sideeffect "", "~{VRsave}"() nounwind
+ br label %return
+
+; CHECK: @foo
+; CHECK: mfspr r{{[0-9]+}}, 256
+; CHECK: mtspr 256, r{{[0-9]+}}
+
+return: ; preds = %entry
+ ret <4 x float> %d
+}
+
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
index 7641017c43..9fb3d03477 100644
--- a/test/CodeGen/PowerPC/vrspill.ll
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -13,7 +13,7 @@ entry:
ret void
}
-; CHECK: stvx 2, 0, 0
-; CHECK: lvx 2, 0, 0
+; CHECK: stvx 2, 1,
+; CHECK: lvx 2, 1,
declare void @foo(i32*)