47 files changed, 1646 insertions, 136 deletions
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 82ef2b82cb..b6feb5abbc 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 -mattr=+altivec | grep dst | count 4
 
 define hidden void @_Z4borkPc(i8* %image) {
 entry:
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 84aa40c4b5..91253daae3 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0"
 define void @foo(i32 %y) nounwind ssp {
 entry:
 ; CHECK: foo
-; CHECK: add r3
-; CHECK: 0(r3)
+; CHECK: add r2
+; CHECK: 0(r2)
   %y_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store i32 %y, i32* %y_addr
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 974a99a52c..097611a761 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -2,21 +2,21 @@
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
-; This formerly used R0 for both the stack address and CR.
 
 define void @foo() nounwind {
 entry:
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  ori r3, r3, 34524
-;CHECK:  stwx r2, r1, r3
-; Make sure that the register scavenger returns the same temporary register.
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 12, 0, 31
-;CHECK:  ori r3, r3, 34520
-;CHECK:  stwx r2, r1, r3
+; Note that part of what is being checked here is proper register reuse.
+; CHECK: mfcr [[T1:r[0-9]+]]                         ; cr2
+; CHECK: lis [[T2:r[0-9]+]], 1
+; CHECK: addi r3, r1, 72
+; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31
+; CHECK: ori [[T2]], [[T2]], 34540
+; CHECK: stwx [[T1]], r1, [[T2]]
+; CHECK: lis [[T3:r[0-9]+]], 1
+; CHECK: mfcr [[T4:r[0-9]+]]                         ; cr3
+; CHECK: ori [[T3]], [[T3]], 34536
+; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31
+; CHECK: stwx [[T4]], r1, [[T3]]
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
@@ -25,11 +25,16 @@ entry:
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r3, 1
-;CHECK:  ori r3, r3, 34524
-;CHECK:  lwzx r2, r1, r3
-;CHECK:  rlwinm r2, r2, 24, 0, 31
-;CHECK:  mtcrf 32, r2
+; CHECK: lis [[T1:r[0-9]+]], 1
+; CHECK: ori [[T1]], [[T1]], 34536
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31
+; CHECK: mtcrf 16, [[T1]]
+; CHECK: lis [[T1]], 1
+; CHECK: ori [[T1]], [[T1]], 34540
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31
+; CHECK: mtcrf 32, [[T1]]
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
index 72ae9d6c73..0dbc2d0180 100644
--- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
+++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -18,8 +18,8 @@ entry:
 ; CHECK: _g:
 ; CHECK:  mflr r0
 ; CHECK:  stw r0, 8(r1)
-; CHECK:  lwz r3, 0(r1)
-; CHECK:  lwz r3, 8(r3)
+; CHECK:  lwz r2, 0(r1)
+; CHECK:  lwz r3, 8(r2)
   %0 = tail call i8* @llvm.returnaddress(i32 1)   ; <i8*> [#uses=1]
   ret i8* %0
 }
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index bf3d577a36..d1a3c9f46b 100644
--- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -7,7 +7,7 @@ define i32 @main() nounwind {
 entry:
 ; Make sure we're generating references using the red zone
 ; CHECK: main:
-; CHECK: stw r3, -12(r1)
+; CHECK: stw r2, -12(r1)
   %retval = alloca i32
   %0 = alloca i32
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/PowerPC/DbgValueOtherTargets.test b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
index b1b338776b..9702934f7e 100644
--- a/test/CodeGen/PowerPC/DbgValueOtherTargets.test
+++ b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
@@ -1 +1 @@
-; RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 6f985c819f..e8765deab0 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "std r4, 9024"
+; RUN:   grep "std r3, 9024"
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/Stats/lit.local.cfg b/test/CodeGen/PowerPC/Stats/lit.local.cfg
deleted file mode 100644
index 2608e139e9..0000000000
--- a/test/CodeGen/PowerPC/Stats/lit.local.cfg
+++ /dev/null
@@ -1,8 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
-    config.unsupported = True
-
-if not config.root.enable_assertions:
-    config.unsupported = True
diff --git a/test/CodeGen/PowerPC/allocate-r0.ll b/test/CodeGen/PowerPC/allocate-r0.ll
new file mode 100644
index 0000000000..1cf4cec076
--- /dev/null
+++ b/test/CodeGen/PowerPC/allocate-r0.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; Because r0 is allocatable, we can use it to hold r3 without spilling.
+; CHECK: mr 0, 3
+; CHECK: mr 3, 0
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
new file mode 100644
index 0000000000..d04a6c98ee
--- /dev/null
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with
+; GPRC is handled correctly. When it was not, this test would assert.
+
+@gen_random.last = external unnamed_addr global i64, align 8
+@.str = external unnamed_addr constant [4 x i8], align 1
+
+declare double @gen_random(double) #0
+
+declare void @benchmark_heapsort(i32 signext, double* nocapture) #0
+
+define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 {
+entry:
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ]
+  %add = add i32 %cond, 1
+  %conv = sext i32 %add to i64
+  %mul = shl nsw i64 %conv, 3
+  %call1 = tail call noalias i8* @malloc(i64 %mul) #1
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %cond.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %add
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %cond.end
+  ret i32 0
+}
+
+declare noalias i8* @malloc(i64) #0
+
+declare signext i32 @printf(i8* nocapture, ...) #0
+
+declare void @free(i8* nocapture) #0
+
+declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index cbfa4094fb..838db20ddd 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 |  FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 |  FileCheck %s
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 ; CHECK: exchange_and_add:
-; CHECK: lwarx
+; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}}
   %tmp = atomicrmw add i32* %mem, i32 %val monotonic
-; CHECK: stwcx.
+; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}}
   ret i32 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index 4f6bfc7299..53bbc52167 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc32 -mcpu=ppc32 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=ppc64 -mcpu=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
+; RUN: llc < %s -march=ppc32 -mcpu=pwr7 | FileCheck %s -check-prefix=X32
 
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@@ -34,18 +36,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
         ret i16 %tmp6
 }
 
+define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i )                ; <i64> [#uses=1]
+        store i64 %tmp13, i64* %tmp1.upgrd.1
+        ret void
+}
+
+define i64 @LDBRX(i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp = load i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
+        %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
+        ret i64 %tmp14
+}
+
 declare i32 @llvm.bswap.i32(i32)
 
 declare i16 @llvm.bswap.i16(i16)
 
+declare i64 @llvm.bswap.i64(i64)
+
 
 ; X32: stwbrx
 ; X32: lwbrx
 ; X32: sthbrx
 ; X32: lhbrx
+; X32-NOT: ldbrx
+; X32-NOT: stdbrx
 
 ; X64: stwbrx
 ; X64: lwbrx
 ; X64: sthbrx
 ; X64: lhbrx
+; X64-NOT: ldbrx
+; X64-NOT: stdbrx
+
+; PWR7: stwbrx
+; PWR7: lwbrx
+; PWR7: sthbrx
+; PWR7: lhbrx
+; PWR7: stdbrx
+; PWR7: ldbrx
 
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
new file mode 100644
index 0000000000..d6df7a2376
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -0,0 +1,409 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This test case triggers several functions related to cr spilling, both in
+; frame lowering and to handle cr register pressure. When the register kill
+; flags were not being set correctly, this would cause the register scavenger to
+; assert.
+
+@SetupFastFullPelSearch.orig_pels = external unnamed_addr global [768 x i16], align 2
+@weight_luma = external global i32
+@offset_luma = external global i32
+@wp_luma_round = external global i32, align 4
+@luma_log_weight_denom = external global i32, align 4
+
+define void @SetupFastFullPelSearch() #0 {
+entry:
+  %mul10 = mul nsw i32 undef, undef
+  br i1 undef, label %land.end, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %entry
+  switch i32 0, label %land.end [
+    i32 0, label %land.rhs
+    i32 3, label %land.rhs
+  ]
+
+land.rhs:                                         ; preds = %land.lhs.true, %land.lhs.true
+  %tobool21 = icmp ne i32 undef, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %land.lhs.true, %entry
+  %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
+  %cond = load i32** undef, align 8
+  br i1 undef, label %if.then95, label %for.body.lr.ph
+
+if.then95:                                        ; preds = %land.end
+  %cmp.i4.i1427 = icmp slt i32 undef, undef
+  br label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.then95, %land.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  br i1 undef, label %for.body, label %for.body252
+
+for.body252:                                      ; preds = %for.inc997, %for.body
+  %shl263 = add i32 undef, 80
+  br i1 %0, label %for.cond286.preheader, label %for.cond713.preheader
+
+for.cond286.preheader:                            ; preds = %for.body252
+  br label %for.cond290.preheader
+
+for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
+  %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
+  %1 = load i32* undef, align 4, !tbaa !0
+  %2 = load i32* @weight_luma, align 4, !tbaa !0
+  %3 = load i32* @wp_luma_round, align 4, !tbaa !0
+  %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
+  %5 = load i32* @offset_luma, align 4, !tbaa !0
+  %incdec.ptr502.sum = add i64 undef, 16
+  br label %for.body293
+
+for.body293:                                      ; preds = %for.body293, %for.cond290.preheader
+  %srcptr.41591 = phi i16* [ %srcptr.31595, %for.cond290.preheader ], [ undef, %for.body293 ]
+  %refptr.11590 = phi i16* [ undef, %for.cond290.preheader ], [ %add.ptr517, %for.body293 ]
+  %LineSadBlk0.01588 = phi i32 [ 0, %for.cond290.preheader ], [ %add346, %for.body293 ]
+  %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
+  %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
+  %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
+  %6 = load i16* %refptr.11590, align 2, !tbaa !3
+  %conv294 = zext i16 %6 to i32
+  %mul295 = mul nsw i32 %conv294, %2
+  %add296 = add nsw i32 %mul295, %3
+  %shr = ashr i32 %add296, %4
+  %add297 = add nsw i32 %shr, %5
+  %cmp.i.i1513 = icmp sgt i32 %add297, 0
+  %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
+  %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
+  %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
+  %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+  %conv300 = zext i16 %7 to i32
+  %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
+  %idxprom302 = sext i32 %sub301 to i64
+  %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
+  %8 = load i32* %arrayidx303, align 4, !tbaa !0
+  %add304 = add nsw i32 %8, %LineSadBlk0.01588
+  %9 = load i32* undef, align 4, !tbaa !0
+  %add318 = add nsw i32 %add304, %9
+  %10 = load i16* undef, align 2, !tbaa !3
+  %conv321 = zext i16 %10 to i32
+  %mul322 = mul nsw i32 %conv321, %2
+  %add323 = add nsw i32 %mul322, %3
+  %shr324 = ashr i32 %add323, %4
+  %add325 = add nsw i32 %shr324, %5
+  %cmp.i.i1505 = icmp sgt i32 %add325, 0
+  %cond.i.i1506 = select i1 %cmp.i.i1505, i32 %add325, i32 0
+  %cmp.i4.i1507 = icmp slt i32 %cond.i.i1506, %1
+  %cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1
+  %sub329 = sub nsw i32 %cond.i5.i1508, 0
+  %idxprom330 = sext i32 %sub329 to i64
+  %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
+  %11 = load i32* %arrayidx331, align 4, !tbaa !0
+  %add332 = add nsw i32 %add318, %11
+  %cmp.i.i1501 = icmp sgt i32 undef, 0
+  %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
+  %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
+  %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
+  %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
+  %12 = load i16* null, align 2, !tbaa !3
+  %conv342 = zext i16 %12 to i32
+  %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
+  %idxprom344 = sext i32 %sub343 to i64
+  %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
+  %13 = load i32* %arrayidx345, align 4, !tbaa !0
+  %add346 = add nsw i32 %add332, %13
+  %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
+  %14 = load i16* null, align 2, !tbaa !3
+  %conv349 = zext i16 %14 to i32
+  %mul350 = mul nsw i32 %conv349, %2
+  %add351 = add nsw i32 %mul350, %3
+  %shr352 = ashr i32 %add351, %4
+  %add353 = add nsw i32 %shr352, %5
+  %cmp.i.i1497 = icmp sgt i32 %add353, 0
+  %cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0
+  %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
+  %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
+  %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
+  %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+  %conv356 = zext i16 %15 to i32
+  %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
+  %idxprom358 = sext i32 %sub357 to i64
+  %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
+  %16 = load i32* %arrayidx359, align 4, !tbaa !0
+  %add360 = add nsw i32 %16, %LineSadBlk1.01587
+  %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
+  %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+  %conv363 = zext i16 %17 to i32
+  %mul364 = mul nsw i32 %conv363, %2
+  %add365 = add nsw i32 %mul364, %3
+  %shr366 = ashr i32 %add365, %4
+  %add367 = add nsw i32 %shr366, %5
+  %cmp.i.i1493 = icmp sgt i32 %add367, 0
+  %cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0
+  %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
+  %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
+  %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
+  %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+  %conv370 = zext i16 %18 to i32
+  %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
+  %idxprom372 = sext i32 %sub371 to i64
+  %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
+  %19 = load i32* %arrayidx373, align 4, !tbaa !0
+  %add374 = add nsw i32 %add360, %19
+  %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
+  %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+  %conv377 = zext i16 %20 to i32
+  %mul378 = mul nsw i32 %conv377, %2
+  %add379 = add nsw i32 %mul378, %3
+  %shr380 = ashr i32 %add379, %4
+  %add381 = add nsw i32 %shr380, %5
+  %cmp.i.i1489 = icmp sgt i32 %add381, 0
+  %cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0
+  %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
+  %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
+  %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
+  %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+  %conv384 = zext i16 %21 to i32
+  %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
+  %idxprom386 = sext i32 %sub385 to i64
+  %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
+  %22 = load i32* %arrayidx387, align 4, !tbaa !0
+  %add388 = add nsw i32 %add374, %22
+  %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+  %conv391 = zext i16 %23 to i32
+  %mul392 = mul nsw i32 %conv391, %2
+  %add395 = add nsw i32 0, %5
+  %cmp.i.i1485 = icmp sgt i32 %add395, 0
+  %cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0
+  %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
+  %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
+  %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
+  %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+  %conv398 = zext i16 %24 to i32
+  %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
+  %idxprom400 = sext i32 %sub399 to i64
+  %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
+  %25 = load i32* %arrayidx401, align 4, !tbaa !0
+  %add402 = add nsw i32 %add388, %25
+  %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
+  %cmp.i4.i1483 = icmp slt i32 undef, %1
+  %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
+  %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+  %conv412 = zext i16 %26 to i32
+  %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
+  %idxprom414 = sext i32 %sub413 to i64
+  %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
+  %27 = load i32* %arrayidx415, align 4, !tbaa !0
+  %add416 = add nsw i32 %27, %LineSadBlk2.01585
+  %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
+  %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+  %conv419 = zext i16 %28 to i32
+  %mul420 = mul nsw i32 %conv419, %2
+  %add421 = add nsw i32 %mul420, %3
+  %shr422 = ashr i32 %add421, %4
+  %add423 = add nsw i32 %shr422, %5
+  %cmp.i.i1477 = icmp sgt i32 %add423, 0
+  %cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0
+  %cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1
+  %cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1
+  %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10
+  %sub427 = sub nsw i32 %cond.i5.i1480, 0
+  %idxprom428 = sext i32 %sub427 to i64
+  %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
+  %29 = load i32* %arrayidx429, align 4, !tbaa !0
+  %add430 = add nsw i32 %add416, %29
+  %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
+  %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+  %conv433 = zext i16 %30 to i32
+  %mul434 = mul nsw i32 %conv433, %2
+  %add435 = add nsw i32 %mul434, %3
+  %shr436 = ashr i32 %add435, %4
+  %add437 = add nsw i32 %shr436, %5
+  %cmp.i.i1473 = icmp sgt i32 %add437, 0
+  %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
+  %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
+  %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
+  %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+  %conv440 = zext i16 %31 to i32
+  %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
+  %idxprom442 = sext i32 %sub441 to i64
+  %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
+  %32 = load i32* %arrayidx443, align 4, !tbaa !0
+  %add444 = add nsw i32 %add430, %32
+  %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
+  %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+  %conv447 = zext i16 %33 to i32
+  %mul448 = mul nsw i32 %conv447, %2
+  %add449 = add nsw i32 %mul448, %3
+  %s