aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2011-03-31 22:14:03 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2011-03-31 22:14:03 +0000
commitca6fd009ad7905f39ce66f16164c652337e14c61 (patch)
tree45131b54326b48f1db4f75766e4b639ddb94cad1
parent7ebdc37d2bbfc62ff7957b346bb3a91eb6044d5f (diff)
Fix ARM tests to be register allocator independent.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128680 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--test/CodeGen/ARM/2009-10-27-double-align.ll4
-rw-r--r--test/CodeGen/ARM/arguments.ll2
-rw-r--r--test/CodeGen/ARM/arm-and-tst-peephole.ll6
-rw-r--r--test/CodeGen/ARM/arm-returnaddr.ll9
-rw-r--r--test/CodeGen/ARM/fnmscs.ll17
-rw-r--r--test/CodeGen/ARM/indirectbr.ll8
-rw-r--r--test/CodeGen/ARM/ldrd.ll12
-rw-r--r--test/CodeGen/ARM/memcpy-inline.ll14
-rw-r--r--test/CodeGen/ARM/peephole-bitcast.ll5
-rw-r--r--test/CodeGen/ARM/reg_sequence.ll22
-rw-r--r--test/CodeGen/ARM/str_pre-2.ll5
-rw-r--r--test/CodeGen/ARM/thumb1-varalloc.ll6
-rw-r--r--test/CodeGen/ARM/vcgt.ll7
-rw-r--r--test/CodeGen/ARM/vfp.ll5
-rw-r--r--test/CodeGen/ARM/vld1.ll7
-rw-r--r--test/CodeGen/ARM/vld3.ll7
-rw-r--r--test/CodeGen/ARM/vldlane.ll3
17 files changed, 83 insertions, 56 deletions
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
index c31b116c55..10ca722f9e 100644
--- a/test/CodeGen/ARM/2009-10-27-double-align.ll
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=linearscan | FileCheck %s
+
+; This test depends on linear scan's reserved register coalescing.
@.str = private constant [1 x i8] zeroinitializer, align 1
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index c7fcb9755d..a8b42e63b7 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -14,7 +14,7 @@ define i32 @f1(i32 %a, i64 %b) {
define i32 @f2() nounwind optsize {
; ELF: f2:
; ELF: mov [[REGISTER:(r[0-9]+)]], #128
-; ELF: str [[REGISTER]], [sp]
+; ELF: str [[REGISTER]], [
; DARWIN: f2:
; DARWIN: mov r3, #128
entry:
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 50c638b739..444dce7bf6 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -26,9 +26,9 @@ tailrecurse: ; preds = %sw.bb, %entry
; ARM: ands r12, r12, #3
; ARM-NEXT: beq
-; THUMB: movs r5, #3
-; THUMB-NEXT: ands r5, r4
-; THUMB-NEXT: cmp r5, #0
+; THUMB: movs r[[R0:[0-9]+]], #3
+; THUMB-NEXT: ands r[[R0]], r
+; THUMB-NEXT: cmp r[[R0]], #0
; THUMB-NEXT: beq
; T2: ands r12, r12, #3
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 382a183346..ec2ebc9aae 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=linearscan | FileCheck %s
; rdar://8015977
; rdar://8020118
+; This test needs the reserved register r7 to be coalesced into the ldr.
+; So far, only linear scan can do that.
+
define i8* @rt0(i32 %x) nounwind readnone {
entry:
; CHECK: rt0:
@@ -16,7 +19,7 @@ define i8* @rt2() nounwind readnone {
entry:
; CHECK: rt2:
; CHECK: {r7, lr}
-; CHECK: ldr r0, [r7]
+; CHECK: ldr r[[R0:[0-9]+]], [r7]
; CHECK: ldr r0, [r0]
; CHECK: ldr r0, [r0, #4]
%0 = tail call i8* @llvm.returnaddress(i32 2)
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 76c806761f..9facf20fee 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
define float @t1(float %acc, float %a, float %b) nounwind {
entry:
@@ -11,8 +12,8 @@ entry:
; NEON: vnmla.f32
; A8: t1:
-; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
-; A8: vsub.f32 d0, d0, d1
+; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
+; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
%0 = fmul float %a, %b
%1 = fsub float -0.0, %0
%2 = fsub float %1, %acc
@@ -28,8 +29,8 @@ entry:
; NEON: vnmla.f32
; A8: t2:
-; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
-; A8: vsub.f32 d0, d0, d1
+; A8: vnmul.f32 s{{[0123]}}, s{{[0123]}}, s{{[0123]}}
+; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
%0 = fmul float %a, %b
%1 = fmul float -1.0, %0
%2 = fsub float %1, %acc
@@ -45,8 +46,8 @@ entry:
; NEON: vnmla.f64
; A8: t3:
-; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d16, d16, d17
+; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
%0 = fmul double %a, %b
%1 = fsub double -0.0, %0
%2 = fsub double %1, %acc
@@ -62,8 +63,8 @@ entry:
; NEON: vnmla.f64
; A8: t4:
-; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d16, d16, d17
+; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
%0 = fmul double %a, %b
%1 = fmul double -1.0, %0
%2 = fsub double %1, %acc
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 0aac9d16ec..19dad3adfe 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -14,15 +14,15 @@ entry:
%1 = icmp eq i8* %0, null ; <i1> [#uses=1]
; indirect branch gets duplicated here
; ARM: bx
-; THUMB: mov pc, r1
-; THUMB2: mov pc, r2
+; THUMB: mov pc,
+; THUMB2: mov pc,
br i1 %1, label %bb3, label %bb2
bb2: ; preds = %entry, %bb3
%gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
; ARM: bx
-; THUMB: mov pc, r1
-; THUMB2: mov pc, r2
+; THUMB: mov pc,
+; THUMB2: mov pc,
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
bb3: ; preds = %entry
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 895562a1d3..187006a3be 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6
; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5
; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI
; rdar://r6949835
+; Magic ARM pair hints works best with linearscan.
+
@b = external global i64*
define i64 @t(i64 %a) nounwind readonly {
entry:
;V6: ldrd r2, [r2]
-;V5: ldr r3, [r2]
-;V5: ldr r2, [r2, #4]
+;V5: ldr r{{[0-9]+}}, [r2]
+;V5: ldr r{{[0-9]+}}, [r2, #4]
-;EABI: ldr r3, [r2]
-;EABI: ldr r2, [r2, #4]
+;EABI: ldr r{{[0-9]+}}, [r2]
+;EABI: ldr r{{[0-9]+}}, [r2, #4]
%0 = load i64** @b, align 4
%1 = load i64* %0, align 4
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index ed20c32dc0..e8a2a3b7d5 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,9 +1,13 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic -disable-post-ra | FileCheck %s
- %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+; The ARM magic hinting works best with linear scan.
+; CHECK: ldmia
+; CHECK: stmia
+; CHECK: ldrh
+; CHECK: ldrb
+
+%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
@src = external global %struct.x
@dst = external global %struct.x
diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll
index 8d95d75b1d..e670a5be3b 100644
--- a/test/CodeGen/ARM/peephole-bitcast.ll
+++ b/test/CodeGen/ARM/peephole-bitcast.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s
; vmov s0, r0 + vmov r0, s0 should have been optimized away.
; rdar://9104514
+; Peephole leaves a dead vmovsr instruction behind, and depends on linear scan
+; to remove it.
+
define void @t(float %x) nounwind ssp {
entry:
; CHECK: t:
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 53214fd4c3..d350937c68 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
%struct.int16x8_t = type { <8 x i16> }
@@ -123,9 +124,9 @@ return1:
return2:
; CHECK: %return2
; CHECK: vadd.i32
-; CHECK: vmov q9, q11
+; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}}
; CHECK-NOT: vmov
-; CHECK: vst2.32 {d16, d17, d18, d19}
+; CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
%tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
%tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
%tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1]
@@ -137,9 +138,10 @@ return2:
define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
; CHECK: t5:
; CHECK: vldmia
-; CHECK: vmov q9, q8
+; How can FileCheck match Q and D registers? We need a lisp interpreter.
+; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}}
; CHECK-NOT: vmov
-; CHECK: vld2.16 {d16[1], d18[1]}, [r0]
+; CHECK: vld2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
; CHECK-NOT: vmov
; CHECK: vadd.i16
%tmp0 = bitcast i16* %A to i8* ; <i8*> [#uses=1]
@@ -154,8 +156,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
; CHECK: t6:
; CHECK: vldr.64
-; CHECK: vmov d17, d16
-; CHECK-NEXT: vld2.8 {d16[1], d17[1]}
+; CHECK: vmov d[[D0:[0-9]+]], d[[D1:[0-9]+]]
+; CHECK-NEXT: vld2.8 {d[[D1]][1], d[[D0]][1]}
%tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2]
%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
@@ -169,10 +171,10 @@ entry:
; CHECK: t7:
; CHECK: vld2.32
; CHECK: vst2.32
-; CHECK: vld1.32 {d16, d17},
-; CHECK: vmov q9, q8
+; CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}},
+; CHECK: vmov q[[Q0:[0-9]+]], q[[Q1:[0-9]+]]
; CHECK-NOT: vmov
-; CHECK: vuzp.32 q8, q9
+; CHECK: vuzp.32 q[[Q1]], q[[Q0]]
; CHECK: vst1.32
%0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2]
%1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
@@ -271,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
entry:
; CHECK: t10:
; CHECK: vmul.f32 q8, q8, d0[0]
-; CHECK: vmov.i32 q9, #0x3F000000
+; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
; CHECK: vadd.f32 q8, q8, q8
%0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
%1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 465c7e676c..b24f75a6e2 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
+
+; The greedy register allocator uses a single CSR here, invalidating the test.
@b = external global i64*
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
index 25093fee22..aa88ae0c1a 100644
--- a/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
; rdar://8819685
@__bar = external hidden global i8*
@@ -12,12 +13,13 @@ entry:
%0 = load i8** @__bar, align 4
%1 = icmp eq i8* %0, null
br i1 %1, label %bb1, label %bb3
+; CHECK: bne
bb1:
store i32 1026, i32* %size, align 4
%2 = alloca [1026 x i8], align 1
-; CHECK: mov r0, sp
-; CHECK: adds r4, r0, r4
+; CHECK: mov [[R0:r[0-9]+]], sp
+; CHECK: adds {{r[0-9]+}}, [[R0]], {{r[0-9]+}}
%3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0
%4 = call i32 @_called_func(i8* %3, i32* %size) nounwind
%5 = icmp eq i32 %4, 0
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index c3c4cb3563..2243bac91f 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vcgts8:
@@ -161,9 +162,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
; rdar://7923010
define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK: vcgt_zext:
-;CHECK: vmov.i32 q10, #0x1
-;CHECK: vcgt.f32 q8
-;CHECK: vand q8, q8, q10
+;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
+;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
+;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
%tmp1 = load <4 x float>* %A
%tmp2 = load <4 x float>* %B
%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 44a44afe9a..390457fc21 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
define void @test(float* %P, double* %D) {
%A = load float* %P ; <float> [#uses=1]
@@ -39,10 +40,10 @@ define void @test_add(float* %P, double* %D) {
define void @test_ext_round(float* %P, double* %D) {
;CHECK: test_ext_round:
%a = load float* %P ; <float> [#uses=1]
+;CHECK: vcvt.f32.f64
;CHECK: vcvt.f64.f32
%b = fpext float %a to double ; <double> [#uses=1]
%A = load double* %D ; <double> [#uses=1]
-;CHECK: vcvt.f32.f64
%B = fptrunc double %A to float ; <float> [#uses=1]
store double %b, double* %D
store float %B, float* %P
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index c886125a2f..02e543cccd 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
define <8 x i8> @vld1i8(i8* %A) nounwind {
;CHECK: vld1i8:
@@ -19,7 +20,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
;Check for a post-increment updating load.
define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
;CHECK: vld1i16_update:
-;CHECK: vld1.16 {d16}, [r1]!
+;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
%A = load i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
@@ -39,7 +40,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
;CHECK: vld1i32_update:
-;CHECK: vld1.32 {d16}, [r2], r1
+;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
%A = load i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
@@ -75,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
;Check for a post-increment updating load.
define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
;CHECK: vld1Qi8_update:
-;CHECK: vld1.8 {d16, d17}, [r1, :64]!
+;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]!
%A = load i8** %ptr
%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
%tmp2 = getelementptr i8* %A, i32 16
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index dde530f6df..b495319830 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -36,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
;CHECK: vld3i16_update:
-;CHECK: vld3.16 {d16, d17, d18}, [r2], r1
+;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
%A = load i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
@@ -121,8 +122,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
;Check for a post-increment updating load.
define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
;CHECK: vld3Qi32_update:
-;CHECK: vld3.32 {d16, d18, d20}, [r1]!
-;CHECK: vld3.32 {d17, d19, d21}, [r1]!
+;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
+;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
%A = load i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 770ed071ac..68dd503bcc 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vld1lanei8:
@@ -279,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check for a post-increment updating load with register increment.
define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
;CHECK: vld3laneQi16_update:
-;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
%A = load i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B