diff options
author | Jim Grosbach <grosbach@apple.com> | 2011-11-14 23:03:21 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2011-11-14 23:03:21 +0000 |
commit | ffc658b056b7cc0b3f6a2626694b6a4216ed728d (patch) | |
tree | ebe680a03750316ed3c572177cd1306a4c0fd173 | |
parent | 88990248d3bfb2f265fcf27f8a032ac0eb14d09f (diff) |
ARM VLDR/VSTR instructions don't need a size suffix.
Canonicallize on the non-suffixed form, but continue to accept assembly that
has any correctly sized type suffix.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144583 91177308-0d34-0410-b5e6-96231b3b80d8
24 files changed, 105 insertions, 112 deletions
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 6d5b6a4615..841ca741fd 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1996,6 +1996,7 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> : InstAlias<!strconcat(opc, dt, asm), Result>; multiclass VFPDT32InstAlias<string opc, string asm, dag Result> { + def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>; def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>; def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>; def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>; @@ -2003,6 +2004,7 @@ multiclass VFPDT32InstAlias<string opc, string asm, dag Result> { def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>; } multiclass VFPDT64InstAlias<string opc, string asm, dag Result> { + def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 06cb79a50b..488c508cc6 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -69,11 +69,11 @@ def vfp_f64imm : Operand<f64>, let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), - IIC_fpLoad64, "vldr", ".64\t$Dd, $addr", + IIC_fpLoad64, "vldr", "\t$Dd, $addr", [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), - IIC_fpLoad32, "vldr", ".32\t$Sd, $addr", + IIC_fpLoad32, "vldr", "\t$Sd, $addr", [(set SPR:$Sd, (load addrmode5:$addr))]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -83,11 +83,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), - IIC_fpStore64, "vstr", ".64\t$Dd, $addr", + IIC_fpStore64, "vstr", "\t$Dd, $addr", [(store (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), - IIC_fpStore32, "vstr", ".32\t$Sd, $addr", + IIC_fpStore32, "vstr", "\t$Sd, $addr", [(store SPR:$Sd, addrmode5:$addr)]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -1163,21 +1163,12 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; -// The size suffix is optional for VLDR/VSTR -def : VFP2InstAlias<"vldr$p $Dd, $addr", - (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -def : VFP2InstAlias<"vldr$p $Sd, $addr", - (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -def : VFP2InstAlias<"vstr$p $Dd, $addr", - (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -def : VFP2InstAlias<"vstr$p $Sd, $addr", - (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -// The suffix can also by typed. -defm : VFPDT32InstAlias<"vldr$p", "$Sd, $addr", +// VLDR/VSTR accept an optional type suffix. +defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr", (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT32InstAlias<"vstr$p", "$Sd, $addr", +defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr", (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vldr$p", "$Dd, $addr", +defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vstr$p", "$Dd, $addr", +defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll index 0a157c96b3..426bd17590 100644 --- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll +++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll @@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind { br i1 %4, label %bb1, label %bb2 bb1: -;CHECK: vstrhi.64 +;CHECK: vstrhi store double %1, double* %y br label %bb2 diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll index 8bfd02697b..eb9c2d0f7f 100644 --- a/test/CodeGen/ARM/2009-09-24-spill-align.ll +++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll @@ -6,7 +6,7 @@ entry: %arg0_poly16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1] %out_poly16_t = alloca i16 ; <i16*> [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] -; CHECK: vldr.64 +; CHECK: vldr %0 = load <4 x i16>* %arg0_poly16x4_t, align 8 ; <<4 x i16>> [#uses=1] %1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1] store i16 %1, i16* %out_poly16_t, align 2 diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll index cd1c9c8c04..a400b7b288 100644 --- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -10,28 +10,28 @@ entry: %4 = ashr i32 %3, 30 %.sum = add i32 %4, 4 %5 = getelementptr inbounds float* %table, i32 %.sum -;CHECK: vldr.32 s +;CHECK: vldr s %6 = load float* %5, align 4 %tmp11 = insertelement <4 x float> undef, float %6, i32 0 %7 = shl i32 %packedValue, 18 %8 = ashr i32 %7, 30 %.sum12 = add i32 %8, 4 %9 = getelementptr inbounds float* %table, i32 %.sum12 -;CHECK: vldr.32 s +;CHECK: vldr s %10 = load float* %9, align 4 %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1 %11 = shl i32 %packedValue, 20 %12 = ashr i32 %11, 30 %.sum13 = add i32 %12, 4 %13 = getelementptr inbounds float* %table, i32 %.sum13 -;CHECK: vldr.32 s +;CHECK: vldr s %14 = load float* %13, align 4 %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2 %15 = shl i32 %packedValue, 22 %16 = ashr i32 %15, 30 %.sum14 = add i32 %16, 4 %17 = getelementptr inbounds float* %table, i32 %.sum14 -;CHECK: vldr.32 s +;CHECK: vldr s %18 = load float* %17, align 4 %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3 %19 = fmul <4 x float> %tmp5, %2 diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll index 3c9216cde7..42b1491481 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s ; Should trigger a NEON store. -; CHECK: vstr.64 +; CHECK: vstr define void @f_0_12(i8* nocapture %c) nounwind optsize { entry: call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll index b1bf63f816..33c60081a3 100644 --- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll +++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll @@ -26,9 +26,9 @@ entry: ; ARM: t1b ; THUMB: t1b %cmp = fcmp oeq float %a, -0.000000e+00 -; ARM: vldr.32 +; ARM: vldr ; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}} -; THUMB: vldr.32 +; THUMB: vldr ; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}} br i1 %cmp, label %if.then, label %if.end @@ -63,9 +63,9 @@ entry: ; ARM: t2b ; THUMB: t2b %cmp = fcmp oeq double %a, -0.000000e+00 -; ARM: vldr.64 +; ARM: vldr ; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}} -; THUMB: vldr.64 +; THUMB: vldr ; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}} br i1 %cmp, label %if.then, label %if.end diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll index ac023d19ec..93601cf9d6 100644 --- a/test/CodeGen/ARM/fp.ll +++ b/test/CodeGen/ARM/fp.ll @@ -42,7 +42,7 @@ entry: define double @h(double* %v) { ;CHECK: h: -;CHECK: vldr.64 +;CHECK: vldr ;CHECK-NEXT: vmov entry: %tmp = load double* %v ; <double> [#uses=1] diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 7c0dd0e12a..ad03202f8e 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -14,8 +14,8 @@ entry: ; FINITE: beq ; NAN: t1: -; NAN: vldr.32 s0, -; NAN: vldr.32 s1, +; NAN: vldr s0, +; NAN: vldr s1, ; NAN: vcmpe.f32 s1, s0 ; NAN: vmrs apsr_nzcv, fpscr ; NAN: beq diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll index 38339334b4..8faa57896a 100644 --- a/test/CodeGen/ARM/fpmem.ll +++ b/test/CodeGen/ARM/fpmem.ll @@ -8,7 +8,7 @@ define float @f1(float %a) { define float @f2(float* %v, float %u) { ; CHECK: f2: -; CHECK: vldr.32{{.*}}[ +; CHECK: vldr{{.*}}[ %tmp = load float* %v ; <float> [#uses=1] %tmp1 = fadd float %tmp, %u ; <float> [#uses=1] ret float %tmp1 @@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) { define float @f2offset(float* %v, float %u) { ; CHECK: f2offset: -; CHECK: vldr.32{{.*}}, #4] +; CHECK: vldr{{.*}}, #4] %addr = getelementptr float* %v, i32 1 %tmp = load float* %addr %tmp1 = fadd float %tmp, %u @@ -25,7 +25,7 @@ define float @f2offset(float* %v, float %u) { define float @f2noffset(float* %v, float %u) { ; CHECK: f2noffset: -; CHECK: vldr.32{{.*}}, #-4] +; CHECK: vldr{{.*}}, #-4] %addr = getelementptr float* %v, i32 -1 %tmp = load float* %addr %tmp1 = fadd float %tmp, %u @@ -34,7 +34,7 @@ define float @f2noffset(float* %v, float %u) { define void @f3(float %a, float %b, float* %v) { ; CHECK: f3: -; CHECK: vstr.32{{.*}}[ +; CHECK: vstr{{.*}}[ %tmp = fadd float %a, %b ; <float> [#uses=1] store float %tmp, float* %v ret void diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 47379016cf..ea5ae8f518 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,9 +4,9 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96] +; CHECK: vstr s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32] +; CHECK: vstr s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64] +; CHECK: vstr s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96] ; We can also save a register in the outer loop, but that requires ; performing LSR on the outer loop. diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll index c78872a4bc..c218ff523e 100644 --- a/test/CodeGen/ARM/neon_ld1.ll +++ b/test/CodeGen/ARM/neon_ld1.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4 -; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64 +; RUN: llc < %s -march=arm -mattr=+neon | grep vldr | count 4 +; RUN: llc < %s -march=arm -mattr=+neon | grep vstr ; RUN: llc < %s -march=arm -mattr=+neon | grep vmov define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind { diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 091a003ef9..78b4e7ea84 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -155,7 +155,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: t6: -; CHECK: vldr.64 +; CHECK: vldr ; CHECK: vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]] ; CHECK-NEXT: vld2.8 {d[[D1]][1], d[[D0]][1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] @@ -240,7 +240,7 @@ bb14: ; preds = %bb6 ; PR7157 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { ; CHECK: t9: -; CHECK: vldr.64 +; CHECK: vldr ; CHECK-NOT: vmov d{{.*}}, d16 ; CHECK: vmov.i32 d17 ; CHECK-NEXT: vstmia r0, {d16, d17} diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll index 993d7ec750..03ae12c6de 100644 --- a/test/CodeGen/ARM/subreg-remat.ll +++ b/test/CodeGen/ARM/subreg-remat.ll @@ -12,13 +12,13 @@ target triple = "thumbv7-apple-ios" ; ; CHECK: f1 ; CHECK: vmov s1, r0 -; CHECK: vldr.32 s0, LCPI +; CHECK: vldr s0, LCPI ; The vector must be spilled: -; CHECK: vstr.64 d0, +; CHECK: vstr d0, ; CHECK: asm clobber d0 ; And reloaded after the asm: -; CHECK: vldr.64 [[D16:d[0-9]+]], -; CHECK: vstr.64 [[D16]], [r1] +; CHECK: vldr [[D16:d[0-9]+]], +; CHECK: vstr [[D16]], [r1] define void @f1(float %x, <2 x float>* %p) { %v1 = insertelement <2 x float> undef, float %x, i32 1 %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0 @@ -37,13 +37,13 @@ define void @f1(float %x, <2 x float>* %p) { ; virtual register. It doesn't read the old value. ; ; CHECK: f2 -; CHECK: vldr.32 s0, LCPI +; CHECK: vldr s0, LCPI ; The vector must not be spilled: -; CHECK-NOT: vstr.64 +; CHECK-NOT: vstr ; CHECK: asm clobber d0 ; But instead rematerialize after the asm: -; CHECK: vldr.32 [[S0:s[0-9]+]], LCPI -; CHECK: vstr.64 [[D0:d[0-9]+]], [r0] +; CHECK: vldr [[S0:s[0-9]+]], LCPI +; CHECK: vstr [[D0:d[0-9]+]], [r0] define void @f2(<2 x float>* %p) { %v2 = insertelement <2 x float> undef, float 0x400921FB60000000, i32 0 %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll index 14e668efb1..f157dbdb97 100644 --- a/test/CodeGen/ARM/vbsl-constant.ll +++ b/test/CodeGen/ARM/vbsl-constant.ll @@ -2,8 +2,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { ;CHECK: v_bsli8: -;CHECK: vldr.64 -;CHECK: vldr.64 +;CHECK: vldr +;CHECK: vldr ;CHECK: vbsl %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B @@ -16,8 +16,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ;CHECK: v_bsli16: -;CHECK: vldr.64 -;CHECK: vldr.64 +;CHECK: vldr +;CHECK: vldr ;CHECK: vbsl %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B @@ -30,8 +30,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ;CHECK: v_bsli32: -;CHECK: vldr.64 -;CHECK: vldr.64 +;CHECK: vldr +;CHECK: vldr ;CHECK: vbsl %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B @@ -44,9 +44,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind { ;CHECK: v_bsli64: -;CHECK: vldr.64 -;CHECK: vldr.64 -;CHECK: vldr.64 +;CHECK: vldr +;CHECK: vldr +;CHECK: vldr ;CHECK: vbsl %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll index e99fac1f1e..05332e4d8c 100644 --- a/test/CodeGen/ARM/vdup.ll +++ b/test/CodeGen/ARM/vdup.ll @@ -254,7 +254,7 @@ entry: ;CHECK: redundantVdup: ;CHECK: vmov.i8 ;CHECK-NOT: vdup.8 -;CHECK: vstr.64 +;CHECK: vstr define void @redundantVdup(<8 x i8>* %ptr) nounwind { %1 = insertelement <8 x i8> undef, i8 -128, i32 0 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll index 1a97982eb0..a38a0feae0 100644 --- a/test/CodeGen/ARM/vector-DAGCombine.ll +++ b/test/CodeGen/ARM/vector-DAGCombine.ll @@ -80,7 +80,7 @@ declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind ; so they are not split up into i32 values. Radar 8755338. define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind { ; CHECK: i64_buildvector -; CHECK: vldr.64 +; CHECK: vldr %t0 = load i64* %ptr, align 4 %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0 store <2 x i64> %t1, <2 x i64>* %vp @@ -89,7 +89,7 @@ define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind { define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind { ; CHECK: i64_insertelement -; CHECK: vldr.64 +; CHECK: vldr %t0 = load i64* %ptr, align 4 %vec = load <2 x i64>* %vp %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0 @@ -99,7 +99,7 @@ define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind { define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind { ; CHECK: i64_extractelement -; CHECK: vstr.64 +; CHECK: vstr %vec = load <2 x i64>* %vp %t1 = extractelement <2 x i64> %vec, i32 0 store i64 %t1, i64* %ptr diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index 65b5913e40..e224bdfe25 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -138,7 +138,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; Make sure this doesn't crash define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind { ; CHECK: test_elem_mismatch: -; CHECK: vstr.64 +; CHECK: vstr %tmp0 = load <2 x i64>* %src, align 16 %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll index 8fd99ba7af..1f5113eeb3 100644 --- a/test/CodeGen/ARM/widen-vmovs.ll +++ b/test/CodeGen/ARM/widen-vmovs.ll @@ -3,7 +3,7 @@ target triple = "thumbv7-apple-ios" ; The 0.0 constant is loaded from the constant pool and kept in a register. ; CHECK: %entry -; CHECK: vldr.32 s +; CHECK: vldr s ; The float loop variable is initialized with a vmovs from the constant register. ; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32. ; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]] diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll index 034a28f003..524e5a6b7b 100644 --- a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll +++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll @@ -5,7 +5,7 @@ define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y entry: ; -- The loop following the load should only use a single add-literation ; instruction. -; CHECK: ldr.64 +; CHECK: vldr ; CHECK: adds r{{[0-9]+.*}}#1 ; CHECK-NOT: adds ; CHECK: subsections_via_symbols diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll index 01fb0a581a..06762bad85 100644 --- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll +++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll @@ -23,7 +23,7 @@ entry: %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2] ; Constant pool load followed by add. ; Then clobber the loaded register, not the sum. -; CHECK: vldr.64 [[LDR:d.*]], +; CHECK: vldr [[LDR:d.*]], ; CHECK: LPC0_0: ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]] ; CHECK-NOT: vmov.f64 [[ADD]] diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s index cb76215f7f..5c81c7881c 100644 --- a/test/MC/ARM/simple-fp-encoding.s +++ b/test/MC/ARM/simple-fp-encoding.s @@ -172,52 +172,52 @@ @ CHECK: vmov r0, r1, d16 @ encoding: [0x30,0x0b,0x51,0xec] vmov r0, r1, d16 -@ CHECK: vldr.64 d17, [r0] @ encoding: [0x00,0x1b,0xd0,0xed] -@ CHECK: vldr.32 s0, [lr] @ encoding: [0x00,0x0a,0x9e,0xed] -@ CHECK: vldr.64 d0, [lr] @ encoding: [0x00,0x0b,0x9e,0xed] +@ CHECK: vldr d17, [r0] @ encoding: [0x00,0x1b,0xd0,0xed] +@ CHECK: vldr s0, [lr] @ encoding: [0x00,0x0a,0x9e,0xed] +@ CHECK: vldr d0, [lr] @ encoding: [0x00,0x0b,0x9e,0xed] vldr.64 d17, [r0] - vldr s0, [lr] - vldr d0, [lr] + vldr.i32 s0, [lr] + vldr.d d0, [lr] -@ CHECK: vldr.64 d1, [r2, #32] @ encoding: [0x08,0x1b,0x92,0xed] -@ CHECK: vldr.64 d1, [r2, #-32] @ encoding: [0x08,0x1b,0x12,0xed] +@ CHECK: vldr d1, [r2, #32] @ encoding: [0x08,0x1b,0x92,0xed] +@ CHECK: vldr d1, [r2, #-32] @ encoding: [0x08,0x1b,0x12,0xed] vldr.64 d1, [r2, #32] - vldr.64 d1, [r2, #-32] + vldr.f64 d1, [r2, #-32] -@ CHECK: vldr.64 d2, [r3] @ encoding: [0x00,0x2b,0x93,0xed] +@ CHECK: vldr d2, [r3] @ encoding: [0x00,0x2b,0x93,0xed] vldr.64 d2, [r3] -@ CHECK: vldr.64 d3, [pc] @ encoding: [0x00,0x3b,0x9f,0xed] -@ CHECK: vldr.64 d3, [pc] @ encoding: [0x00,0x3b,0x9f,0xed] -@ CHECK: vldr.64 d3, [pc, #-0] @ encoding: [0x00,0x3b,0x1f,0xed] +@ CHECK: vldr d3, [pc] @ encoding: [0x00,0x3b,0x9f,0xed] +@ CHECK: vldr d3, [pc] @ encoding: [0x00,0x3b,0x9f,0xed] +@ CHECK: vldr d3, [pc, #-0] @ encoding: [0x00,0x3b,0x1f,0xed] vldr.64 d3, [pc] vldr.64 d3, [pc,#0] vldr.64 d3, [pc,#-0] -@ CHECK: vldr.32 s13, [r0] @ encoding: [0x00,0x6a,0xd0,0xed] +@ CHECK: vldr s13, [r0] @ encoding: [0x00,0x6a,0xd0,0xed] vldr.32 s13, [r0] -@ CHECK: vldr.32 s1, [r2, #32] @ encoding: [0x08,0x0a,0xd2,0xed] -@ CHECK: vldr.32 s1, [r2, #-32] @ encoding: [0x08,0x0a,0x52,0xed] +@ CHECK: vldr s1, [r2, #32] @ encoding: [0x08,0x0a,0xd2,0xed] +@ CHECK: vldr s1, [r2, #-32] @ encoding: [0x08,0x0a,0x52,0xed] vldr.32 s1, [r2, #32] vldr.32 s1, [r2, #-32] -@ CHECK: vldr.32 s2, [r3] @ encoding: [0x00,0x1a,0x93,0xed] +@ CHECK: vldr s2, [r3] @ encoding: [0x00,0x1a,0x93,0xed] vldr.32 s2, [r3] -@ CHECK: vldr.32 s5, [pc] @ encoding: [0x00,0x2a,0xdf,0xed] -@ CHECK: vldr.32 s5, [pc] @ encoding: [0x00,0x2a,0xdf,0xed] -@ CHECK: vldr.32 s5, [pc, #-0] @ encoding: [0x00,0x2a,0x5f,0xed] +@ CHECK: vldr s5, [pc] @ encoding: [0x00,0x2a,0xdf,0xed] +@ CHECK: vldr s5, [pc] @ encoding: [0x00,0x2a,0xdf,0xed] +@ CHECK: vldr s5, [pc, #-0] @ encoding: [0x00,0x2a,0x5f,0xed] vldr.32 s5, [pc] vldr.32 s5, [pc,#0] vldr.32 s5, [pc,#-0] -@ CHECK: vstr.64 d4, [r1] @ encoding: [0x00,0x4b,0x81,0xed] -@ CHECK: vstr.64 d4, [r1, #24] @ encoding: [0x06,0x4b,0x81,0xed] -@ CHECK: vstr.64 d4, [r1, #-24] @ encoding: [0x06,0x4b,0x01,0xed] -@ CHECK: vstr.32 s0, [lr] @ encoding: [0x00,0x0a,0x8e,0xed] -@ CHECK: vstr.64 d0, [lr] @ encoding: [0x00,0x0b,0x8e,0xed] +@ CHECK: vstr d4, [r1] @ encoding: [0x00,0x4b,0x81,0xed] +@ CHECK: vstr d4, [r1, #24] @ encoding: [0x06,0x4b,0x81,0xed] +@ CHECK: vstr d4, [r1, #-24] @ encoding: [0x06,0x4b,0x01,0xed] +@ CHECK: vstr s0, [lr] @ encoding: [0x00,0x0a,0x8e,0xed] +@ CHECK: vstr d0, [lr] @ encoding: [0x00,0x0b,0x8e,0xed] vstr.64 d4, [r1] vstr.64 d4, [r1, #24] @@ -225,9 +225,9 @@ vstr s0, [lr] vstr d0, [lr] -@ CHECK: vstr.32 s4, [r1] @ encoding: [0x00,0x2a,0x81,0xed] -@ CHECK: vstr.32 s4, [r1, #24] @ encoding: [0x06,0x2a,0x81,0xed] -@ CHECK: vstr.32 s4, [r1, #-24] @ encoding: [0x06,0x2a,0x01,0xed] +@ CHECK: vstr s4, [r1] @ encoding: [0x00,0x2a,0x81,0xed] +@ CHECK: vstr s4, [r1, #24] @ encoding: [0x06,0x2a,0x81,0xed] +@ CHECK: vstr s4, [r1, #-24] @ encoding: [0x06,0x2a,0x01,0xed] vstr.32 s4, [r1] vstr.32 s4, [r1, #24] vstr.32 s4, [r1, #-24] diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt index a1431357fb..264a78a83e 100644 --- a/test/MC/Disassembler/ARM/arm-tests.txt +++ b/test/MC/Disassembler/ARM/arm-tests.txt @@ -215,7 +215,7 @@ # CHECK: vldmdb r2!, {s7, s8, s9, s10, s11} 0x05 0x3a 0x72 0xed -# CHECK: vldr.32 s23, [r2, #660] +# CHECK: vldr s23, [r2, #660] 0xa5 0xba 0xd2 0xed # CHECK: strtvc r5, [r3], r0, lsr #20 diff --git a/test/MC/Disassembler/ARM/fp-encoding.txt b/test/MC/Disassembler/ARM/fp-encoding.txt index f3e026138a..9095b84ce1 100644 --- a/test/MC/Disassembler/ARM/fp-encoding.txt +++ b/test/MC/Disassembler/ARM/fp-encoding.txt @@ -152,46 +152,46 @@ # CHECK: vmov r0, r1, d16 0x00 0x1b 0xd0 0xed -# CHECK: vldr.64 d17, [r0] +# CHECK: vldr d17, [r0] 0x08 0x1b 0x92 0xed 0x08 0x1b 0x12 0xed -# CHECK: vldr.64 d1, [r2, #32] -# CHECK: vldr.64 d1, [r2, #-32] +# CHECK: vldr d1, [r2, #32] +# CHECK: vldr d1, [r2, #-32] 0x00 0x2b 0x93 0xed -# CHECK: vldr.64 d2, [r3] +# CHECK: vldr d2, [r3] 0x00 0x3b 0x9f 0xed -# CHECK: vldr.64 d3, [pc] +# CHECK: vldr d3, [pc] 0x00 0x6a 0xd0 0xed -# CHECK: vldr.32 s13, [r0] +# CHECK: vldr s13, [r0] 0x08 0x0a 0xd2 0xed 0x08 0x0a 0x52 0xed -# CHECK: vldr.32 s1, [r2, #32] -# CHECK: vldr.32 s1, [r2, #-32] +# CHECK: vldr s1, [r2, #32] +# CHECK: vldr s1, [r2, #-32] 0x00 0x1a 0x93 0xed -# CHECK: vldr.32 s2, [r3] +# CHECK: vldr s2, [r3] 0x00 0x2a 0xdf 0xed -# CHECK: vldr.32 s5, [pc] +# CHECK: vldr s5, [pc] 0x00 0x4b 0x81 0xed 0x06 0x4b 0x81 0xed 0x06 0x4b 0x01 0xed -# CHECK: vstr.64 d4, [r1] -# CHECK: vstr.64 d4, [r1, #24] -# CHECK: vstr.64 d4, [r1, #-24] +# CHECK: vstr d4, [r1] +# CHECK: vstr d4, [r1, #24] +# CHECK: vstr d4, [r1, #-24] 0x00 0x2a 0x81 0xed 0x06 0x2a 0x81 0xed 0x06 0x2a 0x01 0xed -# CHECK: vstr.32 s4, [r1] -# CHECK: vstr.32 s4, [r1, #24] -# CHECK: vstr.32 s4, [r1, #-24] +# CHECK: vstr s4, [r1] +# CHECK: vstr s4, [r1, #24] +# CHECK: vstr s4, [r1, #-24] 0x0c 0x2b 0x91 0xec 0x06 0x1a 0x91 0xec |