diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-11-12 20:32:20 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-11-12 20:32:20 +0000 |
commit | 529916ca4ab83ec472a2d7039a05007c4d40553a (patch) | |
tree | 635b3718aef92dde8f1b5a2fabeb3d6b38bbcff8 | |
parent | b39e6488eef7a5ff8b4e6acf0d42d096f5be853b (diff) |
Add some missing isel predicates on def : pat patterns to avoid generating VFP vmla / vmls (they cause stalls). Disabling them in isel is properly not a right solution, I'll look into a proper solution next.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118922 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARM.td | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrFormats.td | 27 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrVFP.td | 88 | ||||
-rw-r--r-- | test/CodeGen/ARM/fmacs.ll | 55 | ||||
-rw-r--r-- | test/CodeGen/ARM/fmscs.ll | 39 | ||||
-rw-r--r-- | test/CodeGen/ARM/fnmacs.ll | 31 | ||||
-rw-r--r-- | test/CodeGen/ARM/fnmscs.ll | 64 | ||||
-rw-r--r-- | test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 18 |
8 files changed, 208 insertions, 117 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index eef152f274..a99dbfbd64 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -168,7 +168,8 @@ def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, ProcA8, FeatureHasSlowVMLx, FeatureT2XtPk]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, ProcA9, FeatureT2XtPk]>; + [ArchV7A, ProcA9, + FeatureHasSlowVMLx, FeatureT2XtPk]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 2c61be20c2..c3175989d1 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1577,33 +1577,6 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{4} = op4; } -// Double precision, binary, VML[AS] (for additional predicate) -class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, - dag iops, InstrItinClass itin, string opc, string asm, - list<dag> pattern> - : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { - // Instruction operands. - bits<5> Dd; - bits<5> Dn; - bits<5> Dm; - - // Encode instruction operands. - let Inst{19-16} = Dn{3-0}; - let Inst{7} = Dn{4}; - let Inst{15-12} = Dd{3-0}; - let Inst{22} = Dd{4}; - let Inst{3-0} = Dm{3-0}; - let Inst{5} = Dm{4}; - - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-9} = 0b101; - let Inst{8} = 1; // Double precision - let Inst{6} = op6; - let Inst{4} = op4; - list<Predicate> Predicates = [HasVFP2, UseVMLx]; -} - // Single precision, unary class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e0fb5808a8..feb3e539e6 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -738,80 +738,96 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, // FP FMA Operations. // -def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0, - (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), - IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm), - (f64 DPR:$Ddin)))]>, - RegConstraint<"$Ddin = $Dd">; +def VMLAD : ADbI<0b11100, 0b00, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseVMLx]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fadd (fmul SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, - RegConstraint<"$Sdin = $Sd">; + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>; def : Pat<(fadd DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))), - (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseVMLx]>; def : Pat<(fadd SPR:$dstin, (fmul SPR:$a, SPR:$b)), - (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP, UseVMLx]>; -def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0, - (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), - IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)), - (f64 DPR:$Ddin)))]>, - RegConstraint<"$Ddin = $Dd">; +def VMLSD : ADbI<0b11100, 0b00, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseVMLx]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fadd (fneg (fmul SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, - RegConstraint<"$Sdin = $Sd">; + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>; def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))), - (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseVMLx]>; def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), - (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>; -def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0, - (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), - IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)), - (f64 DPR:$Ddin)))]>, - RegConstraint<"$Ddin = $Dd">; +def VNMLAD : ADbI<0b11100, 0b01, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseVMLx]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub (fneg (fmul SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, - RegConstraint<"$Sdin = $Sd">; + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>; def : Pat<(fsub (fneg (fmul DPR:$a, (f64 DPR:$b))), DPR:$dstin), - (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; + (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseVMLx]>; def : Pat<(fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin), - (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>; -def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0, - (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), - IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm), - (f64 DPR:$Ddin)))]>, - RegConstraint<"$Ddin = $Dd">; +def VNMLSD : ADbI<0b11100, 0b01, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseVMLx]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub (fmul SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, - RegConstraint<"$Sdin = $Sd">; + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>; def : Pat<(fsub (fmul DPR:$a, (f64 DPR:$b)), DPR:$dstin), - (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; + (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseVMLx]>; def : Pat<(fsub (fmul SPR:$a, SPR:$b), SPR:$dstin), - (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>; //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index f8b47b5bac..fb83ef626a 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -1,24 +1,51 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -define float @test(float %acc, float %a, float %b) { +define float @t1(float %acc, float %a, float %b) { entry: +; VFP2: t1: +; VFP2: vmla.f32 + +; NEON: t1: +; NEON: vmla.f32 + +; A8: t1: +; A8: vmul.f32 +; A8: vadd.f32 %0 = fmul float %a, %b %1 = fadd float %acc, %0 ret float %1 } -; VFP2: test: -; VFP2: vmla.f32 s2, s1, s0 +define double @t2(double %acc, double %a, double %b) { +entry: +; VFP2: t2: +; VFP2: vmla.f64 + +; NEON: t2: +; NEON: vmla.f64 -; NFP1: test: -; NFP1: vmul.f32 d0, d1, d0 -; NFP0: test: -; NFP0: vmla.f32 s2, s1, s0 +; A8: t2: +; A8: vmul.f64 +; A8: vadd.f64 + %0 = fmul double %a, %b + %1 = fadd double %acc, %0 + ret double %1 +} -; CORTEXA8: test: -; CORTEXA8: vmul.f32 d0, d1, d0 -; CORTEXA9: test: -; CORTEXA9: vmla.f32 s2, s1, s0 +define float @t3(float %acc, float %a, float %b) { +entry: +; VFP2: t3: +; VFP2: vmla.f32 + +; NEON: t3: +; NEON: vmla.f32 + +; A8: t3: +; A8: vmul.f32 +; A8: vadd.f32 + %0 = fmul float %a, %b + %1 = fadd float %0, %acc + ret float %1 +} diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll index 7a70543dee..a182833a7a 100644 --- a/test/CodeGen/ARM/fmscs.ll +++ b/test/CodeGen/ARM/fmscs.ll @@ -1,24 +1,35 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -define float @test(float %acc, float %a, float %b) { +define float @t1(float %acc, float %a, float %b) { entry: +; VFP2: t1: +; VFP2: vnmls.f32 + +; NEON: t1: +; NEON: vnmls.f32 + +; A8: t1: +; A8: vmul.f32 +; A8: vsub.f32 %0 = fmul float %a, %b %1 = fsub float %0, %acc ret float %1 } -; VFP2: test: -; VFP2: vnmls.f32 s2, s1, s0 +define double @t2(double %acc, double %a, double %b) { +entry: +; VFP2: t2: +; VFP2: vnmls.f64 -; NFP1: test: -; NFP1: vnmls.f32 s2, s1, s0 -; NFP0: test: -; NFP0: vnmls.f32 s2, s1, s0 +; NEON: t2: +; NEON: vnmls.f64 -; CORTEXA8: test: -; CORTEXA8: vnmls.f32 s2, s1, s0 -; CORTEXA9: test: -; CORTEXA9: vnmls.f32 s2, s1, s0 +; A8: t2: +; A8: vmul.f64 +; A8: vsub.f64 + %0 = fmul double %a, %b + %1 = fsub double %0, %acc + ret double %1 +} diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll index 1d1d06a70e..1763d46e06 100644 --- a/test/CodeGen/ARM/fnmacs.ll +++ b/test/CodeGen/ARM/fnmacs.ll @@ -1,20 +1,35 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEONFP +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -define float @test(float %acc, float %a, float %b) { +define float @t1(float %acc, float %a, float %b) { entry: +; VFP2: t1: ; VFP2: vmls.f32 -; NEON: vmls.f32 -; NEONFP-NOT: vmls -; NEONFP-NOT: vmov.f32 -; NEONFP: vmul.f32 -; NEONFP: vsub.f32 -; NEONFP: vmov +; NEON: t1: +; NEON: vmls.f32 +; A8: t1: +; A8: vmul.f32 +; A8: vsub.f32 %0 = fmul float %a, %b %1 = fsub float %acc, %0 ret float %1 } +define double @t2(double %acc, double %a, double %b) { +entry: +; VFP2: t2: +; VFP2: vmls.f64 + +; NEON: t2: +; NEON: vmls.f64 + +; A8: t2: +; A8: vmul.f64 +; A8: vsub.f64 + %0 = fmul double %a, %b + %1 = fsub double %acc, %0 + ret double %1 +} diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 0b47edd5f1..5d832537c0 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -1,23 +1,71 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -define float @test1(float %acc, float %a, float %b) nounwind { -; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}} +define float @t1(float %acc, float %a, float %b) nounwind { entry: +; VFP2: t1: +; VFP2: vnmla.f32 + +; NEON: t1: +; NEON: vnmla.f32 + +; A8: t1: +; A8: vnmul.f32 s0, s1, s0 +; A8: vsub.f32 d0, d0, d1 %0 = fmul float %a, %b %1 = fsub float -0.0, %0 %2 = fsub float %1, %acc ret float %2 } -define float @test2(float %acc, float %a, float %b) nounwind { -; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}} +define float @t2(float %acc, float %a, float %b) nounwind { entry: +; VFP2: t2: +; VFP2: vnmla.f32 + +; NEON: t2: +; NEON: vnmla.f32 + +; A8: t2: +; A8: vnmul.f32 s0, s1, s0 +; A8: vsub.f32 d0, d0, d1 %0 = fmul float %a, %b %1 = fmul float -1.0, %0 %2 = fsub float %1, %acc ret float %2 } +define double @t3(double %acc, double %a, double %b) nounwind { +entry: +; VFP2: t3: +; VFP2: vnmla.f64 + +; NEON: t3: +; NEON: vnmla.f64 + +; A8: t3: +; A8: vnmul.f64 d16, d16, d17 +; A8: vsub.f64 d16, d16, d17 + %0 = fmul double %a, %b + %1 = fsub double -0.0, %0 + %2 = fsub double %1, %acc + ret double %2 +} + +define double @t4(double %acc, double %a, double %b) nounwind { +entry: +; VFP2: t4: +; VFP2: vnmla.f64 + +; NEON: t4: +; NEON: vnmla.f64 + +; A8: t4: +; A8: vnmul.f64 d16, d16, d17 +; A8: vsub.f64 d16, d16, d17 + %0 = fmul double %a, %b + %1 = fmul double -1.0, %0 + %2 = fsub double %1, %acc + ret double %2 +} diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 52e40b234b..9882690da2 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s{{.*}}, [lr, #-128] -; CHECK: vstr.32 s{{.*}}, [lr, #-96] -; CHECK: vstr.32 s{{.*}}, [lr, #-64] -; CHECK: vstr.32 s{{.*}}, [lr, #-32] -; CHECK: vstr.32 s{{.*}}, [lr] -; CHECK: vstr.32 s{{.*}}, [lr, #32] -; CHECK: vstr.32 s{{.*}}, [lr, #64] -; CHECK: vstr.32 s{{.*}}, [lr, #96] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64] +; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" @@ -627,7 +627,7 @@ bb24: ; preds = %bb23 ; in a register. ; CHECK: @ %bb24 -; CHECK: subs{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1 +; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1 ; CHECK: bne.w %92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1] |