aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOwen Anderson <resistor@mac.com>2010-11-02 01:24:55 +0000
committerOwen Anderson <resistor@mac.com>2010-11-02 01:24:55 +0000
commitcf667be17b479fe276fd606b8fd72ccfa3065bb8 (patch)
tree6ecd9c507048d5a1856cb671d8aa2216c41919bb
parent4053e63a4b2f54446e29ef5bdad64e43b3bf52d6 (diff)
Add correct NEON encodings for vld2, vld3, and vld4 basic variants.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117997 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td1
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td160
-rw-r--r--lib/Target/ARM/ARMMCCodeEmitter.cpp9
-rw-r--r--test/MC/ARM/neon-vld-encoding.s54
5 files changed, 157 insertions, 70 deletions
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 3fa9a9c724..429b1d3357 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -102,6 +102,7 @@ namespace {
unsigned getMachineSoImmOpValue(unsigned SoImm);
unsigned getAddrMode6RegisterOperand(const MachineInstr &MI);
+ unsigned getAddrMode6OffsetOperand(const MachineInstr &MI);
unsigned getAddrModeSBit(const MachineInstr &MI,
const TargetInstrDesc &TID) const;
@@ -175,6 +176,8 @@ namespace {
const { return 0; }
unsigned getAddrMode6RegisterOperand(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getAddrMode6OffsetOperand(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
unsigned Op) const { return 0; }
unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 649c83bbd1..94718cb53d 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -478,6 +478,7 @@ def addrmode6 : Operand<i32>,
def am6offset : Operand<i32> {
let PrintMethod = "printAddrMode6OffsetOperand";
let MIOperandInfo = (ops GPR);
+ string EncoderMethod = "getAddrMode6OffsetOperand";
}
// addrmodepc := pc + reg
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 48ded47bbf..82ade478ea 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -284,22 +284,28 @@ def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD2,
- "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD2,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
class VLD2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD2x2,
- "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD2x2,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
-def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
-def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
+def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
+def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">;
+def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">;
-def VLD2q8 : VLD2Q<0b0000, "8">;
-def VLD2q16 : VLD2Q<0b0100, "16">;
-def VLD2q32 : VLD2Q<0b1000, "32">;
+def VLD2q8 : VLD2Q<{0,0,?,?}, "8">;
+def VLD2q16 : VLD2Q<{0,1,?,?}, "16">;
+def VLD2q32 : VLD2Q<{1,0,?,?}, "32">;
def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
@@ -311,24 +317,28 @@ def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2u,
- "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
class VLD2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2x2u,
- "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
-def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
-def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
+def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
-def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
-def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
-def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
@@ -339,22 +349,25 @@ def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers (for disassembly only):
-def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
-def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
-def VLD2b32 : VLD2D<0b1001, 0b1000, "32">;
-def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">;
-def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
-def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
+def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr), IIC_VLD3,
- "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
-def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
-def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
-def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
+def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
+def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
+def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
@@ -363,26 +376,28 @@ def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3u,
- "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
-def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
-def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
-def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
// ...with double-spaced registers (non-updating versions for disassembly only):
-def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
-def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
-def VLD3q32 : VLD3D<0b0101, 0b1000, "32">;
-def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
@@ -396,13 +411,16 @@ def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD4,
- "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
-def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
-def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
+def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
+def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
+def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
@@ -411,26 +429,28 @@ def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
- "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
-def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
-def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>;
def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>;
def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>;
// ...with double-spaced registers (non-updating versions for disassembly only):
-def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
-def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
-def VLD4q32 : VLD4D<0b0001, 0b1000, "32">;
-def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp
index 6f32ce7645..fcf10ff745 100644
--- a/lib/Target/ARM/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -100,6 +100,7 @@ public:
unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op) const;
unsigned getAddrMode6RegisterOperand(const MCInst &MI, unsigned Op) const;
+ unsigned getAddrMode6OffsetOperand(const MCInst &MI, unsigned Op) const;
unsigned getNumFixupKinds() const {
assert(0 && "ARMMCCodeEmitter::getNumFixupKinds() not yet implemented.");
@@ -312,6 +313,14 @@ unsigned ARMMCCodeEmitter::getAddrMode6RegisterOperand(const MCInst &MI,
return RegNo | (Align << 4);
}
+unsigned ARMMCCodeEmitter::getAddrMode6OffsetOperand(const MCInst &MI,
+ unsigned Op) const {
+ const MCOperand &regno = MI.getOperand(Op);
+ if (regno.getReg() == 0) return 0x0D;
+ return regno.getReg();
+}
+
+
void ARMMCCodeEmitter::
EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 93919c8239..eb8f80e367 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -17,3 +17,57 @@
vld1.32 {d16, d17}, [r0]
@ CHECK: vld1.64 {d16, d17}, [r0] @ encoding: [0xcf,0x0a,0x60,0xf4]
vld1.64 {d16, d17}, [r0]
+
+@ CHECK: vld2.8 {d16, d17}, [r0, :64] @ encoding: [0x1f,0x08,0x60,0xf4]
+ vld2.8 {d16, d17}, [r0, :64]
+@ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4]
+ vld2.16 {d16, d17}, [r0, :128]
+@ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4]
+ vld2.32 {d16, d17}, [r0]
+@ CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4]
+ vld2.8 {d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
+ vld2.16 {d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
+ vld2.32 {d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vld3.8 {d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4]
+ vld3.8 {d16, d17, d18}, [r0, :64]
+@ CHECK: vld3.16 {d16, d17, d18}, [r0] @ encoding: [0x4f,0x04,0x60,0xf4]
+ vld3.16 {d16, d17, d18}, [r0]
+@ CHECK: vld3.32 {d16, d17, d18}, [r0] @ encoding: [0x8f,0x04,0x60,0xf4]
+ vld3.32 {d16, d17, d18}, [r0]
+@ CHECK: vld3.8 {d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
+ vld3.8 {d16, d18, d20}, [r0, :64]!
+@ CHECK: vld3.8 {d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4]
+ vld3.8 {d17, d19, d21}, [r0, :64]!
+@ CHECK: vld3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x60,0xf4]
+ vld3.16 {d16, d18, d20}, [r0]!
+@ CHECK: vld3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x60,0xf4]
+ vld3.16 {d17, d19, d21}, [r0]!
+@ CHECK: vld3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x60,0xf4]
+ vld3.32 {d16, d18, d20}, [r0]!
+@ CHECK: vld3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x60,0xf4]
+ vld3.32 {d17, d19, d21}, [r0]!
+
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf4]
+ vld4.8 {d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf4]
+ vld4.16 {d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf4]
+ vld4.32 {d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf4]
+ vld4.8 {d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf4]
+ vld4.8 {d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vld4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4]
+ vld4.16 {d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4]
+ vld4.16 {d17, d19, d21, d23}, [r0]!
+@ CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4]
+ vld4.32 {d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4]
+ vld4.32 {d17, d19, d21, d23}, [r0]!
+
+
+ \ No newline at end of file