diff options
author | Jim Grosbach <grosbach@apple.com> | 2012-01-24 18:37:25 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2012-01-24 18:37:25 +0000 |
commit | e983a134e7e40e214f590c3d8ba565bb85f39628 (patch) | |
tree | b51f8782b4420bb6be22db8779b96280640af59e | |
parent | 10c6fdcd111d447de79613495219af8de3a2fb7e (diff) |
NEON VLD4(one lane) assembly parsing and encoding.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148832 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 100 | ||||
-rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 145 | ||||
-rw-r--r-- | test/MC/ARM/neon-vld-encoding.s | 44 |
3 files changed, 278 insertions, 11 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 14d480dd61..702de8bd99 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -306,6 +306,57 @@ def VecListThreeQWordIndexed : Operand<i32> { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -6203,6 +6254,55 @@ def VST3qWB_register_Asm_32 : (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VLD4 multiple structure pseudo-instructions. These need special handling for diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index fb03a45251..9bc4e60863 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1203,6 +1203,31 @@ public: return VectorList.Count == 3 && VectorList.LaneIndex <= 1; } + bool isVecListFourDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 7; + } + + bool isVecListFourDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -5338,6 +5363,23 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16; case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32; + // VLD4LN + case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8; + case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16; + case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32; + case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16; + case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32; + // VLD4 case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; @@ -5665,6 +5707,41 @@ processInstruction(MCInst &Inst, return true; } + case ARM::VLD4LNdWB_register_Asm_8: + case ARM::VLD4LNdWB_register_Asm_16: + case ARM::VLD4LNdWB_register_Asm_32: + case ARM::VLD4LNqWB_register_Asm_16: + case ARM::VLD4LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_32: { @@ -5744,6 +5821,41 @@ processInstruction(MCInst &Inst, return true; } + case ARM::VLD4LNdWB_fixed_Asm_8: + case ARM::VLD4LNdWB_fixed_Asm_16: + case ARM::VLD4LNdWB_fixed_Asm_32: + case ARM::VLD4LNqWB_fixed_Asm_16: + case ARM::VLD4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_32: { @@ -5817,6 +5929,39 @@ processInstruction(MCInst &Inst, return true; } + case ARM::VLD4LNdAsm_8: + case ARM::VLD4LNdAsm_16: + case ARM::VLD4LNdAsm_32: + case ARM::VLD4LNqAsm_16: + case ARM::VLD4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // VLD3 multiple 3-element structure instructions. case ARM::VLD3dAsm_8: case ARM::VLD3dAsm_16: diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s index 1dc2fdc14c..ba9b218b0d 100644 --- a/test/MC/ARM/neon-vld-encoding.s +++ b/test/MC/ARM/neon-vld-encoding.s @@ -336,17 +336,39 @@ @ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4] -@ vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] -@ vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] -@ vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] -@ vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64] -@ vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] - -@ FIXME: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf4] -@ FIXME: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf4] -@ FIXME: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf4] -@ FIXME: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf4] -@ FIXME: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf4] + vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] + vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] + vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] + vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] + vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] + + vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! + vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! + vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! + vld4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]! + vld4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! + + vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 + vld4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 + vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 + vld4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 + vld4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 + +@ CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xe1,0xf4] +@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xe2,0xf4] +@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xe3,0xf4] +@ CHECK: vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xe7,0xf4] +@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xe8,0xf4] +@ CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xe1,0xf4] +@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xe2,0xf4] +@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xe3,0xf4] +@ CHECK: vld4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xe7,0xf4] +@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xe8,0xf4] +@ CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xe1,0xf4] +@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xe2,0xf4] +@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4] +@ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xe6,0xf4] +@ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4] @ Handle 'Q' registers in register lists as if the sub-reg D regs were |