diff options
author | Bob Wilson <bob.wilson@apple.com> | 2010-02-17 22:23:11 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2010-02-17 22:23:11 +0000 |
commit | 3c0f96e05472693ff9a59366726e4a3da5e05471 (patch) | |
tree | d0e8f07490317a513497a5f504f0b21ba425db9f /lib | |
parent | fb566795c6feccc2a931236fcf30e3b068933d7f (diff) |
More cleanup for NEON:
* Use "S" abbreviation for scalar single FP registers in class and pattern
names, instead of keeping the "D" (for "double") abbreviation and tacking on
an "s" elsewhere in the name.
* Move the scalar single FP register classes and patterns to be more
consistent with other definitions in the file.
* Rename "VNEGf32d" definition to "VNEGfd" for consistency.
* Deleted the N2VDIntsPat pattern; N2VSPat is good enough.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96521 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 191 |
1 files changed, 90 insertions, 101 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 064243f318..6d7f242127 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -566,35 +566,34 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{ // Instruction Classes //===----------------------------------------------------------------------===// -// Basic 2-register operations, both double- and quad-register. +// Basic 2-register operations: single-, double- and quad-register. +class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), + IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; -// Basic 2-register operations, scalar single-precision. -class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> +// Basic 2-register intrinsics: single-, double- and quad-register. +class N2VSInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), - IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; - -class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> - : NEONFPPat<(ResTy (OpNode SPR:$a)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - -// Basic 2-register intrinsics, both double- and quad-register. + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, + OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -610,21 +609,6 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; -// Basic 2-register intrinsics, scalar single-precision -class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, - OpcodeStr, Dt, "$dst, $src", "", []>; - -class N2VDIntsPat<SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$a)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -655,7 +639,16 @@ class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; -// Basic 3-register operations, both double- and quad-register. +// Basic 3-register operations: single-, double- and quad-register. +class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { + let isCommutable = Commutable; +} + class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> @@ -740,23 +733,6 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, let isCommutable = 0; } -// Basic 3-register operations, scalar single-precision -class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, - OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { - let isCommutable = Commutable; -} -class N3VDsPat<SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -824,7 +800,15 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } -// Multiply-Add/Sub operations, both double- and quad-register. +// Multiply-Add/Sub operations: single-, double- and quad-register. +class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; + class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> @@ -892,25 +876,6 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))))]>; -// Multiply-Add/Sub operations, scalar single-precision -class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode OpNode> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; - -class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -2409,7 +2374,7 @@ def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) -def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, +def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, "vneg", "f32", "$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; @@ -2995,71 +2960,95 @@ def VTBX4 // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// +class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> + : NEONFPPat<(ResTy (OpNode SPR:$a)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$acc, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. // Vector Add Operations used for single-precision FP let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VDs<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; -def : N3VDsPat<fadd, VADDfd_sfp>; +def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; +def : N3VSPat<fadd, VADDfd_sfp>; // Vector Sub Operations used for single-precision FP let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; -def : N3VDsPat<fsub, VSUBfd_sfp>; +def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; +def : N3VSPat<fsub, VSUBfd_sfp>; // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VDs<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; -def : N3VDsPat<fmul, VMULfd_sfp>; +def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; +def : N3VSPat<fmul, VMULfd_sfp>; // Vector Multiply-Accumulate/Subtract used for single-precision FP // vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so // we want to avoid them for now. e.g., alternating vmla/vadd instructions. //let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VDMulOps<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", +//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", // v2f32, fmul, fadd>; -//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; +//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; //let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VDMulOps<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", +//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", // v2f32, fmul, fsub>; -//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; +//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in -def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, IIC_VUNAD, +def VABSfd_sfp : N2VSInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; -def : N2VDIntsPat<fabs, VABSfd_sfp>; +def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; // Vector Negate used for single-precision FP let neverHasSideEffects = 1 in -def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg", "f32", "$dst, $src", "", []>; -def : N2VDIntsPat<fneg, VNEGf32d_sfp>; +def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, + "vneg", "f32", "$dst, $src", "", []>; +def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", - v2i32, v2f32, fp_to_sint>; -def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; +def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", + v2i32, v2f32, fp_to_sint>; +def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", - v2i32, v2f32, fp_to_uint>; -def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; +def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", + v2i32, v2f32, fp_to_uint>; +def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", - v2f32, v2i32, sint_to_fp>; -def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; +def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", + v2f32, v2i32, sint_to_fp>; +def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", - v2f32, v2i32, uint_to_fp>; -def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; +def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", + v2f32, v2i32, uint_to_fp>; +def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns |