diff options
author | Preston Gurd <preston.gurd@intel.com> | 2012-02-27 23:35:03 +0000 |
---|---|---|
committer | Preston Gurd <preston.gurd@intel.com> | 2012-02-27 23:35:03 +0000 |
commit | e879cbae7cf80f4653860d69e50f667d45401fd9 (patch) | |
tree | f0809662b996a9f8c10982436ef0e94faa292bea | |
parent | 79ac37b39197cc312c8ddc1865b6b498ed307cbf (diff) |
This patch adds instruction latencies for the SSE instructions
to the instruction scheduler for the Intel Atom.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151590 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 2138 | ||||
-rw-r--r-- | lib/Target/X86/X86Schedule.td | 136 | ||||
-rw-r--r-- | lib/Target/X86/X86ScheduleAtom.td | 151 |
3 files changed, 1697 insertions, 728 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 56e997c1fb..c6d1d192cf 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -13,6 +13,126 @@ // //===----------------------------------------------------------------------===// +class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { + InstrItinClass rr = arg_rr; + InstrItinClass rm = arg_rm; +} + +class SizeItins<OpndItins arg_s, OpndItins arg_d> { + OpndItins s = arg_s; + OpndItins d = arg_d; +} + + +class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, + InstrItinClass arg_ri> { + InstrItinClass rr = arg_rr; + InstrItinClass rm = arg_rm; + InstrItinClass ri = arg_ri; +} + + +// scalar +def SSE_ALU_F32S : OpndItins< + IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM +>; + +def SSE_ALU_F64S : OpndItins< + IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM +>; + +def SSE_ALU_ITINS_S : SizeItins< + SSE_ALU_F32S, SSE_ALU_F64S +>; + +def SSE_MUL_F32S : OpndItins< + IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM +>; + +def SSE_MUL_F64S : OpndItins< + IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM +>; + +def SSE_MUL_ITINS_S : SizeItins< + SSE_MUL_F32S, SSE_MUL_F64S +>; + +def SSE_DIV_F32S : OpndItins< + IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM +>; + +def SSE_DIV_F64S : OpndItins< + IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM +>; + +def SSE_DIV_ITINS_S : SizeItins< + SSE_DIV_F32S, SSE_DIV_F64S +>; + +// parallel +def SSE_ALU_F32P : OpndItins< + IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM +>; + +def SSE_ALU_F64P : OpndItins< + IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM +>; + +def SSE_ALU_ITINS_P : SizeItins< + SSE_ALU_F32P, SSE_ALU_F64P +>; + +def SSE_MUL_F32P : OpndItins< + IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM +>; + +def SSE_MUL_F64P : OpndItins< + IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM +>; + +def SSE_MUL_ITINS_P : SizeItins< + SSE_MUL_F32P, SSE_MUL_F64P +>; + +def SSE_DIV_F32P : OpndItins< + IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM +>; + +def SSE_DIV_F64P : OpndItins< + IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM +>; + +def SSE_DIV_ITINS_P : SizeItins< + SSE_DIV_F32P, SSE_DIV_F64P +>; + +def SSE_BIT_ITINS_P : OpndItins< + IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM +>; + +def SSE_INTALU_ITINS_P : OpndItins< + IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM +>; + +def SSE_INTALUQ_ITINS_P : OpndItins< + IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM +>; + +def SSE_INTMUL_ITINS_P : OpndItins< + IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM +>; + +def SSE_INTSHIFT_ITINS_P : ShiftOpndItins< + IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI +>; + +def SSE_MOVA_ITINS : OpndItins< + IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM +>; + +def SSE_MOVU_ITINS : OpndItins< + IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM +>; //===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes @@ -21,25 +141,27 @@ /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, + OpndItins itins, bit Is2Addr = 1> { let isCommutable = 1 in { def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, string asm, string SSEVer, string FPSizeStr, Operand memopr, ComplexPattern mem_cpat, + OpndItins itins, bit Is2Addr = 1> { def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, @@ -47,34 +169,34 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))]>; + RC:$src1, RC:$src2))], itins.rr>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))]>; + RC:$src1, mem_cpat:$src2))], itins.rm>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, RegisterClass RC, ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, - Domain d, bit Is2Addr = 1> { + Domain d, OpndItins itins, bit Is2Addr = 1> { let isCommutable = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_DEFAULT, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], - IIC_DEFAULT, d>; + itins.rm, d>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -100,7 +222,7 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, string asm, string SSEVer, string FPSizeStr, X86MemOperand x86memop, PatFrag mem_frag, - Domain d, bit Is2Addr = 1> { + Domain d, OpndItins itins, bit Is2Addr = 1> { def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), @@ -348,14 +470,16 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> : SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, [(set VR128:$dst, (vt (OpNode VR128:$src1, - (scalar_to_vector RC:$src2))))]>; + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>; // Loading from memory automatically zeroing upper bits. class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, PatFrag mem_pat, string OpcodeStr> : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))]>; + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>; // AVX def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32, @@ -369,11 +493,13 @@ def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, let isCodeGenOnly = 1 in { def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, XS, VEX_4V, VEX_LIG; def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, XD, VEX_4V, VEX_LIG; } @@ -387,10 +513,12 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG; + [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + XS, VEX, VEX_LIG; def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG; + [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + XD, VEX, VEX_LIG; // SSE1 & 2 let Constraints = "$src1 = $dst" in { @@ -403,10 +531,12 @@ let Constraints = "$src1 = $dst" in { let isCodeGenOnly = 1 in { def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", []>, XS; + "movss\t{$src2, $dst|$dst, $src2}", [], + IIC_SSE_MOV_S_RR>, XS; def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", []>, XD; + "movsd\t{$src2, $dst|$dst, $src2}", [], + IIC_SSE_MOV_S_RR>, XD; } } @@ -419,10 +549,10 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>; + [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>; + [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; // Patterns let Predicates = [HasAVX] in { @@ -684,93 +814,122 @@ let Predicates = [HasSSE2] in { multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d, + OpndItins itins, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], IIC_DEFAULT, d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], IIC_DEFAULT, d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, TB, VEX; + "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, + TB, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, TB, OpSize, VEX; + "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, + TB, OpSize, VEX; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, TB, VEX; + "movups", SSEPackedSingle, SSE_MOVU_ITINS>, + TB, VEX; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; + "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, + TB, OpSize, VEX; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, - "movaps", SSEPackedSingle>, TB, VEX; + "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, + TB, VEX; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, - "movapd", SSEPackedDouble>, TB, OpSize, VEX; + "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, + TB, OpSize, VEX; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, - "movups", SSEPackedSingle>, TB, VEX; + "movups", SSEPackedSingle, SSE_MOVU_ITINS>, + TB, VEX; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, - "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; + "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, + TB, OpSize, VEX; defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, TB; + "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, + TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, TB, OpSize; + "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, + TB, OpSize; defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, TB; + "movups", SSEPackedSingle, SSE_MOVU_ITINS>, + TB; defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble, 0>, TB, OpSize; + "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, + TB, OpSize; def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; + [(alignedstore (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX; + [(alignedstore (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", - [(store (v4f32 VR128:$src), addr:$dst)]>, VEX; + [(store (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", - [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; + [(store (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movups\t{$src, $dst|$dst, $src}", - [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; + [(store (v8f32 VR256:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", - [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; + [(store (v4f64 VR256:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; // For disassembler let isCodeGenOnly = 1 in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movups\t{$src, $dst|$dst, $src}", []>, VEX; + "movups\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", []>, VEX; + "movupd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movups\t{$src, $dst|$dst, $src}", []>, VEX; + "movups\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movupd\t{$src, $dst|$dst, $src}", []>, VEX; + "movupd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; } let Predicates = [HasAVX] in { @@ -796,27 +955,35 @@ def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; + [(alignedstore (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>; def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; + [(alignedstore (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>; def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", - [(store (v4f32 VR128:$src), addr:$dst)]>; + [(store (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>; def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", - [(store (v2f64 VR128:$src), addr:$dst)]>; + [(store (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>; // For disassembler let isCodeGenOnly = 1 in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movups\t{$src, $dst|$dst, $src}", []>; + "movups\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>; def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", []>; + "movupd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>; } let Predicates = [HasAVX] in { @@ -912,13 +1079,17 @@ let Predicates = [HasSSE1] in { // bits are disregarded. FIXME: Set encoding to pseudo! let neverHasSideEffects = 1 in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; } // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper @@ -927,17 +1098,21 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX; + [(set FR32:$dst, (alignedloadfsf32 addr:$src))], + IIC_SSE_MOVA_P_RM>, VEX; def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX; + [(set FR64:$dst, (alignedloadfsf64 addr:$src))], + IIC_SSE_MOVA_P_RM>, VEX; } def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; + [(set FR32:$dst, (alignedloadfsf32 addr:$src))], + IIC_SSE_MOVA_P_RM>; def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; + [(set FR64:$dst, (alignedloadfsf64 addr:$src))], + IIC_SSE_MOVA_P_RM>; } //===----------------------------------------------------------------------===// @@ -946,48 +1121,54 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, SDNode psnode, SDNode pdnode, string base_opc, - string asm_opr> { + string asm_opr, InstrItinClass itin> { def PSrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "s", asm_opr), [(set RC:$dst, (psnode RC:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - IIC_DEFAULT, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB; def PDrm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set RC:$dst, (v2f64 (pdnode RC:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - IIC_DEFAULT, SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize; } let AddedComplexity = 20 in { defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", + IIC_SSE_MOV_LH>, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $dst|$dst, $src2}">; + "\t{$src2, $dst|$dst, $src2}", + IIC_SSE_MOV_LH>; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, VEX; def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, VEX; def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>; def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>; let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1064,11 +1245,13 @@ let Predicates = [HasSSE2] in { let AddedComplexity = 20 in { defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", + IIC_SSE_MOV_LH>, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $dst|$dst, $src2}">; + "\t{$src2, $dst|$dst, $src2}", + IIC_SSE_MOV_LH>; } // v2f64 extract element 1 is always custom lowered to unpack high to low @@ -1078,23 +1261,23 @@ def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), (bc_v2f64 (v4f32 VR128:$src))), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), (bc_v2f64 (v4f32 VR128:$src))), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1143,13 +1326,15 @@ let AddedComplexity = 20 in { (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, + (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>, VEX_4V; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, + (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { @@ -1157,12 +1342,14 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>; + (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>; + (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>; } let Predicates = [HasAVX] in { @@ -1193,24 +1380,50 @@ let Predicates = [HasSSE1] in { // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// +def SSE_CVT_PD : OpndItins< + IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM +>; + +def SSE_CVT_PS : OpndItins< + IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM +>; + +def SSE_CVT_Scalar : OpndItins< + IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM +>; + +def SSE_CVT_SS2SI_32 : OpndItins< + IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM +>; + +def SSE_CVT_SS2SI_64 : OpndItins< + IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM +>; + +def SSE_CVT_SD2SI : OpndItins< + IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM +>; + multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm> { + string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (OpNode SrcRC:$src))]>; + [(set DstRC:$dst, (OpNode SrcRC:$src))], + itins.rr>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], + itins.rm>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { + string asm, Domain d, OpndItins itins> { def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], - IIC_DEFAULT, d>; + itins.rr, d>; def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - IIC_DEFAULT, d>; + itins.rm, d>; } multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1226,32 +1439,36 @@ let neverHasSideEffects = 1 in { } defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_LIG; + "cvttss2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_32>, + XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_W, VEX_LIG; + "cvttss2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_64>, + XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, - VEX_LIG; + "cvttsd2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SD2SI>, + XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, - VEX, VEX_W, VEX_LIG; + "cvttsd2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SD2SI>, + XD, VEX, VEX_W, VEX_LIG; // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. -defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS, - VEX_4V, VEX_LIG; -defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, - VEX_4V, VEX_W, VEX_LIG; -defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, - VEX_4V, VEX_LIG; -defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, - VEX_4V, VEX_LIG; -defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, - VEX_4V, VEX_W, VEX_LIG; +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, + XS, VEX_4V, VEX_LIG; +defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, + XS, VEX_4V, VEX_W, VEX_LIG; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, + XD, VEX_4V, VEX_LIG; +defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, + XD, VEX_4V, VEX_LIG; +defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, + XD, VEX_4V, VEX_W, VEX_LIG; let Predicates = [HasAVX], AddedComplexity = 1 in { |