diff options
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 74 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 65 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 134 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 1 | ||||
-rw-r--r-- | test/CodeGen/X86/fast-isel-x86-64.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/widen_load-1.ll | 13 |
7 files changed, 174 insertions, 127 deletions
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 81b4f812af..55ad2ecb8a 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -287,12 +287,14 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +def __xs : XS; + // SI - SSE 1 & 2 scalar instructions class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -303,7 +305,7 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -314,18 +316,25 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : I<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); } +// MMXPI - SSE 1 & 2 packed instructions with MMX operands +class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, + InstrItinClass itin, Domain d> + : I<o, F, outs, ins, asm, pattern, itin, d> { + let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]); +} + // PIi8 - SSE 1 & 2 packed instructions with immediate class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : Ii8<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); @@ -341,18 +350,18 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, @@ -372,27 +381,31 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. // VSDI - SSE2 instructions with XD prefix in AVX form. // VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. +// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as +// MMX operands. +// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as +// MMX operands. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>; class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, @@ -405,6 +418,12 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; +class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; +class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; // SSE3 Instruction Templates: // @@ -415,21 +434,23 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; // SSSE3 Instruction Templates: // // SS38I - SSSE3 instructions with T8 prefix. // SS3AI - SSSE3 instructions with TA prefix. +// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands. +// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands. // // Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version // uses the MMX registers. The 64-bit versions are grouped with the MMX @@ -438,10 +459,18 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSSE3]>; + Requires<[UseSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, + Requires<[UseSSSE3]>; +class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, + Requires<[HasSSSE3]>; +class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: @@ -452,11 +481,11 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: // @@ -464,9 +493,10 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. +// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; @@ -475,7 +505,7 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c8a588b036..304676d635 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -552,11 +552,17 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index c8f40bbb49..bd5485840d 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -118,11 +118,11 @@ let Constraints = "$src1 = $dst" in { /// Unary MMX instructions requiring SSSE3. multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (memopmmx addr:$src))))], @@ -134,11 +134,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in { multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { let isCommutable = 0 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, @@ -149,11 +149,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, /// PALIGN MMX instructions (require SSSE3). multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { - def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; - def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, @@ -163,12 +163,10 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins, Domain d> { - def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], - itins.rr, d>; - def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], - itins.rm, d>; + def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>; + def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -243,29 +241,30 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), [(store (x86mmx VR64:$src), addr:$dst)], IIC_MMX_MOVQ_RM>; -def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (x86mmx (bitconvert - (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))))], - IIC_MMX_MOVQ_RR>; - -def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector - (i64 (bitconvert (x86mmx VR64:$src))))))], - IIC_MMX_MOVQ_RR>; +def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (x86mmx (bitconvert + (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))))], + IIC_MMX_MOVQ_RR>; + +def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 + (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src))))))], + IIC_MMX_MOVQ_RR>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; -def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", @@ -577,6 +576,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), IIC_MMX_MASKMOV>; // 64-bit bit convert. +let Predicates = [HasSSE2] in { def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), @@ -585,5 +585,6 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; +} diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c834b90ffe..b46da95bbe 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -362,7 +362,7 @@ let Predicates = [HasAVX] in { def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; } -// Alias instructions that map fld0 to pxor for sse. +// Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1 in { @@ -693,7 +693,7 @@ let Predicates = [HasAVX] in { (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSS to the lower bits. @@ -727,7 +727,7 @@ let Predicates = [HasSSE1] in { (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSD to the lower bits. @@ -964,10 +964,10 @@ let Predicates = [HasAVX] in { (VMOVUPDmr addr:$dst, VR128:$src)>; } -let Predicates = [HasSSE1] in +let Predicates = [UseSSE1] in def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), (MOVUPSmr addr:$dst, VR128:$src)>; -let Predicates = [HasSSE2] in +let Predicates = [UseSSE2] in def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), (MOVUPDmr addr:$dst, VR128:$src)>; @@ -1022,7 +1022,7 @@ let Predicates = [HasAVX] in { // Use movaps / movups for SSE integer load / store (one byte shorter). // The instructions selected below are then converted to MOVDQA/MOVDQU // during the SSE domain pass. -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(alignedloadv2i64 addr:$src), (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), @@ -1169,7 +1169,7 @@ let Predicates = [HasAVX] in { (VMOVLPDmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), (iPTR 0))), addr:$src1), @@ -1194,7 +1194,7 @@ let Predicates = [HasSSE1] in { (MOVLPSmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; @@ -1268,7 +1268,7 @@ let Predicates = [HasAVX] in { (VMOVHPDrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVHPS patterns def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), @@ -1278,7 +1278,7 @@ let Predicates = [HasSSE1] in { (MOVHPSrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold // cause it has two uses through a bitcast. One use disappears at isel time @@ -1335,7 +1335,7 @@ let Predicates = [HasAVX] in { (VMOVHLPSrr VR128:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVLHPS patterns def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; @@ -1622,7 +1622,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - TB, Requires<[HasSSE2]>; + TB, Requires<[UseSSE2]>; /// SSE 2 Only @@ -1652,7 +1652,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1673,13 +1673,13 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; } // Convert scalar single to scalar double @@ -1716,12 +1716,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1729,9 +1729,9 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), // Since these loads aren't folded into the fextend, we have to match it // explicitly here. def : Pat<(fextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>; + (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1751,13 +1751,13 @@ def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; } // Convert packed single/double fp to doubleword @@ -1893,7 +1893,7 @@ let Predicates = [HasAVX] in { (VCVTTPS2DQYrm addr:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), @@ -1983,7 +1983,7 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), IIC_SSE_CVT_PD_RM>, TB, VEX; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], @@ -2098,7 +2098,7 @@ let Predicates = [HasAVX] in { (VCVTPS2PDYrm addr:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Match fextend for 128 conversions def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), (CVTPS2PDrr VR128:$src)>; @@ -2325,14 +2325,14 @@ def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), @@ -2409,7 +2409,7 @@ let Predicates = [HasAVX] in { (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86Shufp VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; @@ -2417,7 +2417,7 @@ let Predicates = [HasSSE1] in { (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Generic SHUFPD patterns def : Pat<(v2i64 (X86Shufp VR128:$src1, (memopv2i64 addr:$src2), (i8 imm:$imm))), @@ -2518,7 +2518,7 @@ let Predicates = [HasAVX], AddedComplexity = 1 in { (VUNPCKLPDrr VR128:$src, VR128:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel @@ -2587,16 +2587,16 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", def : Pat<(i32 (X86fgetsign FR32:$src)), (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -2981,7 +2981,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[HasSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; @@ -3066,7 +3066,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; @@ -3326,7 +3326,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), IIC_SSE_MOVNT>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), @@ -3486,7 +3486,7 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; // For Disassembler let isCodeGenOnly = 1 in { @@ -3496,7 +3496,7 @@ def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } let canFoldAsLoad = 1, mayLoad = 1 in { @@ -3508,7 +3508,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], IIC_SSE_MOVU_P_RM>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } let mayStore = 1 in { @@ -3520,7 +3520,7 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // Intrinsic forms of MOVDQU load and store @@ -3534,7 +3534,7 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -4032,7 +4032,7 @@ let Predicates = [HasAVX2] in { (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), @@ -4214,7 +4214,7 @@ let Predicates = [HasAVX2] in { defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 5 in defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; @@ -4377,7 +4377,7 @@ let Predicates = [HasAVX] in { } let Constraints = "$src1 = $dst" in - defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>; + defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -4661,7 +4661,7 @@ let Predicates = [HasAVX] in { (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2], AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), @@ -4701,7 +4701,7 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, - Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int @@ -4744,7 +4744,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[HasSSE2] |