diff options
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 165 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 517 | ||||
-rw-r--r-- | test/CodeGen/X86/avx2-intrinsics-x86.ll | 384 |
3 files changed, 920 insertions, 146 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 912307b939..0f3c01d57c 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1361,6 +1361,171 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //===----------------------------------------------------------------------===// +// AVX2 + +// Integer arithmetic ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; +} + +// Integer shift ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + + def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi256_byteshift">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; +} + +// Integer comparison ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem, Commutative]>; + def int_x86_avx2_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem, Commutative]>; + def int_x86_avx2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem, Commutative]>; + def int_x86_avx2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem]>; +} + +// Pack ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// // MMX // Empty MMX state op. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d3ced23450..b5eea45780 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3343,64 +3343,68 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedInt in { // SSE integer instructions multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, - bit IsCommutable = 0, bit Is2Addr = 1> { + RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit IsCommutable = 0, + bit Is2Addr = 1> { let isCommutable = IsCommutable in - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), + def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), + [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>; + def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>; + [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))]>; } multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, - Intrinsic IntId2, bit Is2Addr = 1> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), + Intrinsic IntId2, RegisterClass RC, + bit Is2Addr = 1> { + // src2 is always 128-bit + def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, VR128:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), + [(set RC:$dst, (IntId RC:$src1, VR128:$src2))]>; + def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>; - def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), + [(set RC:$dst, (IntId RC:$src1, (bitconvert (memopv2i64 addr:$src2))))]>; + def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), + (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; + [(set RC:$dst, (IntId2 RC:$src1, (i32 imm:$src2)))]>; } /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> { + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit IsCommutable = 0, + bit Is2Addr = 1> { let isCommutable = IsCommutable in - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), + def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; + def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, - (bitconvert (memopv2i64 addr:$src2)))))]>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)))))]>; } /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. @@ -3425,93 +3429,203 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; } +/// PDI_binop_rm_v4i64 - Simple AVX2 binary operator whose type is v4i64. +/// +/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew +/// to collapse (bitconvert VT to VT) into its operand. +/// +multiclass PDI_binop_rm_v4i64<bits<8> opc, string OpcodeStr, SDNode OpNode, + bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : PDI<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))]>; + def rm : PDI<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (OpNode VR256:$src1, (memopv4i64 addr:$src2)))]>; +} + } // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic let Predicates = [HasAVX] in { -defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; -defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; -defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; +defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64, + i128mem, 1, 0 /*3addr*/>, VEX_4V; +defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; -defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V; -defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V; -defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V; -defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V; +defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; +defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; +defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; // Intrinsic forms -defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>, - VEX_4V; -defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>, - VEX_4V; -defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>, - VEX_4V; -defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>, - VEX_4V; -defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>, - VEX_4V; -defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>, - VEX_4V; -defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>, - VEX_4V; -defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>, - VEX_4V; -defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>, - VEX_4V; -defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>, - VEX_4V; -defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>, - VEX_4V; -defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>, - VEX_4V; -defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>, - VEX_4V; -defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>, - VEX_4V; -defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>, - VEX_4V; -defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>, - VEX_4V; -defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>, - VEX_4V; -defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>, - VEX_4V; -defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>, - VEX_4V; +defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { +defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPADDQY : PDI_binop_rm_v4i64<0xD4, "vpaddq", add, 1>, VEX_4V; +defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; +defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; +defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; +defm VPSUBQY : PDI_binop_rm_v4i64<0xFB, "vpsubq", sub, 0>, VEX_4V; + +// Intrinsic forms +defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMULUDQY : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_avx2_pmulu_dq, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { -defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; -defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; -defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; +defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64, + i128mem, 1>; +defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, + i128mem, 1>; +defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, + i128mem, 1>; defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; -defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; -defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>; -defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>; -defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>; +defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, + i128mem, 1>; +defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, + i128mem>; +defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, + i128mem>; +defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, + i128mem>; defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; // Intrinsic forms -defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>; -defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>; -defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>; -defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>; -defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; -defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; -defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; -defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; -defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; -defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>; -defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; -defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; -defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; -defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; -defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; +defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, + VR128, memopv2i64, i128mem>; +defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, + VR128, memopv2i64, i128mem>; +defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b, + VR128, memopv2i64, i128mem>; +defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w, + VR128, memopv2i64, i128mem>; +defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, + VR128, memopv2i64, i128mem, 1>; +defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, + VR128, memopv2i64, i128mem, 1>; +defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, + VR128, memopv2i64, i128mem, 1>; +defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, + VR128, memopv2i64, i128mem, 1>; +defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, + VR128, memopv2i64, i128mem, 1>; +defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, + VR128, memopv2i64, i128mem, 1>; +defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, + VR128, memopv2i64, i128mem, 1>; +defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, + VR128, memopv2i64, i128mem, 1>; +defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, + VR128, memopv2i64, i128mem, 1>; +defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, + VR128, memopv2i64, i128mem, 1>; +defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, + VR128, memopv2i64, i128mem, 1>; +defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, + VR128, memopv2i64, i128mem, 1>; +defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, + VR128, memopv2i64, i128mem, 1>; +defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, + VR128, memopv2i64, i128mem, 1>; +defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, + VR128, memopv2i64, i128mem, 1>; } // Constraints = "$src1 = $dst" @@ -3521,31 +3635,31 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; let Predicates = [HasAVX] in { defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, - VEX_4V; + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, + VR128, 0>, VEX_4V; defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>, - VEX_4V; + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, + VR128, 0>, VEX_4V; defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>, - VEX_4V; + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, + VR128, 0>, VEX_4V; defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>, - VEX_4V; + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, + VR128, 0>, VEX_4V; defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>, - VEX_4V; + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, + VR128, 0>, VEX_4V; defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>, - VEX_4V; + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, + VR128, 0>, VEX_4V; defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>, - VEX_4V; + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, + VR128, 0>, VEX_4V; defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>, - VEX_4V; + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, + VR128, 0>, VEX_4V; defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; @@ -3578,25 +3692,92 @@ let ExeDomain = SSEPackedInt in { } } +let Predicates = [HasAVX2] in { +defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", + int_x86_avx2_psll_w, int_x86_avx2_pslli_w, + VR256, 0>, VEX_4V; +defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", + int_x86_avx2_psll_d, int_x86_avx2_pslli_d, + VR256, 0>, VEX_4V; +defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", + int_x86_avx2_psll_q, int_x86_avx2_pslli_q, + VR256, 0>, VEX_4V; + +defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", + int_x86_avx2_psrl_w, int_x86_avx2_psrli_w, + VR256, 0>, VEX_4V; +defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", + int_x86_avx2_psrl_d, int_x86_avx2_psrli_d, + VR256, 0>, VEX_4V; +defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", + int_x86_avx2_psrl_q, int_x86_avx2_psrli_q, + VR256, 0>, VEX_4V; + +defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", + int_x86_avx2_psra_w, int_x86_avx2_psrai_w, + VR256, 0>, VEX_4V; +defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", + int_x86_avx2_psra_d, int_x86_avx2_psrai_d, + VR256, 0>, VEX_4V; + +defm VPANDY : PDI_binop_rm_v4i64<0xDB, "vpand", and, 1>, VEX_4V; +defm VPORY : PDI_binop_rm_v4i64<0xEB, "vpor" , or, 1>, VEX_4V; +defm VPXORY : PDI_binop_rm_v4i64<0xEF, "vpxor", xor, 1>, VEX_4V; + +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def VPSLLDQYri : PDIi8<0x73, MRM7r, + (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + def VPSRLDQYri : PDIi8<0x73, MRM3r, + (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + // PSRADQYri doesn't exist in SSE[1-3]. + } + def VPANDNYrr : PDI<0xDF, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, + (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V; + + def VPANDNYrm : PDI<0xDF, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (X86andnp VR256:$src1, + (memopv4i64 addr:$src2)))]>, VEX_4V; +} +} + let Constraints = "$src1 = $dst" in { defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, + VR128>; defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d>; + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, + VR128>; defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q>; + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, + VR128>; defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>; + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, + VR128>; defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>; + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, + VR128>; defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>; + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, + VR128>; defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w>; + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, + VR128>; defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, + VR128>; defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; @@ -3642,6 +3823,17 @@ let Predicates = [HasAVX] in { (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; } +let Predicates = [HasAVX2] in { + def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), + (v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; + def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), + (v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2 |