aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrSSE.td
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-03-14 10:51:38 +0000
committerAlexander Kornienko <alexfh@google.com>2013-03-14 10:51:38 +0000
commit647735c781c5b37061ee03d6e9e6c7dda92218e2 (patch)
tree5a5e56606d41060263048b5a5586b3d2380898ba /lib/Target/X86/X86InstrSSE.td
parent6aed25d93d1cfcde5809a73ffa7dc1b0d6396f66 (diff)
parentf635ef401786c84df32090251a8cf45981ecca33 (diff)
Updating branches/google/stable to r176857
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/stable@177040 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86InstrSSE.td')
-rw-r--r--lib/Target/X86/X86InstrSSE.td1743
1 files changed, 800 insertions, 943 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1912a936ce..0979752757 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -203,9 +203,8 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
list<dag> pat_rr, list<dag> pat_rm,
- bit Is2Addr = 1,
- bit rr_hasSideEffects = 0> {
- let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
+ bit Is2Addr = 1> {
+ let isCommutable = 1, hasSideEffects = 0 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -218,27 +217,6 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
pat_rm, IIC_DEFAULT, d>;
}
-/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
-multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
- string asm, string SSEVer, string FPSizeStr,
- X86MemOperand x86memop, PatFrag mem_frag,
- Domain d, OpndItins itins, bit Is2Addr = 1> {
- def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!cast<Intrinsic>(
- !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
- def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!cast<Intrinsic>(
- !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
-}
-
//===----------------------------------------------------------------------===//
// Non-instruction patterns
//===----------------------------------------------------------------------===//
@@ -458,93 +436,69 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
-class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
- SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
- [(set VR128:$dst, (vt (OpNode VR128:$src1,
- (scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>;
+multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string base_opc,
+ string asm_opr> {
+ def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [(set VR128:$dst, (vt (OpNode VR128:$src1,
+ (scalar_to_vector RC:$src2))))],
+ IIC_SSE_MOV_S_RR>;
-// Loading from memory automatically zeroing upper bits.
-class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_pat, string OpcodeStr> :
- SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>;
-
-// AVX
-def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
- VEX_LIG;
-def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
- VEX_LIG;
-
-// For the disassembler
-let isCodeGenOnly = 1 in {
- def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XS, VEX_4V, VEX_LIG;
- def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XD, VEX_4V, VEX_LIG;
+ // For the disassembler
+ let isCodeGenOnly = 1, hasSideEffects = 0 in
+ def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [], IIC_SSE_MOV_S_RR>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
- VEX_LIG;
- let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
- VEX_LIG;
-}
+multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string OpcodeStr> {
+ // AVX
+ defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ VEX_4V, VEX_LIG;
-def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- XS, VEX, VEX_LIG;
-def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- XD, VEX, VEX_LIG;
+ def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ VEX, VEX_LIG;
+ // SSE1 & 2
+ let Constraints = "$src1 = $dst" in {
+ defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}">;
+ }
-// SSE1 & 2
-let Constraints = "$src1 = $dst" in {
- def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
- "movss\t{$src2, $dst|$dst, $src2}">, XS;
- def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
- "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+ def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
+}
- // For the disassembler
- let isCodeGenOnly = 1 in {
- def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}", [],
- IIC_SSE_MOV_S_RR>, XS;
- def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}", [],
- IIC_SSE_MOV_S_RR>, XD;
- }
+// Loading from memory automatically zeroing upper bits.
+multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> {
+ def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG;
+ def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>;
}
+defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
+defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
+
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+ defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
let AddedComplexity = 20 in
- def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+ defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
}
-def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-
// Patterns
let Predicates = [HasAVX] in {
let AddedComplexity = 15 in {
@@ -870,7 +824,7 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
@@ -944,7 +898,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
IIC_SSE_MOVU_P_MR>;
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1132,34 +1086,41 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
// SSE 1 & 2 - Move Low packed FP Instructions
//===----------------------------------------------------------------------===//
-multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
- SDNode psnode, SDNode pdnode, string base_opc,
- string asm_opr, InstrItinClass itin> {
+multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, string asm_opr,
+ InstrItinClass itin> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
- [(set RC:$dst,
- (psnode RC:$src1,
+ [(set VR128:$dst,
+ (psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
itin, SSEPackedSingle>, TB;
def PDrm : PI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
- [(set RC:$dst, (v2f64 (pdnode RC:$src1,
+ [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
itin, SSEPackedDouble>, TB, OpSize;
+
}
-let AddedComplexity = 20 in {
- defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, InstrItinClass itin> {
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ itin>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $dst|$dst, $src2}",
+ itin>;
}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+
+let AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp",
+ IIC_SSE_MOV_LH>;
}
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1257,14 +1218,8 @@ let Predicates = [UseSSE2] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 20 in {
- defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
-}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+ defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp",
+ IIC_SSE_MOV_LH>;
}
// v2f64 extract element 1 is always custom lowered to unpack high to low
@@ -1457,7 +1412,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
SSE_CVT_SS2SI_32>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1465,26 +1420,43 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
SSE_CVT_SD2SI>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>,
XD, VEX, VEX_W, VEX_LIG;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">,
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">,
XS, VEX_4V, VEX_LIG;
defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
XS, VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
XD, VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
let Predicates = [HasAVX] in {
@@ -1511,27 +1483,49 @@ defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>, XD;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XS;
defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XD;
defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XD, REX_W;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
+def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
+ (CVTSI2SSrm FR64:$dst, i32mem:$src)>;
+def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
+ (CVTSI2SDrm FR64:$dst, i32mem:$src)>;
+
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -1566,27 +1560,27 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
}
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
- int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+ int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+ int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
SSE_CVT_Scalar, 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
SSE_CVT_Scalar, 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
@@ -1596,13 +1590,13 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
- "cvtsi2ss", SSE_CVT_Scalar>, XS;
+ "cvtsi2ss{l}", SSE_CVT_Scalar>, XS;
defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64,
"cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
- "cvtsi2sd", SSE_CVT_Scalar>, XD;
+ "cvtsi2sd{l}", SSE_CVT_Scalar>, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
"cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
@@ -1616,40 +1610,40 @@ defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
SSE_CVT_SS2SI_32>, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+ "cvttss2si", SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
sdmem, sse_load_f64, "cvttsd2si",
SSE_CVT_SD2SI>, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si{q}", SSE_CVT_SD2SI>,
+ "cvttsd2si", SSE_CVT_SD2SI>,
XD, VEX, VEX_W;
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
+ "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
sdmem, sse_load_f64, "cvttsd2si",
SSE_CVT_SD2SI>, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- ssmem, sse_load_f32, "cvtss2si{l}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- ssmem, sse_load_f32, "cvtss2si{q}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- ssmem, sse_load_f32, "cvtss2si{l}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS;
defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- ssmem, sse_load_f32, "cvtss2si{q}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, REX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
@@ -1666,6 +1660,40 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
SSEPackedSingle, SSE_CVT_PS>,
TB, Requires<[UseSSE2]>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
+
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+
/// SSE 2 Only
// Convert scalar double to scalar single
@@ -2657,10 +2685,8 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop,
- OpndItins itins,
- bit IsCommutable = 0,
- bit Is2Addr = 1> {
+ X86MemOperand x86memop, OpndItins itins,
+ bit IsCommutable, bit Is2Addr> {
let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
@@ -2679,40 +2705,30 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
} // ExeDomain = SSEPackedInt
-// These are ordered here for pattern ordering requirements with the fp versions
+multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
+ ValueType OpVT128, ValueType OpVT256,
+ OpndItins itins, bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
+ VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
-let Predicates = [HasAVX] in {
-defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
+ memopv2i64, i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
+ OpVT256, VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
}
-let Constraints = "$src1 = $dst" in {
-defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 0>;
-} // Constraints = "$src1 = $dst"
+// These are ordered here for pattern ordering requirements with the fp versions
-let Predicates = [HasAVX2] in {
-defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
+defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
+ SSE_BIT_ITINS_P, 0>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@@ -2757,6 +2773,20 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
+ defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem,
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
+
+ defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem,
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V, VEX_L;
+
// In AVX no need to add a pattern for 128-bit logical rr ps, because they
// are all promoted to v2i64, and the patterns are covered by the int
// version. This is needed in SSE only, because v2i64 isn't supported on
@@ -2764,7 +2794,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
+ (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
@@ -2773,6 +2803,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(memopv2i64 addr:$src2)))], 0>,
TB, OpSize, VEX_4V;
+
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
@@ -2789,31 +2820,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
}
}
-/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
-///
-multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
- defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f256mem,
- [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
-
- defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f256mem,
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (bc_v4i64 (v4f64 VR256:$src2))))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>,
- TB, OpSize, VEX_4V, VEX_L;
-}
-
-// AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
-defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
-defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
-
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
@@ -2848,26 +2854,32 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
itins.d, Is2Addr>, XD;
}
-multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SizeItins itins,
- bit Is2Addr = 1> {
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SizeItins itins> {
+let Predicates = [HasAVX] in {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ VR128, v4f32, f128mem, memopv4f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ VR128, v2f64, f128mem, memopv2f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
+
+ defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, VR256, v8f32, f256mem, memopv8f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
+ defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, VR256, v4f64, f256mem, memopv4f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
- TB;
+ v4f32, f128mem, memopv4f32, SSEPackedSingle,
+ itins.s, 1>, TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
- TB, OpSize;
+ v2f64, f128mem, memopv2f64, SSEPackedDouble,
+ itins.d, 1>, TB, OpSize;
}
-
-multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- SizeItins itins> {
- defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
- v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
- TB, VEX_L;
- defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
- v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
- TB, OpSize, VEX_L;
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
@@ -2881,116 +2893,69 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
itins.d, Is2Addr>, XD;
}
-multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
- SizeItins itins,
- bit Is2Addr = 1> {
- defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
- SSEPackedSingle, itins.s, Is2Addr>,
- TB;
-
- defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
- SSEPackedDouble, itins.d, Is2Addr>,
- TB, OpSize;
+// Binary Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
+let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
}
-multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr,
- SizeItins itins> {
- defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
- !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
- SSEPackedSingle, itins.s, 0>, TB, VEX_L;
-
- defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
- !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
- SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L;
+let isCodeGenOnly = 1 in {
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
}
-// Binary Arithmetic instructions
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
-defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>,
- VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
- VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
- VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
- VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>,
- VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
- VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
let isCommutable = 0 in {
defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
}
}
let isCodeGenOnly = 1 in {
defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
- defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
}
}
@@ -3021,6 +2986,26 @@ def SSE_RCPS : OpndItins<
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ let mayLoad = 1 in {
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ }
+}
+
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode FR32:$src))]>;
@@ -3040,49 +3025,115 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
}
-/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
-multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
let mayLoad = 1 in {
- def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ }
+}
+
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+ Requires<[UseSSE1, OptForSize]>;
+ let Constraints = "$src1 = $dst" in {
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rr>;
+ let mayLoad = 1, hasSideEffects = 0 in
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rm>;
}
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
+ itins.rr>, VEX;
+ def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
+ itins.rm>, VEX;
+ def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L;
+ def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L;
+}
+
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
}
-/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
-multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
- def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX_L;
- def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>, VEX_L;
-}
-
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int, OpndItins itins> {
+ Intrinsic V4F32Int, Intrinsic V8F32Int,
+ OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))],
+ itins.rr>, VEX;
+ def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ itins.rm>, VEX;
+ def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int VR256:$src))],
+ itins.rr>, VEX, VEX_L;
+ def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L;
+}
+
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
@@ -3093,22 +3144,29 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
itins.rm>;
}
-/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
-multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int, OpndItins itins> {
- def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int VR256:$src))],
- itins.rr>, VEX_L;
- def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
- itins.rm>, VEX_L;
-}
-
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ let mayLoad = 1 in {
+ def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1,f64mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ }
+}
+
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
@@ -3125,26 +3183,32 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
}
-/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
-let hasSideEffects = 0 in
-multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- let mayLoad = 1 in {
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- }
-}
-
/// sse2_fp_unop_p - SSE2 unops in vector forms.
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
+ itins.rr>, VEX;
+ def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
+ itins.rm>, VEX;
+ def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L;
+ def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+ itins.rm>, VEX, VEX_L;
+}
+
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
@@ -3153,82 +3217,24 @@ multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
}
-/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
-multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
- def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX_L;
- def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>, VEX_L;
-}
-
-/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int, OpndItins itins> {
- def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int VR128:$src))],
- itins.rr>;
- def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))],
- itins.rm>;
-}
-
-/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int, OpndItins itins> {
- def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int VR256:$src))],
- itins.rr>, VEX_L;
- def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
- itins.rm>, VEX_L;
-}
+// Square root.
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
+ SSE_SQRTS>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
+ SSE_SQRTS>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
-let Predicates = [HasAVX] in {
- // Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
- sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
-
- defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
- SSE_SQRTP>,
- sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
- SSE_SQRTP>,
- sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
- SSE_SQRTP>,
- sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
- SSE_SQRTP>,
- VEX;
-
- // Reciprocal approximations. Note that these typically require refinement
- // in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
- defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256,
- SSE_SQRTP>,
- sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps,
- SSE_SQRTP>, VEX;
-
- defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
- defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>,
- sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>,
- sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256,
- SSE_RCPP>,
- sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps,
- SSE_RCPP>, VEX;
-}
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
+ int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
+defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
+ int_x86_avx_rcp_ps_256, SSE_RCPP>;
def : Pat<(f32 (fsqrt FR32:$src)),
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
@@ -3283,59 +3289,11 @@ let Predicates = [HasAVX] in {
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
}
-// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
- SSE_SQRTS>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
- sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
- SSE_SQRTS>,
- sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
- sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
-
-/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
-multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Intrinsic F32Int, OpndItins itins> {
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
- // For scalar unary operations, fold a load into the operation
- // only in OptForSize mode. It eliminates an instruction, but it also
- // eliminates a whole-register clobber (the load), so it introduces a
- // partial register update condition.
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
- let Constraints = "$src1 = $dst" in {
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rr>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rm>;
- }
-}
-
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
- SSE_SQRTS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
- sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
- SSE_SQRTS>;
let Predicates = [UseSSE1] in {
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
(RSQRTSSr_Int VR128:$src, VR128:$src)>;
-}
-
-defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
- SSE_RCPS>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
- sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
-let Predicates = [UseSSE1] in {
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
(RCPSSr_Int VR128:$src, VR128:$src)>;
}
@@ -3508,7 +3466,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
}
// For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
@@ -3571,7 +3529,7 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
// For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -3650,6 +3608,24 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
itins.rm>;
}
+multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
+ Intrinsic IntId256, OpndItins itins,
+ bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
+ VR128, memopv2i64, i128mem, itins,
+ IsCommutable, 0>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
+ i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
+ VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
+}
+
multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, SDNode OpNode,
SDNode OpNode2, RegisterClass RC,
@@ -3679,7 +3655,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
}
-/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
+/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType DstVT, ValueType SrcVT, RegisterClass RC,
PatFrag memop_frag, X86MemOperand x86memop,
@@ -3702,249 +3678,75 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
} // ExeDomain = SSEPackedInt
-// 128-bit Integer Arithmetic
+defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 1>;
+defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
+ SSE_INTMUL_ITINS_P, 1>;
+defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 0>;
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
-let Predicates = [HasAVX] in {
-defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>,
- VEX_4V;
-defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+// Intrinsic forms
+defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
+ int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>;
+defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
+ int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>;
+defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
+ int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w,
+ int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
+ int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
+ int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
+ int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
+ int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
+ int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
+defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
+ int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
+defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
+ int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
+defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
+ int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+
+let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
VEX_4V;
-
-// Intrinsic forms
-defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
- VR128, memopv2i64, i128mem,
- SSE_PMADD, 1, 0>, VEX_4V;
-defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L;
+let Predicates = [HasAVX2] in
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
VR256, memopv4i64, i256mem,
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-
-// Intrinsic forms
-defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
- VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
- VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
- VR256, memopv4i64, i256mem,
- SSE_PMADD, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 1>;
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTMUL_ITINS_P, 1>;
-defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P>;
+let Constraints = "$src1 = $dst" in
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
-// Intrinsic forms
-defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1>;
-defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1>;
-defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
- VR128, memopv2i64, i128mem,
- SSE_PMADD, 1>;
-defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-
-} // Constraints = "$src1 = $dst"
-
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
@@ -4126,186 +3928,106 @@ let Predicates = [UseSSE2] in {
// SSE2 - Packed Integer Comparison Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
- defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
- defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
- defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
- defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
- defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
- defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
- defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
- defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Pack Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
-defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
+ int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>;
+defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
+ int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>;
+defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
+ int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
-multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def ri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
-def mi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
-}
-
-multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def Yri : Ii8<0x70, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
-def Ymi : Ii8<0x70, MRMSrcMem,
- (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))]>;
-}
-} // ExeDomain = SSEPackedInt
-
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
+ SDNode OpNode> {
let Predicates = [HasAVX] in {
- let AddedComplexity = 5 in
- defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
-
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
-
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
-
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX;
+ def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX;
}
let Predicates = [HasAVX2] in {
- defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>,
- TB, OpSize, VEX,VEX_L;
- defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>,
- XS, VEX, VEX_L;
- defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>,
- XD, VEX, VEX_L;
+ def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX, VEX_L;
+ def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L;
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 5 in
- defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
+ def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>;
+ def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>;
+}
+}
+} // ExeDomain = SSEPackedInt
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
+defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize;
+defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
+defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+}
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
+let Predicates = [UseSSE2] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -5445,7 +5167,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
let neverHasSideEffects = 1 in {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
@@ -5465,7 +5187,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
}
}
-multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
let neverHasSideEffects = 1 in {
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -5482,42 +5204,42 @@ multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX] in
- defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+ defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
let Predicates = [HasAVX2] in
- defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L;
+ defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
- defm PALIGN : ssse3_palign<"palignr">;
+ defm PALIGN : ssse3_palignr<"palignr">;
let Predicates = [HasAVX2] in {
-def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
}
let Predicates = [HasAVX] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
let Predicates = [UseSSSE3] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
@@ -5844,6 +5566,55 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+
+ def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
+ (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXWDYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXDQYrm addr:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQYrm addr:$src)>;
+}
+
let Predicates = [HasAVX] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
@@ -5858,6 +5629,15 @@ let Predicates = [HasAVX] in {
}
let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
@@ -5868,6 +5648,34 @@ let Predicates = [UseSSE41] in {
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
(PMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
}
let Predicates = [HasAVX2] in {
@@ -5928,6 +5736,44 @@ let Predicates = [HasAVX] in {
(VPMOVZXDQrm addr:$src)>;
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
(VPMOVZXDQrm addr:$src)>;
+
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
}
let Predicates = [UseSSE41] in {
@@ -6267,6 +6113,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F64Int, bit Is2Addr = 1> {
let ExeDomain = GenericDomain in {
// Operation, reg.
+ let hasSideEffects = 0 in
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6300,6 +6147,7 @@ let ExeDomain = GenericDomain in {
OpSize;
// Operation, reg.
+ let hasSideEffects = 0 in
def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6621,67 +6469,6 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in {
- let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
- 0>, VEX_4V;
- defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
- 0>, VEX_4V;
- defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
- 0>, VEX_4V;
- defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
- 0>, VEX_4V;
- defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
- 0>, VEX_4V;
- defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
- 0>, VEX_4V;
- defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
- 0>, VEX_4V;
- defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
- 0>, VEX_4V;
- defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
- 0>, VEX_4V;
- defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
- 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
- let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
- int_x86_avx2_packusdw>, VEX_4V, VEX_L;
- defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
- int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
- defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
- int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
- defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
- int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
- defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
- int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
- defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
- int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
- defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
- int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
- defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
- int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
- defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
- int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
- defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
- int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in
- defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
- defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
- defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
- defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
- defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
- defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
- defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
- defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
- defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
-}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6705,6 +6492,76 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
+ int_x86_avx2_packusdw>, VEX_4V, VEX_L;
+ defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
+ int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+let Predicates = [HasAVX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,