aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td94
4 files changed, 30 insertions, 84 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4f7bf15a79..a5bfe1ac8f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2852,6 +2852,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::VPUNPCKLWDY:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
case X86ISD::UNPCKHPS:
@@ -2863,6 +2864,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
case X86ISD::VPUNPCKHWDY:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
case X86ISD::VPERMILPS:
@@ -2939,6 +2941,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::VPUNPCKLWDY:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
case X86ISD::UNPCKHPS:
@@ -2950,6 +2953,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
case X86ISD::VPUNPCKHWDY:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
return DAG.getNode(Opc, dl, VT, V1, V2);
@@ -3569,7 +3573,7 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || NumElts != 16))
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3619,7 +3623,7 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || NumElts != 16))
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -4639,6 +4643,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHWDY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
@@ -4654,6 +4659,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLWDY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
@@ -6595,6 +6601,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
case MVT::v16i16: return X86ISD::VPUNPCKLWDY;
+ case MVT::v32i8: return X86ISD::VPUNPCKLBWY;
default:
llvm_unreachable("Unknown type for unpckl");
}
@@ -6618,6 +6625,7 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v8i16: return X86ISD::PUNPCKHWD;
case MVT::v16i16: return X86ISD::VPUNPCKHWDY;
+ case MVT::v32i8: return X86ISD::VPUNPCKHBWY;
default:
llvm_unreachable("Unknown type for unpckh");
}
@@ -11270,6 +11278,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
+ case X86ISD::VPUNPCKLBWY: return "X86ISD::VPUNPCKLBWY";
case X86ISD::VPUNPCKLWDY: return "X86ISD::VPUNPCKLWDY";
case X86ISD::VPUNPCKLDQY: return "X86ISD::VPUNPCKLDQY";
case X86ISD::VPUNPCKLQDQY: return "X86ISD::VPUNPCKLQDQY";
@@ -11277,6 +11286,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VPUNPCKHBWY: return "X86ISD::VPUNPCKHBWY";
case X86ISD::VPUNPCKHWDY: return "X86ISD::VPUNPCKHWDY";
case X86ISD::VPUNPCKHDQY: return "X86ISD::VPUNPCKHDQY";
case X86ISD::VPUNPCKHQDQY: return "X86ISD::VPUNPCKHQDQY";
@@ -14867,6 +14877,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHWDY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
@@ -14878,6 +14889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLWDY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7bb4da6581..36cb1526fd 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -285,6 +285,7 @@ namespace llvm {
PUNPCKLWD,
PUNPCKLDQ,
PUNPCKLQDQ,
+ VPUNPCKLBWY,
VPUNPCKLWDY,
VPUNPCKLDQY,
VPUNPCKLQDQY,
@@ -292,6 +293,7 @@ namespace llvm {
PUNPCKHWD,
PUNPCKHDQ,
PUNPCKHQDQ,
+ VPUNPCKHBWY,
VPUNPCKHWDY,
VPUNPCKHDQY,
VPUNPCKHQDQY,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index c4d311f8d4..7e8bc04463 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -144,6 +144,7 @@ def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+def X86Punpcklbwy : SDNode<"X86ISD::VPUNPCKLBWY", SDTShuff2Op>;
def X86Punpcklwdy : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>;
def X86Punpckldqy : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>;
def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>;
@@ -152,6 +153,7 @@ def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86Punpckhbwy : SDNode<"X86ISD::VPUNPCKHBWY", SDTShuff2Op>;
def X86Punpckhwdy : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>;
def X86Punpckhdqy : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>;
def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index cfb8c850c8..94bd8251ef 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4204,19 +4204,8 @@ let Predicates = [HasAVX] in {
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
bc_v4i32, 0>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+ bc_v2i64, 0>, VEX_4V;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
bc_v16i8, 0>, VEX_4V;
@@ -4224,99 +4213,40 @@ let Predicates = [HasAVX] in {
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
bc_v4i32, 0>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+ bc_v2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
- defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbwy,
bc_v32i8>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy,
bc_v16i16>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy,
bc_v8i32>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdqy,
+ bc_v4i64>, VEX_4V;
- /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1,
- VR256:$src2)))]>, VEX_4V;
- def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1,
- (memopv4i64 addr:$src2))))]>, VEX_4V;
-
- defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbwy,
bc_v32i8>, VEX_4V;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy,
bc_v16i16>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy,
bc_v8i32>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1,
- VR256:$src2)))]>, VEX_4V;
- def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1,
- (memopv4i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdqy,
+ bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, bc_v2i64>;
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, bc_v2i64>;
}
} // ExeDomain = SSEPackedInt