aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2006-03-30 19:54:57 +0000
committerEvan Cheng <evan.cheng@apple.com>2006-03-30 19:54:57 +0000
commit7d9061e3009af718544fde64ea88343fa9cd2ada (patch)
treefed6deec0c22a82a0e66ede31f0e62c76ec4dbc4
parent99e746feba53456314ccaadaec3a0581e840d83d (diff)
Make sure all possible shuffles are matched.
Use pshufd, pshuhw, and pshulw to shuffle v4f32 if shufps doesn't match. Use shufps to shuffle v4f32 if pshufd, pshuhw, and pshulw don't match. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27259 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp53
-rw-r--r--lib/Target/X86/X86InstrSSE.td66
2 files changed, 89 insertions, 30 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 072e1990e7..7f2954c67b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1486,23 +1486,17 @@ bool X86::isSHUFPMask(SDNode *N) {
if (NumElems != 4) return false;
// Each half must refer to only one of the vector.
- SDOperand Elt = N->getOperand(0);
- assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
- for (unsigned i = 1; i < NumElems / 2; ++i) {
+ for (unsigned i = 0; i < 2; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
- if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
- cast<ConstantSDNode>(Elt)->getValue())
- return false;
+ unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ if (Val >= 4) return false;
}
- Elt = N->getOperand(NumElems / 2);
- assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
- for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) {
+ for (unsigned i = 2; i < 4; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
- if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
- cast<ConstantSDNode>(Elt)->getValue())
- return false;
+ unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ if (Val < 4) return false;
}
return true;
@@ -2489,11 +2483,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
unsigned NumElems = PermMask.getNumOperands();
// Splat && PSHUFD's 2nd vector must be undef.
- if (X86::isSplatMask(PermMask.Val) ||
- ((MVT::isInteger(VT) &&
- (X86::isPSHUFDMask(PermMask.Val) ||
- X86::isPSHUFHWMask(PermMask.Val) ||
- X86::isPSHUFLWMask(PermMask.Val))))) {
+ if (X86::isSplatMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
@@ -2505,9 +2495,34 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
// Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
return SDOperand();
- if (NumElems == 2 ||
- X86::isSHUFPMask(PermMask.Val)) {
+ if (NumElems == 2)
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+
+ // If VT is integer, try PSHUF* first, then SHUFP*.
+ if (MVT::isInteger(VT)) {
+ if (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ return SDOperand();
+ }
+
+ if (X86::isSHUFPMask(PermMask.Val))
+ return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+ } else {
+ // Floating point cases in the other order.
+ if (X86::isSHUFPMask(PermMask.Val))
+ return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+ if (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ return SDOperand();
+ }
}
assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 661df4b3fe..2245a2218f 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -106,14 +106,32 @@ def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFLWMask(N);
}], SHUFFLE_get_pshuflw_imm>;
+// Only use PSHUF* for v4f32 if SHUFP does not match.
+def PSHUFD_fp_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isSHUFPMask(N) &&
+ X86::isPSHUFDMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def PSHUFHW_fp_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isSHUFPMask(N) &&
+ X86::isPSHUFHWMask(N);
+}], SHUFFLE_get_pshufhw_imm>;
+
+def PSHUFLW_fp_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isSHUFPMask(N) &&
+ X86::isPSHUFLWMask(N);
+}], SHUFFLE_get_pshuflw_imm>;
+
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
-// Only use SHUFP for v4i32 if no other options are available.
-// FIXME: add tblgen hook to reduce the complexity of pattern.
-def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{
- return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N);
+// Only use SHUFP for v4i32 if PSHUF* do not match.
+def SHUFP_int_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isPSHUFDMask(N) &&
+ !X86::isPSHUFHWMask(N) &&
+ !X86::isPSHUFLWMask(N) &&
+ X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===//
@@ -1278,14 +1296,14 @@ def PSHUFHWrm : Ii8<0x70, MRMDestMem,
// SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWrr : Ii8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
- "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
VR128:$src1, (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
def PSHUFLWrm : Ii8<0x70, MRMDestMem,
(ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
- "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
(bc_v8i16 (loadv2i64 addr:$src1)), (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
@@ -1593,15 +1611,41 @@ def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
(v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
Requires<[HasSSE1]>;
-// Shuffle v4i32 if others do not match
+// Shuffle v4i32 with SHUFP* if others do not match.
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
- SHUFP_shuffle_mask:$sm),
+ SHUFP_int_shuffle_mask:$sm),
(v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
- SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+ SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
- SHUFP_shuffle_mask:$sm),
+ SHUFP_int_shuffle_mask:$sm),
(v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
- SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+ SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+
+// Shuffle v4f32 with PSHUF* if others do not match.
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+ PSHUFD_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFDrr VR128:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+ PSHUFD_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFDrm addr:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+ PSHUFHW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFHWrr VR128:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+ PSHUFHW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFHWrm addr:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+ PSHUFLW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFLWrr VR128:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+ PSHUFLW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFLWrm addr:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
// Logical ops
def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),