aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorRafael Espindola <rafael.espindola@gmail.com>2009-04-24 12:40:33 +0000
committerRafael Espindola <rafael.espindola@gmail.com>2009-04-24 12:40:33 +0000
commit15684b29552393553524171bff1913e750f390f8 (patch)
tree2d43d8f19d7fc59d2c61b282b789a704c96b16b0 /lib/Target/X86
parentf6b9f260ede8c65b0de53dc9dd3ba42c1a286c13 (diff)
Revert 69952. Causes testsuite failures on linux x86-64.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69967 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1651
-rw-r--r--lib/Target/X86/X86ISelLowering.h52
-rw-r--r--lib/Target/X86/X86InstrInfo.td1
-rw-r--r--lib/Target/X86/X86InstrMMX.td114
-rw-r--r--lib/Target/X86/X86InstrSSE.td544
5 files changed, 1406 insertions, 956 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 02366020d7..d51435cf87 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -45,8 +45,7 @@ static cl::opt<bool>
DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
// Forward declarations.
-static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
- SDValue V2);
+static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl);
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM) {
@@ -1668,7 +1667,9 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
- Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64,
+ DAG.getUNDEF(MVT::v2i64), Arg,
+ getMOVLMask(2, DAG, dl));
break;
}
}
@@ -2137,156 +2138,186 @@ static bool hasFPCMov(unsigned X86CC) {
}
}
-/// isUndefOrInRange - Return true if Val is undef or if its value falls within
-/// the specified range (L, H].
-static bool isUndefOrInRange(int Val, int Low, int Hi) {
- return (Val < 0) || (Val >= Low && Val < Hi);
+/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if its value falls within the specified range (L, H].
+static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) {
+ if (Op.getOpcode() == ISD::UNDEF)
+ return true;
+
+ unsigned Val = cast<ConstantSDNode>(Op)->getZExtValue();
+ return (Val >= Low && Val < Hi);
}
-/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
-/// specified value.
-static bool isUndefOrEqual(int Val, int CmpVal) {
- if (Val < 0 || Val == CmpVal)
+/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if its value equal to the specified value.
+static bool isUndefOrEqual(SDValue Op, unsigned Val) {
+ if (Op.getOpcode() == ISD::UNDEF)
return true;
- return false;
+ return cast<ConstantSDNode>(Op)->getZExtValue() == Val;
}
-/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
-/// the second operand.
-static bool isPSHUFDMask(const int *Mask, MVT VT) {
- if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
- return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
- if (VT == MVT::v2f64 || VT == MVT::v2i64)
- return (Mask[0] < 2 && Mask[1] < 2);
- return false;
-}
+/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFD.
+bool X86::isPSHUFDMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
-bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
- return ::isPSHUFDMask(N->getMask(), N->getValueType(0));
+ if (N->getNumOperands() != 2 && N->getNumOperands() != 4)
+ return false;
+
+ // Check if the value doesn't reference the second vector.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(Arg)->getZExtValue() >= e)
+ return false;
+ }
+
+ return true;
}
-/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(const int *Mask, MVT VT) {
- if (VT != MVT::v8i16)
+/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
+bool X86::isPSHUFHWMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 8)
return false;
-
- // Lower quadword copied in order or undef.
- for (int i = 0; i != 4; ++i)
- if (Mask[i] >= 0 && Mask[i] != i)
+
+ // Lower quadword copied in order.
+ for (unsigned i = 0; i != 4; ++i) {
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(Arg)->getZExtValue() != i)
return false;
-
+ }
+
// Upper quadword shuffled.
- for (int i = 4; i != 8; ++i)
- if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+ for (unsigned i = 4; i != 8; ++i) {
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val < 4 || Val > 7)
return false;
-
+ }
+
return true;
}
-bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
- return ::isPSHUFHWMask(N->getMask(), N->getValueType(0));
-}
+/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
+bool X86::isPSHUFLWMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
-/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(const int *Mask, MVT VT) {
- if (VT != MVT::v8i16)
+ if (N->getNumOperands() != 8)
return false;
-
+
// Upper quadword copied in order.
- for (int i = 4; i != 8; ++i)
- if (Mask[i] >= 0 && Mask[i] != i)
+ for (unsigned i = 4; i != 8; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i))
return false;
-
+
// Lower quadword shuffled.
- for (int i = 0; i != 4; ++i)
- if (Mask[i] >= 4)
+ for (unsigned i = 0; i != 4; ++i)
+ if (!isUndefOrInRange(N->getOperand(i), 0, 4))
return false;
-
- return true;
-}
-bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
- return ::isPSHUFLWMask(N->getMask(), N->getValueType(0));
+ return true;
}
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
-static bool isSHUFPMask(const int *Mask, MVT VT) {
- int NumElems = VT.getVectorNumElements();
- if (NumElems != 2 && NumElems != 4)
- return false;
-
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
+template<class SDOperand>
+static bool isSHUFPMask(SDOperand *Elems, unsigned NumElems) {
+ if (NumElems != 2 && NumElems != 4) return false;
+
+ unsigned Half = NumElems / 2;
+ for (unsigned i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Elems[i], 0, NumElems))
return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ for (unsigned i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
return false;
-
+
return true;
}
-bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
- return ::isSHUFPMask(N->getMask(), N->getValueType(0));
+bool X86::isSHUFPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
}
/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
/// half elements to come from vector 1 (which would equal the dest.) and
/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const int *Mask, MVT VT) {
- int NumElems = VT.getVectorNumElements();
-
- if (NumElems != 2 && NumElems != 4)
- return false;
-
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+template<class SDOperand>
+static bool isCommutedSHUFP(SDOperand *Ops, unsigned NumOps) {
+ if (NumOps != 2 && NumOps != 4) return false;
+
+ unsigned Half = NumOps / 2;
+ for (unsigned i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ for (unsigned i = Half; i < NumOps; ++i)
+ if (!isUndefOrInRange(Ops[i], 0, NumOps))
return false;
return true;
}
-static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
- return isCommutedSHUFPMask(N->getMask(), N->getValueType(0));
+static bool isCommutedSHUFP(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
}
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+bool X86::isMOVHLPSMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
return false;
// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
- const int *Mask = N->getMask();
- return isUndefOrEqual(Mask[0], 6) &&
- isUndefOrEqual(Mask[1], 7) &&
- isUndefOrEqual(Mask[2], 2) &&
- isUndefOrEqual(Mask[3], 3);
+ return isUndefOrEqual(N->getOperand(0), 6) &&
+ isUndefOrEqual(N->getOperand(1), 7) &&
+ isUndefOrEqual(N->getOperand(2), 2) &&
+ isUndefOrEqual(N->getOperand(3), 3);
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
+ return false;
+
+ // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(N->getOperand(0), 2) &&
+ isUndefOrEqual(N->getOperand(1), 3) &&
+ isUndefOrEqual(N->getOperand(2), 2) &&
+ isUndefOrEqual(N->getOperand(3), 3);
}
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
-bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
+bool X86::isMOVLPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ unsigned NumElems = N->getNumOperands();
if (NumElems != 2 && NumElems != 4)
return false;
- const int *Mask = N->getMask();
for (unsigned i = 0; i < NumElems/2; ++i)
- if (!isUndefOrEqual(Mask[i], i + NumElems))
+ if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
return false;
for (unsigned i = NumElems/2; i < NumElems; ++i)
- if (!isUndefOrEqual(Mask[i], i))
+ if (!isUndefOrEqual(N->getOperand(i), i))
return false;
return true;
@@ -2295,49 +2326,37 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
/// and MOVLHPS.
-bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
+bool X86::isMOVHPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ unsigned NumElems = N->getNumOperands();
if (NumElems != 2 && NumElems != 4)
return false;
- const int *Mask = N->getMask();
for (unsigned i = 0; i < NumElems/2; ++i)
- if (!isUndefOrEqual(Mask[i], i))
+ if (!isUndefOrEqual(N->getOperand(i), i))
return false;
- for (unsigned i = 0; i < NumElems/2; ++i)
- if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems))
+ for (unsigned i = 0; i < NumElems/2; ++i) {
+ SDValue Arg = N->getOperand(i + NumElems/2);
+ if (!isUndefOrEqual(Arg, i + NumElems))
return false;
+ }
return true;
}
-/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-/// <2, 3, 2, 3>
-bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
- if (NumElems != 4)
- return false;
-
- // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
- const int *Mask = N->getMask();
- return isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3) &&
- isUndefOrEqual(Mask[2], 2) && isUndefOrEqual(Mask[3], 3);
-}
-
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) {
- int NumElts = VT.getVectorNumElements();
+template<class SDOperand>
+bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts,
+ bool V2IsSplat = false) {
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
-
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
+
+ for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ SDValue BitI = Elts[i];
+ SDValue BitI1 = Elts[i+1];
if (!isUndefOrEqual(BitI, j))
return false;
if (V2IsSplat) {
@@ -2348,23 +2367,26 @@ static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) {
return false;
}
}
+
return true;
}
-bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
- return ::isUNPCKLMask(N->getMask(), N->getValueType(0), V2IsSplat);
+bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
}
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) {
- int NumElts = VT.getVectorNumElements();
+template<class SDOperand>
+bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts,
+ bool V2IsSplat = false) {
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
-
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
+
+ for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ SDValue BitI = Elts[i];
+ SDValue BitI1 = Elts[i+1];
if (!isUndefOrEqual(BitI, j + NumElts/2))
return false;
if (V2IsSplat) {
@@ -2375,166 +2397,270 @@ static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) {
return false;
}
}
+
return true;
}
-bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
- return ::isUNPCKHMask(N->getMask(), N->getValueType(0), V2IsSplat);
+bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
}
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(const int *Mask, MVT VT) {
- int NumElems = VT.getVectorNumElements();
+bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned NumElems = N->getNumOperands();
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
-
- for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
+
+ for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
+ SDValue BitI = N->getOperand(i);
+ SDValue BitI1 = N->getOperand(i+1);
+
if (!isUndefOrEqual(BitI, j))
return false;
if (!isUndefOrEqual(BitI1, j))
return false;
}
- return true;
-}
-bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
- return ::isUNPCKL_v_undef_Mask(N->getMask(), N->getValueType(0));
+ return true;
}
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(const int *Mask, MVT VT) {
- int NumElems = VT.getVectorNumElements();
+bool X86::isUNPCKH_v_undef_Mask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned NumElems = N->getNumOperands();
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
-
- for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
+
+ for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+ SDValue BitI = N->getOperand(i);
+ SDValue BitI1 = N->getOperand(i + 1);
+
if (!isUndefOrEqual(BitI, j))
return false;
if (!isUndefOrEqual(BitI1, j))
return false;
}
- return true;
-}
-bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
- return ::isUNPCKH_v_undef_Mask(N->getMask(), N->getValueType(0));
+ return true;
}
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
-static bool isMOVLMask(const int *Mask, MVT VT) {
- int NumElts = VT.getVectorNumElements();
+template<class SDOperand>
+static bool isMOVLMask(SDOperand *Elts, unsigned NumElts) {
if (NumElts != 2 && NumElts != 4)
return false;
-
- if (!isUndefOrEqual(Mask[0], NumElts))
+
+ if (!isUndefOrEqual(Elts[0], NumElts))
return false;
-
- for (int i = 1; i < NumElts; ++i)
- if (!isUndefOrEqual(Mask[i], i))
+
+ for (unsigned i = 1; i < NumElts; ++i) {
+ if (!isUndefOrEqual(Elts[i], i))
return false;
-
+ }
+
return true;
}
-bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
- return ::isMOVLMask(N->getMask(), N->getValueType(0));
+bool X86::isMOVLMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isMOVLMask(N->op_begin(), N->getNumOperands());
}
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(const int *Mask, MVT VT, bool V2IsSplat = false,
- bool V2IsUndef = false) {
- int NumOps = VT.getVectorNumElements();
+template<class SDOperand>
+static bool isCommutedMOVL(SDOperand *Ops, unsigned NumOps,
+ bool V2IsSplat = false,
+ bool V2IsUndef = false) {
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
-
- if (!isUndefOrEqual(Mask[0], 0))
+
+ if (!isUndefOrEqual(Ops[0], 0))
return false;
-
- for (int i = 1; i < NumOps; ++i)
- if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
- (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
- (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+
+ for (unsigned i = 1; i < NumOps; ++i) {
+ SDValue Arg = Ops[i];
+ if (!(isUndefOrEqual(Arg, i+NumOps) ||
+ (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) ||
+ (V2IsSplat && isUndefOrEqual(Arg, NumOps))))
return false;
-
+ }
+
return true;
}
-static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
+static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
bool V2IsUndef = false) {
- return isCommutedMOVLMask(N->getMask(), N->getValueType(0), V2IsSplat,
- V2IsUndef);
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
+ V2IsSplat, V2IsUndef);
}
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
-bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+bool X86::isMOVSHDUPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
return false;
// Expect 1, 1, 3, 3
- const int *Mask = N->getMask();
- for (unsigned i = 0; i < 2; ++i)
- if (Mask[i] >=0 && Mask[i] != 1)
- return false;
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val != 1) return false;
+ }
bool HasHi = false;
for (unsigned i = 2; i < 4; ++i) {
- if (Mask[i] >= 0 && Mask[i] != 3)
- return false;
- if (Mask[i] == 3)
- HasHi = true;
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val != 3) return false;
+ HasHi = true;
}
+
// Don't use movshdup if it can be done with a shufps.
- // FIXME: verify that matching u, u, 3, 3 is what we want.
return HasHi;
}
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
-bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+bool X86::isMOVSLDUPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
return false;
// Expect 0, 0, 2, 2
- const int *Mask = N->getMask();
- for (unsigned i = 0; i < 2; ++i)
- if (Mask[i] > 0)
- return false;
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val != 0) return false;
+ }
bool HasHi = false;
for (unsigned i = 2; i < 4; ++i) {
- if (Mask[i] >= 0 && Mask[i] != 2)
- return false;
- if (Mask[i] == 2)
- HasHi = true;
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val != 2) return false;
+ HasHi = true;
}
- // Don't use movsldup if it can be done with a shufps.
+
+ // Don't use movshdup if it can be done with a shufps.
return HasHi;
}
+/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a identity operation on the LHS or RHS.
+static bool isIdentityMask(SDNode *N, bool RHS = false) {
+ unsigned NumElems = N->getNumOperands();
+ for (unsigned i = 0; i < NumElems; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0)))
+ return false;
+ return true;
+}
+
+/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
+/// a splat of a single element.
+static bool isSplatMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned NumElems = N->getNumOperands();
+ SDValue ElementBase;
+ unsigned i = 0;
+ for (; i != NumElems; ++i) {
+ SDValue Elt = N->getOperand(i);
+ if (isa<ConstantSDNode>(Elt)) {
+ ElementBase = Elt;
+ break;
+ }
+ }
+
+ if (!ElementBase.getNode())
+ return false;
+
+ for (; i != NumElems; ++i) {
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ if (Arg != ElementBase) return false;
+ }
+
+ // Make sure it is a splat of the first vector operand.
+ return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems;
+}
+
+/// getSplatMaskEltNo - Given a splat mask, return the index to the element
+/// we want to splat.
+static SDValue getSplatMaskEltNo(SDNode *N) {
+ assert(isSplatMask(N) && "Not a splat mask");
+ unsigned NumElems = N->getNumOperands();
+ SDValue ElementBase;
+ unsigned i = 0;
+ for (; i != NumElems; ++i) {
+ SDValue Elt = N->getOperand(i);
+ if (isa<ConstantSDNode>(Elt))
+ return Elt;
+ }
+ assert(0 && " No splat value found!");
+ return SDValue();
+}
+
+
+/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
+/// a splat of a single element and it's a 2 or 4 element mask.
+bool X86::isSplatMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ // We can only splat 64-bit, and 32-bit quantities with a single instruction.
+ if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
+ return false;
+ return ::isSplatMask(N);
+}
+
+/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of zero element.
+bool X86::isSplatLoMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), 0))
+ return false;
+ return true;
+}
+
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
-bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
- int e = N->getValueType(0).getVectorNumElements() / 2;
- const int *Mask = N->getMask();
-
- for (int i = 0; i < e; ++i)
- if (!isUndefOrEqual(Mask[i], i))
+bool X86::isMOVDDUPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned e = N->getNumOperands() / 2;
+ for (unsigned i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i))
return false;
- for (int i = 0; i < e; ++i)
- if (!isUndefOrEqual(Mask[e+i], i))
+ for (unsigned i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(e+i), i))
return false;
return true;
}
@@ -2543,20 +2669,20 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- int NumOperands = SVOp->getValueType(0).getVectorNumElements();
- const int *MaskP = SVOp->getMask();
-
+ unsigned NumOperands = N->getNumOperands();
unsigned Shift = (NumOperands == 4) ? 2 : 1;
unsigned Mask = 0;
- for (int i = 0; i < NumOperands; ++i) {
- int Val = MaskP[NumOperands-i-1];
- if (Val < 0) Val = 0;
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ unsigned Val = 0;
+ SDValue Arg = N->getOperand(NumOperands-i-1);
+ if (Arg.getOpcode() != ISD::UNDEF)
+ Val = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Val >= NumOperands) Val -= NumOperands;
Mask |= Val;
if (i != NumOperands - 1)
Mask <<= Shift;
}
+
return Mask;
}
@@ -2564,16 +2690,19 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
/// instructions.
unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
- const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask();
unsigned Mask = 0;
// 8 nodes, but we only care about the last 4.
for (unsigned i = 7; i >= 4; --i) {
- int Val = MaskP[i];
- if (Val >= 0)
+ unsigned Val = 0;
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF) {
+ Val = cast<ConstantSDNode>(Arg)->getZExtValue();
Mask |= (Val - 4);
+ }
if (i != 4)
Mask <<= 2;
}
+
return Mask;
}
@@ -2581,71 +2710,90 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
/// instructions.
unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
- const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask();
unsigned Mask = 0;
// 8 nodes, but we only care about the first 4.
for (int i = 3; i >= 0; --i) {
- int Val = MaskP[i];
- if (Val >= 0)
- Mask |= Val;
+ unsigned Val = 0;
+ SDValue Arg = N->getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF)
+ Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ Mask |= Val;
if (i != 0)
Mask <<= 2;
}
+
return Mask;
}
-/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
-/// their permute mask.
-static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
- const int *Mask = SVOp->getMask();
- SmallVector<int, 8> MaskVec;
-
- for (int i = 0; i != NumElems; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- MaskVec.push_back(idx);
- else if (idx < NumElems)
- MaskVec.push_back(idx + NumElems);
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as
+/// values in ther permute mask.
+static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1,
+ SDValue &V2, SDValue &Mask,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT MaskVT = Mask.getValueType();
+ MVT EltVT = MaskVT.getVectorElementType();
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDValue, 8> MaskVec;
+ DebugLoc dl = Op.getDebugLoc();
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val < NumElems)
+ MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
else
- MaskVec.push_back(idx - NumElems);
+ MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
}
- return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
- SVOp->getOperand(0), &MaskVec[0]);
+
+ std::swap(V1, V2);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask);
}
/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) {
- int NumElems = VT.getVectorNumElements();
- for (int i = 0; i != NumElems; ++i) {
- int idx = Mask[i];
- if (idx < 0)
+static
+SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG, DebugLoc dl) {
+ MVT MaskVT = Mask.getValueType();
+ MVT EltVT = MaskVT.getVectorElementType();
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDValue, 8> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getUNDEF(EltVT));
continue;
- else if (idx < NumElems)
- Mask[i] = idx + NumElems;
+ }
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val < NumElems)
+ MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
else
- Mask[i] = idx - NumElems;
+ MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
}
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems);
}
+
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
-static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
- int NumElems = Op->getValueType(0).getVectorNumElements();
- const int *Mask = Op->getMask();
-
+static bool ShouldXformToMOVHLPS(SDNode *Mask) {
+ unsigned NumElems = Mask->getNumOperands();
if (NumElems != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
- if (!isUndefOrEqual(Mask[i], i+2))
+ if (!isUndefOrEqual(Mask->getOperand(i), i+2))
return false;
for (unsigned i = 2; i != 4; ++i)
- if (!isUndefOrEqual(Mask[i], i+4))
+ if (!isUndefOrEqual(Mask->getOperand(i), i+4))
return false;
return true;
}
@@ -2669,8 +2817,7 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
-static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
- ShuffleVectorSDNode *Op) {
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
@@ -2678,16 +2825,14 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
if (ISD::isNON_EXTLoad(V2))
return false;
- int NumElems = Op->getValueType(0).getVectorNumElements();
- const int *Mask = Op->getMask();
-
+ unsigned NumElems = Mask->getNumOperands();
if (NumElems != 2 && NumElems != 4)
return false;
- for (int i = 0, e = NumElems/2; i != e; ++i)
- if (!isUndefOrEqual(Mask[i], i))
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Mask->getOperand(i), i))
return false;
- for (int i = NumElems/2; i != NumElems; ++i)
- if (!isUndefOrEqual(Mask[i], i+NumElems))
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
return false;
return true;
}
@@ -2705,6 +2850,29 @@ static bool isSplatVector(SDNode *N) {
return true;
}
+/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an undef.
+static bool isUndefShuffle(SDNode *N) {
+ if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ unsigned NumElems = Mask.getNumOperands();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF) {
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
+ return false;
+ else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ }
+ return true;
+}
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
static inline bool isZeroNode(SDValue Elt) {
@@ -2715,26 +2883,34 @@ static inline bool isZeroNode(SDValue Elt) {
}
/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
-/// to an zero vector.
-/// FIXME: move to dag combiner?
-static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+/// to an zero vector.
+static bool isZeroShuffle(SDNode *N) {
+ if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
- const int *Mask = N->getMask();
- int NumElems = N->getValueType(0).getVectorNumElements();
- for (int i = 0; i != NumElems; ++i) {
- int Idx = Mask[i];
- if (Idx >= NumElems) {
- unsigned Opc = V2.getOpcode();
- if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+ SDValue Mask = N->getOperand(2);
+ unsigned NumElems = Mask.getNumOperands();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ continue;
+
+ unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Idx < NumElems) {
+ unsigned Opc = V1.getNode()->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
continue;
- if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems)))
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V1.getNode()->getOperand(Idx)))
return false;
- } else if (Idx >= 0) {
- unsigned Opc = V1.getOpcode();
- if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+ } else if (Idx >= NumElems) {
+ unsigned Opc = V2.getNode()->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
continue;
- if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx)))
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V2.getNode()->getOperand(Idx - NumElems)))
return false;
}
}
@@ -2782,94 +2958,127 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
-static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
- const int *Mask = SVOp->getMask();
-
+static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) {
+ assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
+
bool Changed = false;
- SmallVector<int, 8> MaskVec;
-
- for (int i = 0; i != NumElems; ++i) {
- int idx = Mask[i];
- if (idx > NumElems) {
- idx = NumElems;
- Changed = true;
+ SmallVector<SDValue, 8> MaskVec;
+ unsigned NumElems = Mask.getNumOperands();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF) {
+ unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Val > NumElems) {
+ Arg = DAG.getConstant(NumElems, Arg.getValueType());
+ Changed = true;
+ }
}
- MaskVec.push_back(idx);
+ MaskVec.push_back(Arg);
}
+
if (Changed)
- return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
- SVOp->getOperand(1), &MaskVec[0]);
- return SDValue(SVOp, 0);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(),
+ Mask.getValueType(),
+ &MaskVec[0], MaskVec.size());
+ return Mask;
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
- SDValue V2) {
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> Mask;
- Mask.push_back(NumElems);
- for (unsigned i = 1; i != NumElems; ++i)
- Mask.push_back(i);
- return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
-}
+static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl) {
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
-/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
- SDValue V2) {
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> Mask;
+ SmallVector<SDValue, 8> MaskVec;
+ MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
+ for (unsigned i = 1; i != NumElems; ++i)
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size());
+}
+
+/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
+/// of specified width.
+static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG,
+ DebugLoc dl) {
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
+ SmallVector<SDValue, 8> MaskVec;
for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
- Mask.push_back(i);
- Mask.push_back(i + NumElems);
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
}
- return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size());
}
-/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
- SDValue V2) {
- unsigned NumElems = VT.getVectorNumElements();
+/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
+/// of specified width.
+static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG,
+ DebugLoc dl) {
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
unsigned Half = NumElems/2;
- SmallVector<int, 8> Mask;
+ SmallVector<SDValue, 8> MaskVec;
for (unsigned i = 0; i != Half; ++i) {
- Mask.push_back(i + Half);
- Mask.push_back(i + NumElems + Half);
- }
- return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+ MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
+ MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size());
+}
+
+/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps
+/// element #0 of a vector with the specified index, leaving the rest of the
+/// elements in place.
+static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt,
+ SelectionDAG &DAG, DebugLoc dl) {
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
+ SmallVector<SDValue, 8> MaskVec;
+ // Element #0 of the result gets the elt we are replacing.
+ MaskVec.push_back(DAG.getConstant(DestElt, BaseVT));
+ for (unsigned i = 1; i != NumElems; ++i)
+ MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT));
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size());
}
/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
- bool HasSSE2) {
- if (SV->getValueType(0).getVectorNumElements() <= 4)
- return SDValue(SV, 0);
-
- MVT PVT = MVT::v4f32;
- MVT VT = SV->getValueType(0);
- DebugLoc dl = SV->getDebugLoc();
- SDValue V1 = SV->getOperand(0);
- int NumElems = VT.getVectorNumElements();
- int EltNo = SV->getSplatIndex();
-
- // unpack elements to the correct location
- while (NumElems > 4) {
- if (EltNo < NumElems/2) {
- V1 = getUnpackl(DAG, dl, VT, V1, V1);
- } else {
- V1 = getUnpackh(DAG, dl, VT, V1, V1);
- EltNo -= NumElems/2;
+static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) {
+ MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32;
+ MVT VT = Op.getValueType();
+ if (PVT == VT)
+ return Op;
+ SDValue V1 = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(2);
+ unsigned MaskNumElems = Mask.getNumOperands();
+ unsigned NumElems = MaskNumElems;
+ DebugLoc dl = Op.getDebugLoc();
+ // Special handling of v4f32 -> v4i32.
+ if (VT != MVT::v4f32) {
+ // Find which element we want to splat.
+ SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode();
+ unsigned EltNo = cast<ConstantSDNode>(EltNoNode)->getZExtValue();
+ // unpack elements to the correct location
+ while (NumElems > 4) {
+ if (EltNo < NumElems/2) {
+ Mask = getUnpacklMask(MaskNumElems, DAG, dl);
+ } else {
+ Mask = getUnpackhMask(MaskNumElems, DAG, dl);
+ EltNo -= NumElems/2;
+ }
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, Mask);
+ NumElems >>= 1;
}
- NumElems >>= 1;
+ SDValue Cst = DAG.getConstant(EltNo, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
}
-
- // Perform the splat.
- int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
- V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+ SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1,
+ DAG.getUNDEF(PVT), Mask);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle);
}
/// isVectorLoad - Returns true if the node is a vector load, a scalar
@@ -2886,28 +3095,32 @@ static bool isVectorLoad(SDValue Op) {
/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64.
///
-static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
- bool HasSSE3) {
+static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask,
+ SelectionDAG &DAG, bool HasSSE3) {
// If we have sse3 and shuffle has more than one use or input is a load, then
// use movddup. Otherwise, use movlhps.
- SDValue V1 = SV->getOperand(0);
-
- bool UseMovddup = HasSSE3 && (!SV->hasOneUse() || isVectorLoad(V1));
+ bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1));
MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32;
- MVT VT = SV->getValueType(0);
+ MVT VT = Op.getValueType();
if (VT == PVT)
- return SDValue(SV, 0);
-
- DebugLoc dl = SV->getDebugLoc();
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
- if (PVT.getVectorNumElements() == 2) {
- int Mask[2] = { 0, 0 };
- V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask);
+ return Op;
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = PVT.getVectorNumElements();
+ if (NumElems == 2) {
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
} else {
- int Mask[4] = { 0, 1, 0, 1 };
- V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask);
+ assert(NumElems == 4);
+ SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Cst0, Cst1, Cst0, Cst1);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
+ SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1,
+ DAG.getUNDEF(PVT), Mask);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
@@ -2917,31 +3130,39 @@ static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool isZero, bool HasSSE2,
SelectionDAG &DAG) {
+ DebugLoc dl = V2.getDebugLoc();
MVT VT = V2.getValueType();
SDValue V1 = isZero
- ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 16> MaskVec;
+ ? getZeroVector(VT, HasSSE2, DAG, dl) : DAG.getUNDEF(VT);
+ unsigned NumElems = V2.getValueType().getVectorNumElements();
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT EVT = MaskVT.getVectorElementType();
+ SmallVector<SDValue, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
- // If this is the insertion idx, put the low elt of V2 here.
- MaskVec.push_back(i == Idx ? NumElems : i);
- return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+ if (i == Idx) // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(DAG.getConstant(NumElems, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i, EVT));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask);
}
/// getNumOfConsecutiveZeros - Return the number of elements in a result of
/// a shuffle that is zero.
static
-unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask,
- int NumElems, bool Low, SelectionDAG &DAG) {
+unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask,
+ unsigned NumElems, bool Low,
+ SelectionDAG &DAG) {
unsigned NumZeros = 0;
- for (int i = 0; i < NumElems; ++i) {
+ for (unsigned i = 0; i < NumElems; ++i) {
unsigned Index = Low ? i : NumElems-i-1;
- int Idx = Mask[Index];
- if (Idx < 0) {
+ SDValue Idx = Mask.getOperand(Index);
+ if (Idx.getOpcode() == ISD::UNDEF) {
++NumZeros;
continue;
}
- SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
+ SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index);
if (Elt.getNode() && isZeroNode(Elt))
++NumZeros;
else
@@ -2952,40 +3173,40 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask,
/// isVectorShift - Returns true if the shuffle can be implemented as a
/// logical left or right shift of a vector.
-/// FIXME: split into pslldqi, psrldqi, palignr variants.
-static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- const int *Mask = SVOp->getMask();
- int NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumElems = Mask.getNumOperands();
isLeft = true;
- unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, true, DAG);
+ unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG);
if (!NumZeros) {
isLeft = false;
- NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, false, DAG);
+ NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG);
if (!NumZeros)
return false;
}
+
bool SeenV1 = false;
bool SeenV2 = false;
- for (int i = NumZeros; i < NumElems; ++i) {
- int Val = isLeft ? (i - NumZeros) : i;
- int Idx = Mask[isLeft ? i : (i - NumZeros)];
- if (Idx < 0)
+ for (unsigned i = NumZeros; i < NumElems; ++i) {
+ unsigned Val = isLeft ? (i - NumZeros) : i;
+ SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros));
+ if (Idx.getOpcode() == ISD::UNDEF)
continue;
- if (Idx < NumElems)
+ unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (Index < NumElems)
SeenV1 = true;
else {
- Idx -= NumElems;
+ Index -= NumElems;
SeenV2 = true;
}
- if (Idx != Val)
+ if (Index != Val)
return false;
}
if (SeenV1 && SeenV2)
return false;
- ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1);
+ ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1);
ShAmt = NumZeros;
return true;
}
@@ -3070,8 +3291,8 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
/// getVShift - Return a vector logical shift node.
///
static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp,
- unsigned NumBits, SelectionDAG &DAG,
- const TargetLowering &TLI, DebugLoc dl) {
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI, DebugLoc dl) {
bool isMMX = VT.getSizeInBits() == 64;
MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
@@ -3156,13 +3377,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
if (Idx != 0) {
- SmallVector<int, 4> Mask;
- Mask.push_back(Idx);
- for (unsigned i = 1; i != VecElts; ++i)
- Mask.push_back(i);
- Item = DAG.getVectorShuffle(VecVT, dl, Item,
- DAG.getUNDEF(Item.getValueType()),
- &Mask[0]);
+ SDValue Ops[] = {
+ Item, DAG.getUNDEF(Item.getValueType()),
+ getSwapEltZeroMask(VecElts, Idx, DAG, dl)
+ };
+ Item = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VecVT, Ops, 3);
}
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
}
@@ -3206,10 +3425,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
Subtarget->hasSSE2(), DAG);
- SmallVector<int, 8> MaskVec;
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT MaskEVT = MaskVT.getVectorElementType();
+ SmallVector<SDValue, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
- MaskVec.push_back(i == Idx ? 0 : 1);
- return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+ MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Item,
+ DAG.getUNDEF(VT), Mask);
}
}
@@ -3267,48 +3491,54 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
- V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2+1], V[i*2],
+ getMOVLMask(NumElems, DAG, dl));
break;
case 2:
- V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1],
+ getMOVLMask(NumElems, DAG, dl));
break;
case 3:
- V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1],
+ getUnpacklMask(NumElems, DAG, dl));
break;
}
}
- SmallVector<int, 8> MaskVec;
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT EVT = MaskVT.getVectorElementType();
+ SmallVector<SDValue, 8> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
- MaskVec.push_back(Reverse ? 1-i : i);
+ if (Reverse)
+ MaskVec.push_back(DAG.getConstant(1-i, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
- MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
- return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+ if (Reverse)
+ MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
+ SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
- // If we have SSE 4.1, Expand into a number of inserts.
- if (getSubtarget()->hasSSE41()) {
- V[0] = DAG.getUNDEF(VT);
- for (unsigned i = 0; i < NumElems; ++i)
- if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
- V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
- Op.getOperand(i), DAG.getIntPtrConstant(i));
- return V[0];
- }
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ SDValue UnpckMask = getUnpacklMask(NumElems, DAG, dl);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
- V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i], V[i + NumElems],
+ UnpckMask);
NumElems >>= 1;
}
return V[0];
@@ -3323,12 +3553,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// 3. [ssse3] 2 x pshufb + 1 x por
// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
static
-SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG, X86TargetLowering &TLI) {
- SDValue V1 = SVOp->getOperand(0);
- SDValue V2 = SVOp->getOperand(1);
- DebugLoc dl = SVOp->getDebugLoc();
- const int *Mask = SVOp->getMask();
+SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2,
+ SDValue PermMask, SelectionDAG &DAG,
+ X86TargetLowering &TLI, DebugLoc dl) {
+ SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(),
+ PermMask.getNode()->op_end());
SmallVector<int, 8> MaskVals;
// Determine if more than 1 of the words in each of the low and high quadwords
@@ -3339,7 +3568,9 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
BitVector InputQuads(4);
for (unsigned i = 0; i < 8; ++i) {
SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
- int EltIdx = Mask[i];
+ SDValue Elt = MaskElts[i];
+ int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 :
+ cast<ConstantSDNode>(Elt)->getZExtValue();
MaskVals.push_back(EltIdx);
if (EltIdx < 0) {
++Quad[0];
@@ -3392,12 +3623,14 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// words from all 4 input quadwords.
SDValue NewV;
if (BestLoQuad >= 0 || BestHiQuad >= 0) {
- SmallVector<int, 8> MaskV;
- MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
- MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
- NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
+ SmallVector<SDValue,8> MaskV;
+ MaskV.push_back(DAG.getConstant(BestLoQuad < 0 ? 0 : BestLoQuad, MVT::i64));
+ MaskV.push_back(DAG.getConstant(BestHiQuad < 0 ? 1 : BestHiQuad, MVT::i64));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, &MaskV[0], 2);
+
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), Mask);
NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
// Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
@@ -3435,8 +3668,15 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// If we've eliminated the use of V2, and the new mask is a pshuflw or
// pshufhw, that's as cheap as it gets. Return the new shuffle.
if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
- return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
- DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ MaskV.clear();
+ for (unsigned i = 0; i != 8; ++i)
+ MaskV.push_back((MaskVals[i] < 0) ? DAG.getUNDEF(MVT::i16)
+ : DAG.getConstant(MaskVals[i],
+ MVT::i16));
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV,
+ DAG.getUNDEF(MVT::v8i16),
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16,
+ &MaskV[0], 8));
}
}
@@ -3493,45 +3733,49 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// and update MaskVals with new element order.
BitVector InOrder(8);
if (BestLoQuad >= 0) {
- SmallVector<int, 8> MaskV;
+ SmallVector<SDValue, 8> MaskV;
for (int i = 0; i != 4; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
- MaskV.push_back(-1);
+ MaskV.push_back(DAG.getUNDEF(MVT::i16));
InOrder.set(i);
} else if ((idx / 4) == BestLoQuad) {
- MaskV.push_back(idx & 3);
+ MaskV.push_back(DAG.getConstant(idx & 3, MVT::i16));
InOrder.set(i);
} else {
- MaskV.push_back(-1);
+ MaskV.push_back(DAG.getUNDEF(MVT::i16));
}
}
for (unsigned i = 4; i != 8; ++i)
- MaskV.push_back(i);
- NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
- &MaskV[0]);
+ MaskV.push_back(DAG.getConstant(i, MVT::i16));
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV,
+ DAG.getUNDEF(MVT::v8i16),
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v8i16, &MaskV[0], 8));
}
// If BestHi >= 0, generate a pshufhw to put the high elements in order,
// and update MaskVals with the new element order.
if (BestHiQuad >= 0) {
- SmallVector<int, 8> MaskV;
+ SmallVector<SDValue, 8> MaskV;
for (unsigned i = 0; i != 4; ++i)
- MaskV.push_back(i);
+ MaskV.push_back(DAG.getConstant(i, MVT::i16));
for (unsigned i = 4; i != 8; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
- MaskV.push_back(-1);
+ MaskV.push_back(DAG.getUNDEF(MVT::i16));
InOrder.set(i);
} else if ((idx / 4) == BestHiQuad) {
- MaskV.push_back((idx & 3) + 4);
+ MaskV.push_back(DAG.getConstant((idx & 3) + 4, MVT::i16));
InOrder.set(i);
} else {
- MaskV.push_back(-1);
+ MaskV.push_back(DAG.getUNDEF(MVT::i16));
}
}
- NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
- &MaskV[0]);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV,
+ DAG.getUNDEF(MVT::v8i16),
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v8i16, &MaskV[0], 8));
}
// In case BestHi & BestLo were both -1, which means each quadword has a word
@@ -3567,12 +3811,11 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// 2. [ssse3] 2 x pshufb + 1 x por
// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
static
-SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG, X86TargetLowering &TLI) {
- SDValue V1 = SVOp->getOperand(0);
- SDValue V2 = SVOp->getOperand(1);
- DebugLoc dl = SVOp->getDebugLoc();
- const int *Mask = SVOp->getMask();
+SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2,
+ SDValue PermMask, SelectionDAG &DAG,
+ X86TargetLowering &TLI, DebugLoc dl) {
+ SmallVector<SDValue, 16> MaskElts(PermMask.getNode()->op_begin(),
+ PermMask.getNode()->op_end());
SmallVector<int, 16> MaskVals;
// If we have SSSE3, case 1 is generated when all result bytes come from
@@ -3582,7 +3825,9 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
bool V1Only = true;
bool V2Only = true;
for (unsigned i = 0; i < 16; ++i) {
- int EltIdx = Mask[i];
+ SDValue Elt = MaskElts[i];
+ int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 :
+ cast<ConstantSDNode>(Elt)->getZExtValue();
MaskVals.push_back(EltIdx);
if (EltIdx < 0)
continue;
@@ -3713,14 +3958,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
-SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- TargetLowering &TLI, DebugLoc dl) {
- MVT VT = SVOp->getValueType(0);
- SDValue V1 = SVOp->getOperand(0);
- SDValue V2 = SVOp->getOperand(1);
- const int *PermMask = SVOp->getMask();
- unsigned NumElems = VT.getVectorNumElements();
+SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2,
+ MVT VT,
+ SDValue PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI, DebugLoc dl) {
+ unsigned NumElems = PermMask.getNumOperands();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
MVT MaskEltVT = MaskVT.getVectorElementType();
@@ -3739,35 +3981,38 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
else
NewVT = MVT::v2f64;
}
- int Scale = NumElems / NewWidth;
- SmallVector<int, 8> MaskVec;
+ unsigned Scale = NumElems / NewWidth;
+ SmallVector<SDValue, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
- int StartIdx = -1;
- for (int j = 0; j < Scale; ++j) {
- int EltIdx = PermMask[i+j];
- if (EltIdx < 0)
+ unsigned StartIdx = ~0U;
+ for (unsigned j = 0; j < Scale; ++j) {
+ SDValue Elt = PermMask.getOperand(i+j);
+ if (Elt.getOpcode() == ISD::UNDEF)
continue;
- if (StartIdx == -1)
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue();
+ if (StartIdx == ~0U)
StartIdx = EltIdx - (EltIdx % Scale);
if (EltIdx != StartIdx + j)
return SDValue();
}
- if (StartIdx == -1)
- MaskVec.push_back(-1);
+ if (StartIdx == ~0U)
+ MaskVec.push_back(DAG.getUNDEF(MaskEltVT));
else
- MaskVec.push_back(StartIdx / Scale);
+ MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT));
}
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2);
- return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskVec[0], MaskVec.size()));
}
/// getVZextMovL - Return a zero-extending vector move low node.
///
static SDValue getVZextMovL(MVT VT, MVT OpVT,
- SDValue SrcOp, SelectionDAG &DAG,
- const X86Subtarget *Subtarget, DebugLoc dl) {
+ SDValue SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, DebugLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
LoadSDNode *LD = NULL;
if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
@@ -3801,37 +4046,31 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT,
/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
/// shuffles.
static SDValue
-LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
- SDValue V1 = SVOp->getOperand(0);
- SDValue V2 = SVOp->getOperand(1);
- DebugLoc dl = SVOp->getDebugLoc();
- MVT VT = SVOp->getValueType(0);
- const int *PermMaskPtr = SVOp->getMask();
-
+LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2,
+ SDValue PermMask, MVT VT, SelectionDAG &DAG,
+ DebugLoc dl) {
+ MVT MaskVT = PermMask.getValueType();
+ MVT MaskEVT = MaskVT.getVectorElementType();
SmallVector<std::pair<int, int>, 8> Locs;
Locs.resize(4);
- SmallVector<int, 8> Mask1(4U, -1);
- SmallVector<int, 8> PermMask;
-
- for (unsigned i = 0; i != 8; ++i)
- PermMask.push_back(PermMaskPtr[i]);
-
+ SmallVector<SDValue, 8> Mask1(4, DAG.getUNDEF(MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
for (unsigned i = 0; i != 4; ++i) {
- int Idx = PermMask[i];
- if (Idx < 0) {
+ SDValue Elt = PermMask.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
- assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
- if (Idx < 4) {
+ unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue();
+ assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!");
+ if (Val < 4) {
Locs[i] = std::make_pair(0, NumLo);
- Mask1[NumLo] = Idx;
+ Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < 4)
- Mask1[2+NumHi] = Idx;
+ Mask1[2+NumHi] = Elt;
NumHi++;
}
}
@@ -3842,21 +4081,24 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
- V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &Mask1[0], Mask1.size()));
- SmallVector<int, 8> Mask2(4U, -1);
-
+ SmallVector<SDValue, 8> Mask2(4, DAG.getUNDEF(MaskEVT));
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < 2) ? 0 : 4;
Idx += Locs[i].first * 2 + Locs[i].second;
- Mask2[i] = Idx;
+ Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
- return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &Mask2[0], Mask2.size()));
} else if (NumLo == 3 || NumHi == 3) {
// Otherwise, we must have three elements from one vector, call it X, and
// one element from the other, call it Y. First, use a shufps to build an
@@ -3867,51 +4109,60 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
- CommuteVectorShuffleMask(PermMask, VT);
+ PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl);
std::swap(V1, V2);
}
// Find the element from V2.
unsigned HiIndex;
for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
- int Val = PermMask[HiIndex];
- if (Val < 0)
+ SDValue Elt = PermMask.getOperand(HiIndex);
+ if (Elt.getOpcode() == ISD::UNDEF)
continue;
+ unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue();
if (Val >= 4)
break;
}
- Mask1[0] = PermMask[HiIndex];
- Mask1[1] = -1;
- Mask1[2] = PermMask[HiIndex^1];
- Mask1[3] = -1;
- V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ Mask1[0] = PermMask.getOperand(HiIndex);
+ Mask1[1] = DAG.getUNDEF(MaskEVT);
+ Mask1[2] = PermMask.getOperand(HiIndex^1);
+ Mask1[3] = DAG.getUNDEF(MaskEVT);
+ V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4));
if (HiIndex >= 2) {
- Mask1[0] = PermMask[0];
- Mask1[1] = PermMask[1];
- Mask1[2] = HiIndex & 1 ? 6 : 4;
- Mask1[3] = HiIndex & 1 ? 4 : 6;
- return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ Mask1[0] = PermMask.getOperand(0);
+ Mask1[1] = PermMask.getOperand(1);
+ Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT);
+ Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MaskVT, &Mask1[0], 4));
} else {
- Mask1[0] = HiIndex & 1 ? 2 : 0;
- Mask1[1] = HiIndex & 1 ? 0 : 2;
- Mask1[2] = PermMask[2];
- Mask1[3] = PermMask[3];
- if (Mask1[2] >= 0)
- Mask1[2] += 4;
- if (Mask1[3] >= 0)
- Mask1[3] += 4;
- return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+ Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT);
+ Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT);
+ Mask1[2] = PermMask.getOperand(2);
+ Mask1[3] = PermMask.getOperand(3);
+ if (Mask1[2].getOpcode() != ISD::UNDEF)
+ Mask1[2] =
+ DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4,
+ MaskEVT);
+ if (Mask1[3].getOpcode() != ISD::UNDEF)
+ Mask1[3] =
+ DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4,
+ MaskEVT);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MaskVT, &Mask1[0], 4));
}
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
- SmallVector<int,8> LoMask(4U, -1);
- SmallVector<int,8> HiMask(4U, -1);
-
- SmallVector<int,8> *MaskPtr = &LoMask;
+ SmallVector<SDValue,8> LoMask(4, DAG.getUNDEF(MaskEVT));
+ SmallVector<SDValue,8> HiMask(4, DAG.getUNDEF(MaskEVT));
+ SmallVector<SDValue,8> *MaskPtr = &LoMask;
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = 2;
@@ -3922,68 +4173,84 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
LoIdx = 0;
HiIdx = 2;
}
- int Idx = PermMask[i];
- if (Idx < 0) {
+ SDValue Elt = PermMask.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
- } else if (Idx < 4) {
+ } else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
- (*MaskPtr)[LoIdx] = Idx;
+ (*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
- (*MaskPtr)[HiIdx] = Idx;
+ (*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
- SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
- SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
- SmallVector<int, 8> MaskOps;
+ SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &LoMask[0], LoMask.size()));
+ SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &HiMask[0], HiMask.size()));
+ SmallVector<SDValue, 8> MaskOps;
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1) {
- MaskOps.push_back(-1);
+ MaskOps.push_back(DAG.getUNDEF(MaskEVT));
} else {
unsigned Idx = Locs[i].first * 4 + Locs[i].second;
- MaskOps.push_back(Idx);
+ MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
- return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &MaskOps[0], MaskOps.size()));
}
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
+ SDValue PermMask = Op.getOperand(2);
MVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- const int *PermMask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
- unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumElems = PermMask.getNumOperands();
bool isMMX = VT.getSizeInBits() == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
- if (isZeroShuffle(SVOp))
+ // FIXME: Check for legal shuffle and return?
+
+ if (isUndefShuffle(Op.getNode()))
+ return DAG.getUNDEF(VT);
+
+ if (isZeroShuffle(Op.getNode()))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ if (isIdentityMask(PermMask.getNode()))
+ return V1;
+ else if (isIdentityMask(PermMask.getNode(), true))
+ return V2;
+
// Canonicalize movddup shuffles.
- if (V2IsUndef && Subtarget->hasSSE2() && VT.getSizeInBits() == 128 &&
- X86::isMOVDDUPMask(SVOp))
- return CanonicalizeMovddup(SVOp, DAG, Subtarget->hasSSE3());
+ if (V2IsUndef && Subtarget->hasSSE2() &&
+ VT.getSizeInBits() == 128 &&
+ X86::isMOVDDUPMask(PermMask.getNode()))
+ return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3());
- // Promote splats to v4f32.
- if (SVOp->isSplat()) {
- if (isMMX || NumElems < 4)
- return Op;
- return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
+ if (isSplatMask(PermMask.getNode())) {
+ if (isMMX || NumElems < 4) return Op;
+ // Promote it to a v4{if}32 splat.
+ return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG,
+ *this, dl);
if (NewOp.getNode())
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
LowerVECTOR_SHUFFLE(NewOp, DAG));
@@ -3991,29 +4258,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
+ DAG, *this, dl);
if (NewOp.getNode()) {
- if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
- return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
- DAG, Subtarget, dl);
+ SDValue NewV1 = NewOp.getOperand(0);
+ SDValue NewV2 = NewOp.getOperand(1);
+ SDValue NewMask = NewOp.getOperand(2);
+ if (isCommutedMOVL(NewMask.getNode(), true, false)) {
+ NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
+ return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget,
+ dl);
+ }
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
- if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
+ SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
+ DAG, *this, dl);
+ if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode()))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
- DAG, Subtarget, dl);
+ DAG, Subtarget, dl);
}
}
-
- if (X86::isPSHUFDMask(SVOp))
- return Op;
-
+
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
SDValue ShVal;
- bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -4021,8 +4291,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
-
- if (X86::isMOVLMask(SVOp)) {
+
+ if (X86::isMOVLMask(PermMask.getNode())) {
if (V1IsUndef)
return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
@@ -4030,18 +4300,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (!isMMX)
return Op;
}
-
- // FIXME: fold these into legal mask.
- if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
- X86::isMOVSLDUPMask(SVOp) ||
- X86::isMOVHLPSMask(SVOp) ||
- X86::isMOVHPMask(SVOp) ||
- X86::isMOVLPMask(SVOp)))
+
+ if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) ||
+ X86::isMOVSLDUPMask(PermMask.getNode()) ||
+ X86::isMOVHLPSMask(PermMask.getNode()) ||
+ X86::isMOVHPMask(PermMask.getNode()) ||
+ X86::isMOVLPMask(PermMask.getNode())))
return Op;
- if (ShouldXformToMOVHLPS(SVOp) ||
- ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
- return CommuteVectorShuffle(SVOp, DAG);
+ if (ShouldXformToMOVHLPS(PermMask.getNode()) ||
+ ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode()))
+ return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (isShift) {
// No better options. Use a vshl / vsrl.
@@ -4049,7 +4318,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
-
+
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
@@ -4058,84 +4327,115 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
- Op = CommuteVectorShuffle(SVOp, DAG);
- SVOp = cast<ShuffleVectorSDNode>(Op);
- V1 = SVOp->getOperand(0);
- V2 = SVOp->getOperand(1);
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
}
- if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
- // Shuffling low element of v1 into undef, just return v1.
- if (V2IsUndef)
- return V1;
- // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
- // the instruction selector will not match, so get a canonical MOVL with
- // swapped operands to undo the commute.
- return getMOVL(DAG, dl, VT, V2, V1);
+ // FIXME: Figure out a cleaner way to do this.
+ if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) {
+ if (V2IsUndef) return V1;
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ if (V2IsSplat) {
+ // V2 is a splat, so the mask may be malformed. That is, it may point
+ // to any V2 element. The instruction selectior won't like this. Get
+ // a corrected mask and commute to form a proper MOVS{S|D}.
+ SDValue NewMask = getMOVLMask(NumElems, DAG, dl);
+ if (NewMask.getNode() != PermMask.getNode())
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask);
+ }
+ return Op;
}
- if (X86::isUNPCKL_v_undef_Mask(SVOp) ||
- X86::isUNPCKH_v_undef_Mask(SVOp) ||
- X86::isUNPCKLMask(SVOp) ||
- X86::isUNPCKHMask(SVOp))
+ if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
+ X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
+ X86::isUNPCKLMask(PermMask.getNode()) ||
+ X86::isUNPCKHMask(PermMask.getNode()))
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
- SDValue NewMask = NormalizeMask(SVOp, DAG);
- ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
- if (NSVOp != SVOp) {
- if (X86::isUNPCKLMask(NSVOp, true)) {
- return NewMask;
- } else if (X86::isUNPCKHMask(NSVOp, true)) {
- return NewMask;
+ SDValue NewMask = NormalizeMask(PermMask, DAG);
+ if (NewMask.getNode() != PermMask.getNode()) {
+ if (X86::isUNPCKLMask(NewMask.getNode(), true)) {
+ SDValue NewMask = getUnpacklMask(NumElems, DAG, dl);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask);
+ } else if (X86::isUNPCKHMask(NewMask.getNode(), true)) {
+ SDValue NewMask = getUnpackhMask(NumElems, DAG, dl);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask);
}
}
}
+ // Normalize the node to match x86 shuffle ops if needed
+ if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode()))
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+
if (Commuted) {
// Commute is back and try unpck* again.
- // FIXME: this seems wrong.
- SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
- ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
- if (X86::isUNPCKL_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKH_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKLMask(NewSVOp) ||
- X86::isUNPCKHMask(NewSVOp))
- return NewOp;
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
+ X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
+ X86::isUNPCKLMask(PermMask.getNode()) ||
+ X86::isUNPCKHMask(PermMask.getNode()))
+ return Op;
}
// FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
+ // Try PSHUF* first, then SHUFP*.
+ // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically
+ // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
+ if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1,
+ DAG.getUNDEF(VT), PermMask);
+ return Op;
+ }
- // Normalize the node to match x86 shuffle ops if needed
- if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
- return CommuteVectorShuffle(SVOp, DAG);
+ if (!isMMX) {
+ if (Subtarget->hasSSE2() &&
+ (X86::isPSHUFDMask(PermMask.getNode()) ||
+ X86::isPSHUFHWMask(PermMask.getNode()) ||
+ X86::isPSHUFLWMask(PermMask.getNode()))) {
+ MVT RVT = VT;
+ if (VT == MVT::v4f32) {
+ RVT = MVT::v4i32;
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT,
+ DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1),
+ DAG.getUNDEF(RVT), PermMask);
+ } else if (V2.getOpcode() != ISD::UNDEF)
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1,
+ DAG.getUNDEF(RVT), PermMask);
+ if (RVT != VT)
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+ return Op;
+ }
+
+ // Binary or unary shufps.
+ if (X86::isSHUFPMask(PermMask.getNode()) ||
+ (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode())))
+ return Op;
+ }
- // Check for legal shuffle and return?
- if (isShuffleMaskLegal(PermMask, VT))
- return Op;
-
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl);
if (NewOp.getNode())
return NewOp;
}
if (VT == MVT::v16i8) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl);
if (NewOp.getNode())
return NewOp;
}
// Handle all 4 wide cases with a number of shuffles except for MMX.
if (NumElems == 4 && !isMMX)
- return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+ return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl);
return SDValue();
}
@@ -4229,12 +4529,22 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
-
// SHUFPS the element to the lowest double word, then movss.
- int Mask[4] = { Idx, -1, -1, -1 };
- MVT VVT = Op.getOperand(0).getValueType();
- SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
- DAG.getUNDEF(VVT), Mask);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(4);
+ SmallVector<SDValue, 8> IdxVec;
+ IdxVec.
+ push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType()));
+ IdxVec.
+ push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
+ IdxVec.
+ push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
+ IdxVec.
+ push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &IdxVec[0], IdxVec.size());
+ SDValue Vec = Op.getOperand(0);
+ Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(),
+ Vec, DAG.getUNDEF(Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (VT.getSizeInBits() == 64) {
@@ -4248,10 +4558,17 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
- int Mask[2] = { 1, -1 };
- MVT VVT = Op.getOperand(0).getValueType();
- SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
- DAG.getUNDEF(VVT), Mask);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(2);
+ SmallVector<SDValue, 8> IdxVec;
+ IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType()));
+ IdxVec.
+ push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
+ &IdxVec[0], IdxVec.size());
+ SDValue Vec = Op.getOperand(0);
+ Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(),
+ Vec, DAG.getUNDEF(Vec.getValueType()),
+ Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
}
@@ -4758,6 +5075,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+ SmallVector<SDValue, 4> MaskVec;
+ MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+ MaskVec.push_back(DAG.getConstant(4, MVT::i32));
+ MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+ MaskVec.push_back(DAG.getConstant(5, MVT::i32));
+ SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &MaskVec[0], MaskVec.size());
+ SmallVector<SDValue, 4> MaskVec2;
+ MaskVec2.push_back(DAG.getConstant(1, MVT::i32));
+ MaskVec2.push_back(DAG.getConstant(0, MVT::i32));
+ SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32,
+ &MaskVec2[0], MaskVec2.size());
+
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Op.getOperand(0),
@@ -4766,11 +5096,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Op.getOperand(0),
DAG.getIntPtrConstant(0)));
- SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+ SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32,
+ XR1, XR2, UnpcklMask);
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
- SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
+ SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32,
+ Unpck1, CLod0, UnpcklMask);
SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
PseudoSourceValue::getConstantPool(), 0,
@@ -4778,9 +5110,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
// Add the halves; easiest way is to swap them into another reg first.
- int ShufMask[2] = { 1, -1 };
- SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
- DAG.getUNDEF(MVT::v2f64), ShufMask);
+ SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2f64,
+ Sub, Sub, ShufMask);
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
DAG.getIntPtrConstant(0));
@@ -6932,36 +7263,34 @@ bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const {
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
-X86TargetLowering::isShuffleMaskLegal(const int *Mask, MVT VT) const {
+X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const {
// Only do shuffles on 128-bit vector types for now.
- if (VT.getSizeInBits() == 64)
- return false;
-
- // FIXME: pshufb, blends, palignr, shifts.
- return (VT.getVectorNumElements() == 2 ||
- ShuffleVectorSDNode::isSplatMask(Mask, VT) ||
- isMOVLMask(Mask, VT) ||
- isSHUFPMask(Mask, VT) ||
- isPSHUFDMask(Mask, VT) ||
- isPSHUFHWMask(Mask, VT) ||
- isPSHUFLWMask(Mask, VT) ||
- isUNPCKLMask(Mask, VT) ||
- isUNPCKHMask(Mask, VT) ||
- isUNPCKL_v_undef_Mask(Mask, VT) ||
- isUNPCKH_v_undef_Mask(Mask, VT));
+ // FIXME: pshufb, blends
+ if (VT.getSizeInBits() == 64) return false;
+ return (Mask.getNode()->getNumOperands() <= 4 ||
+ isIdentityMask(Mask.getNode()) ||
+ isIdentityMask(Mask.getNode(), true) ||
+ isSplatMask(Mask.getNode()) ||
+ X86::isPSHUFHWMask(Mask.getNode()) ||
+ X86::isPSHUFLWMask(Mask.getNode()) ||
+ X86::isUNPCKLMask(Mask.getNode()) ||
+ X86::isUNPCKHMask(Mask.getNode()) ||
+ X86::isUNPCKL_v_undef_Mask(Mask.getNode()) ||
+ X86::isUNPCKH_v_undef_Mask(Mask.getNode()));
}
bool
-X86TargetLowering::isVectorClearMaskLegal(const int *Mask, MVT VT) const {
- unsigned NumElts = VT.getVectorNumElements();
- // FIXME: This collection of masks seems suspect.
- if (NumElts == 2)
- return true;
- if (NumElts == 4 && VT.getSizeInBits() == 128) {
- return (isMOVLMask(Mask, VT) ||
- isCommutedMOVLMask(Mask, VT, true) ||
- isSHUFPMask(Mask, VT) ||
- isCommutedSHUFPMask(Mask, VT));
+X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps,
+ MVT EVT, SelectionDAG &DAG) const {
+ unsigned NumElts = BVOps.size();
+ // Only do shuffles on 128-bit vector types for now.
+ if (EVT.getSizeInBits() * NumElts == 64) return false;
+ if (NumElts == 2) return true;
+ if (NumElts == 4) {
+ return (isMOVLMask(&BVOps[0], 4) ||
+ isCommutedMOVL(&BVOps[0], 4, true) ||
+ isSHUFPMask(&BVOps[0], 4) ||
+ isCommutedSHUFP(&BVOps[0], 4));
}
return false;
}
@@ -7696,14 +8025,15 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
return false;
}
-static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask,
+static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask,
unsigned NumElems, MVT EVT,
SDNode *&Base,
SelectionDAG &DAG, MachineFrameInfo *MFI,
const TargetLowering &TLI) {
Base = NULL;
for (unsigned i = 0; i < NumElems; ++i) {
- if (PermMask[i] < 0) {
+ SDValue Idx = PermMask.getOperand(i);
+ if (Idx.getOpcode() == ISD::UNDEF) {
if (!Base)
return false;
continue;
@@ -7736,12 +8066,12 @@ static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask,
/// shuffle to be an appropriate build vector so it can take advantage of
// performBuildVectorCombine.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
MVT VT = N->getValueType(0);
MVT EVT = VT.getVectorElementType();
- const int *PermMask = cast<ShuffleVectorSDNode>(N)->getMask();
- unsigned NumElems = VT.getVectorNumElements();
+ SDValue PermMask = N->getOperand(2);
+ unsigned NumElems = PermMask.getNumOperands();
// For x86-32 machines, if we see an insert and then a shuffle in a v2i64
// where the upper half is 0, it is advantageous to rewrite it as a build
@@ -7750,10 +8080,9 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
SDValue In[2];
In[0] = N->getOperand(0);
In[1] = N->getOperand(1);
- unsigned Idx0 = PermMask[0];
- unsigned Idx1 = PermMask[1];
- // FIXME: can we take advantage of undef index?
- if (PermMask[0] >= 0 && PermMask[1] >= 0 &&
+ unsigned Idx0 =cast<ConstantSDNode>(PermMask.getOperand(0))->getZExtValue();
+ unsigned Idx1 =cast<ConstantSDNode>(PermMask.getOperand(1))->getZExtValue();
+ if (In[0].getValueType().getVectorNumElements() == NumElems &&
In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT &&
In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) {
ConstantSDNode* InsertVecIdx =
@@ -8217,9 +8546,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
}
}
} else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
- cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
- BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
- DAG.getIntPtrConstant(0));
+ isSplatMask(ShAmtOp.getOperand(2).getNode())) {
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+ DAG.getIntPtrConstant(0));
} else
return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d91951c62e..050b86979c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -230,8 +230,7 @@ namespace llvm {
// VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL,
-
- // CMPPD, CMPPS - Vector double/float comparison.
+
// CMPPD, CMPPS - Vector double/float comparison.
CMPPD, CMPPS,
@@ -252,72 +251,80 @@ namespace llvm {
namespace X86 {
/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
- bool isPSHUFDMask(ShuffleVectorSDNode *N);
+ bool isPSHUFDMask(SDNode *N);
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
- bool isPSHUFHWMask(ShuffleVectorSDNode *N);
+ bool isPSHUFHWMask(SDNode *N);
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
- bool isPSHUFLWMask(ShuffleVectorSDNode *N);
+ bool isPSHUFLWMask(SDNode *N);
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
- bool isSHUFPMask(ShuffleVectorSDNode *N);
+ bool isSHUFPMask(SDNode *N);
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
- bool isMOVHLPSMask(ShuffleVectorSDNode *N);
+ bool isMOVHLPSMask(SDNode *N);
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
- bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
+ bool isMOVHLPS_v_undef_Mask(SDNode *N);
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
- bool isMOVLPMask(ShuffleVectorSDNode *N);
+ /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+ bool isMOVLPMask(SDNode *N);
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
+ /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
/// as well as MOVLHPS.
- bool isMOVHPMask(ShuffleVectorSDNode *N);
+ bool isMOVHPMask(SDNode *N);
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
- bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+ bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false);
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
- bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+ bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false);
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
- bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
+ bool isUNPCKL_v_undef_Mask(SDNode *N);
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
- bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
+ bool isUNPCKH_v_undef_Mask(SDNode *N);
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
- bool isMOVLMask(ShuffleVectorSDNode *N);
+ bool isMOVLMask(SDNode *N);
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
- bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+ bool isMOVSHDUPMask(SDNode *N);
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
- bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+ bool isMOVSLDUPMask(SDNode *N);
+
+ /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element.
+ bool isSplatMask(SDNode *N);
+
+ /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of zero element.
+ bool isSplatLoMask(SDNode *N);
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
- bool isMOVDDUPMask(ShuffleVectorSDNode *N);
+ bool isMOVDDUPMask(SDNode *N);
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
@@ -470,13 +477,14 @@ namespace llvm {
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
/// values are assumed to be legal.
- virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const;
+ virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const;
/// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
/// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
/// pool entry.
- virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const;
+ virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps,
+ MVT EVT, SelectionDAG &DAG) const;
/// ShouldShrinkFPConstant - If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 511d42abc1..462433bb27 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -3801,7 +3801,6 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
(implicit EFLAGS)),
(DEC32m addr:$dst)>, Requires<[In32BitMode]>;
-
//===----------------------------------------------------------------------===//
// Floating Point Stack Support
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 338b9e294b..71f2cb164d 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -30,37 +30,33 @@ def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
// PSHUFW imm.
-def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>;
// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
-def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKHMask(N);
}]>;
// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
-def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKLMask(N);
}]>;
// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKH_v_undef_Mask(N);
}]>;
// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKL_v_undef_Mask(N);
}]>;
-def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+// Patterns for shuffling.
+def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFDMask(N);
}], MMX_SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===//
@@ -189,8 +185,9 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (movl immAllZerosV,
- (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
+ (v2i64 (vector_shuffle immAllZerosV,
+ (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))),
+ MOVL_shuffle_mask)))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
@@ -322,74 +319,86 @@ let isTwoAddress = 1 in {
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
+ (v8i8 (vector_shuffle VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
+ (v4i16 (vector_shuffle VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
+ (v2i32 (vector_shuffle VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
// Unpack Low Packed Data Instructions
def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
+ (v8i8 (vector_shuffle VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)),
+ MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
+ (v4i16 (vector_shuffle VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)),
+ MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
+ (v2i32 (vector_shuffle VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)),
+ MMX_UNPCKL_shuffle_mask)))]>;
}
// -- Pack Instructions
@@ -402,13 +411,17 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+ (v4i16 (vector_shuffle
+ VR64:$src1, (undef),
+ MMX_PSHUFW_shuffle_mask:$src2)))]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
- (undef)))]>;
+ (v4i16 (vector_shuffle
+ (bc_v4i16 (load_mmx addr:$src1)),
+ (undef),
+ MMX_PSHUFW_shuffle_mask:$src2)))]>;
// -- Conversion Instructions
let neverHasSideEffects = 1 in {
@@ -614,27 +627,34 @@ def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
// Patterns to perform canonical versions of vector shuffling.
let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
+ def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKL_v_undef_shuffle_mask)),
(MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
+ def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKL_v_undef_shuffle_mask)),
(MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
+ def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKL_v_undef_shuffle_mask)),
(MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
}
let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
+ def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKH_v_undef_shuffle_mask)),
(MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
+ def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKH_v_undef_shuffle_mask)),
(MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
+ def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKH_v_undef_shuffle_mask)),
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
}
// Patterns to perform vector shuffling with a zeroed out vector.
let AddedComplexity = 20 in {
- def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
- (v2i32 (scalar_to_vector (load_mmx addr:$src))))),
+ def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV,
+ (v2i32 (scalar_to_vector (load_mmx addr:$src))),
+ MMX_UNPCKL_shuffle_mask)),
(MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a10f4433e8..3ce35bd625 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -175,108 +175,103 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{
// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
// SHUFP* etc. imm.
-def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>;
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
// PSHUFHW imm.
-def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
}]>;
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
// PSHUFLW imm.
-def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
}]>;
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
+def SSE_splat_mask : PatLeaf<(build_vector), [{
+ return X86::isSplatMask(N);
+}], SHUFFLE_get_shuf_imm>;
-def movddup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
+ return X86::isSplatLoMask(N);
}]>;
-def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVDDUPMask(N);
}]>;
-def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVHLPSMask(N);
}]>;
-def movhp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
+def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVHLPS_v_undef_Mask(N);
}]>;
-def movlp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVHPMask(N);
}]>;
-def movl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVLPMask(N);
}]>;
-def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVLMask(N);
}]>;
-def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVSHDUPMask(N);
}]>;
-def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVSLDUPMask(N);
}]>;
-def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKLMask(N);
}]>;
-def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKHMask(N);
}]>;
-def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKL_v_undef_Mask(N);
}]>;
-def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
+def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKH_v_undef_Mask(N);
+}]>;
-def shufp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>;
-def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFHWMask(N);
}], SHUFFLE_get_pshufhw_imm>;
-def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFLWMask(N);
}], SHUFFLE_get_pshuflw_imm>;
+def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFDMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isSHUFPMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isSHUFPMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+
//===----------------------------------------------------------------------===//
// SSE scalar FP Instructions
//===----------------------------------------------------------------------===//
@@ -709,14 +704,16 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (movlp VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
+ (v4f32 (vector_shuffle VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
+ MOVLP_shuffle_mask)))]>;
def MOVHPSrm : PSI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (movhp VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
+ (v4f32 (vector_shuffle VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
+ MOVHP_shuffle_mask)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -731,25 +728,29 @@ def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
- (unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (undef)), (iPTR 0))), addr:$dst)]>;
+ (v2f64 (vector_shuffle
+ (bc_v2f64 (v4f32 VR128:$src)), (undef),
+ UNPCKH_shuffle_mask)), (iPTR 0))),
+ addr:$dst)]>;
let Constraints = "$src1 = $dst" in {
let AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHP_shuffle_mask)))]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHLPS_shuffle_mask)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
let AddedComplexity = 20 in
-def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
@@ -907,41 +908,51 @@ let Constraints = "$src1 = $dst" in {
let isConvertibleToThreeAddress = 1 in // Convert to pshufd
def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1,
- VR128:$src2, i8imm:$src3),
+ VR128:$src2, i32i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ SHUFP_shuffle_mask:$src3)))]>;
def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
- f128mem:$src2, i8imm:$src3),
+ f128mem:$src2, i32i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (v4f32 (shufp:$src3
- VR128:$src1, (memopv4f32 addr:$src2))))]>;
+ (v4f32 (vector_shuffle
+ VR128:$src1, (memopv4f32 addr:$src2),
+ SHUFP_shuffle_mask:$src3)))]>;
let AddedComplexity = 10 in {
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1,
- (memopv4f32 addr:$src2))))]>;
+ (v4f32 (vector_shuffle
+ VR128:$src1, (memopv4f32 addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
+ (v4f32 (vector_shuffle
+ VR128:$src1, (memopv4f32 addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -1033,7 +1044,8 @@ let neverHasSideEffects = 1 in
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (movl VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)))]>;
}
// Move to lower bits of a VR128 and zeroing upper bits.
@@ -1439,14 +1451,16 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (movlp VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
+ (v2f64 (vector_shuffle VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)),
+ MOVLP_shuffle_mask)))]>;
def MOVHPDrm : PDI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (movhp VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
+ (v2f64 (vector_shuffle VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)),
+ MOVHP_shuffle_mask)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -1460,8 +1474,9 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
- (v2f64 (unpckh VR128:$src, (undef))),
- (iPTR 0))), addr:$dst)]>;
+ (v2f64 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_shuffle_mask)), (iPTR 0))),
+ addr:$dst)]>;
// SSE2 instructions without OpSize prefix
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -1729,39 +1744,48 @@ let Constraints = "$src1 = $dst" in {
def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ SHUFP_shuffle_mask:$src3)))]>;
def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
f128mem:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (v2f64 (shufp:$src3
- VR128:$src1, (memopv2f64 addr:$src2))))]>;
+ (v2f64 (vector_shuffle
+ VR128:$src1, (memopv2f64 addr:$src2),
+ SHUFP_shuffle_mask:$src3)))]>;
let AddedComplexity = 10 in {
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1,
- (memopv2f64 addr:$src2))))]>;
+ (v2f64 (vector_shuffle
+ VR128:$src1, (memopv2f64 addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
+ (v2f64 (vector_shuffle
+ VR128:$src1, (memopv2f64 addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -2019,43 +2043,49 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
def PSHUFDri : PDIi8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
- VR128:$src1, (undef))))]>;
+ [(set VR128:$dst, (v4i32 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
+ [(set VR128:$dst, (v4i32 (vector_shuffle
(bc_v4i32(memopv2i64 addr:$src1)),
- (undef))))]>;
+ (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
// SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
- (undef))))]>,
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFHW_shuffle_mask:$src2)))]>,
XS, Requires<[HasSSE2]>;
def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ (bc_v8i16 (memopv2i64 addr:$src1)),
+ (undef),
+ PSHUFHW_shuffle_mask:$src2)))]>,
XS, Requires<[HasSSE2]>;
// SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
- (undef))))]>,
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ (bc_v8i16 (memopv2i64 addr:$src1)),
+ (undef),
+ PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
@@ -2064,91 +2094,107 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2))))]>;
+ (v16i8 (vector_shuffle VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2))))]>;
+ (v8i16 (vector_shuffle VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ (v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ (v2i64 (vector_shuffle VR128:$src1,
+ (memopv2i64 addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2))))]>;
+ [(set VR128:$dst,
+ (v16i8 (vector_shuffle VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ UNPCKH_shuffle_mask)))]>;
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2))))]>;
+ (v8i16 (vector_shuffle VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ UNPCKH_shuffle_mask)))]>;
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ (v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ UNPCKH_shuffle_mask)))]>;
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ (v2i64 (vector_shuffle VR128:$src1,
+ (memopv2i64 addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
}
// Extract / Insert
@@ -2311,7 +2357,8 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movsd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (movl VR128:$src1, VR128:$src2)))]>;
+ (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)))]>;
}
// Store / copy lower 64-bits of a XMM register.
@@ -2402,35 +2449,44 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
// Move Instructions
def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movshdup
- VR128:$src, (undef))))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src, (undef),
+ MOVSHDUP_shuffle_mask)))]>;
def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movshdup
- (memopv4f32 addr:$src), (undef)))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ (memopv4f32 addr:$src), (undef),
+ MOVSHDUP_shuffle_mask)))]>;
def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movsldup
- VR128:$src, (undef))))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src, (undef),
+ MOVSLDUP_shuffle_mask)))]>;
def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movsldup
- (memopv4f32 addr:$src), (undef)))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ (memopv4f32 addr:$src), (undef),
+ MOVSLDUP_shuffle_mask)))]>;
def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movddup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle VR128:$src, (undef),
+ MOVDDUP_shuffle_mask)))]>;
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"movddup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
- (undef))))]>;
+ (v2f64 (vector_shuffle
+ (scalar_to_vector (loadf64 addr:$src)),
+ (undef), MOVDDUP_shuffle_mask)))]>;
-def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
- (undef)),
+def : Pat<(vector_shuffle
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef), MOVDDUP_shuffle_mask),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
+def : Pat<(vector_shuffle
+ (memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@@ -2499,18 +2555,22 @@ def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
// vector_shuffle v1, <undef> <1, 1, 3, 3>
let AddedComplexity = 15 in
-def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ MOVSHDUP_shuffle_mask)),
(MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
-def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
+ MOVSHDUP_shuffle_mask)),
(MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
// vector_shuffle v1, <undef> <0, 0, 2, 2>
let AddedComplexity = 15 in
- def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+ def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ MOVSLDUP_shuffle_mask)),
(MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
- def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
+ MOVSLDUP_shuffle_mask)),
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
//===----------------------------------------------------------------------===//
@@ -2851,173 +2911,207 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
-def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
(UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
+def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
(UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
(PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
+def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
(PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
// Special unary SHUFPSrri case.
-def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPSrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
+ SHUFP_unary_shuffle_mask:$sm)),
+ (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE1]>;
-let AddedComplexity = 5 in
-def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
- (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[HasSSE2]>;
-// Special unary SHUFPDrri case.
-def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
-def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef),
+ SHUFP_unary_shuffle_mask:$sm)),
+ (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
-def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
- (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef),
+ SHUFP_unary_shuffle_mask:$sm),
+ (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
// Special binary v4i32 shuffle cases with SHUFPS.
-def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
- (SHUFPSrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
+ PSHUFD_binary_shuffle_mask:$sm)),
+ (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
-def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (SHUFPSrmi VR128:$src1, addr:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
+def : Pat<(v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)),
+ (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
// Special binary v2i64 shuffle cases using SHUFPDrri.
-def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
- (SHUFPDrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
+ SHUFP_shuffle_mask:$sm)),
+ (SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
+ SHUFP_unary_shuffle_mask:$sm)),
+ (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
+ Requires<[HasSSE2]>;
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
let AddedComplexity = 15 in {
-def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>;
-def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>;
}
let AddedComplexity = 10 in {
-def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
(UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
-def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
+def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
(PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
+def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
(PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
(PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
let AddedComplexity = 15 in {
-def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>;
-def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask:$sm)),
+ (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>;
}
let AddedComplexity = 10 in {
-def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
(UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
-def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
+def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
(PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
+def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
(PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
(PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
let AddedComplexity = 20 in {
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHP_shuffle_mask)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
-def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHLPS_shuffle_mask)),
(MOVHLPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
-def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
+ MOVHLPS_v_undef_shuffle_mask)),
(MOVHLPSrr VR128:$src1, VR128:$src1)>;
-def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
+ MOVHLPS_v_undef_shuffle_mask)),
(MOVHLPSrr VR128:$src1, VR128:$src1)>;
}
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
-def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVHP_shuffle_mask)),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
+ MOVHP_shuffle_mask)),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
-def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
- addr:$src1),
+def : Pat<(store (v4i32 (vector_shuffle
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
- addr:$src1),
+def : Pat<(store (v4i32 (vector_shuffle
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
-def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)),
(MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
-def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVLP_shuffle_mask)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVLP_shuffle_mask)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
}
// Set lowest element and zero upper elements.
let AddedComplexity = 15 in
-def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
+def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
+ MOVL_shuffle_mask)),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;