diff options
author | Rafael Espindola <rafael.espindola@gmail.com> | 2009-04-24 12:40:33 +0000 |
---|---|---|
committer | Rafael Espindola <rafael.espindola@gmail.com> | 2009-04-24 12:40:33 +0000 |
commit | 15684b29552393553524171bff1913e750f390f8 (patch) | |
tree | 2d43d8f19d7fc59d2c61b282b789a704c96b16b0 /lib/Target/X86 | |
parent | f6b9f260ede8c65b0de53dc9dd3ba42c1a286c13 (diff) |
Revert 69952. Causes testsuite failures on linux x86-64.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69967 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 1651 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 52 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 114 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 544 |
5 files changed, 1406 insertions, 956 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 02366020d7..d51435cf87 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,8 +45,7 @@ static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); // Forward declarations. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2); +static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl); X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM) { @@ -1668,7 +1667,9 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); + Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, + DAG.getUNDEF(MVT::v2i64), Arg, + getMOVLMask(2, DAG, dl)); break; } } @@ -2137,156 +2138,186 @@ static bool hasFPCMov(unsigned X86CC) { } } -/// isUndefOrInRange - Return true if Val is undef or if its value falls within -/// the specified range (L, H]. -static bool isUndefOrInRange(int Val, int Low, int Hi) { - return (Val < 0) || (Val >= Low && Val < Hi); +/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if its value falls within the specified range (L, H]. +static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { + if (Op.getOpcode() == ISD::UNDEF) + return true; + + unsigned Val = cast<ConstantSDNode>(Op)->getZExtValue(); + return (Val >= Low && Val < Hi); } -/// isUndefOrEqual - Val is either less than zero (undef) or equal to the -/// specified value. -static bool isUndefOrEqual(int Val, int CmpVal) { - if (Val < 0 || Val == CmpVal) +/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if its value equal to the specified value. +static bool isUndefOrEqual(SDValue Op, unsigned Val) { + if (Op.getOpcode() == ISD::UNDEF) return true; - return false; + return cast<ConstantSDNode>(Op)->getZExtValue() == Val; } -/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that -/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference -/// the second operand. -static bool isPSHUFDMask(const int *Mask, MVT VT) { - if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) - return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); - if (VT == MVT::v2f64 || VT == MVT::v2i64) - return (Mask[0] < 2 && Mask[1] < 2); - return false; -} +/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFD. +bool X86::isPSHUFDMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); -bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { - return ::isPSHUFDMask(N->getMask(), N->getValueType(0)); + if (N->getNumOperands() != 2 && N->getNumOperands() != 4) + return false; + + // Check if the value doesn't reference the second vector. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast<ConstantSDNode>(Arg)->getZExtValue() >= e) + return false; + } + + return true; } -/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that -/// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(const int *Mask, MVT VT) { - if (VT != MVT::v8i16) +/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFHW. +bool X86::isPSHUFHWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 8) return false; - - // Lower quadword copied in order or undef. - for (int i = 0; i != 4; ++i) - if (Mask[i] >= 0 && Mask[i] != i) + + // Lower quadword copied in order. + for (unsigned i = 0; i != 4; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast<ConstantSDNode>(Arg)->getZExtValue() != i) return false; - + } + // Upper quadword shuffled. - for (int i = 4; i != 8; ++i) - if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) + for (unsigned i = 4; i != 8; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val < 4 || Val > 7) return false; - + } + return true; } -bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { - return ::isPSHUFHWMask(N->getMask(), N->getValueType(0)); -} +/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFLW. +bool X86::isPSHUFLWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); -/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that -/// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(const int *Mask, MVT VT) { - if (VT != MVT::v8i16) + if (N->getNumOperands() != 8) return false; - + // Upper quadword copied in order. - for (int i = 4; i != 8; ++i) - if (Mask[i] >= 0 && Mask[i] != i) + for (unsigned i = 4; i != 8; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) return false; - + // Lower quadword shuffled. - for (int i = 0; i != 4; ++i) - if (Mask[i] >= 4) + for (unsigned i = 0; i != 4; ++i) + if (!isUndefOrInRange(N->getOperand(i), 0, 4)) return false; - - return true; -} -bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { - return ::isPSHUFLWMask(N->getMask(), N->getValueType(0)); + return true; } /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -static bool isSHUFPMask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); - if (NumElems != 2 && NumElems != 4) - return false; - - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) +template<class SDOperand> +static bool isSHUFPMask(SDOperand *Elems, unsigned NumElems) { + if (NumElems != 2 && NumElems != 4) return false; + + unsigned Half = NumElems / 2; + for (unsigned i = 0; i < Half; ++i) + if (!isUndefOrInRange(Elems[i], 0, NumElems)) return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) + for (unsigned i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) return false; - + return true; } -bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { - return ::isSHUFPMask(N->getMask(), N->getValueType(0)); +bool X86::isSHUFPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); } /// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -static bool isCommutedSHUFPMask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); - - if (NumElems != 2 && NumElems != 4) - return false; - - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) +template<class SDOperand> +static bool isCommutedSHUFP(SDOperand *Ops, unsigned NumOps) { + if (NumOps != 2 && NumOps != 4) return false; + + unsigned Half = NumOps / 2; + for (unsigned i = 0; i < Half; ++i) + if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) + for (unsigned i = Half; i < NumOps; ++i) + if (!isUndefOrInRange(Ops[i], 0, NumOps)) return false; return true; } -static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { - return isCommutedSHUFPMask(N->getMask(), N->getValueType(0)); +static bool isCommutedSHUFP(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); } /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +bool X86::isMOVHLPSMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 - const int *Mask = N->getMask(); - return isUndefOrEqual(Mask[0], 6) && - isUndefOrEqual(Mask[1], 7) && - isUndefOrEqual(Mask[2], 2) && - isUndefOrEqual(Mask[3], 3); + return isUndefOrEqual(N->getOperand(0), 6) && + isUndefOrEqual(N->getOperand(1), 7) && + isUndefOrEqual(N->getOperand(2), 2) && + isUndefOrEqual(N->getOperand(3), 3); +} + +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) + return false; + + // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 + return isUndefOrEqual(N->getOperand(0), 2) && + isUndefOrEqual(N->getOperand(1), 3) && + isUndefOrEqual(N->getOperand(2), 2) && + isUndefOrEqual(N->getOperand(3), 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); +bool X86::isMOVLPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; - const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(Mask[i], i + NumElems)) + if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) return false; for (unsigned i = NumElems/2; i < NumElems; ++i) - if (!isUndefOrEqual(Mask[i], i)) + if (!isUndefOrEqual(N->getOperand(i), i)) return false; return true; @@ -2295,49 +2326,37 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// and MOVLHPS. -bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); +bool X86::isMOVHPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; - const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(Mask[i], i)) + if (!isUndefOrEqual(N->getOperand(i), i)) return false; - for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems)) + for (unsigned i = 0; i < NumElems/2; ++i) { + SDValue Arg = N->getOperand(i + NumElems/2); + if (!isUndefOrEqual(Arg, i + NumElems)) return false; + } return true; } -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); - - if (NumElems != 4) - return false; - - // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 - const int *Mask = N->getMask(); - return isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3) && - isUndefOrEqual(Mask[2], 2) && isUndefOrEqual(Mask[3], 3); -} - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) { - int NumElts = VT.getVectorNumElements(); +template<class SDOperand> +bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, + bool V2IsSplat = false) { if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { + SDValue BitI = Elts[i]; + SDValue BitI1 = Elts[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (V2IsSplat) { @@ -2348,23 +2367,26 @@ static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) { return false; } } + return true; } -bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { - return ::isUNPCKLMask(N->getMask(), N->getValueType(0), V2IsSplat); +bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) { - int NumElts = VT.getVectorNumElements(); +template<class SDOperand> +bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, + bool V2IsSplat = false) { if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { + SDValue BitI = Elts[i]; + SDValue BitI1 = Elts[i+1]; if (!isUndefOrEqual(BitI, j + NumElts/2)) return false; if (V2IsSplat) { @@ -2375,166 +2397,270 @@ static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) { return false; } } + return true; } -bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { - return ::isUNPCKHMask(N->getMask(), N->getValueType(0), V2IsSplat); +bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); +bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDValue BitI = N->getOperand(i); + SDValue BitI1 = N->getOperand(i+1); + if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; -} -bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { - return ::isUNPCKL_v_undef_Mask(N->getMask(), N->getValueType(0)); + return true; } /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -static bool isUNPCKH_v_undef_Mask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); +bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + SDValue BitI = N->getOperand(i); + SDValue BitI1 = N->getOperand(i + 1); + if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; -} -bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { - return ::isUNPCKH_v_undef_Mask(N->getMask(), N->getValueType(0)); + return true; } /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -static bool isMOVLMask(const int *Mask, MVT VT) { - int NumElts = VT.getVectorNumElements(); +template<class SDOperand> +static bool isMOVLMask(SDOperand *Elts, unsigned NumElts) { if (NumElts != 2 && NumElts != 4) return false; - - if (!isUndefOrEqual(Mask[0], NumElts)) + + if (!isUndefOrEqual(Elts[0], NumElts)) return false; - - for (int i = 1; i < NumElts; ++i) - if (!isUndefOrEqual(Mask[i], i)) + + for (unsigned i = 1; i < NumElts; ++i) { + if (!isUndefOrEqual(Elts[i], i)) return false; - + } + return true; } -bool X86::isMOVLMask(ShuffleVectorSDNode *N) { - return ::isMOVLMask(N->getMask(), N->getValueType(0)); +bool X86::isMOVLMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isMOVLMask(N->op_begin(), N->getNumOperands()); } /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -static bool isCommutedMOVLMask(const int *Mask, MVT VT, bool V2IsSplat = false, - bool V2IsUndef = false) { - int NumOps = VT.getVectorNumElements(); +template<class SDOperand> +static bool isCommutedMOVL(SDOperand *Ops, unsigned NumOps, + bool V2IsSplat = false, + bool V2IsUndef = false) { if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - - if (!isUndefOrEqual(Mask[0], 0)) + + if (!isUndefOrEqual(Ops[0], 0)) return false; - - for (int i = 1; i < NumOps; ++i) - if (!(isUndefOrEqual(Mask[i], i+NumOps) || - (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || - (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) + + for (unsigned i = 1; i < NumOps; ++i) { + SDValue Arg = Ops[i]; + if (!(isUndefOrEqual(Arg, i+NumOps) || + (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || + (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) return false; - + } + return true; } -static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, +static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, bool V2IsUndef = false) { - return isCommutedMOVLMask(N->getMask(), N->getValueType(0), V2IsSplat, - V2IsUndef); + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return isCommutedMOVL(N->op_begin(), N->getNumOperands(), + V2IsSplat, V2IsUndef); } /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +bool X86::isMOVSHDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) return false; // Expect 1, 1, 3, 3 - const int *Mask = N->getMask(); - for (unsigned i = 0; i < 2; ++i) - if (Mask[i] >=0 && Mask[i] != 1) - return false; + for (unsigned i = 0; i < 2; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val != 1) return false; + } bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - if (Mask[i] >= 0 && Mask[i] != 3) - return false; - if (Mask[i] == 3) - HasHi = true; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val != 3) return false; + HasHi = true; } + // Don't use movshdup if it can be done with a shufps. - // FIXME: verify that matching u, u, 3, 3 is what we want. return HasHi; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +bool X86::isMOVSLDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) return false; // Expect 0, 0, 2, 2 - const int *Mask = N->getMask(); - for (unsigned i = 0; i < 2; ++i) - if (Mask[i] > 0) - return false; + for (unsigned i = 0; i < 2; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val != 0) return false; + } bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - if (Mask[i] >= 0 && Mask[i] != 2) - return false; - if (Mask[i] == 2) - HasHi = true; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val != 2) return false; + HasHi = true; } - // Don't use movsldup if it can be done with a shufps. + + // Don't use movshdup if it can be done with a shufps. return HasHi; } +/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a identity operation on the LHS or RHS. +static bool isIdentityMask(SDNode *N, bool RHS = false) { + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 0; i < NumElems; ++i) + if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + return false; + return true; +} + +/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies +/// a splat of a single element. +static bool isSplatMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // This is a splat operation if each element of the permute is the same, and + // if the value doesn't reference the second vector. + unsigned NumElems = N->getNumOperands(); + SDValue ElementBase; + unsigned i = 0; + for (; i != NumElems; ++i) { + SDValue Elt = N->getOperand(i); + if (isa<ConstantSDNode>(Elt)) { + ElementBase = Elt; + break; + } + } + + if (!ElementBase.getNode()) + return false; + + for (; i != NumElems; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (Arg != ElementBase) return false; + } + + // Make sure it is a splat of the first vector operand. + return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems; +} + +/// getSplatMaskEltNo - Given a splat mask, return the index to the element +/// we want to splat. +static SDValue getSplatMaskEltNo(SDNode *N) { + assert(isSplatMask(N) && "Not a splat mask"); + unsigned NumElems = N->getNumOperands(); + SDValue ElementBase; + unsigned i = 0; + for (; i != NumElems; ++i) { + SDValue Elt = N->getOperand(i); + if (isa<ConstantSDNode>(Elt)) + return Elt; + } + assert(0 && " No splat value found!"); + return SDValue(); +} + + +/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies +/// a splat of a single element and it's a 2 or 4 element mask. +bool X86::isSplatMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // We can only splat 64-bit, and 32-bit quantities with a single instruction. + if (N->getNumOperands() != 4 && N->getNumOperands() != 2) + return false; + return ::isSplatMask(N); +} + +/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a splat of zero element. +bool X86::isSplatLoMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + if (!isUndefOrEqual(N->getOperand(i), 0)) + return false; + return true; +} + /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. -bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { - int e = N->getValueType(0).getVectorNumElements() / 2; - const int *Mask = N->getMask(); - - for (int i = 0; i < e; ++i) - if (!isUndefOrEqual(Mask[i], i)) +bool X86::isMOVDDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned e = N->getNumOperands() / 2; + for (unsigned i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) return false; - for (int i = 0; i < e; ++i) - if (!isUndefOrEqual(Mask[e+i], i)) + for (unsigned i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getOperand(e+i), i)) return false; return true; } @@ -2543,20 +2669,20 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - int NumOperands = SVOp->getValueType(0).getVectorNumElements(); - const int *MaskP = SVOp->getMask(); - + unsigned NumOperands = N->getNumOperands(); unsigned Shift = (NumOperands == 4) ? 2 : 1; unsigned Mask = 0; - for (int i = 0; i < NumOperands; ++i) { - int Val = MaskP[NumOperands-i-1]; - if (Val < 0) Val = 0; + for (unsigned i = 0; i < NumOperands; ++i) { + unsigned Val = 0; + SDValue Arg = N->getOperand(NumOperands-i-1); + if (Arg.getOpcode() != ISD::UNDEF) + Val = cast<ConstantSDNode>(Arg)->getZExtValue(); if (Val >= NumOperands) Val -= NumOperands; Mask |= Val; if (i != NumOperands - 1) Mask <<= Shift; } + return Mask; } @@ -2564,16 +2690,19 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// instructions. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { - const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the last 4. for (unsigned i = 7; i >= 4; --i) { - int Val = MaskP[i]; - if (Val >= 0) + unsigned Val = 0; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + Val = cast<ConstantSDNode>(Arg)->getZExtValue(); Mask |= (Val - 4); + } if (i != 4) Mask <<= 2; } + return Mask; } @@ -2581,71 +2710,90 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { - const int *MaskP = cast<ShuffleVectorSDNode>(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the first 4. for (int i = 3; i >= 0; --i) { - int Val = MaskP[i]; - if (Val >= 0) - Mask |= Val; + unsigned Val = 0; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) + Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + Mask |= Val; if (i != 0) Mask <<= 2; } + return Mask; } -/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in -/// their permute mask. -static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - const int *Mask = SVOp->getMask(); - SmallVector<int, 8> MaskVec; - - for (int i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) - MaskVec.push_back(idx); - else if (idx < NumElems) - MaskVec.push_back(idx + NumElems); +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as +/// values in ther permute mask. +static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, + SDValue &V2, SDValue &Mask, + SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT MaskVT = Mask.getValueType(); + MVT EltVT = MaskVT.getVectorElementType(); + unsigned NumElems = Mask.getNumOperands(); + SmallVector<SDValue, 8> MaskVec; + DebugLoc dl = Op.getDebugLoc(); + + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(DAG.getUNDEF(EltVT)); + continue; + } + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val < NumElems) + MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); else - MaskVec.push_back(idx - NumElems); + MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); } - return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), - SVOp->getOperand(0), &MaskVec[0]); + + std::swap(V1, V2); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); } /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); - for (int i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) +static +SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG, DebugLoc dl) { + MVT MaskVT = Mask.getValueType(); + MVT EltVT = MaskVT.getVectorElementType(); + unsigned NumElems = Mask.getNumOperands(); + SmallVector<SDValue, 8> MaskVec; + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(DAG.getUNDEF(EltVT)); continue; - else if (idx < NumElems) - Mask[i] = idx + NumElems; + } + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val < NumElems) + MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); else - Mask[i] = idx - NumElems; + MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); } + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); } + /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { - int NumElems = Op->getValueType(0).getVectorNumElements(); - const int *Mask = Op->getMask(); - +static bool ShouldXformToMOVHLPS(SDNode *Mask) { + unsigned NumElems = Mask->getNumOperands(); if (NumElems != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) - if (!isUndefOrEqual(Mask[i], i+2)) + if (!isUndefOrEqual(Mask->getOperand(i), i+2)) return false; for (unsigned i = 2; i != 4; ++i) - if (!isUndefOrEqual(Mask[i], i+4)) + if (!isUndefOrEqual(Mask->getOperand(i), i+4)) return false; return true; } @@ -2669,8 +2817,7 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. -static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, - ShuffleVectorSDNode *Op) { +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -2678,16 +2825,14 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, if (ISD::isNON_EXTLoad(V2)) return false; - int NumElems = Op->getValueType(0).getVectorNumElements(); - const int *Mask = Op->getMask(); - + unsigned NumElems = Mask->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; - for (int i = 0, e = NumElems/2; i != e; ++i) - if (!isUndefOrEqual(Mask[i], i)) + for (unsigned i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i)) return false; - for (int i = NumElems/2; i != NumElems; ++i) - if (!isUndefOrEqual(Mask[i], i+NumElems)) + for (unsigned i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) return false; return true; } @@ -2705,6 +2850,29 @@ static bool isSplatVector(SDNode *N) { return true; } +/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved +/// to an undef. +static bool isUndefShuffle(SDNode *N) { + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) + return false; + else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) + return false; + } + } + return true; +} + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. static inline bool isZeroNode(SDValue Elt) { @@ -2715,26 +2883,34 @@ static inline bool isZeroNode(SDValue Elt) { } /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. -/// FIXME: move to dag combiner? -static bool isZeroShuffle(ShuffleVectorSDNode *N) { +/// to an zero vector. +static bool isZeroShuffle(SDNode *N) { + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - const int *Mask = N->getMask(); - int NumElems = N->getValueType(0).getVectorNumElements(); - for (int i = 0; i != NumElems; ++i) { - int Idx = Mask[i]; - if (Idx >= NumElems) { - unsigned Opc = V2.getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) + SDValue Mask = N->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) + continue; + + unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Idx < NumElems) { + unsigned Opc = V1.getNode()->getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V1.getNode()->getOperand(Idx))) return false; - } else if (Idx >= 0) { - unsigned Opc = V1.getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) + } else if (Idx >= NumElems) { + unsigned Opc = V2.getNode()->getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) return false; } } @@ -2782,94 +2958,127 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. -static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - const int *Mask = SVOp->getMask(); - +static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { + assert(Mask.getOpcode() == ISD::BUILD_VECTOR); + bool Changed = false; - SmallVector<int, 8> MaskVec; - - for (int i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx > NumElems) { - idx = NumElems; - Changed = true; + SmallVector<SDValue, 8> MaskVec; + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); + if (Val > NumElems) { + Arg = DAG.getConstant(NumElems, Arg.getValueType()); + Changed = true; + } } - MaskVec.push_back(idx); + MaskVec.push_back(Arg); } + if (Changed) - return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), - SVOp->getOperand(1), &MaskVec[0]); - return SDValue(SVOp, 0); + Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), + Mask.getValueType(), + &MaskVec[0], MaskVec.size()); + return Mask; } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2) { - unsigned NumElems = VT.getVectorNumElements(); - SmallVector<int, 8> Mask; - Mask.push_back(NumElems); - for (unsigned i = 1; i != NumElems; ++i) - Mask.push_back(i); - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); -} +static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); -/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. -static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2) { - unsigned NumElems = VT.getVectorNumElements(); - SmallVector<int, 8> Mask; + SmallVector<SDValue, 8> MaskVec; + MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); + for (unsigned i = 1; i != NumElems; ++i) + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); +} + +/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation +/// of specified width. +static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG, + DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); + SmallVector<SDValue, 8> MaskVec; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { - Mask.push_back(i); - Mask.push_back(i + NumElems); + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); } - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); } -/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. -static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2) { - unsigned NumElems = VT.getVectorNumElements(); +/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation +/// of specified width. +static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG, + DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); unsigned Half = NumElems/2; - SmallVector<int, 8> Mask; + SmallVector<SDValue, 8> MaskVec; for (unsigned i = 0; i != Half; ++i) { - Mask.push_back(i + Half); - Mask.push_back(i + NumElems + Half); - } - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); + MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); + MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); +} + +/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps +/// element #0 of a vector with the specified index, leaving the rest of the +/// elements in place. +static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, + SelectionDAG &DAG, DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); + SmallVector<SDValue, 8> MaskVec; + // Element #0 of the result gets the elt we are replacing. + MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); + for (unsigned i = 1; i != NumElems; ++i) + MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, - bool HasSSE2) { - if (SV->getValueType(0).getVectorNumElements() <= 4) - return SDValue(SV, 0); - - MVT PVT = MVT::v4f32; - MVT VT = SV->getValueType(0); - DebugLoc dl = SV->getDebugLoc(); - SDValue V1 = SV->getOperand(0); - int NumElems = VT.getVectorNumElements(); - int EltNo = SV->getSplatIndex(); - - // unpack elements to the correct location - while (NumElems > 4) { - if (EltNo < NumElems/2) { - V1 = getUnpackl(DAG, dl, VT, V1, V1); - } else { - V1 = getUnpackh(DAG, dl, VT, V1, V1); - EltNo -= NumElems/2; +static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { + MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; + MVT VT = Op.getValueType(); + if (PVT == VT) + return Op; + SDValue V1 = Op.getOperand(0); + SDValue Mask = Op.getOperand(2); + unsigned MaskNumElems = Mask.getNumOperands(); + unsigned NumElems = MaskNumElems; + DebugLoc dl = Op.getDebugLoc(); + // Special handling of v4f32 -> v4i32. + if (VT != MVT::v4f32) { + // Find which element we want to splat. + SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode(); + unsigned EltNo = cast<ConstantSDNode>(EltNoNode)->getZExtValue(); + // unpack elements to the correct location + while (NumElems > 4) { + if (EltNo < NumElems/2) { + Mask = getUnpacklMask(MaskNumElems, DAG, dl); + } else { + Mask = getUnpackhMask(MaskNumElems, DAG, dl); + EltNo -= NumElems/2; + } + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, Mask); + NumElems >>= 1; } - NumElems >>= 1; + SDValue Cst = DAG.getConstant(EltNo, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } - - // Perform the splat. - int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); + SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, + DAG.getUNDEF(PVT), Mask); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); } /// isVectorLoad - Returns true if the node is a vector load, a scalar @@ -2886,28 +3095,32 @@ static bool isVectorLoad(SDValue Op) { /// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. /// -static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG, - bool HasSSE3) { +static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, + SelectionDAG &DAG, bool HasSSE3) { // If we have sse3 and shuffle has more than one use or input is a load, then // use movddup. Otherwise, use movlhps. - SDValue V1 = SV->getOperand(0); - - bool UseMovddup = HasSSE3 && (!SV->hasOneUse() || isVectorLoad(V1)); + bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; - MVT VT = SV->getValueType(0); + MVT VT = Op.getValueType(); if (VT == PVT) - return SDValue(SV, 0); - - DebugLoc dl = SV->getDebugLoc(); - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - if (PVT.getVectorNumElements() == 2) { - int Mask[2] = { 0, 0 }; - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); + return Op; + DebugLoc dl = Op.getDebugLoc(); + unsigned NumElems = PVT.getVectorNumElements(); + if (NumElems == 2) { + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); } else { - int Mask[4] = { 0, 1, 0, 1 }; - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); + assert(NumElems == 4); + SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); + SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Cst0, Cst1, Cst0, Cst1); } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); + + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, + DAG.getUNDEF(PVT), Mask); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -2917,31 +3130,39 @@ static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG, static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { + DebugLoc dl = V2.getDebugLoc(); MVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); - unsigned NumElems = VT.getVectorNumElements(); - SmallVector<int, 16> MaskVec; + ? getZeroVector(VT, HasSSE2, DAG, dl) : DAG.getUNDEF(VT); + unsigned NumElems = V2.getValueType().getVectorNumElements(); + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT EVT = MaskVT.getVectorElementType(); + SmallVector<SDValue, 16> MaskVec; for (unsigned i = 0; i != NumElems; ++i) - // If this is the insertion idx, put the low elt of V2 here. - MaskVec.push_back(i == Idx ? NumElems : i); - return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); + if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(DAG.getConstant(NumElems, EVT)); + else + MaskVec.push_back(DAG.getConstant(i, EVT)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); } /// getNumOfConsecutiveZeros - Return the number of elements in a result of /// a shuffle that is zero. static -unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask, - int NumElems, bool Low, SelectionDAG &DAG) { +unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, + unsigned NumElems, bool Low, + SelectionDAG &DAG) { unsigned NumZeros = 0; - for (int i = 0; i < NumElems; ++i) { + for (unsigned i = 0; i < NumElems; ++i) { unsigned Index = Low ? i : NumElems-i-1; - int Idx = Mask[Index]; - if (Idx < 0) { + SDValue Idx = Mask.getOperand(Index); + if (Idx.getOpcode() == ISD::UNDEF) { ++NumZeros; continue; } - SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); + SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); if (Elt.getNode() && isZeroNode(Elt)) ++NumZeros; else @@ -2952,40 +3173,40 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask, /// isVectorShift - Returns true if the shuffle can be implemented as a /// logical left or right shift of a vector. -/// FIXME: split into pslldqi, psrldqi, palignr variants. -static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, +static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - const int *Mask = SVOp->getMask(); - int NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumElems = Mask.getNumOperands(); isLeft = true; - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, true, DAG); + unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); if (!NumZeros) { isLeft = false; - NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, false, DAG); + NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); if (!NumZeros) return false; } + bool SeenV1 = false; bool SeenV2 = false; - for (int i = NumZeros; i < NumElems; ++i) { - int Val = isLeft ? (i - NumZeros) : i; - int Idx = Mask[isLeft ? i : (i - NumZeros)]; - if (Idx < 0) + for (unsigned i = NumZeros; i < NumElems; ++i) { + unsigned Val = isLeft ? (i - NumZeros) : i; + SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); + if (Idx.getOpcode() == ISD::UNDEF) continue; - if (Idx < NumElems) + unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (Index < NumElems) SeenV1 = true; else { - Idx -= NumElems; + Index -= NumElems; SeenV2 = true; } - if (Idx != Val) + if (Index != Val) return false; } if (SeenV1 && SeenV2) return false; - ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); + ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); ShAmt = NumZeros; return true; } @@ -3070,8 +3291,8 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// getVShift - Return a vector logical shift node. /// static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, - unsigned NumBits, SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + unsigned NumBits, SelectionDAG &DAG, + const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; @@ -3156,13 +3377,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. if (Idx != 0) { - SmallVector<int, 4> Mask; - Mask.push_back(Idx); - for (unsigned i = 1; i != VecElts; ++i) - Mask.push_back(i); - Item = DAG.getVectorShuffle(VecVT, dl, Item, - DAG.getUNDEF(Item.getValueType()), - &Mask[0]); + SDValue Ops[] = { + Item, DAG.getUNDEF(Item.getValueType()), + getSwapEltZeroMask(VecElts, Idx, DAG, dl) + }; + Item = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VecVT, Ops, 3); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); } @@ -3206,10 +3425,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget->hasSSE2(), DAG); - SmallVector<int, 8> MaskVec; + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT MaskEVT = MaskVT.getVectorElementType(); + SmallVector<SDValue, 8> MaskVec; for (unsigned i = 0; i < NumElems; i++) - MaskVec.push_back(i == Idx ? 0 : 1); - return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); + MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Item, + DAG.getUNDEF(VT), Mask); } } @@ -3267,48 +3491,54 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { V[i] = V[i*2]; // Must be a zero vector. break; case 1: - V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2+1], V[i*2], + getMOVLMask(NumElems, DAG, dl)); break; case 2: - V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], + getMOVLMask(NumElems, DAG, dl)); break; case 3: - V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], + getUnpacklMask(NumElems, DAG, dl)); break; } } - SmallVector<int, 8> MaskVec; + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT EVT = MaskVT.getVectorElementType(); + SmallVector<SDValue, 8> MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) - MaskVec.push_back(Reverse ? 1-i : i); + if (Reverse) + MaskVec.push_back(DAG.getConstant(1-i, EVT)); + else + MaskVec.push_back(DAG.getConstant(i, EVT)); Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; for (unsigned i = 0; i < 2; ++i) - MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); - return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); + if (Reverse) + MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); + else + MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); + SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[0], V[1], ShufMask); } if (Values.size() > 2) { - // If we have SSE 4.1, Expand into a number of inserts. - if (getSubtarget()->hasSSE41()) { - V[0] = DAG.getUNDEF(VT); - for (unsigned i = 0; i < NumElems; ++i) - if (Op.getOperand(i).getOpcode() != ISD::UNDEF) - V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], - Op.getOperand(i), DAG.getIntPtrConstant(i)); - return V[0]; - } // Expand into a number of unpckl*. // e.g. for v4f32 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> + SDValue UnpckMask = getUnpacklMask(NumElems, DAG, dl); for (unsigned i = 0; i < NumElems; ++i) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); NumElems >>= 1; while (NumElems != 0) { for (unsigned i = 0; i < NumElems; ++i) - V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i], V[i + NumElems], + UnpckMask); NumElems >>= 1; } return V[0]; @@ -3323,12 +3553,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static -SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - DebugLoc dl = SVOp->getDebugLoc(); - const int *Mask = SVOp->getMask(); +SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, + SDValue PermMask, SelectionDAG &DAG, + X86TargetLowering &TLI, DebugLoc dl) { + SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(), + PermMask.getNode()->op_end()); SmallVector<int, 8> MaskVals; // Determine if more than 1 of the words in each of the low and high quadwords @@ -3339,7 +3568,9 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, BitVector InputQuads(4); for (unsigned i = 0; i < 8; ++i) { SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad; - int EltIdx = Mask[i]; + SDValue Elt = MaskElts[i]; + int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : + cast<ConstantSDNode>(Elt)->getZExtValue(); MaskVals.push_back(EltIdx); if (EltIdx < 0) { ++Quad[0]; @@ -3392,12 +3623,14 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // words from all 4 input quadwords. SDValue NewV; if (BestLoQuad >= 0 || BestHiQuad >= 0) { - SmallVector<int, 8> MaskV; - MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); - MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); - NewV = DAG.getVectorShuffle(MVT::v2i64, dl, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); + SmallVector<SDValue,8> MaskV; + MaskV.push_back(DAG.getConstant(BestLoQuad < 0 ? 0 : BestLoQuad, MVT::i64)); + MaskV.push_back(DAG.getConstant(BestHiQuad < 0 ? 1 : BestHiQuad, MVT::i64)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, &MaskV[0], 2); + + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), Mask); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the @@ -3435,8 +3668,15 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, - DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); + MaskV.clear(); + for (unsigned i = 0; i != 8; ++i) + MaskV.push_back((MaskVals[i] < 0) ? DAG.getUNDEF(MVT::i16) + : DAG.getConstant(MaskVals[i], + MVT::i16)); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, + DAG.getUNDEF(MVT::v8i16), + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, + &MaskV[0], 8)); } } @@ -3493,45 +3733,49 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // and update MaskVals with new element order. BitVector InOrder(8); if (BestLoQuad >= 0) { - SmallVector<int, 8> MaskV; + SmallVector<SDValue, 8> MaskV; for (int i = 0; i != 4; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); InOrder.set(i); } else if ((idx / 4) == BestLoQuad) { - MaskV.push_back(idx & 3); + MaskV.push_back(DAG.getConstant(idx & 3, MVT::i16)); InOrder.set(i); } else { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); } } for (unsigned i = 4; i != 8; ++i) - MaskV.push_back(i); - NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), - &MaskV[0]); + MaskV.push_back(DAG.getConstant(i, MVT::i16)); + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, + DAG.getUNDEF(MVT::v8i16), + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v8i16, &MaskV[0], 8)); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { - SmallVector<int, 8> MaskV; + SmallVector<SDValue, 8> MaskV; for (unsigned i = 0; i != 4; ++i) - MaskV.push_back(i); + MaskV.push_back(DAG.getConstant(i, MVT::i16)); for (unsigned i = 4; i != 8; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); InOrder.set(i); } else if ((idx / 4) == BestHiQuad) { - MaskV.push_back((idx & 3) + 4); + MaskV.push_back(DAG.getConstant((idx & 3) + 4, MVT::i16)); InOrder.set(i); } else { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); } } - NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), - &MaskV[0]); + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, + DAG.getUNDEF(MVT::v8i16), + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v8i16, &MaskV[0], 8)); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -3567,12 +3811,11 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // 2. [ssse3] 2 x pshufb + 1 x por // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static -SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - DebugLoc dl = SVOp->getDebugLoc(); - const int *Mask = SVOp->getMask(); +SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, + SDValue PermMask, SelectionDAG &DAG, + X86TargetLowering &TLI, DebugLoc dl) { + SmallVector<SDValue, 16> MaskElts(PermMask.getNode()->op_begin(), + PermMask.getNode()->op_end()); SmallVector<int, 16> MaskVals; // If we have SSSE3, case 1 is generated when all result bytes come from @@ -3582,7 +3825,9 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, bool V1Only = true; bool V2Only = true; for (unsigned i = 0; i < 16; ++i) { - int EltIdx = Mask[i]; + SDValue Elt = MaskElts[i]; + int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : + cast<ConstantSDNode>(Elt)->getZExtValue(); MaskVals.push_back(EltIdx); if (EltIdx < 0) continue; @@ -3713,14 +3958,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static -SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { - MVT VT = SVOp->getValueType(0); - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - const int *PermMask = SVOp->getMask(); - unsigned NumElems = VT.getVectorNumElements(); +SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, + MVT VT, + SDValue PermMask, SelectionDAG &DAG, + TargetLowering &TLI, DebugLoc dl) { + unsigned NumElems = PermMask.getNumOperands(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); MVT MaskEltVT = MaskVT.getVectorElementType(); @@ -3739,35 +3981,38 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, else NewVT = MVT::v2f64; } - int Scale = NumElems / NewWidth; - SmallVector<int, 8> MaskVec; + unsigned Scale = NumElems / NewWidth; + SmallVector<SDValue, 8> MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { - int StartIdx = -1; - for (int j = 0; j < Scale; ++j) { - int EltIdx = PermMask[i+j]; - if (EltIdx < 0) + unsigned StartIdx = ~0U; + for (unsigned j = 0; j < Scale; ++j) { + SDValue Elt = PermMask.getOperand(i+j); + if (Elt.getOpcode() == ISD::UNDEF) continue; - if (StartIdx == -1) + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); + if (StartIdx == ~0U) StartIdx = EltIdx - (EltIdx % Scale); if (EltIdx != StartIdx + j) return SDValue(); } - if (StartIdx == -1) - MaskVec.push_back(-1); + if (StartIdx == ~0U) + MaskVec.push_back(DAG.getUNDEF(MaskEltVT)); else - MaskVec.push_back(StartIdx / Scale); + MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); - return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size())); } /// getVZextMovL - Return a zero-extending vector move low node. /// static SDValue getVZextMovL(MVT VT, MVT OpVT, - SDValue SrcOp, SelectionDAG &DAG, - const X86Subtarget *Subtarget, DebugLoc dl) { + SDValue SrcOp, SelectionDAG &DAG, + const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { LoadSDNode *LD = NULL; if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) @@ -3801,37 +4046,31 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, /// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of /// shuffles. static SDValue -LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - DebugLoc dl = SVOp->getDebugLoc(); - MVT VT = SVOp->getValueType(0); - const int *PermMaskPtr = SVOp->getMask(); - +LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, + SDValue PermMask, MVT VT, SelectionDAG &DAG, + DebugLoc dl) { + MVT MaskVT = PermMask.getValueType(); + MVT MaskEVT = MaskVT.getVectorElementType(); SmallVector<std::pair<int, int>, 8> Locs; Locs.resize(4); - SmallVector<int, 8> Mask1(4U, -1); - SmallVector<int, 8> PermMask; - - for (unsigned i = 0; i != 8; ++i) - PermMask.push_back(PermMaskPtr[i]); - + SmallVector<SDValue, 8> Mask1(4, DAG.getUNDEF(MaskEVT)); unsigned NumHi = 0; unsigned NumLo = 0; for (unsigned i = 0; i != 4; ++i) { - int Idx = PermMask[i]; - if (Idx < 0) { + SDValue Elt = PermMask.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) { Locs[i] = std::make_pair(-1, -1); } else { - assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); - if (Idx < 4) { + unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); + assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); + if (Val < 4) { Locs[i] = std::make_pair(0, NumLo); - Mask1[NumLo] = Idx; + Mask1[NumLo] = Elt; NumLo++; } else { Locs[i] = std::make_pair(1, NumHi); if (2+NumHi < 4) - Mask1[2+NumHi] = Idx; + Mask1[2+NumHi] = Elt; NumHi++; } } @@ -3842,21 +4081,24 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // implemented with two shuffles. First shuffle gather the elements. // The second shuffle, which takes the first shuffle as both of its // vector operands, put the elements into the right order. - V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &Mask1[0], Mask1.size())); - SmallVector<int, 8> Mask2(4U, -1); - + SmallVector<SDValue, 8> Mask2(4, DAG.getUNDEF(MaskEVT)); for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; else { unsigned Idx = (i < 2) ? 0 : 4; Idx += Locs[i].first * 2 + Locs[i].second; - Mask2[i] = Idx; + Mask2[i] = DAG.getConstant(Idx, MaskEVT); } } - return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &Mask2[0], Mask2.size())); } else if (NumLo == 3 || NumHi == 3) { // Otherwise, we must have three elements from one vector, call it X, and // one element from the other, call it Y. First, use a shufps to build an @@ -3867,51 +4109,60 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - CommuteVectorShuffleMask(PermMask, VT); + PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl); std::swap(V1, V2); } // Find the element from V2. unsigned HiIndex; for (HiIndex = 0; HiIndex < 3; ++HiIndex) { - int Val = PermMask[HiIndex]; - if (Val < 0) + SDValue Elt = PermMask.getOperand(HiIndex); + if (Elt.getOpcode() == ISD::UNDEF) continue; + unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); if (Val >= 4) break; } - Mask1[0] = PermMask[HiIndex]; - Mask1[1] = -1; - Mask1[2] = PermMask[HiIndex^1]; - Mask1[3] = -1; - V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + Mask1[0] = PermMask.getOperand(HiIndex); + Mask1[1] = DAG.getUNDEF(MaskEVT); + Mask1[2] = PermMask.getOperand(HiIndex^1); + Mask1[3] = DAG.getUNDEF(MaskEVT); + V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4)); if (HiIndex >= 2) { - Mask1[0] = PermMask[0]; - Mask1[1] = PermMask[1]; - Mask1[2] = HiIndex & 1 ? 6 : 4; - Mask1[3] = HiIndex & 1 ? 4 : 6; - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + Mask1[0] = PermMask.getOperand(0); + Mask1[1] = PermMask.getOperand(1); + Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); + Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MaskVT, &Mask1[0], 4)); } else { - Mask1[0] = HiIndex & 1 ? 2 : 0; - Mask1[1] = HiIndex & 1 ? 0 : 2; - Mask1[2] = PermMask[2]; - Mask1[3] = PermMask[3]; - if (Mask1[2] >= 0) - Mask1[2] += 4; - if (Mask1[3] >= 0) - Mask1[3] += 4; - return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); + Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); + Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); + Mask1[2] = PermMask.getOperand(2); + Mask1[3] = PermMask.getOperand(3); + if (Mask1[2].getOpcode() != ISD::UNDEF) + Mask1[2] = + DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4, + MaskEVT); + if (Mask1[3].getOpcode() != ISD::UNDEF) + Mask1[3] = + DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4, + MaskEVT); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MaskVT, &Mask1[0], 4)); } } // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); - SmallVector<int,8> LoMask(4U, -1); - SmallVector<int,8> HiMask(4U, -1); - - SmallVector<int,8> *MaskPtr = &LoMask; + SmallVector<SDValue,8> LoMask(4, DAG.getUNDEF(MaskEVT)); + SmallVector<SDValue,8> HiMask(4, DAG.getUNDEF(MaskEVT)); + SmallVector<SDValue,8> *MaskPtr = &LoMask; unsigned MaskIdx = 0; unsigned LoIdx = 0; unsigned HiIdx = 2; @@ -3922,68 +4173,84 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { LoIdx = 0; HiIdx = 2; } - int Idx = PermMask[i]; - if (Idx < 0) { + SDValue Elt = PermMask.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) { Locs[i] = std::make_pair(-1, -1); - } else if (Idx < 4) { + } else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) { Locs[i] = std::make_pair(MaskIdx, LoIdx); - (*MaskPtr)[LoIdx] = Idx; + (*MaskPtr)[LoIdx] = Elt; LoIdx++; } else { Locs[i] = std::make_pair(MaskIdx, HiIdx); - (*MaskPtr)[HiIdx] = Idx; + (*MaskPtr)[HiIdx] = Elt; HiIdx++; } } - SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); - SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); - SmallVector<int, 8> MaskOps; + SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &LoMask[0], LoMask.size())); + SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &HiMask[0], HiMask.size())); + SmallVector<SDValue, 8> MaskOps; for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) { - MaskOps.push_back(-1); + MaskOps.push_back(DAG.getUNDEF(MaskEVT)); } else { unsigned Idx = Locs[i].first * 4 + Locs[i].second; - MaskOps.push_back(Idx); + MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); } } - return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskOps[0], MaskOps.size())); } SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); + SDValue PermMask = Op.getOperand(2); MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - const int *PermMask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); - unsigned NumElems = VT.getVectorNumElements(); + unsigned NumElems = PermMask.getNumOperands(); bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - if (isZeroShuffle(SVOp)) + // FIXME: Check for legal shuffle and return? + + if (isUndefShuffle(Op.getNode())) + return DAG.getUNDEF(VT); + + if (isZeroShuffle(Op.getNode())) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + if (isIdentityMask(PermMask.getNode())) + return V1; + else if (isIdentityMask(PermMask.getNode(), true)) + return V2; + // Canonicalize movddup shuffles. - if (V2IsUndef && Subtarget->hasSSE2() && VT.getSizeInBits() == 128 && - X86::isMOVDDUPMask(SVOp)) - return CanonicalizeMovddup(SVOp, DAG, Subtarget->hasSSE3()); + if (V2IsUndef && Subtarget->hasSSE2() && + VT.getSizeInBits() == 128 && + X86::isMOVDDUPMask(PermMask.getNode())) + return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); - // Promote splats to v4f32. - if (SVOp->isSplat()) { - if (isMMX || NumElems < 4) - return Op; - return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); + if (isSplatMask(PermMask.getNode())) { + if (isMMX || NumElems < 4) return Op; + // Promote it to a v4{if}32 splat. + return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, + *this, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BIT_CONVERT, dl, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); @@ -3991,29 +4258,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, + DAG, *this, dl); if (NewOp.getNode()) { - if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false)) - return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), - DAG, Subtarget, dl); + SDValue NewV1 = NewOp.getOperand(0); + SDValue NewV2 = NewOp.getOperand(1); + SDValue NewMask = NewOp.getOperand(2); + if (isCommutedMOVL(NewMask.getNode(), true, false)) { + NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); + return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget, + dl); + } } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); - if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp))) + SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, + DAG, *this, dl); + if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); + DAG, Subtarget, dl); } } - - if (X86::isPSHUFDMask(SVOp)) - return Op; - + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = getSubtarget()->hasSSE2() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4021,8 +4291,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - - if (X86::isMOVLMask(SVOp)) { + + if (X86::isMOVLMask(PermMask.getNode())) { if (V1IsUndef) return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) @@ -4030,18 +4300,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX) return Op; } - - // FIXME: fold these into legal mask. - if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || - X86::isMOVSLDUPMask(SVOp) || - X86::isMOVHLPSMask(SVOp) || - X86::isMOVHPMask(SVOp) || - X86::isMOVLPMask(SVOp))) + + if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || + X86::isMOVSLDUPMask(PermMask.getNode()) || + X86::isMOVHLPSMask(PermMask.getNode()) || + X86::isMOVHPMask(PermMask.getNode()) || + X86::isMOVLPMask(PermMask.getNode()))) return Op; - if (ShouldXformToMOVHLPS(SVOp) || - ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) - return CommuteVectorShuffle(SVOp, DAG); + if (ShouldXformToMOVHLPS(PermMask.getNode()) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) + return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); if (isShift) { // No better options. Use a vshl / vsrl. @@ -4049,7 +4318,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4058,84 +4327,115 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Canonicalize the splat or undef, if present, to be on the RHS. if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { - Op = CommuteVectorShuffle(SVOp, DAG); - SVOp = cast<ShuffleVectorSDNode>(Op); - V1 = SVOp->getOperand(0); - V2 = SVOp->getOperand(1); + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); std::swap(V1IsSplat, V2IsSplat); std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { - // Shuffling low element of v1 into undef, just return v1. - if (V2IsUndef) - return V1; - // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which - // the instruction selector will not match, so get a canonical MOVL with - // swapped operands to undo the commute. - return getMOVL(DAG, dl, VT, V2, V1); + // FIXME: Figure out a cleaner way to do this. + if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { + if (V2IsUndef) return V1; + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (V2IsSplat) { + // V2 is a splat, so the mask may be malformed. That is, it may point + // to any V2 element. The instruction selectior won't like this. Get + // a corrected mask and commute to form a proper MOVS{S|D}. + SDValue NewMask = getMOVLMask(NumElems, DAG, dl); + if (NewMask.getNode() != PermMask.getNode()) + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + } + return Op; } - if (X86::isUNPCKL_v_undef_Mask(SVOp) || - X86::isUNPCKH_v_undef_Mask(SVOp) || - X86::isUNPCKLMask(SVOp) || - X86::isUNPCKHMask(SVOp)) + if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKLMask(PermMask.getNode()) || + X86::isUNPCKHMask(PermMask.getNode())) return Op; if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first // element then try to match unpck{h|l} again. If match, return a // new vector_shuffle with the corrected mask. - SDValue NewMask = NormalizeMask(SVOp, DAG); - ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); - if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, true)) { - return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, true)) { - return NewMask; + SDValue NewMask = NormalizeMask(PermMask, DAG); + if (NewMask.getNode() != PermMask.getNode()) { + if (X86::isUNPCKLMask(NewMask.getNode(), true)) { + SDValue NewMask = getUnpacklMask(NumElems, DAG, dl); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + } else if (X86::isUNPCKHMask(NewMask.getNode(), true)) { + SDValue NewMask = getUnpackhMask(NumElems, DAG, dl); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); } } } + // Normalize the node to match x86 shuffle ops if needed + if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (Commuted) { // Commute is back and try unpck* again. - // FIXME: this seems wrong. - SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); - ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || - X86::isUNPCKH_v_undef_Mask(NewSVOp) || - X86::isUNPCKLMask(NewSVOp) || - X86::isUNPCKHMask(NewSVOp)) - return NewOp; + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKLMask(PermMask.getNode()) || + X86::isUNPCKHMask(PermMask.getNode())) + return Op; } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. + // Try PSHUF* first, then SHUFP*. + // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically + // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. + if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { + if (V2.getOpcode() != ISD::UNDEF) + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, + DAG.getUNDEF(VT), PermMask); + return Op; + } - // Normalize the node to match x86 shuffle ops if needed - if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) - return CommuteVectorShuffle(SVOp, DAG); + if (!isMMX) { + if (Subtarget->hasSSE2() && + (X86::isPSHUFDMask(PermMask.getNode()) || + X86::isPSHUFHWMask(PermMask.getNode()) || + X86::isPSHUFLWMask(PermMask.getNode()))) { + MVT RVT = VT; + if (VT == MVT::v4f32) { + RVT = MVT::v4i32; + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, + DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1), + DAG.getUNDEF(RVT), PermMask); + } else if (V2.getOpcode() != ISD::UNDEF) + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1, + DAG.getUNDEF(RVT), PermMask); + if (RVT != VT) + Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); + return Op; + } + + // Binary or unary shufps. + if (X86::isSHUFPMask(PermMask.getNode()) || + (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) + return Op; + } - // Check for legal shuffle and return? - if (isShuffleMaskLegal(PermMask, VT)) - return Op; - // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl); if (NewOp.getNode()) return NewOp; } if (VT == MVT::v16i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl); if (NewOp.getNode()) return NewOp; } // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) - return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); + return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl); return SDValue(); } @@ -4229,12 +4529,22 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; - // SHUFPS the element to the lowest double word, then movss. - int Mask[4] = { Idx, -1, -1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), - DAG.getUNDEF(VVT), Mask); + MVT MaskVT = MVT::getIntVectorWithNumElements(4); + SmallVector<SDValue, 8> IdxVec; + IdxVec. + push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &IdxVec[0], IdxVec.size()); + SDValue Vec = Op.getOperand(0); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), + Vec, DAG.getUNDEF(Vec.getValueType()), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } else if (VT.getSizeInBits() == 64) { @@ -4248,10 +4558,17 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. - int Mask[2] = { 1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), - DAG.getUNDEF(VVT), Mask); + MVT MaskVT = MVT::getIntVectorWithNumElements(2); + SmallVector<SDValue, 8> IdxVec; + IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &IdxVec[0], IdxVec.size()); + SDValue Vec = Op.getOperand(0); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), + Vec, DAG.getUNDEF(Vec.getValueType()), + Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } @@ -4758,6 +5075,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); + SmallVector<SDValue, 4> MaskVec; + MaskVec.push_back(DAG.getConstant(0, MVT::i32)); + MaskVec.push_back(DAG.getConstant(4, MVT::i32)); + MaskVec.push_back(DAG.getConstant(1, MVT::i32)); + MaskVec.push_back(DAG.getConstant(5, MVT::i32)); + SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &MaskVec[0], MaskVec.size()); + SmallVector<SDValue, 4> MaskVec2; + MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); + MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); + SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, + &MaskVec2[0], MaskVec2.size()); + SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), @@ -4766,11 +5096,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), DAG.getIntPtrConstant(0))); - SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); + SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, + XR1, XR2, UnpcklMask); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, false, 16); - SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); + SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, + Unpck1, CLod0, UnpcklMask); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, @@ -4778,9 +5110,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. - int ShufMask[2] = { 1, -1 }; - SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, - DAG.getUNDEF(MVT::v2f64), ShufMask); + SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2f64, + Sub, Sub, ShufMask); SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, DAG.getIntPtrConstant(0)); @@ -6932,36 +7263,34 @@ bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(const int *Mask, MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { // Only do shuffles on 128-bit vector types for now. - if (VT.getSizeInBits() == 64) - return false; - - // FIXME: pshufb, blends, palignr, shifts. - return (VT.getVectorNumElements() == 2 || - ShuffleVectorSDNode::isSplatMask(Mask, VT) || - isMOVLMask(Mask, VT) || - isSHUFPMask(Mask, VT) || - isPSHUFDMask(Mask, VT) || - isPSHUFHWMask(Mask, VT) || - isPSHUFLWMask(Mask, VT) || - isUNPCKLMask(Mask, VT) || - isUNPCKHMask(Mask, VT) || - isUNPCKL_v_undef_Mask(Mask, VT) || - isUNPCKH_v_undef_Mask(Mask, VT)); + // FIXME: pshufb, blends + if (VT.getSizeInBits() == 64) return false; + return (Mask.getNode()->getNumOperands() <= 4 || + isIdentityMask(Mask.getNode()) || + isIdentityMask(Mask.getNode(), true) || + isSplatMask(Mask.getNode()) || + X86::isPSHUFHWMask(Mask.getNode()) || + X86::isPSHUFLWMask(Mask.getNode()) || + X86::isUNPCKLMask(Mask.getNode()) || + X86::isUNPCKHMask(Mask.getNode()) || + X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || + X86::isUNPCKH_v_undef_Mask(Mask.getNode())); } bool -X86TargetLowering::isVectorClearMaskLegal(const int *Mask, MVT VT) const { - unsigned NumElts = VT.getVectorNumElements(); - // FIXME: This collection of masks seems suspect. - if (NumElts == 2) - return true; - if (NumElts == 4 && VT.getSizeInBits() == 128) { - return (isMOVLMask(Mask, VT) || - isCommutedMOVLMask(Mask, VT, true) || - isSHUFPMask(Mask, VT) || - isCommutedSHUFPMask(Mask, VT)); +X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, + MVT EVT, SelectionDAG &DAG) const { + unsigned NumElts = BVOps.size(); + // Only do shuffles on 128-bit vector types for now. + if (EVT.getSizeInBits() * NumElts == 64) return false; + if (NumElts == 2) return true; + if (NumElts == 4) { + return (isMOVLMask(&BVOps[0], 4) || + isCommutedMOVL(&BVOps[0], 4, true) || + isSHUFPMask(&BVOps[0], 4) || + isCommutedSHUFP(&BVOps[0], 4)); } return false; } @@ -7696,14 +8025,15 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, return false; } -static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask, +static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, unsigned NumElems, MVT EVT, SDNode *&Base, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { Base = NULL; for (unsigned i = 0; i < NumElems; ++i) { - if (PermMask[i] < 0) { + SDValue Idx = PermMask.getOperand(i); + if (Idx.getOpcode() == ISD::UNDEF) { if (!Base) return false; continue; @@ -7736,12 +8066,12 @@ static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask, /// shuffle to be an appropriate build vector so it can take advantage of // performBuildVectorCombine. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - const int *PermMask = cast<ShuffleVectorSDNode>(N)->getMask(); - unsigned NumElems = VT.getVectorNumElements(); + SDValue PermMask = N->getOperand(2); + unsigned NumElems = PermMask.getNumOperands(); // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 // where the upper half is 0, it is advantageous to rewrite it as a build @@ -7750,10 +8080,9 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, SDValue In[2]; In[0] = N->getOperand(0); In[1] = N->getOperand(1); - unsigned Idx0 = PermMask[0]; - unsigned Idx1 = PermMask[1]; - // FIXME: can we take advantage of undef index? - if (PermMask[0] >= 0 && PermMask[1] >= 0 && + unsigned Idx0 =cast<ConstantSDNode>(PermMask.getOperand(0))->getZExtValue(); + unsigned Idx1 =cast<ConstantSDNode>(PermMask.getOperand(1))->getZExtValue(); + if (In[0].getValueType().getVectorNumElements() == NumElems && In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { ConstantSDNode* InsertVecIdx = @@ -8217,9 +8546,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + isSplatMask(ShAmtOp.getOperand(2).getNode())) { + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d91951c62e..050b86979c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -230,8 +230,7 @@ namespace llvm { // VSHL, VSRL - Vector logical left / right shift. VSHL, VSRL, - - // CMPPD, CMPPS - Vector double/float comparison. + // CMPPD, CMPPS - Vector double/float comparison. CMPPD, CMPPS, @@ -252,72 +251,80 @@ namespace llvm { namespace X86 { /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFDMask(ShuffleVectorSDNode *N); + bool isPSHUFDMask(SDNode *N); /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFHWMask(ShuffleVectorSDNode *N); + bool isPSHUFHWMask(SDNode *N); /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFLWMask(ShuffleVectorSDNode *N); + bool isPSHUFLWMask(SDNode *N); /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. - bool isSHUFPMask(ShuffleVectorSDNode *N); + bool isSHUFPMask(SDNode *N); /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. - bool isMOVHLPSMask(ShuffleVectorSDNode *N); + bool isMOVHLPSMask(SDNode *N); /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> - bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N); + bool isMOVHLPS_v_undef_Mask(SDNode *N); /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for MOVLP{S|D}. - bool isMOVLPMask(ShuffleVectorSDNode *N); + /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. + bool isMOVLPMask(SDNode *N); /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. + /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// as well as MOVLHPS. - bool isMOVHPMask(ShuffleVectorSDNode *N); + bool isMOVHPMask(SDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> - bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N); + bool isUNPCKL_v_undef_Mask(SDNode *N); /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> - bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N); + bool isUNPCKH_v_undef_Mask(SDNode *N); /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. - bool isMOVLMask(ShuffleVectorSDNode *N); + bool isMOVLMask(SDNode *N); /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. - bool isMOVSHDUPMask(ShuffleVectorSDNode *N); + bool isMOVSHDUPMask(SDNode *N); /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. - bool isMOVSLDUPMask(ShuffleVectorSDNode *N); + bool isMOVSLDUPMask(SDNode *N); + + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of a single element. + bool isSplatMask(SDNode *N); + + /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of zero element. + bool isSplatLoMask(SDNode *N); /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. - bool isMOVDDUPMask(ShuffleVectorSDNode *N); + bool isMOVDDUPMask(SDNode *N); /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* @@ -470,13 +477,14 @@ namespace llvm { /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// values are assumed to be legal. - virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const; + virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const; /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. - virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const; + virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, + MVT EVT, SelectionDAG &DAG) const; /// ShouldShrinkFPConstant - If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 511d42abc1..462433bb27 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3801,7 +3801,6 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC32m addr:$dst)>, Requires<[In32BitMode]>; - //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 338b9e294b..71f2cb164d 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -30,37 +30,33 @@ def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; // MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to // PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ +def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{ return getI8Imm(X86::getShuffleSHUFImmediate(N)); }]>; // Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); +def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKHMask(N); }]>; // Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); +def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKLMask(N); }]>; // Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKH_v_undef_Mask(N); }]>; // Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKL_v_undef_Mask(N); }]>; -def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); +// Patterns for shuffling. +def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFDMask(N); }], MMX_SHUFFLE_get_shuf_imm>; //===----------------------------------------------------------------------===// @@ -189,8 +185,9 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src), def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (movl immAllZerosV, - (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>; + (v2i64 (vector_shuffle immAllZerosV, + (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))), + MOVL_shuffle_mask)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), @@ -322,74 +319,86 @@ let isTwoAddress = 1 in { (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; + (v8i8 (vector_shuffle VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)), + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; + (v4i16 (vector_shuffle VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)), + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; + (v2i32 (vector_shuffle VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)), + MMX_UNPCKH_shuffle_mask)))]>; // Unpack Low Packed Data Instructions def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; + (v8i8 (vector_shuffle VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)), + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; + (v4i16 (vector_shuffle VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)), + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; + (v2i32 (vector_shuffle VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)), + MMX_UNPCKL_shuffle_mask)))]>; } // -- Pack Instructions @@ -402,13 +411,17 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; + (v4i16 (vector_shuffle + VR64:$src1, (undef), + MMX_PSHUFW_shuffle_mask:$src2)))]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), - (undef)))]>; + (v4i16 (vector_shuffle + (bc_v4i16 (load_mmx addr:$src1)), + (undef), + MMX_PSHUFW_shuffle_mask:$src2)))]>; // -- Conversion Instructions let neverHasSideEffects = 1 in { @@ -614,27 +627,34 @@ def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))), // Patterns to perform canonical versions of vector shuffling. let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), + def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKL_v_undef_shuffle_mask)), (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), + def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKL_v_undef_shuffle_mask)), (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), + def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKL_v_undef_shuffle_mask)), (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; } let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), + def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKH_v_undef_shuffle_mask)), (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), + def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKH_v_undef_shuffle_mask)), (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), + def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKH_v_undef_shuffle_mask)), (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } // Patterns to perform vector shuffling with a zeroed out vector. let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))))), + def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV, + (v2i32 (scalar_to_vector (load_mmx addr:$src))), + MMX_UNPCKL_shuffle_mask)), (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a10f4433e8..3ce35bd625 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -175,108 +175,103 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{ // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, // SHUFP* etc. imm. -def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ +def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{ return getI8Imm(X86::getShuffleSHUFImmediate(N)); }]>; // SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to // PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ +def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{ return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); }]>; // SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to // PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ +def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{ return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); }]>; -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; +def SSE_splat_mask : PatLeaf<(build_vector), [{ + return X86::isSplatMask(N); +}], SHUFFLE_get_shuf_imm>; -def movddup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N)); +def SSE_splat_lo_mask : PatLeaf<(build_vector), [{ + return X86::isSplatLoMask(N); }]>; -def movhlps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N)); +def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVDDUPMask(N); }]>; -def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHLPSMask(N); }]>; -def movhp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N)); +def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHLPS_v_undef_Mask(N); }]>; -def movlp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N)); +def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHPMask(N); }]>; -def movl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); +def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVLPMask(N); }]>; -def movshdup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); +def MOVL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVLMask(N); }]>; -def movsldup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); +def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVSHDUPMask(N); }]>; -def unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); +def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVSLDUPMask(N); }]>; -def unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); +def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKLMask(N); }]>; -def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKHMask(N); }]>; -def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKL_v_undef_Mask(N); }]>; -def pshufd : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_shuf_imm>; +def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKH_v_undef_Mask(N); +}]>; -def shufp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N)); +def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFDMask(N); }], SHUFFLE_get_shuf_imm>; -def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N)); +def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFHWMask(N); }], SHUFFLE_get_pshufhw_imm>; -def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); +def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFLWMask(N); }], SHUFFLE_get_pshuflw_imm>; +def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFDMask(N); +}], SHUFFLE_get_shuf_imm>; + +def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isSHUFPMask(N); +}], SHUFFLE_get_shuf_imm>; + +def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isSHUFPMask(N); +}], SHUFFLE_get_shuf_imm>; + + //===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -709,14 +704,16 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movlp VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; + (v4f32 (vector_shuffle VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), + MOVLP_shuffle_mask)))]>; def MOVHPSrm : PSI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movhp VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; + (v4f32 (vector_shuffle VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), + MOVHP_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -731,25 +728,29 @@ def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (unpckh (bc_v2f64 (v4f32 VR128:$src)), - (undef)), (iPTR 0))), addr:$dst)]>; + (v2f64 (vector_shuffle + (bc_v2f64 (v4f32 VR128:$src)), (undef), + UNPCKH_shuffle_mask)), (iPTR 0))), + addr:$dst)]>; let Constraints = "$src1 = $dst" in { let AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHP_shuffle_mask)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHLPS_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" let AddedComplexity = 20 in -def : Pat<(v4f32 (movddup VR128:$src, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)), (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; @@ -907,41 +908,51 @@ let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in // Convert to pshufd def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2, i8imm:$src3), + VR128:$src2, i32i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), + f128mem:$src2, i32i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (shufp:$src3 - VR128:$src1, (memopv4f32 addr:$src2))))]>; + (v4f32 (vector_shuffle + VR128:$src1, (memopv4f32 addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; let AddedComplexity = 10 in { def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, - (memopv4f32 addr:$src2))))]>; + (v4f32 (vector_shuffle + VR128:$src1, (memopv4f32 addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, (memopv4f32 addr:$src2), + UNPCKL_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1033,7 +1044,8 @@ let neverHasSideEffects = 1 in (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movl VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)))]>; } // Move to lower bits of a VR128 and zeroing upper bits. @@ -1439,14 +1451,16 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movlp VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; + (v2f64 (vector_shuffle VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)), + MOVLP_shuffle_mask)))]>; def MOVHPDrm : PDI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movhp VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; + (v2f64 (vector_shuffle VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)), + MOVHP_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1460,8 +1474,9 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (unpckh VR128:$src, (undef))), - (iPTR 0))), addr:$dst)]>; + (v2f64 (vector_shuffle VR128:$src, (undef), + UNPCKH_shuffle_mask)), (iPTR 0))), + addr:$dst)]>; // SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -1729,39 +1744,48 @@ let Constraints = "$src1 = $dst" in { def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v2f64 (shufp:$src3 - VR128:$src1, (memopv2f64 addr:$src2))))]>; + (v2f64 (vector_shuffle + VR128:$src1, (memopv2f64 addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; let AddedComplexity = 10 in { def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; + (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, - (memopv2f64 addr:$src2))))]>; + (v2f64 (vector_shuffle + VR128:$src1, (memopv2f64 addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; + (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; + (v2f64 (vector_shuffle + VR128:$src1, (memopv2f64 addr:$src2), + UNPCKL_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -2019,43 +2043,49 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; def PSHUFDri : PDIi8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - VR128:$src1, (undef))))]>; + [(set VR128:$dst, (v4i32 (vector_shuffle + VR128:$src1, (undef), + PSHUFD_shuffle_mask:$src2)))]>; def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 + [(set VR128:$dst, (v4i32 (vector_shuffle (bc_v4i32(memopv2i64 addr:$src1)), - (undef))))]>; + (undef), + PSHUFD_shuffle_mask:$src2)))]>; // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + VR128:$src1, (undef), + PSHUFHW_shuffle_mask:$src2)))]>, XS, Requires<[HasSSE2]>; def PSHUFHWmi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef), + PSHUFHW_shuffle_mask:$src2)))]>, XS, Requires<[HasSSE2]>; // SSE2 with ImmT == Imm8 and XD prefix. def PSHUFLWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + VR128:$src1, (undef), + PSHUFLW_shuffle_mask:$src2)))]>, XD, Requires<[HasSSE2]>; def PSHUFLWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef), + PSHUFLW_shuffle_mask:$src2)))]>, XD, Requires<[HasSSE2]>; @@ -2064,91 +2094,107 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; + (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; + (v16i8 (vector_shuffle VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), + UNPCKL_shuffle_mask)))]>; def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; + (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; + (v8i16 (vector_shuffle VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)), + UNPCKL_shuffle_mask)))]>; def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; + (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + (v4i32 (vector_shuffle VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), + UNPCKL_shuffle_mask)))]>; def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; + (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, - (memopv2i64 addr:$src2))))]>; + (v2i64 (vector_shuffle VR128:$src1, + (memopv2i64 addr:$src2), + UNPCKL_shuffle_mask)))]>; def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; + (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; + [(set VR128:$dst, + (v16i8 (vector_shuffle VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), + UNPCKH_shuffle_mask)))]>; def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; + (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; + (v8i16 (vector_shuffle VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)), + UNPCKH_shuffle_mask)))]>; def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; + (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + (v4i32 (vector_shuffle VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), + UNPCKH_shuffle_mask)))]>; def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; + (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, - (memopv2i64 addr:$src2))))]>; + (v2i64 (vector_shuffle VR128:$src1, + (memopv2i64 addr:$src2), + UNPCKH_shuffle_mask)))]>; } // Extract / Insert @@ -2311,7 +2357,8 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movsd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movl VR128:$src1, VR128:$src2)))]>; + (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)))]>; } // Store / copy lower 64-bits of a XMM register. @@ -2402,35 +2449,44 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), // Move Instructions def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movshdup - VR128:$src, (undef))))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src, (undef), + MOVSHDUP_shuffle_mask)))]>; def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movshdup - (memopv4f32 addr:$src), (undef)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + (memopv4f32 addr:$src), (undef), + MOVSHDUP_shuffle_mask)))]>; def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movsldup - VR128:$src, (undef))))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src, (undef), + MOVSLDUP_shuffle_mask)))]>; def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movsldup - (memopv4f32 addr:$src), (undef)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + (memopv4f32 addr:$src), (undef), + MOVSLDUP_shuffle_mask)))]>; def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movddup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src, (undef), + MOVDDUP_shuffle_mask)))]>; def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "movddup\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), - (undef))))]>; + (v2f64 (vector_shuffle + (scalar_to_vector (loadf64 addr:$src)), + (undef), MOVDDUP_shuffle_mask)))]>; -def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef)), +def : Pat<(vector_shuffle + (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef), MOVDDUP_shuffle_mask), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (memopv2f64 addr:$src), (undef)), +def : Pat<(vector_shuffle + (memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -2499,18 +2555,22 @@ def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait", // vector_shuffle v1, <undef> <1, 1, 3, 3> let AddedComplexity = 15 in -def : Pat<(v4i32 (movshdup VR128:$src, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + MOVSHDUP_shuffle_mask)), (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in -def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), +def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), + MOVSHDUP_shuffle_mask)), (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; // vector_shuffle v1, <undef> <0, 0, 2, 2> let AddedComplexity = 15 in - def : Pat<(v4i32 (movsldup VR128:$src, (undef))), + def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + MOVSLDUP_shuffle_mask)), (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in - def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), + MOVSLDUP_shuffle_mask)), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; //===----------------------------------------------------------------------===// @@ -2851,173 +2911,207 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), // Splat v2f64 / v2i64 let AddedComplexity = 10 in { -def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2f64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2i64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // Special unary SHUFPSrri case. -def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), + (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE1]>; -let AddedComplexity = 5 in -def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), - (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; // Special unary SHUFPDrri case. -def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. -def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), - (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef), + SHUFP_unary_shuffle_mask:$sm), + (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Special binary v4i32 shuffle cases with SHUFPS. -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2), + PSHUFD_binary_shuffle_mask:$sm)), + (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)), + (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Special binary v2i64 shuffle cases using SHUFPDrri. -def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>, Requires<[HasSSE2]>; +// Special unary SHUFPDrri case. +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, + Requires<[HasSSE2]>; // vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHP_shuffle_mask)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS -def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHLPS_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS -def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), + MOVHLPS_v_undef_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src1)>; -def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef), + MOVHLPS_v_undef_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src1)>; } let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS // vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS -def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS // (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS -def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), +def : Pat<(store (v4i32 (vector_shuffle + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), +def : Pat<(store (v4i32 (vector_shuffle + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. -def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)), (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), +def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; // vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) -def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVLP_shuffle_mask)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVLP_shuffle_mask)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; } // Set lowest element and zero upper elements. let AddedComplexity = 15 in -def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)), +def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src, + MOVL_shuffle_mask)), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; |