aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp63
1 files changed, 63 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c4e44f999d..1c827f0dd3 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2747,6 +2747,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPERMIL:
return true;
}
return false;
@@ -2772,6 +2773,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
+ case X86ISD::VPERMIL:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
@@ -3422,6 +3424,54 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
return ::isMOVLMask(M, N->getValueType(0));
}
+/// isVPERMILMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMIL*.
+static bool isVPERMILMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+
+ // Match any permutation of 128-bit vector with 32/64-bit types
+ if (NumLanes == 1) {
+ if (NumElts == 4 || NumElts == 2)
+ return true;
+ return false;
+ }
+
+ // Only match 256-bit with 32/64-bit types
+ if (NumElts != 8 && NumElts != 4)
+ return false;
+
+ // The mask on the high lane should be the same as the low. Actually,
+ // they can differ if any of the corresponding index in a lane is undef.
+ int LaneSize = NumElts/NumLanes;
+ for (int i = 0; i < LaneSize; ++i) {
+ int HighElt = i+LaneSize;
+ if (Mask[i] < 0 || Mask[HighElt] < 0)
+ continue;
+
+ if (Mask[HighElt]-Mask[i] != LaneSize)
+ return false;
+ }
+
+ return true;
+}
+
+/// getShuffleVPERMILImmediateediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMIL* instructions.
+static unsigned getShuffleVPERMILImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+
+ unsigned Mask = 0;
+ for (int i = 0; i < NumElts/NumLanes /* lane size */; ++i)
+ Mask |= SVOp->getMaskElt(i) << (i*2);
+
+ return Mask;
+}
+
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
@@ -4097,6 +4147,10 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
Depth+1);
}
+ case X86ISD::VPERMIL:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
default:
assert("not implemented for target shuffle node");
return SDValue();
@@ -6043,6 +6097,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (NumElems == 4)
return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+ // Handle VPERMIL permutations
+ if (isVPERMILMask(M, VT)) {
+ unsigned TargetMask = getShuffleVPERMILImmediate(SVOp);
+ if (VT == MVT::v8f32)
+ return getTargetShuffleNode(X86ISD::VPERMIL, dl, VT, V1, TargetMask, DAG);
+ }
+
return SDValue();
}
@@ -9660,6 +9721,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VPERMIL: return "X86ISD::VPERMIL";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -12465,6 +12527,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::VPERMIL:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
}