aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
diff options
context:
space:
mode:
authorMon P Wang <wangmp@apple.com>2008-11-16 05:06:27 +0000
committerMon P Wang <wangmp@apple.com>2008-11-16 05:06:27 +0000
commitc7849c22f4804075c0c972e20f9cd701bdb6ab6f (patch)
tree9f4dda703c98f0b9f5a3db4fdc973b15b3e15ac5 /lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
parent29cd5ba621268c44c8be72397e00c79899ecdf0b (diff)
Improved shuffle normalization to avoid using extract/build when we
can extract using different indexes for two vectors. Added a few tests for vector shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@59399 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp218
1 files changed, 122 insertions, 96 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 436e5ffe5d..4a93452689 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -2292,8 +2292,8 @@ void SelectionDAGLowering::visitExtractElement(User &I) {
// Utility for visitShuffleVector - Returns true if the mask is mask starting
// from SIndx and increasing to the element length (undefs are allowed).
static bool SequentialMask(SDValue Mask, unsigned SIndx) {
- unsigned NumElems = Mask.getNumOperands();
- for (unsigned i = 0; i != NumElems; ++i) {
+ unsigned MaskNumElts = Mask.getNumOperands();
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx != i + SIndx)
@@ -2304,161 +2304,187 @@ static bool SequentialMask(SDValue Mask, unsigned SIndx) {
}
void SelectionDAGLowering::visitShuffleVector(User &I) {
- SDValue V1 = getValue(I.getOperand(0));
- SDValue V2 = getValue(I.getOperand(1));
+ SDValue Srcs[2];
+ Srcs[0] = getValue(I.getOperand(0));
+ Srcs[1] = getValue(I.getOperand(1));
SDValue Mask = getValue(I.getOperand(2));
MVT VT = TLI.getValueType(I.getType());
- MVT VT1 = V1.getValueType();
- unsigned MaskNumElts = Mask.getNumOperands();
- unsigned Src1NumElts = VT1.getVectorNumElements();
+ MVT SrcVT = Srcs[0].getValueType();
+ int MaskNumElts = Mask.getNumOperands();
+ int SrcNumElts = SrcVT.getVectorNumElements();
- if (Src1NumElts == MaskNumElts) {
- setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0], Srcs[1], Mask));
return;
}
// Normalize the shuffle vector since mask and vector length don't match.
- if (Src1NumElts < MaskNumElts && MaskNumElts % Src1NumElts == 0) {
- // We can concat vectors to make the mask and input vector match.
- if (Src1NumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
- // The shuffle is concatenating two vectors.
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, V1, V2));
+ MVT MaskEltVT = Mask.getValueType().getVectorElementType();
+
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // length match.
+ if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, Srcs[0], Srcs[1]));
return;
}
- // Pad both vectors with undefs to the same size as the mask.
- unsigned NumConcat = MaskNumElts / Src1NumElts;
- std::vector<SDValue> UnOps(Src1NumElts,
- DAG.getNode(ISD::UNDEF,
- VT1.getVectorElementType()));
- SDValue UndefVal = DAG.getNode(ISD::BUILD_VECTOR, VT1,
- &UnOps[0], UnOps.size());
+ // Pad both vectors with undefs to make them the same length as the mask.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ SDValue UndefVal = DAG.getNode(ISD::UNDEF, SrcVT);
SmallVector<SDValue, 8> MOps1, MOps2;
- MOps1.push_back(V1);
- MOps2.push_back(V2);
+ MOps1.push_back(Srcs[0]);
+ MOps2.push_back(Srcs[1]);
for (unsigned i = 1; i != NumConcat; ++i) {
MOps1.push_back(UndefVal);
MOps2.push_back(UndefVal);
}
- V1 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0], MOps1.size());
- V2 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0], MOps2.size());
+ Srcs[0] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0], MOps1.size());
+ Srcs[1] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0], MOps2.size());
// Readjust mask for new input vector length.
SmallVector<SDValue, 8> MappedOps;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
+ for (int i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
MappedOps.push_back(Mask.getOperand(i));
} else {
- unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
- if (Idx < Src1NumElts) {
- MappedOps.push_back(DAG.getConstant(Idx,
- Mask.getOperand(i).getValueType()));
- } else {
- MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - Src1NumElts,
- Mask.getOperand(i).getValueType()));
- }
+ int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
+ if (Idx < SrcNumElts)
+ MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
+ else
+ MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts,
+ MaskEltVT));
}
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
&MappedOps[0], MappedOps.size());
- setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
+ setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0], Srcs[1], Mask));
return;
}
- if (Src1NumElts > MaskNumElts) {
+ if (SrcNumElts > MaskNumElts) {
// Resulting vector is shorter than the incoming vector.
- if (Src1NumElts == MaskNumElts && SequentialMask(Mask,0)) {
+ if (SrcNumElts == MaskNumElts && SequentialMask(Mask,0)) {
// Shuffle extracts 1st vector.
- setValue(&I, V1);
+ setValue(&I, Srcs[0]);
return;
}
- if (Src1NumElts == MaskNumElts && SequentialMask(Mask,MaskNumElts)) {
+ if (SrcNumElts == MaskNumElts && SequentialMask(Mask,MaskNumElts)) {
// Shuffle extracts 2nd vector.
- setValue(&I, V2);
+ setValue(&I, Srcs[1]);
return;
}
- // Analyze the access pattern of the vector to see if we can extract each
- // subvector and then do the shuffle. The analysis is done by calculating
- // the range of elements the mask access on both vectors. If it is useful,
- // we could do better by considering separate what elements are accessed
- // in each vector (i.e., have min/max for each vector).
- int MinRange = Src1NumElts+1;
- int MaxRange = -1;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+ int MaxRange[2] = {-1, -1};
+
+ for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
- int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
- if (Idx > (int) Src1NumElts)
- Idx -= Src1NumElts;
- if (Idx > MaxRange)
- MaxRange = Idx;
- if (Idx < MinRange)
- MinRange = Idx;
+ int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
+ int Input = 0;
+ if (Idx >= SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
}
}
- // Adjust MinRange to start at an even boundary since this give us
- // better quality splits later.
- if ((unsigned) MinRange < Src1NumElts && MinRange%2 != 0)
- MinRange = MinRange - 1;
- if (MaxRange - MinRange < (int) MaskNumElts) {
- // Extract subvector because the range is less than the new vector length
- unsigned StartIdx = (MinRange/MaskNumElts)*MaskNumElts;
- if (MaxRange - StartIdx < MaskNumElts) {
- V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V1,
- DAG.getIntPtrConstant(MinRange));
- V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V2,
- DAG.getIntPtrConstant(MinRange));
- // Readjust mask for new input vector length.
- SmallVector<SDValue, 8> MappedOps;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
- MappedOps.push_back(Mask.getOperand(i));
- } else {
- unsigned Idx =
- cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
- if (Idx < Src1NumElts) {
- MappedOps.push_back(DAG.getConstant(Idx - StartIdx,
- Mask.getOperand(i).getValueType()));
- } else {
- Idx = Idx - Src1NumElts - StartIdx + MaskNumElts;
- MappedOps.push_back(DAG.getConstant(Idx,
- Mask.getOperand(i).getValueType()));
- }
- }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2]; // 0 = Unused, 1 = Extract, 2 = Can not Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (int Input=0; Input < 2; ++Input) {
+ if (MinRange[Input] == SrcNumElts+1 && MaxRange[Input] == -1) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ } else if (MaxRange[Input] - MinRange[Input] < MaskNumElts) {
+ // Fits within range but we should see if we can find a good
+ // start index that a multiple of the mask length.
+ if (MaxRange[Input] < MaskNumElts) {
+ RangeUse[Input] = 1; // Extract from beginning of the vector
+ StartIdx[Input] = 0;
+ } else {
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < MaskNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ else
+ RangeUse[Input] = 2; // Can not extract
}
- Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
- &MappedOps[0], MappedOps.size());
+ } else
+ RangeUse[Input] = 2; // Access doesn't fit within range
+ }
- setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
- return;
+ if (RangeUse[0] == 0 && RangeUse[0] == 0) {
+ setValue(&I, DAG.getNode(ISD::UNDEF, VT)); // Vectors are not used.
+ return;
+ }
+ else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (int Input=0; Input < 2; ++Input) {
+ if (RangeUse[Input] == 0) {
+ Srcs[Input] = DAG.getNode(ISD::UNDEF, VT);
+ } else {
+ Srcs[Input] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, Srcs[Input],
+ DAG.getIntPtrConstant(StartIdx[Input]));
+ }
}
+ // Calculate new mask.
+ SmallVector<SDValue, 8> MappedOps;
+ for (int i = 0; i != MaskNumElts; ++i) {
+ SDValue Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MappedOps.push_back(Arg);
+ } else {
+ int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Idx < SrcNumElts)
+ MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT));
+ else {
+ Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
+ MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
+ }
+ }
+ }
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
+ &MappedOps[0], MappedOps.size());
+ setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0], Srcs[1], Mask));
+ return;
}
}
- // We can't use either concat vectors or extract subvectors so we fall back
- // to insert and extracts.
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
MVT EltVT = VT.getVectorElementType();
MVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue,8> Ops;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
+ for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) {
Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
} else {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
- unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
- if (Idx < Src1NumElts)
- Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V1,
+ int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
+ if (Idx < SrcNumElts)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Srcs[0],
DAG.getConstant(Idx, PtrVT)));
else
- Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V2,
- DAG.getConstant(Idx - Src1NumElts, PtrVT)));
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Srcs[1],
+ DAG.getConstant(Idx - SrcNumElts, PtrVT)));
}
}
setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()));