diff options
author | Chris Lattner <sabre@nondot.org> | 2010-08-25 22:49:25 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-08-25 22:49:25 +0000 |
commit | e6f7c267df11a44679c35dec79787fbc276839fb (patch) | |
tree | 831f622cb82ef17861f20320751316a0edf4fe9f /lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | |
parent | e6e0018d3e6b5a7dae8b8ecf7ca3f31476e07800 (diff) |
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This
affects two places in the code: handling cross block values and handling
function return and arguments. Since vectors are already widened by
legalizetypes, this gives us much better code and unblocks x86-64 abi
and SPU abi work.
For example, this (which is a silly example of a cross-block value):
define <4 x float> @test2(<4 x float> %A) nounwind {
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%C = fadd <2 x float> %B, %B
br label %BB
BB:
%D = fadd <2 x float> %C, %C
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x float> %E
}
Now compiles into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
addps %xmm0, %xmm0
ret
previously it compiled into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
pshufd $1, %xmm0, %xmm1
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm1, %xmm0
addps %xmm0, %xmm0
ret
This implements rdar://8230384
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 61 |
1 files changed, 48 insertions, 13 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a8a8ecb4c1..35e7520213 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -252,8 +252,21 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, if (PartVT == ValueVT) return Val; - if (PartVT.isVector()) // Vector/Vector bitcast. + if (PartVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getIntPtrConstant(0)); + } + + // Vector/Vector bitcast. return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + } assert(ValueVT.getVectorElementType() == PartVT && ValueVT.getVectorNumElements() == 1 && @@ -392,16 +405,39 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (NumParts == 1) { - if (PartVT != ValueVT) { - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { - Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); - } else { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getIntPtrConstant(0)); - } + if (PartVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ElementVT, Val, DAG.getIntPtrConstant(i))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else { + // Vector -> scalar conversion. + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + PartVT, Val, DAG.getIntPtrConstant(0)); } Parts[0] = Val; @@ -428,8 +464,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, - DAG.getIntPtrConstant(i)); + IntermediateVT, Val, DAG.getIntPtrConstant(i)); } // Split the intermediate operands into legal parts. |