diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-17 02:29:10 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-17 02:29:10 +0000 |
commit | fc0a7021282a447f4ea2f60e2249da5cdca8d2c3 (patch) | |
tree | 3c03633cef73d08a233474e11d79b76460483434 | |
parent | 23e9ef994ee390060e0630e8b4db85abe4e671fd (diff) |
Now that we have a canonical way to handle 256-bit splats:
vinsertf128 $1 + vpermilps $0, remove the old code that used to first
do the splat in a 128-bit vector and then insert it into a larger one.
This is better because the handling code gets simpler and also makes a
better room for the upcoming vbroadcast!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137807 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 38 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-splat.ll | 10 |
2 files changed, 5 insertions, 43 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 777851e51b..0722895ea0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4205,34 +4205,6 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { return DAG.getNode(ISD::BITCAST, dl, VT, V); } -/// PromoteVectorToScalarSplat - Since there's no native support for -/// scalar_to_vector for 256-bit AVX, a 128-bit scalar_to_vector + -/// INSERT_SUBVECTOR is generated. Recognize this idiom and do the -/// shuffle before the insertion, this yields less instructions in the end. -static SDValue PromoteVectorToScalarSplat(ShuffleVectorSDNode *SV, - SelectionDAG &DAG) { - EVT SrcVT = SV->getValueType(0); - SDValue V1 = SV->getOperand(0); - DebugLoc dl = SV->getDebugLoc(); - int NumElems = SrcVT.getVectorNumElements(); - - assert(SrcVT.is256BitVector() && "unknown howto handle vector type"); - assert(SV->isSplat() && "shuffle must be a splat"); - - int SplatIdx = SV->getSplatIndex(); - const int Mask[4] = { SplatIdx, SplatIdx, SplatIdx, SplatIdx }; - - EVT SVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), - NumElems/2); - SDValue SV1 = DAG.getVectorShuffle(SVT, dl, V1.getOperand(1), - DAG.getUNDEF(SVT), Mask); - SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), SV1, - DAG.getConstant(0, MVT::i32), DAG, dl); - - return Insert128BitVector(InsV, SV1, - DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); -} - /// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32 and /// v8i32, v16i16 or v32i8 to v8f32. static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { @@ -6199,16 +6171,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) return Op; - // Since there's no native support for scalar_to_vector for 256-bit AVX, a - // 128-bit scalar_to_vector + INSERT_SUBVECTOR is generated. Recognize this - // idiom and do the shuffle before the insertion, this yields less - // instructions in the end. - if (VT.is256BitVector() && - V1.getOpcode() == ISD::INSERT_SUBVECTOR && - V1.getOperand(0).getOpcode() == ISD::UNDEF && - V1.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR) - return PromoteVectorToScalarSplat(SVOp, DAG); - // Handle splats by matching through known shuffle masks if (VT.is128BitVector() && NumElem <= 4) return SDValue(); diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index ca35b7f265..417d7b09c5 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -24,8 +24,8 @@ entry: } ; CHECK: vmovd -; CHECK-NEXT: movlhps ; CHECK-NEXT: vinsertf128 $1 +; CHECK-NEXT: vpermilps $0 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { entry: %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -35,8 +35,8 @@ entry: ret <4 x i64> %vecinit6.i } -; CHECK: vshufpd -; CHECK-NEXT: vinsertf128 $1 +; CHECK: vinsertf128 $1 +; CHECK-NEXT: vpermilps $0 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { entry: %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 @@ -78,8 +78,8 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex ret <8 x float> %load_broadcast12281250 } -; CHECK: vpshufd $0 -; CHECK-NEXT: vinsertf128 $1 +; CHECK: vinsertf128 $1 +; CHECK-NEXT: vpermilps $0 define <8 x float> @funcF(i32* %ptr) nounwind { %val = load i32* %ptr, align 4 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 |