diff options
author | Michael Liao <michael.liao@intel.com> | 2012-10-10 16:32:15 +0000 |
---|---|---|
committer | Michael Liao <michael.liao@intel.com> | 2012-10-10 16:32:15 +0000 |
commit | 9d796db3e746c31dbdb605510c53b3da98d71b38 (patch) | |
tree | 4f10f38791b33fecaaf7cf7d4fedd2cbe5d45131 /lib/Target | |
parent | 98f01bf34be636b90125f5829f2a5451acb36e52 (diff) |
Add alternative support for FP_ROUND from v2f32 to v2f64
- Due to the current matching vector elements constraints in ISD::FP_EXTEND,
rounding from v2f32 to v2f64 is scalarized. Add a customized v2f32 widening
to convert it into a target-specific X86ISD::VFPEXT to work around this
constraints. This patch also reverts a previous attempt to fix this issue by
recovering the scalarized ISD::FP_EXTEND pattern and thus significantly
reduces the overhead of supporting non-power-2 vector FP extend.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165625 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 101 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 3 |
2 files changed, 18 insertions, 86 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7e43e5432d..d551d9d027 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -939,6 +939,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); } @@ -5161,86 +5163,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 -// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the -// constraint of matching input/output vector elements. -SDValue -X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - SDNode *N = Op.getNode(); - EVT VT = Op.getValueType(); - unsigned NumElts = Op.getNumOperands(); - - // Check supported types and sub-targets. - // - // Only v2f32 -> v2f64 needs special handling. - if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) - return SDValue(); - - SDValue VecIn; - EVT VecInVT; - SmallVector<int, 8> Mask; - EVT SrcVT = MVT::Other; - - // Check the patterns could be translated into X86vfpext. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue In = N->getOperand(i); - unsigned Opcode = In.getOpcode(); - - // Skip if the element is undefined. - if (Opcode == ISD::UNDEF) { - Mask.push_back(-1); - continue; - } - - // Quit if one of the elements is not defined from 'fpext'. - if (Opcode != ISD::FP_EXTEND) - return SDValue(); - - // Check how the source of 'fpext' is defined. - SDValue L2In = In.getOperand(0); - EVT L2InVT = L2In.getValueType(); - - // Check the original type - if (SrcVT == MVT::Other) - SrcVT = L2InVT; - else if (SrcVT != L2InVT) // Quit if non-homogenous typed. - return SDValue(); - - // Check whether the value being 'fpext'ed is extracted from the same - // source. - Opcode = L2In.getOpcode(); - - // Quit if it's not extracted with a constant index. - if (Opcode != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(L2In.getOperand(1))) - return SDValue(); - - SDValue ExtractedFromVec = L2In.getOperand(0); - - if (VecIn.getNode() == 0) { - VecIn = ExtractedFromVec; - VecInVT = ExtractedFromVec.getValueType(); - } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. - return SDValue(); - - Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue()); - } - - // Quit if all operands of BUILD_VECTOR are undefined. - if (!VecIn.getNode()) - return SDValue(); - - // Fill the remaining mask as undef. - for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) - Mask.push_back(-1); - - return DAG.getNode(X86ISD::VFPEXT, DL, VT, - DAG.getVectorShuffle(VecInVT, DL, - VecIn, DAG.getUNDEF(VecInVT), - &Mask[0])); -} - SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5273,10 +5195,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Broadcast.getNode()) return Broadcast; - SDValue FpExt = LowerVectorFpExtend(Op, DAG); - if (FpExt.getNode()) - return FpExt; - unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -8215,6 +8133,20 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } +SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); + + assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); + + return DAG.getNode(X86ISD::VFPEXT, DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, + In, DAG.getUNDEF(SVT))); +} + SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); @@ -11407,6 +11339,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1cae7ed268..dca65b895e 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -788,6 +788,7 @@ namespace llvm { SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; @@ -818,8 +819,6 @@ namespace llvm { SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; - virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |