diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 94 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 1 |
2 files changed, 53 insertions, 42 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d3c21bd703..b6efc28316 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1125,6 +1125,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); @@ -9539,6 +9542,54 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } +SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op->getValueType(0); + SDValue In = Op->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = Op->getDebugLoc(); + + if ((VT == MVT::v4i64 && InVT == MVT::v4i32) || + (VT == MVT::v8i32 && InVT == MVT::v8i16)) { + + if (Subtarget->hasInt256()) + return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In); + + // Optimize vectors in AVX mode + // Sign extend v8i16 to v8i32 and + // v4i32 to v4i64 + // + // Divide input vector into two parts + // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} + // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 + // concat the vectors to original VT + + unsigned NumElems = InVT.getVectorNumElements(); + SDValue Undef = DAG.getUNDEF(InVT); + + SmallVector<int,8> ShufMask1(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask1[i] = i; + + SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]); + + SmallVector<int,8> ShufMask2(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask2[i] = i + NumElems/2; + + SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); + OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + } + return SDValue(); +} + // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart // from the AND / OR. @@ -11809,6 +11860,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -16746,54 +16798,12 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); - EVT OpVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); - if (VT.isVector() && VT.getSizeInBits() == 256) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; } - if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) || - (VT == MVT::v8i32 && OpVT == MVT::v8i16)) { - - if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op); - - // Optimize vectors in AVX mode - // Sign extend v8i16 to v8i32 and - // v4i32 to v4i64 - // - // Divide input vector into two parts - // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} - // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 - // concat the vectors to original VT - - unsigned NumElems = OpVT.getVectorNumElements(); - SDValue Undef = DAG.getUNDEF(OpVT); - - SmallVector<int,8> ShufMask1(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask1[i] = i; - - SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]); - - SmallVector<int,8> ShufMask2(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask2[i] = i + NumElems/2; - - SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]); - - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), - VT.getVectorNumElements()/2); - - OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); - OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); - } return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index eec1f7e720..a19e7ec1a1 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -824,6 +824,7 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; |