diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 34 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 8 |
4 files changed, 51 insertions, 4 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1ed35d88c3..d74a87281c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -917,6 +917,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v16i8, Custom); + setOperationAction(ISD::VSELECT, MVT::v2f64, Custom); + setOperationAction(ISD::VSELECT, MVT::v2i64, Custom); + setOperationAction(ISD::VSELECT, MVT::v16i8, Custom); + setOperationAction(ISD::VSELECT, MVT::v8i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); + // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are // custom since the immediate controlling the insert encodes additional @@ -8684,6 +8691,32 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } +SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Ops[] = {Cond, Op1, Op2}; + + assert(Op1.getValueType().isVector() && "Op1 must be a vector"); + assert(Op2.getValueType().isVector() && "Op2 must be a vector"); + assert(Cond.getValueType().isVector() && "Cond must be a vector"); + assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch"); + + switch (Op1.getValueType().getSimpleVT().SimpleTy) { + default: break; + case MVT::v2i64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); + case MVT::v2f64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); + case MVT::v4i32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); + case MVT::v4f32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); + case MVT::v16i8: return DAG.getNode(X86ISD::PBLENDVB, DL, Op1.getValueType(), Ops, array_lengthof(Ops)); + } + + return SDValue(); +} + + // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart // from the AND / OR. @@ -10350,6 +10383,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6ff9ca55d3..3051e16485 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -175,8 +175,10 @@ namespace llvm { /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, - /// PBLENDVB - Variable blend + /// BLENDVXX family of opcodes PBLENDVB, + BLENDVPD, + BLENDVPS, /// FMAX, FMIN - Floating point max and min. /// @@ -809,6 +811,7 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index e80038e3a0..7ad9c87d28 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -58,9 +58,15 @@ def X86psignw : SDNode<"X86ISD::PSIGNW", def X86psignd : SDNode<"X86ISD::PSIGND", SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pblendv : SDNode<"X86ISD::PBLENDVB", +def X86pblendvb : SDNode<"X86ISD::PBLENDVB", SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; +def X86blendvpd : SDNode<"X86ISD::BLENDVPD", + SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; +def X86blendvps : SDNode<"X86ISD::BLENDVPS", + SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8eab5d6721..c210a987dc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5843,7 +5843,7 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; -def : Pat<(X86pblendv VR128:$src1, VR128:$src2, VR128:$src3), +def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3), (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>, Requires<[HasAVX]>; @@ -5871,8 +5871,12 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; -def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), +def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0), (PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>; +def : Pat<(X86blendvpd XMM0, VR128:$src1, VR128:$src2), + (BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>; +def : Pat<(X86blendvps XMM0, VR128:$src1, VR128:$src2), + (BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>; let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), |