diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 33 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 18 |
3 files changed, 56 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a2f4cbe820..976286678d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1664,6 +1664,37 @@ bool X86::isUNPCKHMask(SDNode *N) { return true; } +/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form +/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, +/// <0, 0, 1, 1> +bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDOperand BitI = N->getOperand(i); + SDOperand BitI1 = N->getOperand(i+1); + + if (BitI.getOpcode() != ISD::UNDEF) { + assert(isa<ConstantSDNode>(BitI) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast<ConstantSDNode>(BitI)->getValue() != j) + return false; + } + + if (BitI1.getOpcode() != ISD::UNDEF) { + assert(isa<ConstantSDNode>(BitI1) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast<ConstantSDNode>(BitI1)->getValue() != j) + return false; + } + } + + return true; +} + + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies /// a splat of a single element. bool X86::isSplatMask(SDNode *N) { @@ -2604,6 +2635,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { } if (X86::isUNPCKLMask(PermMask.Val) || + X86::isUNPCKL_v_undef_Mask(PermMask.Val) || X86::isUNPCKHMask(PermMask.Val)) // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. return Op; @@ -2929,5 +2961,6 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { isPSHUFHW_PSHUFLWMask(Mask.Val) || X86::isSHUFPMask(Mask.Val) || X86::isUNPCKLMask(Mask.Val) || + X86::isUNPCKL_v_undef_Mask(Mask.Val) || X86::isUNPCKHMask(Mask.Val)); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9722d6fd4f..ddb2d307ba 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -220,6 +220,11 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to UNPCKH. bool isUNPCKHMask(SDNode *N); + /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form + /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, + /// <0, 0, 1, 1> + bool isUNPCKL_v_undef_Mask(SDNode *N); + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element. bool isSplatMask(SDNode *N); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3fd71c89df..bdf5fe2de8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -100,6 +100,10 @@ def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isUNPCKHMask(N); }]>; +def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKL_v_undef_Mask(N); +}]>; + def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isPSHUFDMask(N); }], SHUFFLE_get_shuf_imm>; @@ -1733,6 +1737,20 @@ def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef), (v4f32 (PSHUFLWmi addr:$src1, PSHUFLW_fp_shuffle_mask:$sm))>, Requires<[HasSSE2]>; +// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), + (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), + (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), + (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), + (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + // 128-bit logical shifts def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; |