diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 16 |
3 files changed, 25 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fb2b92ab72..d442a18850 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2868,6 +2868,17 @@ bool X86::isSplatMask(SDNode *N) { return ::isSplatMask(N); } +/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a splat of zero element. +bool X86::isSplatLoMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + if (!isUndefOrEqual(N->getOperand(i), 0)) + return false; + return true; +} + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8fd2b52e1c..6de2964ec7 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -225,6 +225,10 @@ namespace llvm { /// specifies a splat of a single element. bool isSplatMask(SDNode *N); + /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of zero element. + bool isSplatLoMask(SDNode *N); + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c2fe9acb5b..7361c70cdf 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -104,8 +104,8 @@ def SSE_splat_mask : PatLeaf<(build_vector), [{ return X86::isSplatMask(N); }], SHUFFLE_get_shuf_imm>; -def SSE_splat_v2_mask : PatLeaf<(build_vector), [{ - return X86::isSplatMask(N); +def SSE_splat_lo_mask : PatLeaf<(build_vector), [{ + return X86::isSplatLoMask(N); }]>; def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ @@ -812,13 +812,13 @@ def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src), "movddup {$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (vector_shuffle VR128:$src, (undef), - SSE_splat_v2_mask)))]>; + SSE_splat_lo_mask)))]>; def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src), "movddup {$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (vector_shuffle (scalar_to_vector (loadf64 addr:$src)), (undef), - SSE_splat_v2_mask)))]>; + SSE_splat_lo_mask)))]>; // SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src), @@ -1908,10 +1908,14 @@ def : Pat<(v4f32 (vector_shuffle immAllZerosV, // Splat v2f64 / v2i64 let AddedComplexity = 10 in { -def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_v2_mask:$sm), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_v2_mask:$sm), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), + (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), + (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // Splat v4f32 |