aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--lib/Target/X86/X86ISelLowering.h4
-rw-r--r--lib/Target/X86/X86InstrSSE.td16
3 files changed, 25 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index fb2b92ab72..d442a18850 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2868,6 +2868,17 @@ bool X86::isSplatMask(SDNode *N) {
return ::isSplatMask(N);
}
+/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of zero element.
+bool X86::isSplatLoMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), 0))
+ return false;
+ return true;
+}
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 8fd2b52e1c..6de2964ec7 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -225,6 +225,10 @@ namespace llvm {
/// specifies a splat of a single element.
bool isSplatMask(SDNode *N);
+ /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of zero element.
+ bool isSplatLoMask(SDNode *N);
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c2fe9acb5b..7361c70cdf 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -104,8 +104,8 @@ def SSE_splat_mask : PatLeaf<(build_vector), [{
return X86::isSplatMask(N);
}], SHUFFLE_get_shuf_imm>;
-def SSE_splat_v2_mask : PatLeaf<(build_vector), [{
- return X86::isSplatMask(N);
+def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
+ return X86::isSplatLoMask(N);
}]>;
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
@@ -812,13 +812,13 @@ def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src),
"movddup {$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (vector_shuffle
VR128:$src, (undef),
- SSE_splat_v2_mask)))]>;
+ SSE_splat_lo_mask)))]>;
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
"movddup {$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (vector_shuffle
(scalar_to_vector (loadf64 addr:$src)),
(undef),
- SSE_splat_v2_mask)))]>;
+ SSE_splat_lo_mask)))]>;
// SSE2 instructions without OpSize prefix
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
@@ -1908,10 +1908,14 @@ def : Pat<(v4f32 (vector_shuffle immAllZerosV,
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
-def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_v2_mask:$sm),
+def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
(UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_v2_mask:$sm),
+def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
+ (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
(PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
+ (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
// Splat v4f32