aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86InstrSSE.td18
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll5
2 files changed, 9 insertions, 14 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b79783b048..1912a936ce 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3606,25 +3606,17 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
XS, Requires<[UseSSE2]>;
}
-// Intrinsic forms of MOVDQU load and store
-def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVU_P_MR>,
- XS, VEX, Requires<[HasAVX]>;
-
-def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVU_P_MR>,
- XS, Requires<[UseSSE2]>;
-
} // ExeDomain = SSEPackedInt
let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
(VMOVDQUYmr addr:$dst, VR256:$src)>;
}
+let Predicates = [UseSSE2] in
+def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+ (MOVDQUmr addr:$dst, VR128:$src)>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 88ecd5a5d3..fa90ae7ef1 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -671,7 +671,9 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; CHECK: test_x86_sse2_storeu_dq
; CHECK: movl
; CHECK: vmovdqu
- call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+ ; add operation forces the execution domain.
+ %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
ret void
}
declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
@@ -681,6 +683,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
; CHECK: test_x86_sse2_storeu_pd
; CHECK: movl
; CHECK: vmovupd
+ ; fadd operation forces the execution domain.
%a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
ret void