aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86InstrFormats.td4
-rw-r--r--lib/Target/X86/X86InstrInfo.td1
-rw-r--r--lib/Target/X86/X86InstrSSE.td36
-rw-r--r--test/CodeGen/X86/apm.ll4
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll48
5 files changed, 71 insertions, 22 deletions
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 58fd1f3f8d..2da9b4baeb 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -339,10 +339,6 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
-class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
- Requires<[HasXMM]>;
// SSE2 Instruction Templates:
//
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0bc3afa77b..fe83ae9ddd 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -475,6 +475,7 @@ def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasXMM : Predicate<"Subtarget->hasXMM()">;
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+def HasSSE3orAVX : Predicate<"Subtarget->hasSSE3orAVX()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 78143f6977..d6ae3af0d4 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3253,19 +3253,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
-def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
-def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
-def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
+let Predicates = [HasXMM] in {
+def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
+def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
+def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
+def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
+}
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
+ TB, Requires<[HasXMMInt]>;
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
@@ -3273,11 +3275,11 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
- "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
+ "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasXMM]>;
def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasXMMInt]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasXMMInt]>;
def : Pat<(X86SFence), (SFENCE)>;
def : Pat<(X86LFence), (LFENCE)>;
@@ -5463,17 +5465,19 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
- [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
+ Requires<[HasSSE3orAVX]>;
def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
- [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
+ Requires<[HasSSE3orAVX]>;
}
let Uses = [EAX, ECX, EDX] in
def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
- Requires<[HasSSE3]>;
+ Requires<[HasSSE3orAVX]>;
let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
- Requires<[HasSSE3]>;
+ Requires<[HasSSE3orAVX]>;
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index b514cf6427..aaedf18481 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
; PR8573
; CHECK: foo:
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index f58391469c..b4f04ceb8f 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -2481,4 +2481,52 @@ define void @test_x86_avx_vzeroupper() {
}
declare void @llvm.x86.avx.vzeroupper() nounwind
+; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
+; CHECK: monitor
+define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+ tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+ ret void
+}
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: mwait
+define void @mwait(i32 %E, i32 %H) nounwind {
+entry:
+ tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+ ret void
+}
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
+
+; CHECK: sfence
+define void @sfence() nounwind {
+entry:
+ tail call void @llvm.x86.sse.sfence()
+ ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind
+
+; CHECK: lfence
+define void @lfence() nounwind {
+entry:
+ tail call void @llvm.x86.sse2.lfence()
+ ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind
+
+; CHECK: mfence
+define void @mfence() nounwind {
+entry:
+ tail call void @llvm.x86.sse2.mfence()
+ ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind
+
+; CHECK: clflush
+define void @clflush(i8* %p) nounwind {
+entry:
+ tail call void @llvm.x86.sse2.clflush(i8* %p)
+ ret void
+}
+declare void @llvm.x86.sse2.clflush(i8*) nounwind