aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86.td14
-rw-r--r--lib/Target/X86/X86Subtarget.cpp20
2 files changed, 21 insertions, 13 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8ad0bc08ac..e3c22d9c3b 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -182,17 +182,17 @@ def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
// Sandy Bridge
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
-def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL]>;
+def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
// Ivy Bridge
-def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL,
+def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
// Haswell
-def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL, FeatureRDRAND,
- FeatureF16C, FeatureFSGSBase,
+def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
+ FeatureRDRAND, FeatureF16C, FeatureFSGSBase,
FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA,
FeatureRTM]>;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 34f9fad696..bdce4d0221 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -234,12 +234,20 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
ToggleFeature(X86::FeatureSlowBTMem);
}
- // If it's Nehalem, unaligned memory access is fast.
- // Include Westmere and Sandy Bridge as well.
- // FIXME: add later processors.
- if (IsIntel && ((Family == 6 && Model == 26) ||
- (Family == 6 && Model == 44) ||
- (Family == 6 && Model == 42))) {
+ // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
+ // memory access is fast. We hard code model numbers here because they
+ // aren't strictly increasing for Intel chips it seems.
+ if (IsIntel &&
+ ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
+ // Jasper Froest
+ (Family == 6 && Model == 0x2A) || // Nehalem: Bloomfield, Nehalem-EP
+ (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
+ (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
+ (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
+ (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
+ (Family == 6 && Model == 0x2A) || // SandyBridge
+ (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
+ (Family == 6 && Model == 0x3A))) {// IvyBridge
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}