diff options
-rw-r--r-- | lib/Target/X86/X86.td | 14 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 20 |
2 files changed, 21 insertions, 13 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 8ad0bc08ac..e3c22d9c3b 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -182,17 +182,17 @@ def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B, // Sandy Bridge // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. -def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureAES, FeaturePCLMUL]>; +def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; // Ivy Bridge -def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureAES, FeaturePCLMUL, +def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>; // Haswell -def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureAES, FeaturePCLMUL, FeatureRDRAND, - FeatureF16C, FeatureFSGSBase, +def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL, + FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM]>; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 34f9fad696..bdce4d0221 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -234,12 +234,20 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureSlowBTMem); } - // If it's Nehalem, unaligned memory access is fast. - // Include Westmere and Sandy Bridge as well. - // FIXME: add later processors. - if (IsIntel && ((Family == 6 && Model == 26) || - (Family == 6 && Model == 44) || - (Family == 6 && Model == 42))) { + // If it's an Intel chip since Nehalem and not an Atom chip, unaligned + // memory access is fast. We hard code model numbers here because they + // aren't strictly increasing for Intel chips it seems. + if (IsIntel && + ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield, + // Jasper Froest + (Family == 6 && Model == 0x2A) || // Nehalem: Bloomfield, Nehalem-EP + (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX + (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale + (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP + (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX + (Family == 6 && Model == 0x2A) || // SandyBridge + (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E* + (Family == 6 && Model == 0x3A))) {// IvyBridge IsUAMemFast = true; ToggleFeature(X86::FeatureFastUAMem); } |