aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mtrr
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mtrr')
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c24
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c167
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c268
5 files changed, 295 insertions, 176 deletions
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 06130b52f01..5f90b85ff22 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -258,15 +258,15 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
/* Compute the maximum size with which we can make a range: */
if (range_startk)
- max_align = ffs(range_startk) - 1;
+ max_align = __ffs(range_startk);
else
- max_align = 32;
+ max_align = BITS_PER_LONG - 1;
- align = fls(range_sizek) - 1;
+ align = __fls(range_sizek);
if (align > max_align)
align = max_align;
- sizek = 1 << align;
+ sizek = 1UL << align;
if (debug_print) {
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
@@ -632,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i)
unsigned long gran_base, chunk_base, lose_base;
char gran_factor, chunk_factor, lose_factor;
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor);
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor);
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor);
pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
result[i].bad ? "*BAD*" : " ",
@@ -714,15 +714,15 @@ int __init mtrr_cleanup(unsigned address_bits)
if (mtrr_tom2)
x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
- nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
/*
* [0, 1M) should always be covered by var mtrr with WB
* and fixed mtrrs should take effect before var mtrr for it:
*/
- nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
+ nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0,
1ULL<<(20 - PAGE_SHIFT));
- /* Sort the ranges: */
- sort_range(range, nr_range);
+ /* add from var mtrr at last */
+ nr_range = x86_get_mtrr_mem_range(range, nr_range,
+ x_remove_base, x_remove_size);
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM covered: %ldM\n",
@@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return 0;
- if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
+ if (boot_cpu_data.x86 < 0xf)
return 0;
/* In case some hypervisor doesn't pass SYSCFG through: */
if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 68a3343e579..9e451b0876b 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -167,7 +167,7 @@ static void post_set(void)
setCx86(CX86_CCR3, ccr3);
/* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
+ write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fd31a441c61..0e25a1bc5ab 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -1,6 +1,6 @@
/*
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
- * because MTRRs can span upto 40 bits (36bits on most modern x86)
+ * because MTRRs can span up to 40 bits (36bits on most modern x86)
*/
#define DEBUG
@@ -12,7 +12,6 @@
#include <asm/processor-flags.h>
#include <asm/cpufeature.h>
#include <asm/tlbflush.h>
-#include <asm/system.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/pat.h>
@@ -64,18 +63,59 @@ static inline void k8_check_syscfg_dram_mod_en(void)
}
}
+/* Get the size of contiguous MTRR range */
+static u64 get_mtrr_size(u64 mask)
+{
+ u64 size;
+
+ mask >>= PAGE_SHIFT;
+ mask |= size_or_mask;
+ size = -mask;
+ size <<= PAGE_SHIFT;
+ return size;
+}
+
/*
- * Returns the effective MTRR type for the region
- * Error returns:
- * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
- * - 0xFF - when MTRR is not enabled
+ * Check and return the effective type for MTRR-MTRR type overlap.
+ * Returns 1 if the effective type is UNCACHEABLE, else returns 0
*/
-u8 mtrr_type_lookup(u64 start, u64 end)
+static int check_type_overlap(u8 *prev, u8 *curr)
+{
+ if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) {
+ *prev = MTRR_TYPE_UNCACHABLE;
+ *curr = MTRR_TYPE_UNCACHABLE;
+ return 1;
+ }
+
+ if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) ||
+ (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) {
+ *prev = MTRR_TYPE_WRTHROUGH;
+ *curr = MTRR_TYPE_WRTHROUGH;
+ }
+
+ if (*prev != *curr) {
+ *prev = MTRR_TYPE_UNCACHABLE;
+ *curr = MTRR_TYPE_UNCACHABLE;
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Error/Semi-error returns:
+ * 0xFF - when MTRR is not enabled
+ * *repeat == 1 implies [start:end] spanned across MTRR range and type returned
+ * corresponds only to [start:*partial_end].
+ * Caller has to lookup again for [*partial_end:end].
+ */
+static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
{
int i;
u64 base, mask;
u8 prev_match, curr_match;
+ *repeat = 0;
if (!mtrr_state_set)
return 0xFF;
@@ -126,8 +166,34 @@ u8 mtrr_type_lookup(u64 start, u64 end)
start_state = ((start & mask) == (base & mask));
end_state = ((end & mask) == (base & mask));
- if (start_state != end_state)
- return 0xFE;
+
+ if (start_state != end_state) {
+ /*
+ * We have start:end spanning across an MTRR.
+ * We split the region into
+ * either
+ * (start:mtrr_end) (mtrr_end:end)
+ * or
+ * (start:mtrr_start) (mtrr_start:end)
+ * depending on kind of overlap.
+ * Return the type for first region and a pointer to
+ * the start of second region so that caller will
+ * lookup again on the second region.
+ * Note: This way we handle multiple overlaps as well.
+ */
+ if (start_state)
+ *partial_end = base + get_mtrr_size(mask);
+ else
+ *partial_end = base;
+
+ if (unlikely(*partial_end <= start)) {
+ WARN_ON(1);
+ *partial_end = start + PAGE_SIZE;
+ }
+
+ end = *partial_end - 1; /* end is inclusive */
+ *repeat = 1;
+ }
if ((start & mask) != (base & mask))
continue;
@@ -138,21 +204,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
continue;
}
- if (prev_match == MTRR_TYPE_UNCACHABLE ||
- curr_match == MTRR_TYPE_UNCACHABLE) {
- return MTRR_TYPE_UNCACHABLE;
- }
-
- if ((prev_match == MTRR_TYPE_WRBACK &&
- curr_match == MTRR_TYPE_WRTHROUGH) ||
- (prev_match == MTRR_TYPE_WRTHROUGH &&
- curr_match == MTRR_TYPE_WRBACK)) {
- prev_match = MTRR_TYPE_WRTHROUGH;
- curr_match = MTRR_TYPE_WRTHROUGH;
- }
-
- if (prev_match != curr_match)
- return MTRR_TYPE_UNCACHABLE;
+ if (check_type_overlap(&prev_match, &curr_match))
+ return curr_match;
}
if (mtrr_tom2) {
@@ -166,6 +219,36 @@ u8 mtrr_type_lookup(u64 start, u64 end)
return mtrr_state.def_type;
}
+/*
+ * Returns the effective MTRR type for the region
+ * Error return:
+ * 0xFF - when MTRR is not enabled
+ */
+u8 mtrr_type_lookup(u64 start, u64 end)
+{
+ u8 type, prev_type;
+ int repeat;
+ u64 partial_end;
+
+ type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+
+ /*
+ * Common path is with repeat = 0.
+ * However, we can have cases where [start:end] spans across some
+ * MTRR range. Do repeated lookups for that case here.
+ */
+ while (repeat) {
+ prev_type = type;
+ start = partial_end;
+ type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+
+ if (check_type_overlap(&prev_type, &type))
+ return type;
+ }
+
+ return type;
+}
+
/* Get the MSR pair relating to a var range */
static void
get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
@@ -278,11 +361,7 @@ static void __init print_mtrr_state(void)
}
pr_debug("MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
- if (size_or_mask & 0xffffffffUL)
- high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
- else
- high_width = ffs(size_or_mask>>32) + 32 - 1;
- high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
+ high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
@@ -431,15 +510,15 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
static void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type)
{
- unsigned int mask_lo, mask_hi, base_lo, base_hi;
- unsigned int tmp, hi;
- int cpu;
+ u32 mask_lo, mask_hi, base_lo, base_hi;
+ unsigned int hi;
+ u64 tmp, mask;
/*
* get_mtrr doesn't need to update mtrr_state, also it could be called
* from any cpu, so try to print it out directly.
*/
- cpu = get_cpu();
+ get_cpu();
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
@@ -454,17 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask: */
- tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
- mask_lo = size_or_mask | tmp;
+ tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ mask = size_or_mask | tmp;
/* Expand tmp with high bits to all 1s: */
- hi = fls(tmp);
+ hi = fls64(tmp);
if (hi > 0) {
- tmp |= ~((1<<(hi - 1)) - 1);
+ tmp |= ~((1ULL<<(hi - 1)) - 1);
- if (tmp != mask_lo) {
+ if (tmp != mask) {
printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
- mask_lo = tmp;
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ mask = tmp;
}
}
@@ -472,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
* This works correctly if size is a power of two, i.e. a
* contiguous range:
*/
- *size = -mask_lo;
- *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ *size = -mask;
+ *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
*type = base_lo & 0xff;
out_put_cpu:
@@ -603,6 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Save MTRR state */
@@ -610,18 +691,20 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
+ wbinvd();
}
static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
+ write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 79289632cb2..a041e094b8b 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -167,6 +167,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
{
int err = 0;
mtrr_type type;
+ unsigned long base;
unsigned long size;
struct mtrr_sentry sentry;
struct mtrr_gentry gentry;
@@ -267,14 +268,14 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
#endif
if (gentry.regnum >= num_var_ranges)
return -EINVAL;
- mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
+ mtrr_if->get(gentry.regnum, &base, &size, &type);
/* Hide entries that go above 4GB */
- if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))
+ if (base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))
|| size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)))
gentry.base = gentry.size = gentry.type = 0;
else {
- gentry.base <<= PAGE_SHIFT;
+ gentry.base = base << PAGE_SHIFT;
gentry.size = size << PAGE_SHIFT;
gentry.type = type;
}
@@ -321,11 +322,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
#endif
if (gentry.regnum >= num_var_ranges)
return -EINVAL;
- mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
+ mtrr_if->get(gentry.regnum, &base, &size, &type);
/* Hide entries that would overflow */
if (size != (__typeof__(gentry.size))size)
gentry.base = gentry.size = gentry.type = 0;
else {
+ gentry.base = base;
gentry.size = size;
gentry.type = type;
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 79556bd9b60..f961de9964c 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -35,6 +35,7 @@
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
+#include <linux/stop_machine.h>
#include <linux/kvm_para.h>
#include <linux/uaccess.h>
#include <linux/module.h>
@@ -44,14 +45,19 @@
#include <linux/cpu.h>
#include <linux/pci.h>
#include <linux/smp.h>
+#include <linux/syscore_ops.h>
#include <asm/processor.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
+#include <asm/pat.h>
#include "mtrr.h"
+/* arch_phys_wc_add returns an MTRR register index plus this offset. */
+#define MTRR_TO_PHYS_WC_OFFSET 1000
+
u32 num_var_ranges;
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
@@ -77,7 +83,6 @@ void set_mtrr_ops(const struct mtrr_ops *ops)
static int have_wrcomb(void)
{
struct pci_dev *dev;
- u8 rev;
dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
if (dev != NULL) {
@@ -87,13 +92,11 @@ static int have_wrcomb(void)
* chipsets to be tagged
*/
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
- dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
- pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
- if (rev <= 5) {
- pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
- pci_dev_put(dev);
- return 0;
- }
+ dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
+ dev->revision <= 5) {
+ pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+ pci_dev_put(dev);
+ return 0;
}
/*
* Intel 450NX errata # 23. Non ascending cacheline evictions to
@@ -135,8 +138,6 @@ static void __init init_table(void)
}
struct set_mtrr_data {
- atomic_t count;
- atomic_t gate;
unsigned long smp_base;
unsigned long smp_size;
unsigned int smp_reg;
@@ -144,41 +145,36 @@ struct set_mtrr_data {
};
/**
- * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
+ * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
+ * by all the CPUs.
* @info: pointer to mtrr configuration data
*
* Returns nothing.
*/
-static void ipi_handler(void *info)
+static int mtrr_rendezvous_handler(void *info)
{
-#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
- unsigned long flags;
-
- local_irq_save(flags);
- atomic_dec(&data->count);
- while (!atomic_read(&data->gate))
- cpu_relax();
-
- /* The master has cleared me to execute */
+ /*
+ * We use this same function to initialize the mtrrs during boot,
+ * resume, runtime cpu online and on an explicit request to set a
+ * specific MTRR.
+ *
+ * During boot or suspend, the state of the boot cpu's mtrrs has been
+ * saved, and we want to replicate that across all the cpus that come
+ * online (either at the end of boot or resume or during a runtime cpu
+ * online). If we're doing that, @reg is set to something special and on
+ * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
+ * started the boot/resume sequence, this might be a duplicate
+ * set_all()).
+ */
if (data->smp_reg != ~0U) {
mtrr_if->set(data->smp_reg, data->smp_base,
data->smp_size, data->smp_type);
- } else if (mtrr_aps_delayed_init) {
- /*
- * Initialize the MTRRs inaddition to the synchronisation.
- */
+ } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
mtrr_if->set_all();
}
-
- atomic_dec(&data->count);
- while (atomic_read(&data->gate))
- cpu_relax();
-
- atomic_dec(&data->count);
- local_irq_restore(flags);
-#endif
+ return 0;
}
static inline int types_compatible(mtrr_type type1, mtrr_type type2)
@@ -198,7 +194,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
*
* This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
*
- * 1. Send IPI to do the following:
+ * 1. Queue work to do the following on all processors:
* 2. Disable Interrupts
* 3. Wait for all procs to do so
* 4. Enter no-fill cache mode
@@ -214,17 +210,11 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
* 14. Wait for buddies to catch up
* 15. Enable interrupts.
*
- * What does that mean for us? Well, first we set data.count to the number
- * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
- * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
- * Meanwhile, they are waiting for that flag to be set. Once it's set, each
- * CPU goes through the transition of updating MTRRs.
- * The CPU vendors may each do it differently,
- * so we call mtrr_if->set() callback and let them take care of it.
- * When they're done, they again decrement data->count and wait for data.gate
- * to be reset.
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
- * Everyone then enables interrupts and we all continue on.
+ * What does that mean for us? Well, stop_machine() will ensure that
+ * the rendezvous handler is started on each CPU. And in lockstep they
+ * do the state transition of disabling interrupts, updating MTRR's
+ * (the CPU vendors may each do it differently, so we call mtrr_if->set()
+ * callback and let them take care of it.) and enabling interrupts.
*
* Note that the mechanism is the same for UP systems, too; all the SMP stuff
* becomes nops.
@@ -232,63 +222,26 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
static void
set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
{
- struct set_mtrr_data data;
- unsigned long flags;
-
- data.smp_reg = reg;
- data.smp_base = base;
- data.smp_size = size;
- data.smp_type = type;
- atomic_set(&data.count, num_booting_cpus() - 1);
-
- /* Make sure data.count is visible before unleashing other CPUs */
- smp_wmb();
- atomic_set(&data.gate, 0);
-
- /* Start the ball rolling on other CPUs */
- if (smp_call_function(ipi_handler, &data, 0) != 0)
- panic("mtrr: timed out waiting for other CPUs\n");
+ struct set_mtrr_data data = { .smp_reg = reg,
+ .smp_base = base,
+ .smp_size = size,
+ .smp_type = type
+ };
- local_irq_save(flags);
-
- while (atomic_read(&data.count))
- cpu_relax();
-
- /* Ok, reset count and toggle gate */
- atomic_set(&data.count, num_booting_cpus() - 1);
- smp_wmb();
- atomic_set(&data.gate, 1);
-
- /* Do our MTRR business */
-
- /*
- * HACK!
- * We use this same function to initialize the mtrrs on boot.
- * The state of the boot cpu's mtrrs has been saved, and we want
- * to replicate across all the APs.
- * If we're doing that @reg is set to something special...
- */
- if (reg != ~0U)
- mtrr_if->set(reg, base, size, type);
- else if (!mtrr_aps_delayed_init)
- mtrr_if->set_all();
-
- /* Wait for the others */
- while (atomic_read(&data.count))
- cpu_relax();
-
- atomic_set(&data.count, num_booting_cpus() - 1);
- smp_wmb();
- atomic_set(&data.gate, 0);
-
- /*
- * Wait here for everyone to have seen the gate change
- * So we're the last ones to touch 'data'
- */
- while (atomic_read(&data.count))
- cpu_relax();
+ stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
+}
- local_irq_restore(flags);
+static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+{
+ struct set_mtrr_data data = { .smp_reg = reg,
+ .smp_base = base,
+ .smp_size = size,
+ .smp_type = type
+ };
+
+ stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
+ cpu_callout_mask);
}
/**
@@ -356,7 +309,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return -EINVAL;
}
- if (base & size_or_mask || size & size_or_mask) {
+ if ((base | (base + size - 1)) >>
+ (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
pr_warning("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -575,6 +529,73 @@ int mtrr_del(int reg, unsigned long base, unsigned long size)
}
EXPORT_SYMBOL(mtrr_del);
+/**
+ * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If PAT is available, this does nothing. If PAT is unavailable, it
+ * attempts to add a WC MTRR covering size bytes starting at base and
+ * logs an error if this fails.
+ *
+ * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
+ * but drivers should not try to interpret that return value.
+ */
+int arch_phys_wc_add(unsigned long base, unsigned long size)
+{
+ int ret;
+
+ if (pat_enabled)
+ return 0; /* Success! (We don't need to do anything.) */
+
+ ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
+ if (ret < 0) {
+ pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
+ (void *)base, (void *)(base + size - 1));
+ return ret;
+ }
+ return ret + MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL(arch_phys_wc_add);
+
+/*
+ * arch_phys_wc_del - undoes arch_phys_wc_add
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This cleans up after mtrr_add_wc_if_needed.
+ *
+ * The API guarantees that mtrr_del_wc_if_needed(error code) and
+ * mtrr_del_wc_if_needed(0) do nothing.
+ */
+void arch_phys_wc_del(int handle)
+{
+ if (handle >= 1) {
+ WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
+ mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
+ }
+}
+EXPORT_SYMBOL(arch_phys_wc_del);
+
+/*
+ * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This will turn the return value from arch_phys_wc_add into an mtrr
+ * index suitable for debugging.
+ *
+ * Note: There is no legitimate use for this function, except possibly
+ * in printk line. Alas there is an illegitimate use in some ancient
+ * drm ioctls.
+ */
+int phys_wc_to_mtrr_index(int handle)
+{
+ if (handle < MTRR_TO_PHYS_WC_OFFSET)
+ return -1;
+ else
+ return handle - MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
+
/*
* HACK ALERT!
* These should be called implicitly, but we can't yet until all the initcall
@@ -600,7 +621,7 @@ struct mtrr_value {
static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
-static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
+static int mtrr_save(void)
{
int i;
@@ -612,7 +633,7 @@ static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
return 0;
}
-static int mtrr_restore(struct sys_device *sysdev)
+static void mtrr_restore(void)
{
int i;
@@ -623,18 +644,18 @@ static int mtrr_restore(struct sys_device *sysdev)
mtrr_value[i].ltype);
}
}
- return 0;
}
-static struct sysdev_driver mtrr_sysdev_driver = {
+static struct syscore_ops mtrr_syscore_ops = {
.suspend = mtrr_save,
.resume = mtrr_restore,
};
int __initdata changed_by_mtrr_cleanup;
+#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -652,13 +673,13 @@ void __init mtrr_bp_init(void)
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(36);
size_and_mask = 0x00f00000;
phys_addr = 36;
/*
* This is an AMD specific MSR, but we assume(hope?) that
- * Intel will implement it to when they extend the address
+ * Intel will implement it too when they extend the address
* bus of the Xeon.
*/
if (cpuid_eax(0x80000000) >= 0x80000008) {
@@ -671,7 +692,7 @@ void __init mtrr_bp_init(void)
boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
- size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
+ size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
@@ -679,7 +700,7 @@ void __init mtrr_bp_init(void)
* VIA C* family have Intel style MTRRs,
* but don't support PAE
*/
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
phys_addr = 32;
}
@@ -689,21 +710,21 @@ void __init mtrr_bp_init(void)
if (cpu_has_k6_mtrr) {
/* Pre-Athlon (K6) AMD CPU MTRRs */
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CENTAUR:
if (cpu_has_centaur_mcr) {
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CYRIX:
if (cpu_has_cyrix_arr) {
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
@@ -743,15 +764,20 @@ void mtrr_ap_init(void)
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
* lock to prevent mtrr entry changes
*/
- set_mtrr(~0U, 0, 0, 0);
+ set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
}
/**
- * Save current fixed-range MTRR state of the BSP
+ * Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
*/
void mtrr_save_state(void)
{
- smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
+ int first_cpu;
+
+ get_online_cpus();
+ first_cpu = cpumask_first(cpu_online_mask);
+ smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
+ put_online_cpus();
}
void set_mtrr_aps_delayed_init(void)
@@ -763,13 +789,21 @@ void set_mtrr_aps_delayed_init(void)
}
/*
- * MTRR initialization for all AP's
+ * Delayed MTRR initialization for all AP's
*/
void mtrr_aps_init(void)
{
if (!use_intel())
return;
+ /*
+ * Check if someone has requested the delay of AP MTRR initialization,
+ * by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
+ * then we are done.
+ */
+ if (!mtrr_aps_delayed_init)
+ return;
+
set_mtrr(~0U, 0, 0, 0);
mtrr_aps_delayed_init = false;
}
@@ -801,7 +835,7 @@ static int __init mtrr_init_finialize(void)
* TBD: is there any system with such CPU which supports
* suspend/resume? If no, we should remove the code.
*/
- sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
+ register_syscore_ops(&mtrr_syscore_ops);
return 0;
}