diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mtrr/main.c')
| -rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 292 | 
1 files changed, 148 insertions, 144 deletions
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 01c0f3ee6cc..f961de9964c 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -45,14 +45,19 @@  #include <linux/cpu.h>  #include <linux/pci.h>  #include <linux/smp.h> +#include <linux/syscore_ops.h>  #include <asm/processor.h>  #include <asm/e820.h>  #include <asm/mtrr.h>  #include <asm/msr.h> +#include <asm/pat.h>  #include "mtrr.h" +/* arch_phys_wc_add returns an MTRR register index plus this offset. */ +#define MTRR_TO_PHYS_WC_OFFSET 1000 +  u32 num_var_ranges;  unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; @@ -78,7 +83,6 @@ void set_mtrr_ops(const struct mtrr_ops *ops)  static int have_wrcomb(void)  {  	struct pci_dev *dev; -	u8 rev;  	dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);  	if (dev != NULL) { @@ -88,13 +92,11 @@ static int have_wrcomb(void)  		 * chipsets to be tagged  		 */  		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && -		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { -			pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); -			if (rev <= 5) { -				pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); -				pci_dev_put(dev); -				return 0; -			} +		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && +		    dev->revision <= 5) { +			pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); +			pci_dev_put(dev); +			return 0;  		}  		/*  		 * Intel 450NX errata # 23. Non ascending cacheline evictions to @@ -136,56 +138,42 @@ static void __init init_table(void)  }  struct set_mtrr_data { -	atomic_t	count; -	atomic_t	gate;  	unsigned long	smp_base;  	unsigned long	smp_size;  	unsigned int	smp_reg;  	mtrr_type	smp_type;  }; -static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); -  /** - * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. + * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed + * by all the CPUs.   * @info: pointer to mtrr configuration data   *   * Returns nothing.   */ -static int mtrr_work_handler(void *info) +static int mtrr_rendezvous_handler(void *info)  { -#ifdef CONFIG_SMP  	struct set_mtrr_data *data = info; -	unsigned long flags; -	atomic_dec(&data->count); -	while (!atomic_read(&data->gate)) -		cpu_relax(); - -	local_irq_save(flags); - -	atomic_dec(&data->count); -	while (atomic_read(&data->gate)) -		cpu_relax(); - -	/*  The master has cleared me to execute  */ +	/* +	 * We use this same function to initialize the mtrrs during boot, +	 * resume, runtime cpu online and on an explicit request to set a +	 * specific MTRR. +	 * +	 * During boot or suspend, the state of the boot cpu's mtrrs has been +	 * saved, and we want to replicate that across all the cpus that come +	 * online (either at the end of boot or resume or during a runtime cpu +	 * online). If we're doing that, @reg is set to something special and on +	 * all the cpu's we do mtrr_if->set_all() (On the logical cpu that +	 * started the boot/resume sequence, this might be a duplicate +	 * set_all()). +	 */  	if (data->smp_reg != ~0U) {  		mtrr_if->set(data->smp_reg, data->smp_base,  			     data->smp_size, data->smp_type); -	} else if (mtrr_aps_delayed_init) { -		/* -		 * Initialize the MTRRs inaddition to the synchronisation. -		 */ +	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {  		mtrr_if->set_all();  	} - -	atomic_dec(&data->count); -	while (!atomic_read(&data->gate)) -		cpu_relax(); - -	atomic_dec(&data->count); -	local_irq_restore(flags); -#endif  	return 0;  } @@ -222,20 +210,11 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)   * 14. Wait for buddies to catch up   * 15. Enable interrupts.   * - * What does that mean for us? Well, first we set data.count to the number - * of CPUs. As each CPU announces that it started the rendezvous handler by - * decrementing the count, We reset data.count and set the data.gate flag - * allowing all the cpu's to proceed with the work. As each cpu disables - * interrupts, it'll decrement data.count once. We wait until it hits 0 and - * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they - * are waiting for that flag to be cleared. Once it's cleared, each - * CPU goes through the transition of updating MTRRs. - * The CPU vendors may each do it differently, - * so we call mtrr_if->set() callback and let them take care of it. - * When they're done, they again decrement data->count and wait for data.gate - * to be set. - * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag - * Everyone then enables interrupts and we all continue on. + * What does that mean for us? Well, stop_machine() will ensure that + * the rendezvous handler is started on each CPU. And in lockstep they + * do the state transition of disabling interrupts, updating MTRR's + * (the CPU vendors may each do it differently, so we call mtrr_if->set() + * callback and let them take care of it.) and enabling interrupts.   *   * Note that the mechanism is the same for UP systems, too; all the SMP stuff   * becomes nops. @@ -243,82 +222,26 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)  static void  set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)  { -	struct set_mtrr_data data; -	unsigned long flags; -	int cpu; - -	preempt_disable(); - -	data.smp_reg = reg; -	data.smp_base = base; -	data.smp_size = size; -	data.smp_type = type; -	atomic_set(&data.count, num_booting_cpus() - 1); - -	/* Make sure data.count is visible before unleashing other CPUs */ -	smp_wmb(); -	atomic_set(&data.gate, 0); - -	/* Start the ball rolling on other CPUs */ -	for_each_online_cpu(cpu) { -		struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); +	struct set_mtrr_data data = { .smp_reg = reg, +				      .smp_base = base, +				      .smp_size = size, +				      .smp_type = type +				    }; -		if (cpu == smp_processor_id()) -			continue; - -		stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); -	} - - -	while (atomic_read(&data.count)) -		cpu_relax(); - -	/* Ok, reset count and toggle gate */ -	atomic_set(&data.count, num_booting_cpus() - 1); -	smp_wmb(); -	atomic_set(&data.gate, 1); - -	local_irq_save(flags); - -	while (atomic_read(&data.count)) -		cpu_relax(); - -	/* Ok, reset count and toggle gate */ -	atomic_set(&data.count, num_booting_cpus() - 1); -	smp_wmb(); -	atomic_set(&data.gate, 0); - -	/* Do our MTRR business */ - -	/* -	 * HACK! -	 * We use this same function to initialize the mtrrs on boot. -	 * The state of the boot cpu's mtrrs has been saved, and we want -	 * to replicate across all the APs. -	 * If we're doing that @reg is set to something special... -	 */ -	if (reg != ~0U) -		mtrr_if->set(reg, base, size, type); -	else if (!mtrr_aps_delayed_init) -		mtrr_if->set_all(); - -	/* Wait for the others */ -	while (atomic_read(&data.count)) -		cpu_relax(); - -	atomic_set(&data.count, num_booting_cpus() - 1); -	smp_wmb(); -	atomic_set(&data.gate, 1); - -	/* -	 * Wait here for everyone to have seen the gate change -	 * So we're the last ones to touch 'data' -	 */ -	while (atomic_read(&data.count)) -		cpu_relax(); +	stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); +} -	local_irq_restore(flags); -	preempt_enable(); +static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, +				      unsigned long size, mtrr_type type) +{ +	struct set_mtrr_data data = { .smp_reg = reg, +				      .smp_base = base, +				      .smp_size = size, +				      .smp_type = type +				    }; + +	stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data, +				       cpu_callout_mask);  }  /** @@ -386,7 +309,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,  		return -EINVAL;  	} -	if (base & size_or_mask || size & size_or_mask) { +	if ((base | (base + size - 1)) >> +	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {  		pr_warning("mtrr: base or size exceeds the MTRR width\n");  		return -EINVAL;  	} @@ -605,6 +529,73 @@ int mtrr_del(int reg, unsigned long base, unsigned long size)  }  EXPORT_SYMBOL(mtrr_del); +/** + * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable + * @base: Physical base address + * @size: Size of region + * + * If PAT is available, this does nothing.  If PAT is unavailable, it + * attempts to add a WC MTRR covering size bytes starting at base and + * logs an error if this fails. + * + * Drivers must store the return value to pass to mtrr_del_wc_if_needed, + * but drivers should not try to interpret that return value. + */ +int arch_phys_wc_add(unsigned long base, unsigned long size) +{ +	int ret; + +	if (pat_enabled) +		return 0;  /* Success!  (We don't need to do anything.) */ + +	ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); +	if (ret < 0) { +		pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.", +			(void *)base, (void *)(base + size - 1)); +		return ret; +	} +	return ret + MTRR_TO_PHYS_WC_OFFSET; +} +EXPORT_SYMBOL(arch_phys_wc_add); + +/* + * arch_phys_wc_del - undoes arch_phys_wc_add + * @handle: Return value from arch_phys_wc_add + * + * This cleans up after mtrr_add_wc_if_needed. + * + * The API guarantees that mtrr_del_wc_if_needed(error code) and + * mtrr_del_wc_if_needed(0) do nothing. + */ +void arch_phys_wc_del(int handle) +{ +	if (handle >= 1) { +		WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET); +		mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0); +	} +} +EXPORT_SYMBOL(arch_phys_wc_del); + +/* + * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value + * @handle: Return value from arch_phys_wc_add + * + * This will turn the return value from arch_phys_wc_add into an mtrr + * index suitable for debugging. + * + * Note: There is no legitimate use for this function, except possibly + * in printk line.  Alas there is an illegitimate use in some ancient + * drm ioctls. + */ +int phys_wc_to_mtrr_index(int handle) +{ +	if (handle < MTRR_TO_PHYS_WC_OFFSET) +		return -1; +	else +		return handle - MTRR_TO_PHYS_WC_OFFSET; +} +EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index); +  /*   * HACK ALERT!   * These should be called implicitly, but we can't yet until all the initcall @@ -630,7 +621,7 @@ struct mtrr_value {  static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; -static int mtrr_save(struct sys_device *sysdev, pm_message_t state) +static int mtrr_save(void)  {  	int i; @@ -642,7 +633,7 @@ static int mtrr_save(struct sys_device *sysdev, pm_message_t state)  	return 0;  } -static int mtrr_restore(struct sys_device *sysdev) +static void mtrr_restore(void)  {  	int i; @@ -653,18 +644,18 @@ static int mtrr_restore(struct sys_device *sysdev)  				    mtrr_value[i].ltype);  		}  	} -	return 0;  } -static struct sysdev_driver mtrr_sysdev_driver = { +static struct syscore_ops mtrr_syscore_ops = {  	.suspend	= mtrr_save,  	.resume		= mtrr_restore,  };  int __initdata changed_by_mtrr_cleanup; +#define SIZE_OR_MASK_BITS(n)  (~((1ULL << ((n) - PAGE_SHIFT)) - 1))  /**   * mtrr_bp_init - initialize mtrrs on the boot CPU   * @@ -682,13 +673,13 @@ void __init mtrr_bp_init(void)  	if (cpu_has_mtrr) {  		mtrr_if = &generic_mtrr_ops; -		size_or_mask = 0xff000000;			/* 36 bits */ +		size_or_mask = SIZE_OR_MASK_BITS(36);  		size_and_mask = 0x00f00000;  		phys_addr = 36;  		/*  		 * This is an AMD specific MSR, but we assume(hope?) that -		 * Intel will implement it to when they extend the address +		 * Intel will implement it too when they extend the address  		 * bus of the Xeon.  		 */  		if (cpuid_eax(0x80000000) >= 0x80000008) { @@ -701,7 +692,7 @@ void __init mtrr_bp_init(void)  			     boot_cpu_data.x86_mask == 0x4))  				phys_addr = 36; -			size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); +			size_or_mask = SIZE_OR_MASK_BITS(phys_addr);  			size_and_mask = ~size_or_mask & 0xfffff00000ULL;  		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&  			   boot_cpu_data.x86 == 6) { @@ -709,7 +700,7 @@ void __init mtrr_bp_init(void)  			 * VIA C* family have Intel style MTRRs,  			 * but don't support PAE  			 */ -			size_or_mask = 0xfff00000;		/* 32 bits */ +			size_or_mask = SIZE_OR_MASK_BITS(32);  			size_and_mask = 0;  			phys_addr = 32;  		} @@ -719,21 +710,21 @@ void __init mtrr_bp_init(void)  			if (cpu_has_k6_mtrr) {  				/* Pre-Athlon (K6) AMD CPU MTRRs */  				mtrr_if = mtrr_ops[X86_VENDOR_AMD]; -				size_or_mask = 0xfff00000;	/* 32 bits */ +				size_or_mask = SIZE_OR_MASK_BITS(32);  				size_and_mask = 0;  			}  			break;  		case X86_VENDOR_CENTAUR:  			if (cpu_has_centaur_mcr) {  				mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; -				size_or_mask = 0xfff00000;	/* 32 bits */ +				size_or_mask = SIZE_OR_MASK_BITS(32);  				size_and_mask = 0;  			}  			break;  		case X86_VENDOR_CYRIX:  			if (cpu_has_cyrix_arr) {  				mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; -				size_or_mask = 0xfff00000;	/* 32 bits */ +				size_or_mask = SIZE_OR_MASK_BITS(32);  				size_and_mask = 0;  			}  			break; @@ -773,15 +764,20 @@ void mtrr_ap_init(void)  	 *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug  	 *      lock to prevent mtrr entry changes  	 */ -	set_mtrr(~0U, 0, 0, 0); +	set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);  }  /** - * Save current fixed-range MTRR state of the BSP + * Save current fixed-range MTRR state of the first cpu in cpu_online_mask.   */  void mtrr_save_state(void)  { -	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); +	int first_cpu; + +	get_online_cpus(); +	first_cpu = cpumask_first(cpu_online_mask); +	smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); +	put_online_cpus();  }  void set_mtrr_aps_delayed_init(void) @@ -793,13 +789,21 @@ void set_mtrr_aps_delayed_init(void)  }  /* - * MTRR initialization for all AP's + * Delayed MTRR initialization for all AP's   */  void mtrr_aps_init(void)  {  	if (!use_intel())  		return; +	/* +	 * Check if someone has requested the delay of AP MTRR initialization, +	 * by doing set_mtrr_aps_delayed_init(), prior to this point. If not, +	 * then we are done. +	 */ +	if (!mtrr_aps_delayed_init) +		return; +  	set_mtrr(~0U, 0, 0, 0);  	mtrr_aps_delayed_init = false;  } @@ -831,7 +835,7 @@ static int __init mtrr_init_finialize(void)  	 * TBD: is there any system with such CPU which supports  	 * suspend/resume? If no, we should remove the code.  	 */ -	sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver); +	register_syscore_ops(&mtrr_syscore_ops);  	return 0;  }  | 
