diff options
Diffstat (limited to 'drivers/idle/intel_idle.c')
| -rw-r--r-- | drivers/idle/intel_idle.c | 839 | 
1 files changed, 654 insertions, 185 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 41665d2f9f9..4d140bbbe10 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1,7 +1,7 @@  /*   * intel_idle.c - native hardware idle loop for modern Intel processors   * - * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2013, Intel Corporation.   * Len Brown <len.brown@intel.com>   *   * This program is free software; you can redistribute it and/or modify it @@ -56,10 +56,14 @@  #include <linux/kernel.h>  #include <linux/cpuidle.h>  #include <linux/clockchips.h> -#include <linux/hrtimer.h>	/* ktime_get_real() */  #include <trace/events/power.h>  #include <linux/sched.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/module.h> +#include <asm/cpu_device_id.h>  #include <asm/mwait.h> +#include <asm/msr.h>  #define INTEL_IDLE_VERSION "0.4"  #define PREFIX "intel_idle: " @@ -69,144 +73,446 @@ static struct cpuidle_driver intel_idle_driver = {  	.owner = THIS_MODULE,  };  /* intel_idle.max_cstate=0 disables driver */ -static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; +static int max_cstate = CPUIDLE_STATE_MAX - 1;  static unsigned int mwait_substates; +#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF  /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */  static unsigned int lapic_timer_reliable_states = (1 << 1);	 /* Default to only C1 */ +struct idle_cpu { +	struct cpuidle_state *state_table; + +	/* +	 * Hardware C-state auto-demotion may not always be optimal. +	 * Indicate which enable bits to clear here. +	 */ +	unsigned long auto_demotion_disable_flags; +	bool disable_promotion_to_c1e; +}; + +static const struct idle_cpu *icpu;  static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; -static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state); +static int intel_idle(struct cpuidle_device *dev, +			struct cpuidle_driver *drv, int index); +static int intel_idle_cpu_init(int cpu);  static struct cpuidle_state *cpuidle_state_table;  /* + * Set this flag for states where the HW flushes the TLB for us + * and so we don't need cross-calls to keep it consistent. + * If this flag is set, SW flushes the TLB, so even if the + * HW doesn't do the flushing, this flag is safe to use. + */ +#define CPUIDLE_FLAG_TLB_FLUSHED	0x10000 + +/* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) + * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. + * + * We store the hint at the top of our "flags" for each state. + */ +#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) +#define MWAIT2flg(eax) ((eax & 0xFF) << 24) + +/*   * States are indexed by the cstate number,   * which is also the index into the MWAIT hint array.   * Thus C0 is a dummy.   */ -static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { -	{ /* MWAIT C0 */ }, -	{ /* MWAIT C1 */ -		.name = "NHM-C1", +static struct cpuidle_state nehalem_cstates[] = { +	{ +		.name = "C1-NHM",  		.desc = "MWAIT 0x00", -		.driver_data = (void *) 0x00, -		.flags = CPUIDLE_FLAG_TIME_VALID, +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,  		.exit_latency = 3,  		.target_residency = 6,  		.enter = &intel_idle }, -	{ /* MWAIT C2 */ -		.name = "NHM-C3", +	{ +		.name = "C1E-NHM", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 20, +		.enter = &intel_idle }, +	{ +		.name = "C3-NHM",  		.desc = "MWAIT 0x10", -		.driver_data = (void *) 0x10, -		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,  		.exit_latency = 20,  		.target_residency = 80,  		.enter = &intel_idle }, -	{ /* MWAIT C3 */ -		.name = "NHM-C6", +	{ +		.name = "C6-NHM",  		.desc = "MWAIT 0x20", -		.driver_data = (void *) 0x20, -		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,  		.exit_latency = 200,  		.target_residency = 800,  		.enter = &intel_idle }, +	{ +		.enter = NULL }  }; -static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { -	{ /* MWAIT C0 */ }, -	{ /* MWAIT C1 */ -		.name = "SNB-C1", +static struct cpuidle_state snb_cstates[] = { +	{ +		.name = "C1-SNB",  		.desc = "MWAIT 0x00", -		.driver_data = (void *) 0x00, -		.flags = CPUIDLE_FLAG_TIME_VALID, -		.exit_latency = 1, -		.target_residency = 4, +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 2, +		.target_residency = 2,  		.enter = &intel_idle }, -	{ /* MWAIT C2 */ -		.name = "SNB-C3", +	{ +		.name = "C1E-SNB", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 20, +		.enter = &intel_idle }, +	{ +		.name = "C3-SNB",  		.desc = "MWAIT 0x10", -		.driver_data = (void *) 0x10, -		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,  		.exit_latency = 80, -		.target_residency = 160, +		.target_residency = 211,  		.enter = &intel_idle }, -	{ /* MWAIT C3 */ -		.name = "SNB-C6", +	{ +		.name = "C6-SNB",  		.desc = "MWAIT 0x20", -		.driver_data = (void *) 0x20, -		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,  		.exit_latency = 104, -		.target_residency = 208, +		.target_residency = 345,  		.enter = &intel_idle }, -	{ /* MWAIT C4 */ -		.name = "SNB-C7", +	{ +		.name = "C7-SNB",  		.desc = "MWAIT 0x30", -		.driver_data = (void *) 0x30, -		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,  		.exit_latency = 109, +		.target_residency = 345, +		.enter = &intel_idle }, +	{ +		.enter = NULL } +}; + +static struct cpuidle_state byt_cstates[] = { +	{ +		.name = "C1-BYT", +		.desc = "MWAIT 0x00", +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 1, +		.target_residency = 1, +		.enter = &intel_idle }, +	{ +		.name = "C1E-BYT", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 15, +		.target_residency = 30, +		.enter = &intel_idle }, +	{ +		.name = "C6N-BYT", +		.desc = "MWAIT 0x58", +		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 40, +		.target_residency = 275, +		.enter = &intel_idle }, +	{ +		.name = "C6S-BYT", +		.desc = "MWAIT 0x52", +		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 140, +		.target_residency = 560, +		.enter = &intel_idle }, +	{ +		.name = "C7-BYT", +		.desc = "MWAIT 0x60", +		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 1200, +		.target_residency = 1500, +		.enter = &intel_idle }, +	{ +		.name = "C7S-BYT", +		.desc = "MWAIT 0x64", +		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 10000, +		.target_residency = 20000, +		.enter = &intel_idle }, +	{ +		.enter = NULL } +}; + +static struct cpuidle_state ivb_cstates[] = { +	{ +		.name = "C1-IVB", +		.desc = "MWAIT 0x00", +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 1, +		.target_residency = 1, +		.enter = &intel_idle }, +	{ +		.name = "C1E-IVB", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 20, +		.enter = &intel_idle }, +	{ +		.name = "C3-IVB", +		.desc = "MWAIT 0x10", +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 59, +		.target_residency = 156, +		.enter = &intel_idle }, +	{ +		.name = "C6-IVB", +		.desc = "MWAIT 0x20", +		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 80, +		.target_residency = 300, +		.enter = &intel_idle }, +	{ +		.name = "C7-IVB", +		.desc = "MWAIT 0x30", +		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 87, +		.target_residency = 300, +		.enter = &intel_idle }, +	{ +		.enter = NULL } +}; + +static struct cpuidle_state ivt_cstates[] = { +	{ +		.name = "C1-IVT", +		.desc = "MWAIT 0x00", +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 1, +		.target_residency = 1, +		.enter = &intel_idle }, +	{ +		.name = "C1E-IVT", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 80, +		.enter = &intel_idle }, +	{ +		.name = "C3-IVT", +		.desc = "MWAIT 0x10", +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 59, +		.target_residency = 156, +		.enter = &intel_idle }, +	{ +		.name = "C6-IVT", +		.desc = "MWAIT 0x20", +		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 82,  		.target_residency = 300,  		.enter = &intel_idle }, +	{ +		.enter = NULL }  }; -static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { -	{ /* MWAIT C0 */ }, -	{ /* MWAIT C1 */ -		.name = "ATM-C1", +static struct cpuidle_state ivt_cstates_4s[] = { +	{ +		.name = "C1-IVT-4S",  		.desc = "MWAIT 0x00", -		.driver_data = (void *) 0x00, -		.flags = CPUIDLE_FLAG_TIME_VALID, +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,  		.exit_latency = 1, -		.target_residency = 4, +		.target_residency = 1,  		.enter = &intel_idle }, -	{ /* MWAIT C2 */ -		.name = "ATM-C2", +	{ +		.name = "C1E-IVT-4S", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 250, +		.enter = &intel_idle }, +	{ +		.name = "C3-IVT-4S", +		.desc = "MWAIT 0x10", +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 59, +		.target_residency = 300, +		.enter = &intel_idle }, +	{ +		.name = "C6-IVT-4S", +		.desc = "MWAIT 0x20", +		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 84, +		.target_residency = 400, +		.enter = &intel_idle }, +	{ +		.enter = NULL } +}; + +static struct cpuidle_state ivt_cstates_8s[] = { +	{ +		.name = "C1-IVT-8S", +		.desc = "MWAIT 0x00", +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 1, +		.target_residency = 1, +		.enter = &intel_idle }, +	{ +		.name = "C1E-IVT-8S", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 500, +		.enter = &intel_idle }, +	{ +		.name = "C3-IVT-8S", +		.desc = "MWAIT 0x10", +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 59, +		.target_residency = 600, +		.enter = &intel_idle }, +	{ +		.name = "C6-IVT-8S", +		.desc = "MWAIT 0x20", +		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 88, +		.target_residency = 700, +		.enter = &intel_idle }, +	{ +		.enter = NULL } +}; + +static struct cpuidle_state hsw_cstates[] = { +	{ +		.name = "C1-HSW", +		.desc = "MWAIT 0x00", +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 2, +		.target_residency = 2, +		.enter = &intel_idle }, +	{ +		.name = "C1E-HSW", +		.desc = "MWAIT 0x01", +		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 20, +		.enter = &intel_idle }, +	{ +		.name = "C3-HSW", +		.desc = "MWAIT 0x10", +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 33, +		.target_residency = 100, +		.enter = &intel_idle }, +	{ +		.name = "C6-HSW", +		.desc = "MWAIT 0x20", +		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 133, +		.target_residency = 400, +		.enter = &intel_idle }, +	{ +		.name = "C7s-HSW", +		.desc = "MWAIT 0x32", +		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 166, +		.target_residency = 500, +		.enter = &intel_idle }, +	{ +		.name = "C8-HSW", +		.desc = "MWAIT 0x40", +		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 300, +		.target_residency = 900, +		.enter = &intel_idle }, +	{ +		.name = "C9-HSW", +		.desc = "MWAIT 0x50", +		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 600, +		.target_residency = 1800, +		.enter = &intel_idle }, +	{ +		.name = "C10-HSW", +		.desc = "MWAIT 0x60", +		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 2600, +		.target_residency = 7700, +		.enter = &intel_idle }, +	{ +		.enter = NULL } +}; + +static struct cpuidle_state atom_cstates[] = { +	{ +		.name = "C1E-ATM", +		.desc = "MWAIT 0x00", +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 10, +		.target_residency = 20, +		.enter = &intel_idle }, +	{ +		.name = "C2-ATM",  		.desc = "MWAIT 0x10", -		.driver_data = (void *) 0x10, -		.flags = CPUIDLE_FLAG_TIME_VALID, +		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,  		.exit_latency = 20,  		.target_residency = 80,  		.enter = &intel_idle }, -	{ /* MWAIT C3 */ }, -	{ /* MWAIT C4 */ -		.name = "ATM-C4", +	{ +		.name = "C4-ATM",  		.desc = "MWAIT 0x30", -		.driver_data = (void *) 0x30, -		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,  		.exit_latency = 100,  		.target_residency = 400,  		.enter = &intel_idle }, -	{ /* MWAIT C5 */ }, -	{ /* MWAIT C6 */ -		.name = "ATM-C6", +	{ +		.name = "C6-ATM",  		.desc = "MWAIT 0x52", -		.driver_data = (void *) 0x52, -		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,  		.exit_latency = 140,  		.target_residency = 560,  		.enter = &intel_idle }, +	{ +		.enter = NULL } +}; +static struct cpuidle_state avn_cstates[] = { +	{ +		.name = "C1-AVN", +		.desc = "MWAIT 0x00", +		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, +		.exit_latency = 2, +		.target_residency = 2, +		.enter = &intel_idle }, +	{ +		.name = "C6-AVN", +		.desc = "MWAIT 0x51", +		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, +		.exit_latency = 15, +		.target_residency = 45, +		.enter = &intel_idle }, +	{ +		.enter = NULL }  };  /**   * intel_idle   * @dev: cpuidle_device - * @state: cpuidle state + * @drv: cpuidle driver + * @index: index of cpuidle state   * + * Must be called under local_irq_disable().   */ -static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) +static int intel_idle(struct cpuidle_device *dev, +		struct cpuidle_driver *drv, int index)  {  	unsigned long ecx = 1; /* break on interrupt flag */ -	unsigned long eax = (unsigned long)cpuidle_get_statedata(state); +	struct cpuidle_state *state = &drv->states[index]; +	unsigned long eax = flg2MWAIT(state->flags);  	unsigned int cstate; -	ktime_t kt_before, kt_after; -	s64 usec_delta;  	int cpu = smp_processor_id();  	cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; -	local_irq_disable(); -  	/*  	 * leave_mm() to avoid costly and often unnecessary wakeups  	 * for flushing the user TLB's associated with the active mm. @@ -217,50 +523,167 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)  	if (!(lapic_timer_reliable_states & (1 << (cstate))))  		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); -	kt_before = ktime_get_real(); +	mwait_idle_with_hints(eax, ecx); -	stop_critical_timings(); -#ifndef MODULE -	trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); -#endif -	if (!need_resched()) { +	if (!(lapic_timer_reliable_states & (1 << (cstate)))) +		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); -		__monitor((void *)¤t_thread_info()->flags, 0, 0); -		smp_mb(); -		if (!need_resched()) -			__mwait(eax, ecx); -	} +	return index; +} -	start_critical_timings(); +static void __setup_broadcast_timer(void *arg) +{ +	unsigned long reason = (unsigned long)arg; +	int cpu = smp_processor_id(); -	kt_after = ktime_get_real(); -	usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before)); +	reason = reason ? +		CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; -	local_irq_enable(); +	clockevents_notify(reason, &cpu); +} -	if (!(lapic_timer_reliable_states & (1 << (cstate)))) -		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); +static int cpu_hotplug_notify(struct notifier_block *n, +			      unsigned long action, void *hcpu) +{ +	int hotcpu = (unsigned long)hcpu; +	struct cpuidle_device *dev; + +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_ONLINE: + +		if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) +			smp_call_function_single(hotcpu, __setup_broadcast_timer, +						 (void *)true, 1); + +		/* +		 * Some systems can hotplug a cpu at runtime after +		 * the kernel has booted, we have to initialize the +		 * driver in this case +		 */ +		dev = per_cpu_ptr(intel_idle_cpuidle_devices, hotcpu); +		if (!dev->registered) +			intel_idle_cpu_init(hotcpu); -	return usec_delta; +		break; +	} +	return NOTIFY_OK; +} + +static struct notifier_block cpu_hotplug_notifier = { +	.notifier_call = cpu_hotplug_notify, +}; + +static void auto_demotion_disable(void *dummy) +{ +	unsigned long long msr_bits; + +	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); +	msr_bits &= ~(icpu->auto_demotion_disable_flags); +	wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); +} +static void c1e_promotion_disable(void *dummy) +{ +	unsigned long long msr_bits; + +	rdmsrl(MSR_IA32_POWER_CTL, msr_bits); +	msr_bits &= ~0x2; +	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);  } +static const struct idle_cpu idle_cpu_nehalem = { +	.state_table = nehalem_cstates, +	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, +	.disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_atom = { +	.state_table = atom_cstates, +}; + +static const struct idle_cpu idle_cpu_lincroft = { +	.state_table = atom_cstates, +	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, +}; + +static const struct idle_cpu idle_cpu_snb = { +	.state_table = snb_cstates, +	.disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_byt = { +	.state_table = byt_cstates, +	.disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_ivb = { +	.state_table = ivb_cstates, +	.disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_ivt = { +	.state_table = ivt_cstates, +	.disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_hsw = { +	.state_table = hsw_cstates, +	.disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_avn = { +	.state_table = avn_cstates, +	.disable_promotion_to_c1e = true, +}; + +#define ICPU(model, cpu) \ +	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } + +static const struct x86_cpu_id intel_idle_ids[] = { +	ICPU(0x1a, idle_cpu_nehalem), +	ICPU(0x1e, idle_cpu_nehalem), +	ICPU(0x1f, idle_cpu_nehalem), +	ICPU(0x25, idle_cpu_nehalem), +	ICPU(0x2c, idle_cpu_nehalem), +	ICPU(0x2e, idle_cpu_nehalem), +	ICPU(0x1c, idle_cpu_atom), +	ICPU(0x26, idle_cpu_lincroft), +	ICPU(0x2f, idle_cpu_nehalem), +	ICPU(0x2a, idle_cpu_snb), +	ICPU(0x2d, idle_cpu_snb), +	ICPU(0x36, idle_cpu_atom), +	ICPU(0x37, idle_cpu_byt), +	ICPU(0x3a, idle_cpu_ivb), +	ICPU(0x3e, idle_cpu_ivt), +	ICPU(0x3c, idle_cpu_hsw), +	ICPU(0x3f, idle_cpu_hsw), +	ICPU(0x45, idle_cpu_hsw), +	ICPU(0x46, idle_cpu_hsw), +	ICPU(0x4D, idle_cpu_avn), +	{} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); +  /*   * intel_idle_probe()   */ -static int intel_idle_probe(void) +static int __init intel_idle_probe(void)  {  	unsigned int eax, ebx, ecx; +	const struct x86_cpu_id *id;  	if (max_cstate == 0) {  		pr_debug(PREFIX "disabled\n");  		return -EPERM;  	} -	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) -		return -ENODEV; - -	if (!boot_cpu_has(X86_FEATURE_MWAIT)) +	id = x86_match_cpu(intel_idle_ids); +	if (!id) { +		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && +		    boot_cpu_data.x86 == 6) +			pr_debug(PREFIX "does not run on family %d model %d\n", +				boot_cpu_data.x86, boot_cpu_data.x86_model);  		return -ENODEV; +	}  	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)  		return -ENODEV; @@ -268,51 +691,19 @@ static int intel_idle_probe(void)  	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);  	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || -		!(ecx & CPUID5_ECX_INTERRUPT_BREAK)) +	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || +	    !mwait_substates)  			return -ENODEV;  	pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); -	if (boot_cpu_has(X86_FEATURE_ARAT))	/* Always Reliable APIC Timer */ -		lapic_timer_reliable_states = 0xFFFFFFFF; - -	if (boot_cpu_data.x86 != 6)	/* family 6 */ -		return -ENODEV; - -	switch (boot_cpu_data.x86_model) { - -	case 0x1A:	/* Core i7, Xeon 5500 series */ -	case 0x1E:	/* Core i7 and i5 Processor - Lynnfield Jasper Forest */ -	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */ -	case 0x2E:	/* Nehalem-EX Xeon */ -	case 0x2F:	/* Westmere-EX Xeon */ -		lapic_timer_reliable_states = (1 << 1);	 /* C1 */ - -	case 0x25:	/* Westmere */ -	case 0x2C:	/* Westmere */ -		cpuidle_state_table = nehalem_cstates; -		break; +	icpu = (const struct idle_cpu *)id->driver_data; +	cpuidle_state_table = icpu->state_table; -	case 0x1C:	/* 28 - Atom Processor */ -	case 0x26:	/* 38 - Lincroft Atom Processor */ -		lapic_timer_reliable_states = (1 << 1); /* C1 */ -		cpuidle_state_table = atom_cstates; -		break; - -	case 0x2A:	/* SNB */ -	case 0x2D:	/* SNB Xeon */ -		cpuidle_state_table = snb_cstates; -		break; -#ifdef FUTURE_USE -	case 0x17:	/* 23 - Core 2 Duo */ -		lapic_timer_reliable_states = (1 << 2) | (1 << 1); /* C2, C1 */ -#endif - -	default: -		pr_debug(PREFIX "does not run on family %d model %d\n", -			boot_cpu_data.x86, boot_cpu_data.x86_model); -		return -ENODEV; -	} +	if (boot_cpu_has(X86_FEATURE_ARAT))	/* Always Reliable APIC Timer */ +		lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; +	else +		on_each_cpu(__setup_broadcast_timer, (void *)true, 1);  	pr_debug(PREFIX "v" INTEL_IDLE_VERSION  		" model 0x%X\n", boot_cpu_data.x86_model); @@ -339,93 +730,163 @@ static void intel_idle_cpuidle_devices_uninit(void)  	free_percpu(intel_idle_cpuidle_devices);  	return;  } +  /* - * intel_idle_cpuidle_devices_init() - * allocate, initialize, register cpuidle_devices + * intel_idle_state_table_update() + * + * Update the default state_table for this CPU-id + * + * Currently used to access tuned IVT multi-socket targets + * Assumption: num_sockets == (max_package_num + 1)   */ -static int intel_idle_cpuidle_devices_init(void) +void intel_idle_state_table_update(void)  { -	int i, cstate; -	struct cpuidle_device *dev; +	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ +	if (boot_cpu_data.x86_model == 0x3e) { /* IVT */ +		int cpu, package_num, num_sockets = 1; + +		for_each_online_cpu(cpu) { +			package_num = topology_physical_package_id(cpu); +			if (package_num + 1 > num_sockets) { +				num_sockets = package_num + 1; + +				if (num_sockets > 4) { +					cpuidle_state_table = ivt_cstates_8s; +					return; +				} +			} +		} -	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); -	if (intel_idle_cpuidle_devices == NULL) -		return -ENOMEM; +		if (num_sockets > 2) +			cpuidle_state_table = ivt_cstates_4s; +		/* else, 1 and 2 socket systems use default ivt_cstates */ +	} +	return; +} -	for_each_online_cpu(i) { -		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); +/* + * intel_idle_cpuidle_driver_init() + * allocate, initialize cpuidle_states + */ +static int __init intel_idle_cpuidle_driver_init(void) +{ +	int cstate; +	struct cpuidle_driver *drv = &intel_idle_driver; -		dev->state_count = 1; +	intel_idle_state_table_update(); -		for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { -			int num_substates; +	drv->state_count = 1; -			if (cstate > max_cstate) { -				printk(PREFIX "max_cstate %d reached\n", -					max_cstate); -				break; -			} +	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { +		int num_substates, mwait_hint, mwait_cstate; -			/* does the state exist in CPUID.MWAIT? */ -			num_substates = (mwait_substates >> ((cstate) * 4)) -						& MWAIT_SUBSTATE_MASK; -			if (num_substates == 0) -				continue; -			/* is the state not enabled? */ -			if (cpuidle_state_table[cstate].enter == NULL) { -				/* does the driver not know about the state? */ -				if (*cpuidle_state_table[cstate].name == '\0') -					pr_debug(PREFIX "unaware of model 0x%x" -						" MWAIT %d please" -						" contact lenb@kernel.org", -					boot_cpu_data.x86_model, cstate); -				continue; -			} +		if (cpuidle_state_table[cstate].enter == NULL) +			break; -			if ((cstate > 2) && -				!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) -				mark_tsc_unstable("TSC halts in idle" -					" states deeper than C2"); +		if (cstate + 1 > max_cstate) { +			printk(PREFIX "max_cstate %d reached\n", +				max_cstate); +			break; +		} -			dev->states[dev->state_count] =	/* structure copy */ -				cpuidle_state_table[cstate]; +		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); +		mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); -			dev->state_count += 1; -		} +		/* number of sub-states for this state in CPUID.MWAIT */ +		num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) +					& MWAIT_SUBSTATE_MASK; -		dev->cpu = i; -		if (cpuidle_register_device(dev)) { -			pr_debug(PREFIX "cpuidle_register_device %d failed!\n", -				 i); -			intel_idle_cpuidle_devices_uninit(); -			return -EIO; -		} +		/* if NO sub-states for this state in CPUID, skip it */ +		if (num_substates == 0) +			continue; + +		if (((mwait_cstate + 1) > 2) && +			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) +			mark_tsc_unstable("TSC halts in idle" +					" states deeper than C2"); + +		drv->states[drv->state_count] =	/* structure copy */ +			cpuidle_state_table[cstate]; + +		drv->state_count += 1;  	} +	if (icpu->auto_demotion_disable_flags) +		on_each_cpu(auto_demotion_disable, NULL, 1); + +	if (icpu->disable_promotion_to_c1e)	/* each-cpu is redundant */ +		on_each_cpu(c1e_promotion_disable, NULL, 1); +  	return 0;  } +/* + * intel_idle_cpu_init() + * allocate, initialize, register cpuidle_devices + * @cpu: cpu/core to initialize + */ +static int intel_idle_cpu_init(int cpu) +{ +	struct cpuidle_device *dev; + +	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); + +	dev->cpu = cpu; + +	if (cpuidle_register_device(dev)) { +		pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); +		intel_idle_cpuidle_devices_uninit(); +		return -EIO; +	} + +	if (icpu->auto_demotion_disable_flags) +		smp_call_function_single(cpu, auto_demotion_disable, NULL, 1); + +	if (icpu->disable_promotion_to_c1e) +		smp_call_function_single(cpu, c1e_promotion_disable, NULL, 1); + +	return 0; +} +  static int __init intel_idle_init(void)  { -	int retval; +	int retval, i; + +	/* Do not load intel_idle at all for now if idle= is passed */ +	if (boot_option_idle_override != IDLE_NO_OVERRIDE) +		return -ENODEV;  	retval = intel_idle_probe();  	if (retval)  		return retval; +	intel_idle_cpuidle_driver_init();  	retval = cpuidle_register_driver(&intel_idle_driver);  	if (retval) { +		struct cpuidle_driver *drv = cpuidle_get_driver();  		printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", -			cpuidle_get_driver()->name); +			drv ? drv->name : "none");  		return retval;  	} -	retval = intel_idle_cpuidle_devices_init(); -	if (retval) { -		cpuidle_unregister_driver(&intel_idle_driver); -		return retval; +	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); +	if (intel_idle_cpuidle_devices == NULL) +		return -ENOMEM; + +	cpu_notifier_register_begin(); + +	for_each_online_cpu(i) { +		retval = intel_idle_cpu_init(i); +		if (retval) { +			cpu_notifier_register_done(); +			cpuidle_unregister_driver(&intel_idle_driver); +			return retval; +		}  	} +	__register_cpu_notifier(&cpu_hotplug_notifier); + +	cpu_notifier_register_done();  	return 0;  } @@ -435,6 +896,14 @@ static void __exit intel_idle_exit(void)  	intel_idle_cpuidle_devices_uninit();  	cpuidle_unregister_driver(&intel_idle_driver); +	cpu_notifier_register_begin(); + +	if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) +		on_each_cpu(__setup_broadcast_timer, (void *)false, 1); +	__unregister_cpu_notifier(&cpu_hotplug_notifier); + +	cpu_notifier_register_done(); +  	return;  }  | 
