diff options
Diffstat (limited to 'drivers/idle/intel_idle.c')
| -rw-r--r-- | drivers/idle/intel_idle.c | 730 |
1 files changed, 530 insertions, 200 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 54ab97bae04..4d140bbbe10 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1,7 +1,7 @@ /* * intel_idle.c - native hardware idle loop for modern Intel processors * - * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2013, Intel Corporation. * Len Brown <len.brown@intel.com> * * This program is free software; you can redistribute it and/or modify it @@ -56,12 +56,12 @@ #include <linux/kernel.h> #include <linux/cpuidle.h> #include <linux/clockchips.h> -#include <linux/hrtimer.h> /* ktime_get_real() */ #include <trace/events/power.h> #include <linux/sched.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/module.h> +#include <asm/cpu_device_id.h> #include <asm/mwait.h> #include <asm/msr.h> @@ -73,7 +73,7 @@ static struct cpuidle_driver intel_idle_driver = { .owner = THIS_MODULE, }; /* intel_idle.max_cstate=0 disables driver */ -static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; +static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int mwait_substates; @@ -81,19 +81,26 @@ static unsigned int mwait_substates; /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ +struct idle_cpu { + struct cpuidle_state *state_table; + + /* + * Hardware C-state auto-demotion may not always be optimal. + * Indicate which enable bits to clear here. + */ + unsigned long auto_demotion_disable_flags; + bool disable_promotion_to_c1e; +}; + +static const struct idle_cpu *icpu; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static int intel_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); +static int intel_idle_cpu_init(int cpu); static struct cpuidle_state *cpuidle_state_table; /* - * Hardware C-state auto-demotion may not always be optimal. - * Indicate which enable bits to clear here. - */ -static unsigned long long auto_demotion_disable_flags; - -/* * Set this flag for states where the HW flushes the TLB for us * and so we don't need cross-calls to keep it consistent. * If this flag is set, SW flushes the TLB, so even if the @@ -102,129 +109,390 @@ static unsigned long long auto_demotion_disable_flags; #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 /* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) + * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. + * + * We store the hint at the top of our "flags" for each state. + */ +#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) +#define MWAIT2flg(eax) ((eax & 0xFF) << 24) + +/* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. * Thus C0 is a dummy. */ -static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static struct cpuidle_state nehalem_cstates[] = { + { .name = "C1-NHM", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 3, .target_residency = 6, .enter = &intel_idle }, - { /* MWAIT C2 */ + { + .name = "C1E-NHM", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { .name = "C3-NHM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-NHM", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle }, + { + .enter = NULL } }; -static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static struct cpuidle_state snb_cstates[] = { + { .name = "C1-SNB", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 1, - .target_residency = 1, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle }, + { + .name = "C1E-SNB", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, .enter = &intel_idle }, - { /* MWAIT C2 */ + { .name = "C3-SNB", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-SNB", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, .enter = &intel_idle }, - { /* MWAIT C4 */ + { .name = "C7-SNB", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, .enter = &intel_idle }, + { + .enter = NULL } +}; + +static struct cpuidle_state byt_cstates[] = { + { + .name = "C1-BYT", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle }, + { + .name = "C1E-BYT", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 15, + .target_residency = 30, + .enter = &intel_idle }, + { + .name = "C6N-BYT", + .desc = "MWAIT 0x58", + .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 40, + .target_residency = 275, + .enter = &intel_idle }, + { + .name = "C6S-BYT", + .desc = "MWAIT 0x52", + .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 140, + .target_residency = 560, + .enter = &intel_idle }, + { + .name = "C7-BYT", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 1200, + .target_residency = 1500, + .enter = &intel_idle }, + { + .name = "C7S-BYT", + .desc = "MWAIT 0x64", + .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 10000, + .target_residency = 20000, + .enter = &intel_idle }, + { + .enter = NULL } +}; + +static struct cpuidle_state ivb_cstates[] = { + { + .name = "C1-IVB", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle }, + { + .name = "C1E-IVB", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { + .name = "C3-IVB", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 59, + .target_residency = 156, + .enter = &intel_idle }, + { + .name = "C6-IVB", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 80, + .target_residency = 300, + .enter = &intel_idle }, + { + .name = "C7-IVB", + .desc = "MWAIT 0x30", + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 87, + .target_residency = 300, + .enter = &intel_idle }, + { + .enter = NULL } +}; + +static struct cpuidle_state ivt_cstates[] = { + { + .name = "C1-IVT", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle }, + { + .name = "C1E-IVT", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 80, + .enter = &intel_idle }, + { + .name = "C3-IVT", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 59, + .target_residency = 156, + .enter = &intel_idle }, + { + .name = "C6-IVT", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 82, + .target_residency = 300, + .enter = &intel_idle }, + { + .enter = NULL } }; -static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ - .name = "C1-ATM", +static struct cpuidle_state ivt_cstates_4s[] = { + { + .name = "C1-IVT-4S", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, - .target_residency = 4, + .target_residency = 1, + .enter = &intel_idle }, + { + .name = "C1E-IVT-4S", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 250, + .enter = &intel_idle }, + { + .name = "C3-IVT-4S", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 59, + .target_residency = 300, + .enter = &intel_idle }, + { + .name = "C6-IVT-4S", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 84, + .target_residency = 400, + .enter = &intel_idle }, + { + .enter = NULL } +}; + +static struct cpuidle_state ivt_cstates_8s[] = { + { + .name = "C1-IVT-8S", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle }, + { + .name = "C1E-IVT-8S", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 500, + .enter = &intel_idle }, + { + .name = "C3-IVT-8S", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 59, + .target_residency = 600, + .enter = &intel_idle }, + { + .name = "C6-IVT-8S", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 88, + .target_residency = 700, + .enter = &intel_idle }, + { + .enter = NULL } +}; + +static struct cpuidle_state hsw_cstates[] = { + { + .name = "C1-HSW", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle }, + { + .name = "C1E-HSW", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { + .name = "C3-HSW", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 33, + .target_residency = 100, + .enter = &intel_idle }, + { + .name = "C6-HSW", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 133, + .target_residency = 400, + .enter = &intel_idle }, + { + .name = "C7s-HSW", + .desc = "MWAIT 0x32", + .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 166, + .target_residency = 500, + .enter = &intel_idle }, + { + .name = "C8-HSW", + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 300, + .target_residency = 900, + .enter = &intel_idle }, + { + .name = "C9-HSW", + .desc = "MWAIT 0x50", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 600, + .target_residency = 1800, + .enter = &intel_idle }, + { + .name = "C10-HSW", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 2600, + .target_residency = 7700, .enter = &intel_idle }, - { /* MWAIT C2 */ + { + .enter = NULL } +}; + +static struct cpuidle_state atom_cstates[] = { + { + .name = "C1E-ATM", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { .name = "C2-ATM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, - { /* MWAIT C3 */ }, - { /* MWAIT C4 */ + { .name = "C4-ATM", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, .enter = &intel_idle }, - { /* MWAIT C5 */ }, - { /* MWAIT C6 */ + { .name = "C6-ATM", .desc = "MWAIT 0x52", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, .enter = &intel_idle }, + { + .enter = NULL } +}; +static struct cpuidle_state avn_cstates[] = { + { + .name = "C1-AVN", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle }, + { + .name = "C6-AVN", + .desc = "MWAIT 0x51", + .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 15, + .target_residency = 45, + .enter = &intel_idle }, + { + .enter = NULL } }; - -static long get_driver_data(int cstate) -{ - int driver_data; - switch (cstate) { - - case 1: /* MWAIT C1 */ - driver_data = 0x00; - break; - case 2: /* MWAIT C2 */ - driver_data = 0x10; - break; - case 3: /* MWAIT C3 */ - driver_data = 0x20; - break; - case 4: /* MWAIT C4 */ - driver_data = 0x30; - break; - case 5: /* MWAIT C5 */ - driver_data = 0x40; - break; - case 6: /* MWAIT C6 */ - driver_data = 0x52; - break; - default: - driver_data = 0x00; - } - return driver_data; -} /** * intel_idle @@ -239,11 +507,8 @@ static int intel_idle(struct cpuidle_device *dev, { unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage); + unsigned long eax = flg2MWAIT(state->flags); unsigned int cstate; - ktime_t kt_before, kt_after; - s64 usec_delta; int cpu = smp_processor_id(); cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; @@ -258,30 +523,11 @@ static int intel_idle(struct cpuidle_device *dev, if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); - kt_before = ktime_get_real(); - - stop_critical_timings(); - if (!need_resched()) { - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(eax, ecx); - } - - start_critical_timings(); - - kt_after = ktime_get_real(); - usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before)); - - local_irq_enable(); + mwait_idle_with_hints(eax, ecx); if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); - /* Update cpuidle counters */ - dev->last_residency = (int)usec_delta; - return index; } @@ -296,22 +542,35 @@ static void __setup_broadcast_timer(void *arg) clockevents_notify(reason, &cpu); } -static int setup_broadcast_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) +static int cpu_hotplug_notify(struct notifier_block *n, + unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; + struct cpuidle_device *dev; - switch (action & 0xf) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - smp_call_function_single(hotcpu, __setup_broadcast_timer, - (void *)true, 1); + + if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) + smp_call_function_single(hotcpu, __setup_broadcast_timer, + (void *)true, 1); + + /* + * Some systems can hotplug a cpu at runtime after + * the kernel has booted, we have to initialize the + * driver in this case + */ + dev = per_cpu_ptr(intel_idle_cpuidle_devices, hotcpu); + if (!dev->registered) + intel_idle_cpu_init(hotcpu); + break; } return NOTIFY_OK; } -static struct notifier_block setup_broadcast_notifier = { - .notifier_call = setup_broadcast_cpuhp_notify, +static struct notifier_block cpu_hotplug_notifier = { + .notifier_call = cpu_hotplug_notify, }; static void auto_demotion_disable(void *dummy) @@ -319,27 +578,112 @@ static void auto_demotion_disable(void *dummy) unsigned long long msr_bits; rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); - msr_bits &= ~auto_demotion_disable_flags; + msr_bits &= ~(icpu->auto_demotion_disable_flags); wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); } +static void c1e_promotion_disable(void *dummy) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits &= ~0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + +static const struct idle_cpu idle_cpu_nehalem = { + .state_table = nehalem_cstates, + .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, + .disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_atom = { + .state_table = atom_cstates, +}; + +static const struct idle_cpu idle_cpu_lincroft = { + .state_table = atom_cstates, + .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, +}; + +static const struct idle_cpu idle_cpu_snb = { + .state_table = snb_cstates, + .disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_byt = { + .state_table = byt_cstates, + .disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_ivb = { + .state_table = ivb_cstates, + .disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_ivt = { + .state_table = ivt_cstates, + .disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_hsw = { + .state_table = hsw_cstates, + .disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_avn = { + .state_table = avn_cstates, + .disable_promotion_to_c1e = true, +}; + +#define ICPU(model, cpu) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } + +static const struct x86_cpu_id intel_idle_ids[] = { + ICPU(0x1a, idle_cpu_nehalem), + ICPU(0x1e, idle_cpu_nehalem), + ICPU(0x1f, idle_cpu_nehalem), + ICPU(0x25, idle_cpu_nehalem), + ICPU(0x2c, idle_cpu_nehalem), + ICPU(0x2e, idle_cpu_nehalem), + ICPU(0x1c, idle_cpu_atom), + ICPU(0x26, idle_cpu_lincroft), + ICPU(0x2f, idle_cpu_nehalem), + ICPU(0x2a, idle_cpu_snb), + ICPU(0x2d, idle_cpu_snb), + ICPU(0x36, idle_cpu_atom), + ICPU(0x37, idle_cpu_byt), + ICPU(0x3a, idle_cpu_ivb), + ICPU(0x3e, idle_cpu_ivt), + ICPU(0x3c, idle_cpu_hsw), + ICPU(0x3f, idle_cpu_hsw), + ICPU(0x45, idle_cpu_hsw), + ICPU(0x46, idle_cpu_hsw), + ICPU(0x4D, idle_cpu_avn), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); /* * intel_idle_probe() */ -static int intel_idle_probe(void) +static int __init intel_idle_probe(void) { unsigned int eax, ebx, ecx; + const struct x86_cpu_id *id; if (max_cstate == 0) { pr_debug(PREFIX "disabled\n"); return -EPERM; } - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return -ENODEV; - - if (!boot_cpu_has(X86_FEATURE_MWAIT)) + id = x86_match_cpu(intel_idle_ids); + if (!id) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) + pr_debug(PREFIX "does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; + } if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) return -ENODEV; @@ -353,50 +697,13 @@ static int intel_idle_probe(void) pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); - - if (boot_cpu_data.x86 != 6) /* family 6 */ - return -ENODEV; - - switch (boot_cpu_data.x86_model) { - - case 0x1A: /* Core i7, Xeon 5500 series */ - case 0x1E: /* Core i7 and i5 Processor - Lynnfield Jasper Forest */ - case 0x1F: /* Core i7 and i5 Processor - Nehalem */ - case 0x2E: /* Nehalem-EX Xeon */ - case 0x2F: /* Westmere-EX Xeon */ - case 0x25: /* Westmere */ - case 0x2C: /* Westmere */ - cpuidle_state_table = nehalem_cstates; - auto_demotion_disable_flags = - (NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE); - break; - - case 0x1C: /* 28 - Atom Processor */ - cpuidle_state_table = atom_cstates; - break; - - case 0x26: /* 38 - Lincroft Atom Processor */ - cpuidle_state_table = atom_cstates; - auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE; - break; - - case 0x2A: /* SNB */ - case 0x2D: /* SNB Xeon */ - cpuidle_state_table = snb_cstates; - break; - - default: - pr_debug(PREFIX "does not run on family %d model %d\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); - return -ENODEV; - } + icpu = (const struct idle_cpu *)id->driver_data; + cpuidle_state_table = icpu->state_table; if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; - else { + else on_each_cpu(__setup_broadcast_timer, (void *)true, 1); - register_cpu_notifier(&setup_broadcast_notifier); - } pr_debug(PREFIX "v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -423,43 +730,77 @@ static void intel_idle_cpuidle_devices_uninit(void) free_percpu(intel_idle_cpuidle_devices); return; } + +/* + * intel_idle_state_table_update() + * + * Update the default state_table for this CPU-id + * + * Currently used to access tuned IVT multi-socket targets + * Assumption: num_sockets == (max_package_num + 1) + */ +void intel_idle_state_table_update(void) +{ + /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ + if (boot_cpu_data.x86_model == 0x3e) { /* IVT */ + int cpu, package_num, num_sockets = 1; + + for_each_online_cpu(cpu) { + package_num = topology_physical_package_id(cpu); + if (package_num + 1 > num_sockets) { + num_sockets = package_num + 1; + + if (num_sockets > 4) { + cpuidle_state_table = ivt_cstates_8s; + return; + } + } + } + + if (num_sockets > 2) + cpuidle_state_table = ivt_cstates_4s; + /* else, 1 and 2 socket systems use default ivt_cstates */ + } + return; +} + /* * intel_idle_cpuidle_driver_init() * allocate, initialize cpuidle_states */ -static int intel_idle_cpuidle_driver_init(void) +static int __init intel_idle_cpuidle_driver_init(void) { int cstate; struct cpuidle_driver *drv = &intel_idle_driver; + intel_idle_state_table_update(); + drv->state_count = 1; - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { - int num_substates; + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { + int num_substates, mwait_hint, mwait_cstate; + + if (cpuidle_state_table[cstate].enter == NULL) + break; - if (cstate > max_cstate) { + if (cstate + 1 > max_cstate) { printk(PREFIX "max_cstate %d reached\n", max_cstate); break; } - /* does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> ((cstate) * 4)) + mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); + + /* number of sub-states for this state in CPUID.MWAIT */ + num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) & MWAIT_SUBSTATE_MASK; + + /* if NO sub-states for this state in CPUID, skip it */ if (num_substates == 0) continue; - /* is the state not enabled? */ - if (cpuidle_state_table[cstate].enter == NULL) { - /* does the driver not know about the state? */ - if (*cpuidle_state_table[cstate].name == '\0') - pr_debug(PREFIX "unaware of model 0x%x" - " MWAIT %d please" - " contact lenb@kernel.org", - boot_cpu_data.x86_model, cstate); - continue; - } - if ((cstate > 2) && + if (((mwait_cstate + 1) > 2) && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halts in idle" " states deeper than C2"); @@ -470,9 +811,12 @@ static int intel_idle_cpuidle_driver_init(void) drv->state_count += 1; } - if (auto_demotion_disable_flags) + if (icpu->auto_demotion_disable_flags) on_each_cpu(auto_demotion_disable, NULL, 1); + if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */ + on_each_cpu(c1e_promotion_disable, NULL, 1); + return 0; } @@ -482,38 +826,12 @@ static int intel_idle_cpuidle_driver_init(void) * allocate, initialize, register cpuidle_devices * @cpu: cpu/core to initialize */ -int intel_idle_cpu_init(int cpu) +static int intel_idle_cpu_init(int cpu) { - int cstate; struct cpuidle_device *dev; dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); - dev->state_count = 1; - - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { - int num_substates; - - if (cstate > max_cstate) { - printk(PREFIX "max_cstate %d reached\n", - max_cstate); - break; - } - - /* does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> ((cstate) * 4)) - & MWAIT_SUBSTATE_MASK; - if (num_substates == 0) - continue; - /* is the state not enabled? */ - if (cpuidle_state_table[cstate].enter == NULL) - continue; - - dev->states_usage[dev->state_count].driver_data = - (void *)get_driver_data(cstate); - - dev->state_count += 1; - } dev->cpu = cpu; if (cpuidle_register_device(dev)) { @@ -522,12 +840,14 @@ int intel_idle_cpu_init(int cpu) return -EIO; } - if (auto_demotion_disable_flags) + if (icpu->auto_demotion_disable_flags) smp_call_function_single(cpu, auto_demotion_disable, NULL, 1); + if (icpu->disable_promotion_to_c1e) + smp_call_function_single(cpu, c1e_promotion_disable, NULL, 1); + return 0; } -EXPORT_SYMBOL_GPL(intel_idle_cpu_init); static int __init intel_idle_init(void) { @@ -544,8 +864,9 @@ static int __init intel_idle_init(void) intel_idle_cpuidle_driver_init(); retval = cpuidle_register_driver(&intel_idle_driver); if (retval) { + struct cpuidle_driver *drv = cpuidle_get_driver(); printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", - cpuidle_get_driver()->name); + drv ? drv->name : "none"); return retval; } @@ -553,13 +874,19 @@ static int __init intel_idle_init(void) if (intel_idle_cpuidle_devices == NULL) return -ENOMEM; + cpu_notifier_register_begin(); + for_each_online_cpu(i) { retval = intel_idle_cpu_init(i); if (retval) { + cpu_notifier_register_done(); cpuidle_unregister_driver(&intel_idle_driver); return retval; } } + __register_cpu_notifier(&cpu_hotplug_notifier); + + cpu_notifier_register_done(); return 0; } @@ -569,10 +896,13 @@ static void __exit intel_idle_exit(void) intel_idle_cpuidle_devices_uninit(); cpuidle_unregister_driver(&intel_idle_driver); - if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) { + cpu_notifier_register_begin(); + + if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) on_each_cpu(__setup_broadcast_timer, (void *)false, 1); - unregister_cpu_notifier(&setup_broadcast_notifier); - } + __unregister_cpu_notifier(&cpu_hotplug_notifier); + + cpu_notifier_register_done(); return; } |
