aboutsummaryrefslogtreecommitdiff
path: root/drivers/idle/intel_idle.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/idle/intel_idle.c')
-rw-r--r--drivers/idle/intel_idle.c730
1 files changed, 530 insertions, 200 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 54ab97bae04..4d140bbbe10 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1,7 +1,7 @@
/*
* intel_idle.c - native hardware idle loop for modern Intel processors
*
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2013, Intel Corporation.
* Len Brown <len.brown@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -56,12 +56,12 @@
#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/clockchips.h>
-#include <linux/hrtimer.h> /* ktime_get_real() */
#include <trace/events/power.h>
#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <asm/cpu_device_id.h>
#include <asm/mwait.h>
#include <asm/msr.h>
@@ -73,7 +73,7 @@ static struct cpuidle_driver intel_idle_driver = {
.owner = THIS_MODULE,
};
/* intel_idle.max_cstate=0 disables driver */
-static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
+static int max_cstate = CPUIDLE_STATE_MAX - 1;
static unsigned int mwait_substates;
@@ -81,19 +81,26 @@ static unsigned int mwait_substates;
/* Reliable LAPIC Timer States, bit 1 for C1 etc. */
static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */
+struct idle_cpu {
+ struct cpuidle_state *state_table;
+
+ /*
+ * Hardware C-state auto-demotion may not always be optimal.
+ * Indicate which enable bits to clear here.
+ */
+ unsigned long auto_demotion_disable_flags;
+ bool disable_promotion_to_c1e;
+};
+
+static const struct idle_cpu *icpu;
static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
static int intel_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index);
+static int intel_idle_cpu_init(int cpu);
static struct cpuidle_state *cpuidle_state_table;
/*
- * Hardware C-state auto-demotion may not always be optimal.
- * Indicate which enable bits to clear here.
- */
-static unsigned long long auto_demotion_disable_flags;
-
-/*
* Set this flag for states where the HW flushes the TLB for us
* and so we don't need cross-calls to keep it consistent.
* If this flag is set, SW flushes the TLB, so even if the
@@ -102,129 +109,390 @@ static unsigned long long auto_demotion_disable_flags;
#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
/*
+ * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+ * the C-state (top nibble) and sub-state (bottom nibble)
+ * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
+ *
+ * We store the hint at the top of our "flags" for each state.
+ */
+#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
+#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+
+/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
* Thus C0 is a dummy.
*/
-static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
- { /* MWAIT C0 */ },
- { /* MWAIT C1 */
+static struct cpuidle_state nehalem_cstates[] = {
+ {
.name = "C1-NHM",
.desc = "MWAIT 0x00",
- .flags = CPUIDLE_FLAG_TIME_VALID,
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 3,
.target_residency = 6,
.enter = &intel_idle },
- { /* MWAIT C2 */
+ {
+ .name = "C1E-NHM",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 20,
+ .enter = &intel_idle },
+ {
.name = "C3-NHM",
.desc = "MWAIT 0x10",
- .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 20,
.target_residency = 80,
.enter = &intel_idle },
- { /* MWAIT C3 */
+ {
.name = "C6-NHM",
.desc = "MWAIT 0x20",
- .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle },
+ {
+ .enter = NULL }
};
-static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
- { /* MWAIT C0 */ },
- { /* MWAIT C1 */
+static struct cpuidle_state snb_cstates[] = {
+ {
.name = "C1-SNB",
.desc = "MWAIT 0x00",
- .flags = CPUIDLE_FLAG_TIME_VALID,
- .exit_latency = 1,
- .target_residency = 1,
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 2,
+ .target_residency = 2,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-SNB",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 20,
.enter = &intel_idle },
- { /* MWAIT C2 */
+ {
.name = "C3-SNB",
.desc = "MWAIT 0x10",
- .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 211,
.enter = &intel_idle },
- { /* MWAIT C3 */
+ {
.name = "C6-SNB",
.desc = "MWAIT 0x20",
- .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 104,
.target_residency = 345,
.enter = &intel_idle },
- { /* MWAIT C4 */
+ {
.name = "C7-SNB",
.desc = "MWAIT 0x30",
- .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 109,
.target_residency = 345,
.enter = &intel_idle },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state byt_cstates[] = {
+ {
+ .name = "C1-BYT",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-BYT",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 15,
+ .target_residency = 30,
+ .enter = &intel_idle },
+ {
+ .name = "C6N-BYT",
+ .desc = "MWAIT 0x58",
+ .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 40,
+ .target_residency = 275,
+ .enter = &intel_idle },
+ {
+ .name = "C6S-BYT",
+ .desc = "MWAIT 0x52",
+ .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 140,
+ .target_residency = 560,
+ .enter = &intel_idle },
+ {
+ .name = "C7-BYT",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 1200,
+ .target_residency = 1500,
+ .enter = &intel_idle },
+ {
+ .name = "C7S-BYT",
+ .desc = "MWAIT 0x64",
+ .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 10000,
+ .target_residency = 20000,
+ .enter = &intel_idle },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state ivb_cstates[] = {
+ {
+ .name = "C1-IVB",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-IVB",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 20,
+ .enter = &intel_idle },
+ {
+ .name = "C3-IVB",
+ .desc = "MWAIT 0x10",
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 59,
+ .target_residency = 156,
+ .enter = &intel_idle },
+ {
+ .name = "C6-IVB",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 80,
+ .target_residency = 300,
+ .enter = &intel_idle },
+ {
+ .name = "C7-IVB",
+ .desc = "MWAIT 0x30",
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 87,
+ .target_residency = 300,
+ .enter = &intel_idle },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state ivt_cstates[] = {
+ {
+ .name = "C1-IVT",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-IVT",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 80,
+ .enter = &intel_idle },
+ {
+ .name = "C3-IVT",
+ .desc = "MWAIT 0x10",
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 59,
+ .target_residency = 156,
+ .enter = &intel_idle },
+ {
+ .name = "C6-IVT",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 82,
+ .target_residency = 300,
+ .enter = &intel_idle },
+ {
+ .enter = NULL }
};
-static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
- { /* MWAIT C0 */ },
- { /* MWAIT C1 */
- .name = "C1-ATM",
+static struct cpuidle_state ivt_cstates_4s[] = {
+ {
+ .name = "C1-IVT-4S",
.desc = "MWAIT 0x00",
- .flags = CPUIDLE_FLAG_TIME_VALID,
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1,
- .target_residency = 4,
+ .target_residency = 1,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-IVT-4S",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 250,
+ .enter = &intel_idle },
+ {
+ .name = "C3-IVT-4S",
+ .desc = "MWAIT 0x10",
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 59,
+ .target_residency = 300,
+ .enter = &intel_idle },
+ {
+ .name = "C6-IVT-4S",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 84,
+ .target_residency = 400,
+ .enter = &intel_idle },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state ivt_cstates_8s[] = {
+ {
+ .name = "C1-IVT-8S",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-IVT-8S",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 500,
+ .enter = &intel_idle },
+ {
+ .name = "C3-IVT-8S",
+ .desc = "MWAIT 0x10",
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 59,
+ .target_residency = 600,
+ .enter = &intel_idle },
+ {
+ .name = "C6-IVT-8S",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 88,
+ .target_residency = 700,
+ .enter = &intel_idle },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state hsw_cstates[] = {
+ {
+ .name = "C1-HSW",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 2,
+ .target_residency = 2,
+ .enter = &intel_idle },
+ {
+ .name = "C1E-HSW",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 20,
+ .enter = &intel_idle },
+ {
+ .name = "C3-HSW",
+ .desc = "MWAIT 0x10",
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 33,
+ .target_residency = 100,
+ .enter = &intel_idle },
+ {
+ .name = "C6-HSW",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 133,
+ .target_residency = 400,
+ .enter = &intel_idle },
+ {
+ .name = "C7s-HSW",
+ .desc = "MWAIT 0x32",
+ .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 166,
+ .target_residency = 500,
+ .enter = &intel_idle },
+ {
+ .name = "C8-HSW",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 300,
+ .target_residency = 900,
+ .enter = &intel_idle },
+ {
+ .name = "C9-HSW",
+ .desc = "MWAIT 0x50",
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 600,
+ .target_residency = 1800,
+ .enter = &intel_idle },
+ {
+ .name = "C10-HSW",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 2600,
+ .target_residency = 7700,
.enter = &intel_idle },
- { /* MWAIT C2 */
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state atom_cstates[] = {
+ {
+ .name = "C1E-ATM",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 10,
+ .target_residency = 20,
+ .enter = &intel_idle },
+ {
.name = "C2-ATM",
.desc = "MWAIT 0x10",
- .flags = CPUIDLE_FLAG_TIME_VALID,
+ .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 20,
.target_residency = 80,
.enter = &intel_idle },
- { /* MWAIT C3 */ },
- { /* MWAIT C4 */
+ {
.name = "C4-ATM",
.desc = "MWAIT 0x30",
- .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 100,
.target_residency = 400,
.enter = &intel_idle },
- { /* MWAIT C5 */ },
- { /* MWAIT C6 */
+ {
.name = "C6-ATM",
.desc = "MWAIT 0x52",
- .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 140,
.target_residency = 560,
.enter = &intel_idle },
+ {
+ .enter = NULL }
+};
+static struct cpuidle_state avn_cstates[] = {
+ {
+ .name = "C1-AVN",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 2,
+ .target_residency = 2,
+ .enter = &intel_idle },
+ {
+ .name = "C6-AVN",
+ .desc = "MWAIT 0x51",
+ .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 15,
+ .target_residency = 45,
+ .enter = &intel_idle },
+ {
+ .enter = NULL }
};
-
-static long get_driver_data(int cstate)
-{
- int driver_data;
- switch (cstate) {
-
- case 1: /* MWAIT C1 */
- driver_data = 0x00;
- break;
- case 2: /* MWAIT C2 */
- driver_data = 0x10;
- break;
- case 3: /* MWAIT C3 */
- driver_data = 0x20;
- break;
- case 4: /* MWAIT C4 */
- driver_data = 0x30;
- break;
- case 5: /* MWAIT C5 */
- driver_data = 0x40;
- break;
- case 6: /* MWAIT C6 */
- driver_data = 0x52;
- break;
- default:
- driver_data = 0x00;
- }
- return driver_data;
-}
/**
* intel_idle
@@ -239,11 +507,8 @@ static int intel_idle(struct cpuidle_device *dev,
{
unsigned long ecx = 1; /* break on interrupt flag */
struct cpuidle_state *state = &drv->states[index];
- struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
- unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage);
+ unsigned long eax = flg2MWAIT(state->flags);
unsigned int cstate;
- ktime_t kt_before, kt_after;
- s64 usec_delta;
int cpu = smp_processor_id();
cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
@@ -258,30 +523,11 @@ static int intel_idle(struct cpuidle_device *dev,
if (!(lapic_timer_reliable_states & (1 << (cstate))))
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
- kt_before = ktime_get_real();
-
- stop_critical_timings();
- if (!need_resched()) {
-
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __mwait(eax, ecx);
- }
-
- start_critical_timings();
-
- kt_after = ktime_get_real();
- usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
-
- local_irq_enable();
+ mwait_idle_with_hints(eax, ecx);
if (!(lapic_timer_reliable_states & (1 << (cstate))))
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
- /* Update cpuidle counters */
- dev->last_residency = (int)usec_delta;
-
return index;
}
@@ -296,22 +542,35 @@ static void __setup_broadcast_timer(void *arg)
clockevents_notify(reason, &cpu);
}
-static int setup_broadcast_cpuhp_notify(struct notifier_block *n,
- unsigned long action, void *hcpu)
+static int cpu_hotplug_notify(struct notifier_block *n,
+ unsigned long action, void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
+ struct cpuidle_device *dev;
- switch (action & 0xf) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- smp_call_function_single(hotcpu, __setup_broadcast_timer,
- (void *)true, 1);
+
+ if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
+ smp_call_function_single(hotcpu, __setup_broadcast_timer,
+ (void *)true, 1);
+
+ /*
+ * Some systems can hotplug a cpu at runtime after
+ * the kernel has booted, we have to initialize the
+ * driver in this case
+ */
+ dev = per_cpu_ptr(intel_idle_cpuidle_devices, hotcpu);
+ if (!dev->registered)
+ intel_idle_cpu_init(hotcpu);
+
break;
}
return NOTIFY_OK;
}
-static struct notifier_block setup_broadcast_notifier = {
- .notifier_call = setup_broadcast_cpuhp_notify,
+static struct notifier_block cpu_hotplug_notifier = {
+ .notifier_call = cpu_hotplug_notify,
};
static void auto_demotion_disable(void *dummy)
@@ -319,27 +578,112 @@ static void auto_demotion_disable(void *dummy)
unsigned long long msr_bits;
rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
- msr_bits &= ~auto_demotion_disable_flags;
+ msr_bits &= ~(icpu->auto_demotion_disable_flags);
wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
}
+static void c1e_promotion_disable(void *dummy)
+{
+ unsigned long long msr_bits;
+
+ rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ msr_bits &= ~0x2;
+ wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
+
+static const struct idle_cpu idle_cpu_nehalem = {
+ .state_table = nehalem_cstates,
+ .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
+ .disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_atom = {
+ .state_table = atom_cstates,
+};
+
+static const struct idle_cpu idle_cpu_lincroft = {
+ .state_table = atom_cstates,
+ .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
+};
+
+static const struct idle_cpu idle_cpu_snb = {
+ .state_table = snb_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_byt = {
+ .state_table = byt_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_ivb = {
+ .state_table = ivb_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_ivt = {
+ .state_table = ivt_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_hsw = {
+ .state_table = hsw_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_avn = {
+ .state_table = avn_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
+#define ICPU(model, cpu) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
+
+static const struct x86_cpu_id intel_idle_ids[] = {
+ ICPU(0x1a, idle_cpu_nehalem),
+ ICPU(0x1e, idle_cpu_nehalem),
+ ICPU(0x1f, idle_cpu_nehalem),
+ ICPU(0x25, idle_cpu_nehalem),
+ ICPU(0x2c, idle_cpu_nehalem),
+ ICPU(0x2e, idle_cpu_nehalem),
+ ICPU(0x1c, idle_cpu_atom),
+ ICPU(0x26, idle_cpu_lincroft),
+ ICPU(0x2f, idle_cpu_nehalem),
+ ICPU(0x2a, idle_cpu_snb),
+ ICPU(0x2d, idle_cpu_snb),
+ ICPU(0x36, idle_cpu_atom),
+ ICPU(0x37, idle_cpu_byt),
+ ICPU(0x3a, idle_cpu_ivb),
+ ICPU(0x3e, idle_cpu_ivt),
+ ICPU(0x3c, idle_cpu_hsw),
+ ICPU(0x3f, idle_cpu_hsw),
+ ICPU(0x45, idle_cpu_hsw),
+ ICPU(0x46, idle_cpu_hsw),
+ ICPU(0x4D, idle_cpu_avn),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
/*
* intel_idle_probe()
*/
-static int intel_idle_probe(void)
+static int __init intel_idle_probe(void)
{
unsigned int eax, ebx, ecx;
+ const struct x86_cpu_id *id;
if (max_cstate == 0) {
pr_debug(PREFIX "disabled\n");
return -EPERM;
}
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- return -ENODEV;
-
- if (!boot_cpu_has(X86_FEATURE_MWAIT))
+ id = x86_match_cpu(intel_idle_ids);
+ if (!id) {
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6)
+ pr_debug(PREFIX "does not run on family %d model %d\n",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
+ }
if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
return -ENODEV;
@@ -353,50 +697,13 @@ static int intel_idle_probe(void)
pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates);
-
- if (boot_cpu_data.x86 != 6) /* family 6 */
- return -ENODEV;
-
- switch (boot_cpu_data.x86_model) {
-
- case 0x1A: /* Core i7, Xeon 5500 series */
- case 0x1E: /* Core i7 and i5 Processor - Lynnfield Jasper Forest */
- case 0x1F: /* Core i7 and i5 Processor - Nehalem */
- case 0x2E: /* Nehalem-EX Xeon */
- case 0x2F: /* Westmere-EX Xeon */
- case 0x25: /* Westmere */
- case 0x2C: /* Westmere */
- cpuidle_state_table = nehalem_cstates;
- auto_demotion_disable_flags =
- (NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE);
- break;
-
- case 0x1C: /* 28 - Atom Processor */
- cpuidle_state_table = atom_cstates;
- break;
-
- case 0x26: /* 38 - Lincroft Atom Processor */
- cpuidle_state_table = atom_cstates;
- auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE;
- break;
-
- case 0x2A: /* SNB */
- case 0x2D: /* SNB Xeon */
- cpuidle_state_table = snb_cstates;
- break;
-
- default:
- pr_debug(PREFIX "does not run on family %d model %d\n",
- boot_cpu_data.x86, boot_cpu_data.x86_model);
- return -ENODEV;
- }
+ icpu = (const struct idle_cpu *)id->driver_data;
+ cpuidle_state_table = icpu->state_table;
if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
- else {
+ else
on_each_cpu(__setup_broadcast_timer, (void *)true, 1);
- register_cpu_notifier(&setup_broadcast_notifier);
- }
pr_debug(PREFIX "v" INTEL_IDLE_VERSION
" model 0x%X\n", boot_cpu_data.x86_model);
@@ -423,43 +730,77 @@ static void intel_idle_cpuidle_devices_uninit(void)
free_percpu(intel_idle_cpuidle_devices);
return;
}
+
+/*
+ * intel_idle_state_table_update()
+ *
+ * Update the default state_table for this CPU-id
+ *
+ * Currently used to access tuned IVT multi-socket targets
+ * Assumption: num_sockets == (max_package_num + 1)
+ */
+void intel_idle_state_table_update(void)
+{
+ /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
+ if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
+ int cpu, package_num, num_sockets = 1;
+
+ for_each_online_cpu(cpu) {
+ package_num = topology_physical_package_id(cpu);
+ if (package_num + 1 > num_sockets) {
+ num_sockets = package_num + 1;
+
+ if (num_sockets > 4) {
+ cpuidle_state_table = ivt_cstates_8s;
+ return;
+ }
+ }
+ }
+
+ if (num_sockets > 2)
+ cpuidle_state_table = ivt_cstates_4s;
+ /* else, 1 and 2 socket systems use default ivt_cstates */
+ }
+ return;
+}
+
/*
* intel_idle_cpuidle_driver_init()
* allocate, initialize cpuidle_states
*/
-static int intel_idle_cpuidle_driver_init(void)
+static int __init intel_idle_cpuidle_driver_init(void)
{
int cstate;
struct cpuidle_driver *drv = &intel_idle_driver;
+ intel_idle_state_table_update();
+
drv->state_count = 1;
- for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
- int num_substates;
+ for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+ int num_substates, mwait_hint, mwait_cstate;
+
+ if (cpuidle_state_table[cstate].enter == NULL)
+ break;
- if (cstate > max_cstate) {
+ if (cstate + 1 > max_cstate) {
printk(PREFIX "max_cstate %d reached\n",
max_cstate);
break;
}
- /* does the state exist in CPUID.MWAIT? */
- num_substates = (mwait_substates >> ((cstate) * 4))
+ mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
+ mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
+
+ /* number of sub-states for this state in CPUID.MWAIT */
+ num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
& MWAIT_SUBSTATE_MASK;
+
+ /* if NO sub-states for this state in CPUID, skip it */
if (num_substates == 0)
continue;
- /* is the state not enabled? */
- if (cpuidle_state_table[cstate].enter == NULL) {
- /* does the driver not know about the state? */
- if (*cpuidle_state_table[cstate].name == '\0')
- pr_debug(PREFIX "unaware of model 0x%x"
- " MWAIT %d please"
- " contact lenb@kernel.org",
- boot_cpu_data.x86_model, cstate);
- continue;
- }
- if ((cstate > 2) &&
+ if (((mwait_cstate + 1) > 2) &&
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halts in idle"
" states deeper than C2");
@@ -470,9 +811,12 @@ static int intel_idle_cpuidle_driver_init(void)
drv->state_count += 1;
}
- if (auto_demotion_disable_flags)
+ if (icpu->auto_demotion_disable_flags)
on_each_cpu(auto_demotion_disable, NULL, 1);
+ if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
+ on_each_cpu(c1e_promotion_disable, NULL, 1);
+
return 0;
}
@@ -482,38 +826,12 @@ static int intel_idle_cpuidle_driver_init(void)
* allocate, initialize, register cpuidle_devices
* @cpu: cpu/core to initialize
*/
-int intel_idle_cpu_init(int cpu)
+static int intel_idle_cpu_init(int cpu)
{
- int cstate;
struct cpuidle_device *dev;
dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
- dev->state_count = 1;
-
- for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
- int num_substates;
-
- if (cstate > max_cstate) {
- printk(PREFIX "max_cstate %d reached\n",
- max_cstate);
- break;
- }
-
- /* does the state exist in CPUID.MWAIT? */
- num_substates = (mwait_substates >> ((cstate) * 4))
- & MWAIT_SUBSTATE_MASK;
- if (num_substates == 0)
- continue;
- /* is the state not enabled? */
- if (cpuidle_state_table[cstate].enter == NULL)
- continue;
-
- dev->states_usage[dev->state_count].driver_data =
- (void *)get_driver_data(cstate);
-
- dev->state_count += 1;
- }
dev->cpu = cpu;
if (cpuidle_register_device(dev)) {
@@ -522,12 +840,14 @@ int intel_idle_cpu_init(int cpu)
return -EIO;
}
- if (auto_demotion_disable_flags)
+ if (icpu->auto_demotion_disable_flags)
smp_call_function_single(cpu, auto_demotion_disable, NULL, 1);
+ if (icpu->disable_promotion_to_c1e)
+ smp_call_function_single(cpu, c1e_promotion_disable, NULL, 1);
+
return 0;
}
-EXPORT_SYMBOL_GPL(intel_idle_cpu_init);
static int __init intel_idle_init(void)
{
@@ -544,8 +864,9 @@ static int __init intel_idle_init(void)
intel_idle_cpuidle_driver_init();
retval = cpuidle_register_driver(&intel_idle_driver);
if (retval) {
+ struct cpuidle_driver *drv = cpuidle_get_driver();
printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",
- cpuidle_get_driver()->name);
+ drv ? drv->name : "none");
return retval;
}
@@ -553,13 +874,19 @@ static int __init intel_idle_init(void)
if (intel_idle_cpuidle_devices == NULL)
return -ENOMEM;
+ cpu_notifier_register_begin();
+
for_each_online_cpu(i) {
retval = intel_idle_cpu_init(i);
if (retval) {
+ cpu_notifier_register_done();
cpuidle_unregister_driver(&intel_idle_driver);
return retval;
}
}
+ __register_cpu_notifier(&cpu_hotplug_notifier);
+
+ cpu_notifier_register_done();
return 0;
}
@@ -569,10 +896,13 @@ static void __exit intel_idle_exit(void)
intel_idle_cpuidle_devices_uninit();
cpuidle_unregister_driver(&intel_idle_driver);
- if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) {
+ cpu_notifier_register_begin();
+
+ if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
on_each_cpu(__setup_broadcast_timer, (void *)false, 1);
- unregister_cpu_notifier(&setup_broadcast_notifier);
- }
+ __unregister_cpu_notifier(&cpu_hotplug_notifier);
+
+ cpu_notifier_register_done();
return;
}