aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c135
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c40
5 files changed, 131 insertions, 71 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7bfed..a7797197956 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
#include <linux/gfp.h>
#include <asm/mce.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
- if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+ if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
static struct notifier_block mce_raise_nb = {
.notifier_call = mce_raise_notify,
- .priority = 1000,
+ .priority = NMI_LOCAL_NORMAL_PRIOR,
};
/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a35b72d7c0..d916183b7f9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -326,7 +326,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
- unsigned bank = __get_cpu_var(injectm.bank);
+ unsigned bank = __this_cpu_read(injectm.bank);
if (msr == rip_msr)
return offsetof(struct mce, ip);
@@ -346,7 +346,7 @@ static u64 mce_rdmsrl(u32 msr)
{
u64 v;
- if (__get_cpu_var(injectm).finished) {
+ if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
if (offset < 0)
@@ -369,7 +369,7 @@ static u64 mce_rdmsrl(u32 msr)
static void mce_wrmsrl(u32 msr, u64 v)
{
- if (__get_cpu_var(injectm).finished) {
+ if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
if (offset >= 0)
@@ -1159,7 +1159,7 @@ static void mce_start_timer(unsigned long data)
WARN_ON(smp_processor_id() != data);
- if (mce_available(&current_cpu_data)) {
+ if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
}
@@ -1767,7 +1767,7 @@ static int mce_shutdown(struct sys_device *dev)
static int mce_resume(struct sys_device *dev)
{
__mcheck_cpu_init_generic();
- __mcheck_cpu_init_vendor(&current_cpu_data);
+ __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
return 0;
}
@@ -1775,7 +1775,7 @@ static int mce_resume(struct sys_device *dev)
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_timer();
@@ -1790,7 +1790,7 @@ static void mce_restart(void)
/* Toggle features for corrected errors */
static void mce_disable_ce(void *all)
{
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (all)
del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1799,7 +1799,7 @@ static void mce_disable_ce(void *all)
static void mce_enable_ce(void *all)
{
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
cmci_reenable();
cmci_recheck();
@@ -2022,7 +2022,7 @@ static void __cpuinit mce_disable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
@@ -2040,7 +2040,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 80c482382d5..5bf2fac52ac 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -31,8 +31,6 @@
#include <asm/mce.h>
#include <asm/msr.h>
-#define PFX "mce_threshold: "
-#define VERSION "version 1.1.1"
#define NR_BANKS 6
#define NR_BLOCKS 9
#define THRESHOLD_MAX 0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
struct list_head miscj;
};
-/* defaults used early on boot */
-static struct threshold_block threshold_defaults = {
- .interrupt_enable = 0,
- .threshold_limit = THRESHOLD_MAX,
-};
-
struct threshold_bank {
struct kobject *kobj;
struct threshold_block *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
struct thresh_restart {
struct threshold_block *b;
int reset;
+ int set_lvt_off;
+ int lvt_off;
u16 old_limit;
};
+static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
+{
+ int msr = (hi & MASK_LVTOFF_HI) >> 20;
+
+ if (apic < 0) {
+ pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
+ "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
+ b->bank, b->block, b->address, hi, lo);
+ return 0;
+ }
+
+ if (apic != msr) {
+ pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
+ "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
+ b->cpu, apic, b->bank, b->block, b->address, hi, lo);
+ return 0;
+ }
+
+ return 1;
+};
+
/* must be called with correct cpu affinity */
/* Called via smp_call_function_single() */
static void threshold_restart_bank(void *_tr)
{
struct thresh_restart *tr = _tr;
- u32 mci_misc_hi, mci_misc_lo;
+ u32 hi, lo;
- rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+ rdmsr(tr->b->address, lo, hi);
- if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+ if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
tr->reset = 1; /* limit cannot be lower than err count */
if (tr->reset) { /* reset err count and overflow bit */
- mci_misc_hi =
- (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
+ hi =
+ (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
(THRESHOLD_MAX - tr->b->threshold_limit);
} else if (tr->old_limit) { /* change limit w/o reset */
- int new_count = (mci_misc_hi & THRESHOLD_MAX) +
+ int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
- mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
+ hi = (hi & ~MASK_ERR_COUNT_HI) |
(new_count & THRESHOLD_MAX);
}
+ if (tr->set_lvt_off) {
+ if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
+ /* set new lvt offset */
+ hi &= ~MASK_LVTOFF_HI;
+ hi |= tr->lvt_off << 20;
+ }
+ }
+
tr->b->interrupt_enable ?
- (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
- (mci_misc_hi &= ~MASK_INT_TYPE_HI);
+ (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
+ (hi &= ~MASK_INT_TYPE_HI);
- mci_misc_hi |= MASK_COUNT_EN_HI;
- wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+ hi |= MASK_COUNT_EN_HI;
+ wrmsr(tr->b->address, lo, hi);
+}
+
+static void mce_threshold_block_init(struct threshold_block *b, int offset)
+{
+ struct thresh_restart tr = {
+ .b = b,
+ .set_lvt_off = 1,
+ .lvt_off = offset,
+ };
+
+ b->threshold_limit = THRESHOLD_MAX;
+ threshold_restart_bank(&tr);
+};
+
+static int setup_APIC_mce(int reserved, int new)
+{
+ if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
+ APIC_EILVT_MSG_FIX, 0))
+ return new;
+
+ return reserved;
}
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
+ struct threshold_block b;
unsigned int cpu = smp_processor_id();
u32 low = 0, high = 0, address = 0;
unsigned int bank, block;
- struct thresh_restart tr;
- int lvt_off = -1;
- u8 offset;
+ int offset = -1;
for (bank = 0; bank < NR_BANKS; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (shared_bank[bank] && c->cpu_core_id)
break;
#endif
- offset = (high & MASK_LVTOFF_HI) >> 20;
- if (lvt_off < 0) {
- if (setup_APIC_eilvt(offset,
- THRESHOLD_APIC_VECTOR,
- APIC_EILVT_MSG_FIX, 0)) {
- pr_err(FW_BUG "cpu %d, failed to "
- "setup threshold interrupt "
- "for bank %d, block %d "
- "(MSR%08X=0x%x%08x)",
- smp_processor_id(), bank, block,
- address, high, low);
- continue;
- }
- lvt_off = offset;
- } else if (lvt_off != offset) {
- pr_err(FW_BUG "cpu %d, invalid threshold "
- "interrupt offset %d for bank %d,"
- "block %d (MSR%08X=0x%x%08x)",
- smp_processor_id(), lvt_off, bank,
- block, address, high, low);
- continue;
- }
-
- high &= ~MASK_LVTOFF_HI;
- high |= lvt_off << 20;
- wrmsr(address, low, high);
+ offset = setup_APIC_mce(offset,
+ (high & MASK_LVTOFF_HI) >> 20);
- threshold_defaults.address = address;
- tr.b = &threshold_defaults;
- tr.reset = 0;
- tr.old_limit = 0;
- threshold_restart_bank(&tr);
+ memset(&b, 0, sizeof(b));
+ b.cpu = cpu;
+ b.bank = bank;
+ b.block = block;
+ b.address = address;
+ mce_threshold_block_init(&b, offset);
mce_threshold_vector = amd_threshold_interrupt;
}
}
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
b->interrupt_enable = !!new;
+ memset(&tr, 0, sizeof(tr));
tr.b = b;
- tr.reset = 0;
- tr.old_limit = 0;
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
if (new < 1)
new = 1;
+ memset(&tr, 0, sizeof(tr));
tr.old_limit = b->threshold_limit;
b->threshold_limit = new;
tr.b = b;
- tr.reset = 0;
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
continue;
err = threshold_create_bank(cpu, bank);
if (err)
- goto out;
+ return err;
}
-out:
+
return err;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 6fcd0936194..8694ef56459 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -130,7 +130,7 @@ void cmci_recheck(void)
unsigned long flags;
int banks;
- if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4b683267eca..e12246ff5aa 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -53,8 +53,13 @@ struct thermal_state {
struct _thermal_state core_power_limit;
struct _thermal_state package_throttle;
struct _thermal_state package_power_limit;
+ struct _thermal_state core_thresh0;
+ struct _thermal_state core_thresh1;
};
+/* Callback to handle core threshold interrupts */
+int (*platform_thermal_notify)(__u64 msr_val);
+
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
return 0;
}
+static int thresh_event_valid(int event)
+{
+ struct _thermal_state *state;
+ unsigned int this_cpu = smp_processor_id();
+ struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+ u64 now = get_jiffies_64();
+
+ state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
+
+ if (time_before64(now, state->next_check))
+ return 0;
+
+ state->next_check = now + CHECK_INTERVAL;
+ return 1;
+}
+
#ifdef CONFIG_SYSFS
/* Add/Remove thermal_throttle interface for CPU device: */
static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
#define PACKAGE_THROTTLED ((__u64)2 << 62)
#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
+static void notify_thresholds(__u64 msr_val)
+{
+ /* check whether the interrupt handler is defined;
+ * otherwise simply return
+ */
+ if (!platform_thermal_notify)
+ return;
+
+ /* lower threshold reached */
+ if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
+ platform_thermal_notify(msr_val);
+ /* higher threshold reached */
+ if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
+ platform_thermal_notify(msr_val);
+}
+
/* Thermal transition interrupt handler */
static void intel_thermal_interrupt(void)
{
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+ /* Check for violation of core thermal thresholds*/
+ notify_thresholds(msr_val);
+
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
THERMAL_THROTTLING_EVENT,
CORE_LEVEL) != 0)