aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2011-12-14 15:55:20 -0800
committerTony Luck <tony.luck@intel.com>2012-01-03 12:06:53 -0800
commitaf104e394e17e328df85c25a9e21448539725b67 (patch)
treee83de1e677bd3a184540f748f22b719552e92e7a /arch/x86/kernel/cpu/mcheck
parent85f92694affa7dba7f1978666a69552b5dfc628e (diff)
x86/mce: Add mechanism to safely save information in MCE handler
Machine checks on Intel cpus interrupt execution on all cpus, regardless of interrupt masking. We have a need to save some data about the cause of the machine check (physical address) in the machine check handler that can be retrieved later to attempt recovery in a more flexible execution state. Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c43
1 files changed, 43 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f1c200f05e..e1579c5a71d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -887,6 +887,49 @@ static void mce_clear_state(unsigned long *toclear)
}
/*
+ * Need to save faulting physical address associated with a process
+ * in the machine check handler some place where we can grab it back
+ * later in mce_notify_process()
+ */
+#define MCE_INFO_MAX 16
+
+struct mce_info {
+ atomic_t inuse;
+ struct task_struct *t;
+ __u64 paddr;
+} mce_info[MCE_INFO_MAX];
+
+static void mce_save_info(__u64 addr)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
+ if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
+ mi->t = current;
+ mi->paddr = addr;
+ return;
+ }
+ }
+
+ mce_panic("Too many concurrent recoverable errors", NULL, NULL);
+}
+
+static struct mce_info *mce_find_info(void)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
+ if (atomic_read(&mi->inuse) && mi->t == current)
+ return mi;
+ return NULL;
+}
+
+static void mce_clear_info(struct mce_info *mi)
+{
+ atomic_set(&mi->inuse, 0);
+}
+
+/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
*