diff options
author | Tony Luck <tony.luck@intel.com> | 2011-12-14 15:55:20 -0800 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2012-01-03 12:06:53 -0800 |
commit | af104e394e17e328df85c25a9e21448539725b67 (patch) | |
tree | e83de1e677bd3a184540f748f22b719552e92e7a /arch/x86/kernel/cpu/mcheck | |
parent | 85f92694affa7dba7f1978666a69552b5dfc628e (diff) |
x86/mce: Add mechanism to safely save information in MCE handler
Machine checks on Intel cpus interrupt execution on all cpus, regardless
of interrupt masking. We have a need to save some data about the cause
of the machine check (physical address) in the machine check handler that
can be retrieved later to attempt recovery in a more flexible execution
state.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2f1c200f05e..e1579c5a71d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -887,6 +887,49 @@ static void mce_clear_state(unsigned long *toclear) } /* + * Need to save faulting physical address associated with a process + * in the machine check handler some place where we can grab it back + * later in mce_notify_process() + */ +#define MCE_INFO_MAX 16 + +struct mce_info { + atomic_t inuse; + struct task_struct *t; + __u64 paddr; +} mce_info[MCE_INFO_MAX]; + +static void mce_save_info(__u64 addr) +{ + struct mce_info *mi; + + for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { + if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { + mi->t = current; + mi->paddr = addr; + return; + } + } + + mce_panic("Too many concurrent recoverable errors", NULL, NULL); +} + +static struct mce_info *mce_find_info(void) +{ + struct mce_info *mi; + + for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) + if (atomic_read(&mi->inuse) && mi->t == current) + return mi; + return NULL; +} + +static void mce_clear_info(struct mce_info *mi) +{ + atomic_set(&mi->inuse, 0); +} + +/* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. * |