From cd42f4a3b2b1c4cbd997363dc57821953d73fd87 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 15 Dec 2011 10:48:12 -0800 Subject: HWPOISON: Clean up memory_failure() vs. __memory_failure() There is only one caller of memory_failure(), all other users call __memory_failure() and pass in the flags argument explicitly. The lone user of memory_failure() will soon need to pass flags too. Add flags argument to the callsite in mce.c. Delete the old memory_failure() function, and then rename __memory_failure() without the leading "__". Provide clearer message when action optional memory errors are ignored. Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2af127d4c3d..1a08ce5f345 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1046,11 +1046,15 @@ out: } EXPORT_SYMBOL_GPL(do_machine_check); -/* dummy to break dependency. actual code is in mm/memory-failure.c */ -void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) +#ifndef CONFIG_MEMORY_FAILURE +int memory_failure(unsigned long pfn, int vector, int flags) { - printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); + printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" + "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); + + return 0; } +#endif /* * Called after mce notification in process context. This code @@ -1068,7 +1072,7 @@ void mce_notify_process(void) unsigned long pfn; mce_notify_irq(); while (mce_ring_get(&pfn)) - memory_failure(pfn, MCE_VECTOR); + memory_failure(pfn, MCE_VECTOR, 0); } static void mce_process_work(struct work_struct *dummy) -- cgit v1.2.3-18-g5258 From 85f92694affa7dba7f1978666a69552b5dfc628e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 13 Dec 2011 09:48:13 -0800 Subject: x86/mce: Create helper function to save addr/misc when needed The MCI_STATUS_MISCV and MCI_STATUS_ADDRV bits in the bank status registers define whether the MISC and ADDR registers respectively contain valid data - provide a helper function to check these bits and read the registers when needed. In addition, processors that support software error recovery (as indicated by the MCG_SER_P bit in the MCG_CAP register) may include some undefined bits in the ADDR register - mask these out. Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1a08ce5f345..2f1c200f05e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -492,6 +492,27 @@ static void mce_report_event(struct pt_regs *regs) irq_work_queue(&__get_cpu_var(mce_irq_work)); } +/* + * Read ADDR and MISC registers. + */ +static void mce_read_aux(struct mce *m, int i) +{ + if (m->status & MCI_STATUS_MISCV) + m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); + if (m->status & MCI_STATUS_ADDRV) { + m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); + + /* + * Mask the reported address by the reported granularity. + */ + if (mce_ser && (m->status & MCI_STATUS_MISCV)) { + u8 shift = MCI_MISC_ADDR_LSB(m->misc); + m->addr >>= shift; + m->addr <<= shift; + } + } +} + DEFINE_PER_CPU(unsigned, mce_poll_count); /* @@ -542,10 +563,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; - if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); - if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); + mce_read_aux(&m, i); if (!(flags & MCP_TIMESTAMP)) m.tsc = 0; @@ -981,10 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (severity == MCE_AR_SEVERITY) kill_it = 1; - if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); - if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); + mce_read_aux(&m, i); /* * Action optional error. Queue address for later processing. -- cgit v1.2.3-18-g5258 From af104e394e17e328df85c25a9e21448539725b67 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 14 Dec 2011 15:55:20 -0800 Subject: x86/mce: Add mechanism to safely save information in MCE handler Machine checks on Intel cpus interrupt execution on all cpus, regardless of interrupt masking. We have a need to save some data about the cause of the machine check (physical address) in the machine check handler that can be retrieved later to attempt recovery in a more flexible execution state. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 43 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2f1c200f05e..e1579c5a71d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -886,6 +886,49 @@ static void mce_clear_state(unsigned long *toclear) } } +/* + * Need to save faulting physical address associated with a process + * in the machine check handler some place where we can grab it back + * later in mce_notify_process() + */ +#define MCE_INFO_MAX 16 + +struct mce_info { + atomic_t inuse; + struct task_struct *t; + __u64 paddr; +} mce_info[MCE_INFO_MAX]; + +static void mce_save_info(__u64 addr) +{ + struct mce_info *mi; + + for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { + if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { + mi->t = current; + mi->paddr = addr; + return; + } + } + + mce_panic("Too many concurrent recoverable errors", NULL, NULL); +} + +static struct mce_info *mce_find_info(void) +{ + struct mce_info *mi; + + for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) + if (atomic_read(&mi->inuse) && mi->t == current) + return mi; + return NULL; +} + +static void mce_clear_info(struct mce_info *mi) +{ + atomic_set(&mi->inuse, 0); +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. -- cgit v1.2.3-18-g5258 From a8c321fbf9aeced45519248e5901af8cbc240510 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 3 Jan 2012 11:45:45 -0800 Subject: x86/mce: Handle "action required" errors All non-urgent actions (reporting low severity errors and handling "action-optional" errors) are now handled by a work queue. This means that TIF_MCE_NOTIFY can be used to block execution for a thread experiencing an "action-required" fault until we get all cpus out of the machine check handler (and the thread that hit the fault into mce_notify_process(). We use the new mce_{save,find,clear}_info() API to get information from do_machine_check() to mce_notify_process(), and then use the newly improved memory_failure(..., MF_ACTION_REQUIRED) to handle the error (possibly signalling the process). Update some comments to make the new code flows clearer. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 95 ++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e1579c5a71d..56e4e79387c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -982,7 +982,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) barrier(); /* - * When no restart IP must always kill or panic. + * When no restart IP might need to kill or panic. + * Assume the worst for now, but if we find the + * severity is MCE_AR_SEVERITY we have other options. */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) kill_it = 1; @@ -1036,12 +1038,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) continue; } - /* - * Kill on action required. - */ - if (severity == MCE_AR_SEVERITY) - kill_it = 1; - mce_read_aux(&m, i); /* @@ -1062,6 +1058,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) } } + /* mce_clear_state will clear *final, save locally for use later */ + m = *final; + if (!no_way_out) mce_clear_state(toclear); @@ -1073,27 +1072,22 @@ void do_machine_check(struct pt_regs *regs, long error_code) no_way_out = worst >= MCE_PANIC_SEVERITY; /* - * If we have decided that we just CAN'T continue, and the user - * has not set tolerant to an insane level, give up and die. - * - * This is mainly used in the case when the system doesn't - * support MCE broadcasting or it has been disabled. + * At insane "tolerant" levels we take no action. Otherwise + * we only die if we have no other choice. For less serious + * issues we try to recover, or limit damage to the current + * process. */ - if (no_way_out && tolerant < 3) - mce_panic("Fatal machine check on current CPU", final, msg); - - /* - * If the error seems to be unrecoverable, something should be - * done. Try to kill as little as possible. If we can kill just - * one task, do that. If the user has set the tolerance very - * high, don't try to do anything at all. - */ - - if (kill_it && tolerant < 3) - force_sig(SIGBUS, current); - - /* notify userspace ASAP */ - set_thread_flag(TIF_MCE_NOTIFY); + if (tolerant < 3) { + if (no_way_out) + mce_panic("Fatal machine check on current CPU", &m, msg); + if (worst == MCE_AR_SEVERITY) { + /* schedule action before return to userland */ + mce_save_info(m.addr); + set_thread_flag(TIF_MCE_NOTIFY); + } else if (kill_it) { + force_sig(SIGBUS, current); + } + } if (worst > 0) mce_report_event(regs); @@ -1107,6 +1101,8 @@ EXPORT_SYMBOL_GPL(do_machine_check); #ifndef CONFIG_MEMORY_FAILURE int memory_failure(unsigned long pfn, int vector, int flags) { + /* mce_severity() should not hand us an ACTION_REQUIRED error */ + BUG_ON(flags & MF_ACTION_REQUIRED); printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); @@ -1115,27 +1111,44 @@ int memory_failure(unsigned long pfn, int vector, int flags) #endif /* - * Called after mce notification in process context. This code - * is allowed to sleep. Call the high level VM handler to process - * any corrupted pages. - * Assume that the work queue code only calls this one at a time - * per CPU. - * Note we don't disable preemption, so this code might run on the wrong - * CPU. In this case the event is picked up by the scheduled work queue. - * This is merely a fast path to expedite processing in some common - * cases. + * Called in process context that interrupted by MCE and marked with + * TIF_MCE_NOTIFY, just before returning to erroneous userland. + * This code is allowed to sleep. + * Attempt possible recovery such as calling the high level VM handler to + * process any corrupted pages, and kill/signal current process if required. + * Action required errors are handled here. */ void mce_notify_process(void) { unsigned long pfn; - mce_notify_irq(); - while (mce_ring_get(&pfn)) - memory_failure(pfn, MCE_VECTOR, 0); + struct mce_info *mi = mce_find_info(); + + if (!mi) + mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); + pfn = mi->paddr >> PAGE_SHIFT; + + clear_thread_flag(TIF_MCE_NOTIFY); + + pr_err("Uncorrected hardware memory error in user-access at %llx", + mi->paddr); + if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { + pr_err("Memory error not recovered"); + force_sig(SIGBUS, current); + } + mce_clear_info(mi); } +/* + * Action optional processing happens here (picking up + * from the list of faulting pages that do_machine_check() + * placed into the "ring"). + */ static void mce_process_work(struct work_struct *dummy) { - mce_notify_process(); + unsigned long pfn; + + while (mce_ring_get(&pfn)) + memory_failure(pfn, MCE_VECTOR, 0); } #ifdef CONFIG_X86_MCE_INTEL @@ -1225,8 +1238,6 @@ int mce_notify_irq(void) /* Not more than two messages every minute */ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - clear_thread_flag(TIF_MCE_NOTIFY); - if (test_and_clear_bit(0, &mce_need_notify)) { /* wake processes polling /dev/mcelog */ wake_up_interruptible(&mce_chrdev_wait); -- cgit v1.2.3-18-g5258 From 5f7b88d51e89771f64c15903b96b5933dd0bc6d8 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 3 Jan 2012 11:48:04 -0800 Subject: x86/mce: Recognise machine check bank signature for data path error Action required data path signature is defined in table 15-19 of SDM: +-----------------------------------------------------------------------------+ | SRAR Error | Valid | OVER | UC | EN | MISCV | ADDRV | PCC | S | AR | MCACOD | | Data Load | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0x134 | +-----------------------------------------------------------------------------+ Recognise this, and pass MCE_AR_SEVERITY code back to do_machine_check() if we have the action handler configured (CONFIG_MEMORY_FAILURE=y) Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 7395d5f4272..f6c92f99efa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -54,6 +54,7 @@ static struct severity { #define MASK(x, y) .mask = x, .result = y #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) +#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) #define MCACOD 0xffff MCESEV( @@ -102,11 +103,24 @@ static struct severity { SER, BITCLR(MCI_STATUS_S) ), - /* AR add known MCACODs here */ MCESEV( PANIC, "Action required with lost events", SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) ), + + /* known AR MCACODs: */ +#ifdef CONFIG_MEMORY_FAILURE + MCESEV( + KEEP, "HT thread notices Action required: data load error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134), + MCGMASK(MCG_STATUS_EIPV, 0) + ), + MCESEV( + AR, "Action required: data load error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134), + USER + ), +#endif MCESEV( PANIC, "Action required: unknown MCACOD", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) -- cgit v1.2.3-18-g5258 From 08dda402d60a721ac94e79efd7646b332be3e3b2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 26 Jan 2012 16:02:22 -0800 Subject: x86/mce: Replace hard coded hex constants with symbolic defines Magic constants like 0x0134 in code just invite questions on where they come from, what they mean, can they be changed. Provide #defines for the architecturally defined MCACOD values with a reference to the Intel Software Developers manual which describes them. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index f6c92f99efa..0c82091b165 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -56,6 +56,12 @@ static struct severity { #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) #define MCACOD 0xffff +/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ +#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ +#define MCACOD_SCRUBMSK 0xfff0 +#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ +#define MCACOD_DATA 0x0134 /* Data Load */ +#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ MCESEV( NO, "Invalid", @@ -112,12 +118,12 @@ static struct severity { #ifdef CONFIG_MEMORY_FAILURE MCESEV( KEEP, "HT thread notices Action required: data load error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134), + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), MCGMASK(MCG_STATUS_EIPV, 0) ), MCESEV( AR, "Action required: data load error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134), + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), USER ), #endif @@ -129,11 +135,11 @@ static struct severity { /* known AO MCACODs: */ MCESEV( AO, "Action optional: memory scrubbing error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) ), MCESEV( AO, "Action optional: last level cache writeback error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) ), MCESEV( SOME, "Action optional: unknown MCACOD", -- cgit v1.2.3-18-g5258 From 644e9cbbe3fc032cc92d0936057e166a994dc246 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 26 Jan 2012 00:09:05 +0100 Subject: Add driver auto probing for x86 features v4 There's a growing number of drivers that support a specific x86 feature or CPU. Currently loading these drivers currently on a generic distribution requires various driver specific hacks and it often doesn't work. This patch adds auto probing for drivers based on the x86 cpuid information, in particular based on vendor/family/model number and also based on CPUID feature bits. For example a common issue is not loading the SSE 4.2 accelerated CRC module: this can significantly lower the performance of BTRFS which relies on fast CRC. Another issue is loading the right CPUFREQ driver for the current CPU. Currently distributions often try all all possible driver until one sticks, which is not really a good way to do this. It works with existing udev without any changes. The code exports the x86 information as a generic string in sysfs that can be matched by udev's pattern matching. This scheme does not support numeric ranges, so if you want to handle e.g. ranges of model numbers they have to be encoded in ASCII or simply all models or families listed. Fixing that would require changing udev. Another issue is that udev will happily load all drivers that match, there is currently no nice way to stop a specific driver from being loaded if it's not needed (e.g. if you don't need fast CRC) But there are not that many cpu specific drivers around and they're all not that bloated, so this isn't a particularly serious issue. Originally this patch added the modalias to the normal cpu sysdevs. However sysdevs don't have all the infrastructure needed for udev, so it couldn't really autoload drivers. This patch instead adds the CPU modaliases to the cpuid devices, which are real devices with full support for udev. This implies that the cpuid driver has to be loaded to use this. This patch just adds infrastructure, some driver conversions in followups. Thanks to Kay for helping with some sysfs magic. v2: Constifcation, some updates v4: (trenn@suse.de): - Use kzalloc instead of kmalloc to terminate modalias buffer - Use uppercase hex values to match correctly against hex values containing letters Cc: Dave Jones Cc: Kay Sievers Cc: Jen Axboe Cc: Herbert Xu Cc: Huang Ying Cc: Len Brown Signed-off-by: Andi Kleen Signed-off-by: Thomas Renninger Acked-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/Makefile | 1 + arch/x86/kernel/cpu/match.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 arch/x86/kernel/cpu/match.c (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25f24dccdcf..6ab6aa2fdfd 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o mshyperv.o obj-y += rdrand.o +obj-y += match.o obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c new file mode 100644 index 00000000000..7acc961422e --- /dev/null +++ b/arch/x86/kernel/cpu/match.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +/** + * x86_match_cpu - match current CPU again an array of x86_cpu_ids + * @match: Pointer to array of x86_cpu_ids. Last entry terminated with + * {}. + * + * Return the entry if the current CPU matches the entries in the + * passed x86_cpu_id match table. Otherwise NULL. The match table + * contains vendor (X86_VENDOR_*), family, model and feature bits or + * respective wildcard entries. + * + * A typical table entry would be to match a specific CPU + * { X86_VENDOR_INTEL, 6, 0x12 } + * or to match a specific CPU feature + * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } + * + * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, + * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) + * + * Arrays used to match for this should also be declared using + * MODULE_DEVICE_TABLE(x86_cpu, ...) + * + * This always matches against the boot cpu, assuming models and features are + * consistent over all CPUs. + */ +const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) +{ + const struct x86_cpu_id *m; + struct cpuinfo_x86 *c = &boot_cpu_data; + + for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) + continue; + if (m->family != X86_FAMILY_ANY && c->x86 != m->family) + continue; + if (m->model != X86_MODEL_ANY && c->x86_model != m->model) + continue; + if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) + continue; + return m; + } + return NULL; +} +EXPORT_SYMBOL(x86_match_cpu); -- cgit v1.2.3-18-g5258 From 2f1e097e24defe64a86535b53768f5c8ab0368d1 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 26 Jan 2012 00:09:11 +0100 Subject: X86: Introduce HW-Pstate scattered cpuid feature It is rather similar to CPB (boot capability) feature and exists since fam10h (can be looked up in AMD's BKDG). The feature is needed for powernow-k8 to cleanup init functions and to provide proper autoloading matching with the new x86cpu modalias feature. Cc: Kay Sievers Cc: Dave Jones Cc: Borislav Petkov Signed-off-by: Thomas Renninger Acked-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/scattered.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index c7f64e6f537..addf9e82a7f 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, + { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, -- cgit v1.2.3-18-g5258 From fad12ac8c8c2591c7f4e61d19b6a9d76cd49fafa Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 26 Jan 2012 00:09:14 +0100 Subject: CPU: Introduce ARCH_HAS_CPU_AUTOPROBE and X86 parts This patch is based on Andi Kleen's work: Implement autoprobing/loading of modules serving CPU specific features (x86cpu autoloading). And Kay Siever's work to get rid of sysdev cpu structures and making use of struct device instead. Before, the cpuid driver had to be loaded to get the x86cpu autoloading feature. With this patch autoloading works through the /sys/devices/system/cpu object Cc: Kay Sievers Cc: Dave Jones Cc: Jens Axboe Cc: Herbert Xu Cc: Huang Ying Cc: Len Brown Acked-by: Andi Kleen Signed-off-by: Thomas Renninger Acked-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/match.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 7acc961422e..940e2d48307 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -2,6 +2,7 @@ #include #include #include +#include /** * x86_match_cpu - match current CPU again an array of x86_cpu_ids @@ -46,3 +47,46 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) return NULL; } EXPORT_SYMBOL(x86_match_cpu); + +ssize_t arch_print_cpu_modalias(struct device *dev, + struct device_attribute *attr, + char *bufptr) +{ + int size = PAGE_SIZE; + int i, n; + char *buf = bufptr; + + n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:" + "model:%04X:feature:", + boot_cpu_data.x86_vendor, + boot_cpu_data.x86, + boot_cpu_data.x86_model); + size -= n; + buf += n; + size -= 2; + for (i = 0; i < NCAPINTS*32; i++) { + if (boot_cpu_has(i)) { + n = snprintf(buf, size, ",%04X", i); + if (n < 0) { + WARN(1, "x86 features overflow page\n"); + break; + } + size -= n; + buf += n; + } + } + *buf++ = ','; + *buf++ = '\n'; + return buf - bufptr; +} + +int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (buf) { + arch_print_cpu_modalias(NULL, NULL, buf); + add_uevent_var(env, "MODALIAS=%s", buf); + kfree(buf); + } + return 0; +} -- cgit v1.2.3-18-g5258 From 21c3fcf3e39353d4f21d50e257cc74f3204b1988 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 12 Feb 2012 09:53:57 -0800 Subject: x86/debug: Fix/improve the show_msr= debug print out Found out that show_msr= is broken, when I asked a user to use it to capture debug info about broken MTRR's whose MTRR settings are probably different between CPUs. Only the first CPUs MSRs are printed, but that is not enough to track down the suspected bug. For years we called print_cpu_msr from print_cpu_info(), but this commit: | commit 2eaad1fddd7450a48ad464229775f97fbfe8af36 | Author: Mike Travis | Date: Thu Dec 10 17:19:36 2009 -0800 | | x86: Limit the number of processor bootup messages removed the print_cpu_info() call from all APs. Put it back - it will only print MSRs when the user specifically requests them via show_msr=. Signed-off-by: Yinghai Lu Cc: Mike Travis Link: http://lkml.kernel.org/r/1329069237-11483-1-git-send-email-yinghai@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d43cad74f16..8b6a3bb57d8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -933,7 +933,7 @@ static const struct msr_range msr_range_array[] __cpuinitconst = { { 0xc0011000, 0xc001103b}, }; -static void __cpuinit print_cpu_msr(void) +static void __cpuinit __print_cpu_msr(void) { unsigned index_min, index_max; unsigned index; @@ -997,13 +997,13 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "\n"); -#ifdef CONFIG_SMP + __print_cpu_msr(); +} + +void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) +{ if (c->cpu_index < show_msr) - print_cpu_msr(); -#else - if (show_msr) - print_cpu_msr(); -#endif + __print_cpu_msr(); } static __init int setup_disablecpuid(char *arg) -- cgit v1.2.3-18-g5258 From 70142a9dd154f54f7409871ead86f7d77f2c6576 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 11 Feb 2012 22:56:59 +0000 Subject: x86/cpu: Fix overrun check in arch_print_cpu_modalias() snprintf() does not return a negative value when truncating. Signed-off-by: Ben Hutchings Acked-by: Thomas Renninger Acked-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 940e2d48307..2dfa52bcdfe 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -67,7 +67,7 @@ ssize_t arch_print_cpu_modalias(struct device *dev, for (i = 0; i < NCAPINTS*32; i++) { if (boot_cpu_has(i)) { n = snprintf(buf, size, ",%04X", i); - if (n < 0) { + if (n >= size) { WARN(1, "x86 features overflow page\n"); break; } -- cgit v1.2.3-18-g5258 From 5467bdda4a326513c2f14b712a22d59115b7ae94 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 11 Feb 2012 22:57:19 +0000 Subject: x86/cpu: Clean up modalias feature matching We currently include commas on both sides of the feature ID in a modalias, but this prevents the lowest numbered feature of a CPU from being matched. Since all feature IDs have the same length, we do not need to worry about substring matches, so omit commas from the modalias entirely. Avoid generating multiple adjacent wildcards when there is no feature ID to match. Signed-off-by: Ben Hutchings Acked-by: Thomas Renninger Acked-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/match.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 2dfa52bcdfe..5502b289341 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -63,7 +63,7 @@ ssize_t arch_print_cpu_modalias(struct device *dev, boot_cpu_data.x86_model); size -= n; buf += n; - size -= 2; + size -= 1; for (i = 0; i < NCAPINTS*32; i++) { if (boot_cpu_has(i)) { n = snprintf(buf, size, ",%04X", i); @@ -75,7 +75,6 @@ ssize_t arch_print_cpu_modalias(struct device *dev, buf += n; } } - *buf++ = ','; *buf++ = '\n'; return buf - bufptr; } -- cgit v1.2.3-18-g5258 From 8546c008924d5fd1724fa698eaa92b414bafd50d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 21 Feb 2012 10:25:45 -0800 Subject: i387: Uninline the generic FP helpers that we expose to kernel modules Instead of exporting the very low-level internals of the FPU state save/restore code (ie things like 'fpu_owner_task'), we should export the higher-level interfaces. Inlining these things is pointless anyway: sure, sometimes the end result is small, but while 'stts()' can result in just three x86 instructions, those are not cheap instructions (writing %cr0 is a serializing instruction and a very slow one at that). So the overhead of a function call is not noticeable, and we really don't want random modules mucking about with our internal state save logic anyway. So this unexports 'fpu_owner_task', and instead uninlines and exports the actual functions that modules can use: fpu_kernel_begin/end() and unlazy_fpu(). Signed-off-by: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1202211339590.5354@i5.linux-foundation.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c0f7d68d318..cb71b01ab66 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1045,7 +1045,6 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) = -1; DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); -EXPORT_PER_CPU_SYMBOL(fpu_owner_task); /* * Special IST stacks which the CPU switches to when it calls @@ -1115,7 +1114,6 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); -EXPORT_PER_CPU_SYMBOL(fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); -- cgit v1.2.3-18-g5258 From 1361b83a13d4d92e53fbb6c877528713e118b821 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 21 Feb 2012 13:19:22 -0800 Subject: i387: Split up into exported and internal interfaces While various modules include to get access to things we actually *intend* for them to use, most of that header file was really pretty low-level internal stuff that we really don't want to expose to others. So split the header file into two: the small exported interfaces remain in , while the internal definitions that are only used by core architecture code are now in . The guiding principle for this was to expose functions that we export to modules, and leave them in , while stuff that is used by task switching or was marked GPL-only is in . The fpu-internal.h file could be further split up too, especially since arch/x86/kvm/ uses some of the remaining stuff for its module. But that kvm usage should probably be abstracted out a bit, and at least now the internal FPU accessor functions are much more contained. Even if it isn't perhaps as contained as it _could_ be. Signed-off-by: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1202211340330.5354@i5.linux-foundation.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb71b01ab66..89620b1725d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-18-g5258 From d6126ef5f31ca54980cb067af659a360dfcca037 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 26 Jan 2012 15:49:14 -0800 Subject: x86/mce: Convert static array of pointers to per-cpu variables When I previously fixed up the mce_device code, I used a static array of the pointers. It was (rightfully) pointed out to me that I should be using the per_cpu code instead. This patch converts the code over to that structure, moving the variable back into the per_cpu area, like it used to be for 3.2 and earlier. Signed-off-by: Greg Kroah-Hartman Reviewed-by: Srivatsa S. Bhat Link: https://lkml.org/lkml/2012/1/27/165 Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++---- arch/x86/kernel/cpu/mcheck/mce_amd.c | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a11ae2e9e9..4979a5dfeba 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1859,7 +1859,7 @@ static struct bus_type mce_subsys = { .dev_name = "machinecheck", }; -struct device *mce_device[CONFIG_NR_CPUS]; +DEFINE_PER_CPU(struct device *, mce_device); __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); @@ -2038,7 +2038,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) goto error2; } cpumask_set_cpu(cpu, mce_device_initialized); - mce_device[cpu] = dev; + per_cpu(mce_device, cpu) = dev; return 0; error2: @@ -2055,7 +2055,7 @@ error: static __cpuinit void mce_device_remove(unsigned int cpu) { - struct device *dev = mce_device[cpu]; + struct device *dev = per_cpu(mce_device, cpu); int i; if (!cpumask_test_cpu(cpu, mce_device_initialized)) @@ -2069,7 +2069,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu) device_unregister(dev); cpumask_clear_cpu(cpu, mce_device_initialized); - mce_device[cpu] = NULL; + per_cpu(mce_device, cpu) = NULL; } /* Make sure there are no machine checks on offlined CPUs. */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 786e76a8632..a4bf9d23cdb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -523,7 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; - struct device *dev = mce_device[cpu]; + struct device *dev = per_cpu(mce_device, cpu); char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -585,7 +585,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (i == cpu) continue; - dev = mce_device[i]; + dev = per_cpu(mce_device, i); if (dev) err = sysfs_create_link(&dev->kobj,b->kobj, name); if (err) @@ -665,7 +665,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { - sysfs_remove_link(&mce_device[cpu]->kobj, name); + dev = per_cpu(mce_device, cpu); + sysfs_remove_link(&dev->kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; @@ -677,7 +678,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (i == cpu) continue; - dev = mce_device[i]; + dev = per_cpu(mce_device, i); if (dev) sysfs_remove_link(&dev->kobj, name); per_cpu(threshold_banks, i)[bank] = NULL; -- cgit v1.2.3-18-g5258 From fadd85f16a8ec3fee8af599e79a209682dc52348 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 23 Jan 2012 15:54:52 -0500 Subject: x86/mce: Fix return value of mce_chrdev_read() when erst is disabled Current kernel MCE code reads ERST at the first reading of /dev/mcelog (maybe in starting mcelogd,) even if the system does not support ERST, which results in a fake "no such device" message (as described in [1].) This problem is not critical, but can confuse system admins. This patch fixes it by filtering the return value from lower (ACPI) layer. [1] http://thread.gmane.org/gmane.linux.kernel/1060250 Reported by: Jon Masters Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: Huang Ying Link: https://lkml.org/lkml/2012/1/23/299 Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4979a5dfeba..87c56ba8080 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1541,6 +1541,12 @@ static int __mce_read_apei(char __user **ubuf, size_t usize) /* Error or no more MCE record */ if (rc <= 0) { mce_apei_read_done = 1; + /* + * When ERST is disabled, mce_chrdev_read() should return + * "no record" instead of "no device." + */ + if (rc == -ENODEV) + return 0; return rc; } rc = -EFAULT; -- cgit v1.2.3-18-g5258 From f649e9388cd46ad1634164e56f96ae092ca59e4a Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 20 Jan 2012 16:24:09 -0500 Subject: x86: relocate get/set debugreg fcns to include/asm/debugreg. Since we already have a debugreg.h header file, move the assoc. get/set functions to it. In addition to it being the logical home for them, it has a secondary advantage. The functions that are moved use BUG(). So we really need to have linux/bug.h in scope. But asm/processor.h is used about 600 times, vs. only about 15 for debugreg.h -- so adding bug.h to the latter reduces the amount of time we'll be processing it during a compile. Signed-off-by: Paul Gortmaker Acked-by: Ingo Molnar CC: Thomas Gleixner CC: "H. Peter Anvin" --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c0f7d68d318..0d676dd923a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-18-g5258 From b11e3d782b9c065b3b2fb543bfb0d97801822dc0 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 7 Mar 2012 11:44:29 +0100 Subject: x86, mce: Fix rcu splat in drain_mce_log_buffer() While booting, the following message is seen: [ 21.665087] =============================== [ 21.669439] [ INFO: suspicious RCU usage. ] [ 21.673798] 3.2.0-0.0.0.28.36b5ec9-default #2 Not tainted [ 21.681353] ------------------------------- [ 21.685864] arch/x86/kernel/cpu/mcheck/mce.c:194 suspicious rcu_dereference_index_check() usage! [ 21.695013] [ 21.695014] other info that might help us debug this: [ 21.695016] [ 21.703488] [ 21.703489] rcu_scheduler_active = 1, debug_locks = 1 [ 21.710426] 3 locks held by modprobe/2139: [ 21.714754] #0: (&__lockdep_no_validate__){......}, at: [] __driver_attach+0x53/0xa0 [ 21.725020] #1: [ 21.725323] ioatdma: Intel(R) QuickData Technology Driver 4.00 [ 21.733206] (&__lockdep_no_validate__){......}, at: [] __driver_attach+0x61/0xa0 [ 21.743015] #2: (i7core_edac_lock){+.+.+.}, at: [] i7core_probe+0x1f/0x5c0 [i7core_edac] [ 21.753708] [ 21.753709] stack backtrace: [ 21.758429] Pid: 2139, comm: modprobe Not tainted 3.2.0-0.0.0.28.36b5ec9-default #2 [ 21.768253] Call Trace: [ 21.770838] [] lockdep_rcu_suspicious+0xcd/0x100 [ 21.777366] [] drain_mcelog_buffer+0x191/0x1b0 [ 21.783715] [] mce_register_decode_chain+0x18/0x20 [ 21.790430] [] i7core_register_mci+0x2fb/0x3e4 [i7core_edac] [ 21.798003] [] i7core_probe+0xd4/0x5c0 [i7core_edac] [ 21.804809] [] local_pci_probe+0x5b/0xe0 [ 21.810631] [] __pci_device_probe+0xd9/0xe0 [ 21.816650] [] ? get_device+0x14/0x20 [ 21.822178] [] pci_device_probe+0x36/0x60 [ 21.828061] [] really_probe+0x7a/0x2b0 [ 21.833676] [] driver_probe_device+0x63/0xc0 [ 21.839868] [] __driver_attach+0x9b/0xa0 [ 21.845718] [] ? driver_probe_device+0xc0/0xc0 [ 21.852027] [] bus_for_each_dev+0x68/0x90 [ 21.857876] [] driver_attach+0x1c/0x20 [ 21.863462] [] bus_add_driver+0x16d/0x2b0 [ 21.869377] [] driver_register+0x7c/0x160 [ 21.875220] [] __pci_register_driver+0x6a/0xf0 [ 21.881494] [] ? 0xffffffffa01fdfff [ 21.886846] [] i7core_init+0x47/0x1000 [i7core_edac] [ 21.893737] [] do_one_initcall+0x3e/0x180 [ 21.899670] [] sys_init_module+0xc5/0x220 [ 21.905542] [] system_call_fastpath+0x16/0x1b Fix this by using ACCESS_ONCE() instead of rcu_dereference_check_mce() over mcelog.next. Since the access to each entry is controlled by the ->finished field, ACCESS_ONCE() should work just fine. An rcu_dereference is unnecessary here. Signed-off-by: Srivatsa S. Bhat Suggested-by: Paul E. McKenney Cc: Tony Luck Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a11ae2e9e9..db590aff874 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void) { unsigned int next, i, prev = 0; - next = rcu_dereference_check_mce(mcelog.next); + next = ACCESS_ONCE(mcelog.next); do { struct mce *m; -- cgit v1.2.3-18-g5258 From 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:03 -0700 Subject: bitops: rename for_each_set_bit_cont() in favor of analogous list.h function This renames for_each_set_bit_cont() to for_each_set_bit_from() because it is analogous to list_for_each_entry_from() in list.h rather than list_for_each_entry_continue(). This doesn't remove for_each_set_bit_cont() for now. Signed-off-by: Akinobu Mita Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a18d16cb58..fa2900c0e39 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -643,14 +643,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) /* Prefer fixed purpose counters */ if (x86_pmu.num_counters_fixed) { idx = X86_PMC_IDX_FIXED; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } -- cgit v1.2.3-18-g5258