From 27c379f7f89a4d558c685b5d89b5ba2fe79ae701 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Sep 2010 13:47:29 +0200 Subject: generic-ipi: Fix deadlock in __smp_call_function_single Just got my 6 way machine to a state where cpu 0 is in an endless loop within __smp_call_function_single. All other cpus are idle. The call trace on cpu 0 looks like this: __smp_call_function_single scheduler_tick update_process_times tick_sched_timer __run_hrtimer hrtimer_interrupt clock_comparator_work do_extint ext_int_handler ----> timer irq cpu_idle __smp_call_function_single() got called from nohz_balancer_kick() (inlined) with the remote cpu being 1, wait being 0 and the per cpu variable remote_sched_softirq_cb (call_single_data) of the current cpu (0). Then it loops forever when it tries to grab the lock of the call_single_data, since it is already locked and enqueued on cpu 0. My theory how this could have happened: for some reason the scheduler decided to call __smp_call_function_single() on it's own cpu, and sends an IPI to itself. The interrupt stays pending since IRQs are disabled. If then the hypervisor schedules the cpu away it might happen that upon rescheduling both the IPI and the timer IRQ are pending. If then interrupts are enabled again it depends which one gets scheduled first. If the timer interrupt gets delivered first we end up with the local deadlock as seen in the calltrace above. Let's make __smp_call_function_single() check if the target cpu is the current cpu and execute the function immediately just like smp_call_function_single does. That should prevent at least the scenario described here. It might also be that the scheduler is not supposed to call __smp_call_function_single with the remote cpu being the current cpu, but that is a different issue. Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra Acked-by: Jens Axboe Cc: Venkatesh Pallipadi Cc: Suresh Siddha LKML-Reference: <20100910114729.GB2827@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar --- kernel/smp.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 75c970c715d..ed6aacfcb7e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -365,9 +365,10 @@ call: EXPORT_SYMBOL_GPL(smp_call_function_any); /** - * __smp_call_function_single(): Run a function on another CPU + * __smp_call_function_single(): Run a function on a specific CPU * @cpu: The CPU to run on. * @data: Pre-allocated and setup data structure + * @wait: If true, wait until function has completed on specified CPU. * * Like smp_call_function_single(), but allow caller to pass in a * pre-allocated data structure. Useful for embedding @data inside @@ -376,8 +377,10 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); void __smp_call_function_single(int cpu, struct call_single_data *data, int wait) { - csd_lock(data); + unsigned int this_cpu; + unsigned long flags; + this_cpu = get_cpu(); /* * Can deadlock when called with interrupts disabled. * We allow cpu's that are not yet online though, as no one else can @@ -387,7 +390,15 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() && !oops_in_progress); - generic_exec_single(cpu, data, wait); + if (cpu == this_cpu) { + local_irq_save(flags); + data->func(data->info); + local_irq_restore(flags); + } else { + csd_lock(data); + generic_exec_single(cpu, data, wait); + } + put_cpu(); } /** -- cgit v1.2.3-18-g5258 From a247c3a97a0216b18a46243eda26081f1928ec37 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 22 Sep 2010 13:05:12 -0700 Subject: rmap: fix walk during fork The below bug in fork led to the rmap walk finding the parent huge-pmd twice instead of just once, because the anon_vma_chain objects of the child vma still point to the vma->vm_mm of the parent. The patch fixes it by making the rmap walk accurate during fork. It's not a big deal normally but it worth being accurate considering the cost is the same. Signed-off-by: Andrea Arcangeli Acked-by: Johannes Weiner Acked-by: Rik van Riel Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index b7e9d60a675..c445f8cc408 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -356,10 +356,10 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (IS_ERR(pol)) goto fail_nomem_policy; vma_set_policy(tmp, pol); + tmp->vm_mm = mm; if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~VM_LOCKED; - tmp->vm_mm = mm; tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { -- cgit v1.2.3-18-g5258 From 399f1e30ac17b77d383444aff480c7390f5adf2a Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Thu, 30 Sep 2010 15:15:27 -0700 Subject: kfifo: fix scatterlist usage The kfifo_dma family of functions use sg_mark_end() on the last element in their scatterlist. This forces use of a fresh scatterlist for each DMA operation, which makes recycling a single scatterlist impossible. Change the behavior of the kfifo_dma functions to match the usage of the dma_map_sg function. This means that users must respect the returned nents value. The sample code is updated to reflect the change. This bug is trivial to cause: call kfifo_dma_in_prepare() such that it prepares a scatterlist with a single entry comprising the whole fifo. This is the case when you map the entirety of a newly created empty fifo. This causes the setup_sgl() function to mark the first scatterlist entry as the end of the chain, no matter what comes after it. Afterwards, add and remove some data from the fifo such that another call to kfifo_dma_in_prepare() will create two scatterlist entries. It returns nents=2. However, due to the previous sg_mark_end() call, sg_is_last() will now return true for the first scatterlist element. This causes the sample code to print a single scatterlist element when it should print two. By removing the call to sg_mark_end(), we make the API as similar as possible to the DMA mapping API. All users are required to respect the returned nents. Signed-off-by: Ira W. Snyder Cc: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kfifo.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 6b5580c5764..01a0700e873 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -365,8 +365,6 @@ static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, n = setup_sgl_buf(sgl, fifo->data + off, nents, l); n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); - if (n) - sg_mark_end(sgl + n - 1); return n; } -- cgit v1.2.3-18-g5258 From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Oct 2010 11:29:27 -0700 Subject: modules: Fix module_bug_list list corruption race With all the recent module loading cleanups, we've minimized the code that sits under module_mutex, fixing various deadlocks and making it possible to do most of the module loading in parallel. However, that whole conversion totally missed the rather obscure code that adds a new module to the list for BUG() handling. That code was doubly obscure because (a) the code itself lives in lib/bugs.c (for dubious reasons) and (b) it gets called from the architecture-specific "module_finalize()" rather than from generic code. Calling it from arch-specific code makes no sense what-so-ever to begin with, and is now actively wrong since that code isn't protected by the module loading lock any more. So this commit moves the "module_bug_{finalize,cleanup}()" calls away from the arch-specific code, and into the generic code - and in the process protects it with the module_mutex so that the list operations are now safe. Future fixups: - move the module list handling code into kernel/module.c where it belongs. - get rid of 'module_bug_list' and just use the regular list of modules (called 'modules' - imagine that) that we already create and maintain for other reasons. Reported-and-tested-by: Thomas Gleixner Cc: Rusty Russell Cc: Adrian Bunk Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b..ccd64199184 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod) { struct module *mod = _mod; list_del(&mod->list); + module_bug_cleanup(mod); return 0; } @@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod, if (err < 0) goto ddebug; + module_bug_finalize(info.hdr, info.sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + module_bug_cleanup(mod); + ddebug: if (!mod->taints) dynamic_debug_remove(info.debug); -- cgit v1.2.3-18-g5258