From 27c379f7f89a4d558c685b5d89b5ba2fe79ae701 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Sep 2010 13:47:29 +0200 Subject: generic-ipi: Fix deadlock in __smp_call_function_single Just got my 6 way machine to a state where cpu 0 is in an endless loop within __smp_call_function_single. All other cpus are idle. The call trace on cpu 0 looks like this: __smp_call_function_single scheduler_tick update_process_times tick_sched_timer __run_hrtimer hrtimer_interrupt clock_comparator_work do_extint ext_int_handler ----> timer irq cpu_idle __smp_call_function_single() got called from nohz_balancer_kick() (inlined) with the remote cpu being 1, wait being 0 and the per cpu variable remote_sched_softirq_cb (call_single_data) of the current cpu (0). Then it loops forever when it tries to grab the lock of the call_single_data, since it is already locked and enqueued on cpu 0. My theory how this could have happened: for some reason the scheduler decided to call __smp_call_function_single() on it's own cpu, and sends an IPI to itself. The interrupt stays pending since IRQs are disabled. If then the hypervisor schedules the cpu away it might happen that upon rescheduling both the IPI and the timer IRQ are pending. If then interrupts are enabled again it depends which one gets scheduled first. If the timer interrupt gets delivered first we end up with the local deadlock as seen in the calltrace above. Let's make __smp_call_function_single() check if the target cpu is the current cpu and execute the function immediately just like smp_call_function_single does. That should prevent at least the scenario described here. It might also be that the scheduler is not supposed to call __smp_call_function_single with the remote cpu being the current cpu, but that is a different issue. Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra Acked-by: Jens Axboe Cc: Venkatesh Pallipadi Cc: Suresh Siddha LKML-Reference: <20100910114729.GB2827@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar --- kernel/smp.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 75c970c715d..ed6aacfcb7e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -365,9 +365,10 @@ call: EXPORT_SYMBOL_GPL(smp_call_function_any); /** - * __smp_call_function_single(): Run a function on another CPU + * __smp_call_function_single(): Run a function on a specific CPU * @cpu: The CPU to run on. * @data: Pre-allocated and setup data structure + * @wait: If true, wait until function has completed on specified CPU. * * Like smp_call_function_single(), but allow caller to pass in a * pre-allocated data structure. Useful for embedding @data inside @@ -376,8 +377,10 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); void __smp_call_function_single(int cpu, struct call_single_data *data, int wait) { - csd_lock(data); + unsigned int this_cpu; + unsigned long flags; + this_cpu = get_cpu(); /* * Can deadlock when called with interrupts disabled. * We allow cpu's that are not yet online though, as no one else can @@ -387,7 +390,15 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() && !oops_in_progress); - generic_exec_single(cpu, data, wait); + if (cpu == this_cpu) { + local_irq_save(flags); + data->func(data->info); + local_irq_restore(flags); + } else { + csd_lock(data); + generic_exec_single(cpu, data, wait); + } + put_cpu(); } /** -- cgit v1.2.3-70-g09d2 From 3a5f65df5a0fcbaa35e5417c0420d691fee4ac56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Oct 2010 17:28:36 +0100 Subject: Typedef SMP call function pointer Typedef the pointer to the function to be called by smp_call_function() and friends: typedef void (*smp_call_func_t)(void *info); as it is used in a fair number of places. Signed-off-by: David Howells cc: linux-arch@vger.kernel.org --- include/linux/smp.h | 19 ++++++++++--------- kernel/smp.c | 8 ++++---- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'kernel/smp.c') diff --git a/include/linux/smp.h b/include/linux/smp.h index cfa2d20e35f..6dc95cac6b3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -13,9 +13,10 @@ extern void cpu_idle(void); +typedef void (*smp_call_func_t)(void *info); struct call_single_data { struct list_head list; - void (*func) (void *info); + smp_call_func_t func; void *info; u16 flags; u16 priv; @@ -24,8 +25,8 @@ struct call_single_data { /* total number of cpus in this system (may exceed NR_CPUS) */ extern unsigned int total_cpus; -int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, - int wait); +int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, + int wait); #ifdef CONFIG_SMP @@ -69,15 +70,15 @@ extern void smp_cpus_done(unsigned int max_cpus); /* * Call a function on all other processors */ -int smp_call_function(void(*func)(void *info), void *info, int wait); +int smp_call_function(smp_call_func_t func, void *info, int wait); void smp_call_function_many(const struct cpumask *mask, - void (*func)(void *info), void *info, bool wait); + smp_call_func_t func, void *info, bool wait); void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); int smp_call_function_any(const struct cpumask *mask, - void (*func)(void *info), void *info, int wait); + smp_call_func_t func, void *info, int wait); /* * Generic and arch helpers @@ -94,7 +95,7 @@ void ipi_call_unlock_irq(void); /* * Call a function on all processors */ -int on_each_cpu(void (*func) (void *info), void *info, int wait); +int on_each_cpu(smp_call_func_t func, void *info, int wait); #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ #define MSG_ALL 0x8001 @@ -122,7 +123,7 @@ static inline void smp_send_stop(void) { } * These macros fold the SMP functionality into a single CPU system */ #define raw_smp_processor_id() 0 -static inline int up_smp_call_function(void (*func)(void *), void *info) +static inline int up_smp_call_function(smp_call_func_t func, void *info) { return 0; } @@ -143,7 +144,7 @@ static inline void smp_send_reschedule(int cpu) { } static inline void init_call_single_data(void) { } static inline int -smp_call_function_any(const struct cpumask *mask, void (*func)(void *info), +smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait) { return smp_call_function_single(0, func, info, wait); diff --git a/kernel/smp.c b/kernel/smp.c index ed6aacfcb7e..12ed8b013e2 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -267,7 +267,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); * * Returns 0 on success, else a negative status code. */ -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, +int smp_call_function_single(int cpu, smp_call_func_t func, void *info, int wait) { struct call_single_data d = { @@ -336,7 +336,7 @@ EXPORT_SYMBOL(smp_call_function_single); * 3) any other online cpu in @mask */ int smp_call_function_any(const struct cpumask *mask, - void (*func)(void *info), void *info, int wait) + smp_call_func_t func, void *info, int wait) { unsigned int cpu; const struct cpumask *nodemask; @@ -416,7 +416,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, * must be disabled when calling this function. */ void smp_call_function_many(const struct cpumask *mask, - void (*func)(void *), void *info, bool wait) + smp_call_func_t func, void *info, bool wait) { struct call_function_data *data; unsigned long flags; @@ -500,7 +500,7 @@ EXPORT_SYMBOL(smp_call_function_many); * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function(void (*func)(void *), void *info, int wait) +int smp_call_function(smp_call_func_t func, void *info, int wait) { preempt_disable(); smp_call_function_many(cpu_online_mask, func, info, wait); -- cgit v1.2.3-70-g09d2