diff options
Diffstat (limited to 'kernel/smp.c')
-rw-r--r-- | kernel/smp.c | 262 |
1 files changed, 70 insertions, 192 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index 69f38bd98b4..4dba0f7b72a 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -16,22 +16,12 @@ #include "smpboot.h" #ifdef CONFIG_USE_GENERIC_SMP_HELPERS -static struct { - struct list_head queue; - raw_spinlock_t lock; -} call_function __cacheline_aligned_in_smp = - { - .queue = LIST_HEAD_INIT(call_function.queue), - .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock), - }; - enum { CSD_FLAG_LOCK = 0x01, }; struct call_function_data { - struct call_single_data csd; - atomic_t refs; + struct call_single_data __percpu *csd; cpumask_var_t cpumask; cpumask_var_t cpumask_ipi; }; @@ -60,6 +50,11 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, cpu_to_node(cpu))) return notifier_from_errno(-ENOMEM); + cfd->csd = alloc_percpu(struct call_single_data); + if (!cfd->csd) { + free_cpumask_var(cfd->cpumask); + return notifier_from_errno(-ENOMEM); + } break; #ifdef CONFIG_HOTPLUG_CPU @@ -70,6 +65,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD_FROZEN: free_cpumask_var(cfd->cpumask); free_cpumask_var(cfd->cpumask_ipi); + free_percpu(cfd->csd); break; #endif }; @@ -104,16 +100,16 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static void csd_lock_wait(struct call_single_data *data) +static void csd_lock_wait(struct call_single_data *csd) { - while (data->flags & CSD_FLAG_LOCK) + while (csd->flags & CSD_FLAG_LOCK) cpu_relax(); } -static void csd_lock(struct call_single_data *data) +static void csd_lock(struct call_single_data *csd) { - csd_lock_wait(data); - data->flags = CSD_FLAG_LOCK; + csd_lock_wait(csd); + csd->flags |= CSD_FLAG_LOCK; /* * prevent CPU from reordering the above assignment @@ -123,16 +119,16 @@ static void csd_lock(struct call_single_data *data) smp_mb(); } -static void csd_unlock(struct call_single_data *data) +static void csd_unlock(struct call_single_data *csd) { - WARN_ON(!(data->flags & CSD_FLAG_LOCK)); + WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); /* * ensure we're all done before releasing data: */ smp_mb(); - data->flags &= ~CSD_FLAG_LOCK; + csd->flags &= ~CSD_FLAG_LOCK; } /* @@ -141,7 +137,7 @@ static void csd_unlock(struct call_single_data *data) * ->func, ->info, and ->flags set. */ static -void generic_exec_single(int cpu, struct call_single_data *data, int wait) +void generic_exec_single(int cpu, struct call_single_data *csd, int wait) { struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; @@ -149,7 +145,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) raw_spin_lock_irqsave(&dst->lock, flags); ipi = list_empty(&dst->list); - list_add_tail(&data->list, &dst->list); + list_add_tail(&csd->list, &dst->list); raw_spin_unlock_irqrestore(&dst->lock, flags); /* @@ -167,86 +163,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) arch_send_call_function_single_ipi(cpu); if (wait) - csd_lock_wait(data); -} - -/* - * Invoked by arch to handle an IPI for call function. Must be called with - * interrupts disabled. - */ -void generic_smp_call_function_interrupt(void) -{ - struct call_function_data *data; - int cpu = smp_processor_id(); - - /* - * Shouldn't receive this interrupt on a cpu that is not yet online. - */ - WARN_ON_ONCE(!cpu_online(cpu)); - - /* - * Ensure entry is visible on call_function_queue after we have - * entered the IPI. See comment in smp_call_function_many. - * If we don't have this, then we may miss an entry on the list - * and never get another IPI to process it. - */ - smp_mb(); - - /* - * It's ok to use list_for_each_rcu() here even though we may - * delete 'pos', since list_del_rcu() doesn't clear ->next - */ - list_for_each_entry_rcu(data, &call_function.queue, csd.list) { - int refs; - smp_call_func_t func; - - /* - * Since we walk the list without any locks, we might - * see an entry that was completed, removed from the - * list and is in the process of being reused. - * - * We must check that the cpu is in the cpumask before - * checking the refs, and both must be set before - * executing the callback on this cpu. - */ - - if (!cpumask_test_cpu(cpu, data->cpumask)) - continue; - - smp_rmb(); - - if (atomic_read(&data->refs) == 0) - continue; - - func = data->csd.func; /* save for later warn */ - func(data->csd.info); - - /* - * If the cpu mask is not still set then func enabled - * interrupts (BUG), and this cpu took another smp call - * function interrupt and executed func(info) twice - * on this cpu. That nested execution decremented refs. - */ - if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { - WARN(1, "%pf enabled interrupts and double executed\n", func); - continue; - } - - refs = atomic_dec_return(&data->refs); - WARN_ON(refs < 0); - - if (refs) - continue; - - WARN_ON(!cpumask_empty(data->cpumask)); - - raw_spin_lock(&call_function.lock); - list_del_rcu(&data->csd.list); - raw_spin_unlock(&call_function.lock); - - csd_unlock(&data->csd); - } - + csd_lock_wait(csd); } /* @@ -256,7 +173,6 @@ void generic_smp_call_function_interrupt(void) void generic_smp_call_function_single_interrupt(void) { struct call_single_queue *q = &__get_cpu_var(call_single_queue); - unsigned int data_flags; LIST_HEAD(list); /* @@ -269,25 +185,26 @@ void generic_smp_call_function_single_interrupt(void) raw_spin_unlock(&q->lock); while (!list_empty(&list)) { - struct call_single_data *data; + struct call_single_data *csd; + unsigned int csd_flags; - data = list_entry(list.next, struct call_single_data, list); - list_del(&data->list); + csd = list_entry(list.next, struct call_single_data, list); + list_del(&csd->list); /* - * 'data' can be invalid after this call if flags == 0 + * 'csd' can be invalid after this call if flags == 0 * (when called through generic_exec_single()), * so save them away before making the call: */ - data_flags = data->flags; + csd_flags = csd->flags; - data->func(data->info); + csd->func(csd->info); /* * Unlocked CSDs are valid through generic_exec_single(): */ - if (data_flags & CSD_FLAG_LOCK) - csd_unlock(data); + if (csd_flags & CSD_FLAG_LOCK) + csd_unlock(csd); } } @@ -332,16 +249,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, local_irq_restore(flags); } else { if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { - struct call_single_data *data = &d; + struct call_single_data *csd = &d; if (!wait) - data = &__get_cpu_var(csd_data); + csd = &__get_cpu_var(csd_data); - csd_lock(data); + csd_lock(csd); - data->func = func; - data->info = info; - generic_exec_single(cpu, data, wait); + csd->func = func; + csd->info = info; + generic_exec_single(cpu, csd, wait); } else { err = -ENXIO; /* CPU not online */ } @@ -408,7 +325,7 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); * pre-allocated data structure. Useful for embedding @data inside * other structures, for instance. */ -void __smp_call_function_single(int cpu, struct call_single_data *data, +void __smp_call_function_single(int cpu, struct call_single_data *csd, int wait) { unsigned int this_cpu; @@ -426,11 +343,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, if (cpu == this_cpu) { local_irq_save(flags); - data->func(data->info); + csd->func(csd->info); local_irq_restore(flags); } else { - csd_lock(data); - generic_exec_single(cpu, data, wait); + csd_lock(csd); + generic_exec_single(cpu, csd, wait); } put_cpu(); } @@ -452,9 +369,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) { - struct call_function_data *data; - unsigned long flags; - int refs, cpu, next_cpu, this_cpu = smp_processor_id(); + struct call_function_data *cfd; + int cpu, next_cpu, this_cpu = smp_processor_id(); /* * Can deadlock when called with interrupts disabled. @@ -485,86 +401,48 @@ void smp_call_function_many(const struct cpumask *mask, return; } - data = &__get_cpu_var(cfd_data); - csd_lock(&data->csd); - - /* This BUG_ON verifies our reuse assertions and can be removed */ - BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); - - /* - * The global call function queue list add and delete are protected - * by a lock, but the list is traversed without any lock, relying - * on the rcu list add and delete to allow safe concurrent traversal. - * We reuse the call function data without waiting for any grace - * period after some other cpu removes it from the global queue. - * This means a cpu might find our data block as it is being - * filled out. - * - * We hold off the interrupt handler on the other cpu by - * ordering our writes to the cpu mask vs our setting of the - * refs counter. We assert only the cpu owning the data block - * will set a bit in cpumask, and each bit will only be cleared - * by the subject cpu. Each cpu must first find its bit is - * set and then check that refs is set indicating the element is - * ready to be processed, otherwise it must skip the entry. - * - * On the previous iteration refs was set to 0 by another cpu. - * To avoid the use of transitivity, set the counter to 0 here - * so the wmb will pair with the rmb in the interrupt handler. - */ - atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ - - data->csd.func = func; - data->csd.info = info; - - /* Ensure 0 refs is visible before mask. Also orders func and info */ - smp_wmb(); + cfd = &__get_cpu_var(cfd_data); - /* We rely on the "and" being processed before the store */ - cpumask_and(data->cpumask, mask, cpu_online_mask); - cpumask_clear_cpu(this_cpu, data->cpumask); - refs = cpumask_weight(data->cpumask); + cpumask_and(cfd->cpumask, mask, cpu_online_mask); + cpumask_clear_cpu(this_cpu, cfd->cpumask); /* Some callers race with other cpus changing the passed mask */ - if (unlikely(!refs)) { - csd_unlock(&data->csd); + if (unlikely(!cpumask_weight(cfd->cpumask))) return; - } /* - * After we put an entry into the list, data->cpumask - * may be cleared again when another CPU sends another IPI for - * a SMP function call, so data->cpumask will be zero. - */ - cpumask_copy(data->cpumask_ipi, data->cpumask); - raw_spin_lock_irqsave(&call_function.lock, flags); - /* - * Place entry at the _HEAD_ of the list, so that any cpu still - * observing the entry in generic_smp_call_function_interrupt() - * will not miss any other list entries: - */ - list_add_rcu(&data->csd.list, &call_function.queue); - /* - * We rely on the wmb() in list_add_rcu to complete our writes - * to the cpumask before this write to refs, which indicates - * data is on the list and is ready to be processed. + * After we put an entry into the list, cfd->cpumask may be cleared + * again when another CPU sends another IPI for a SMP function call, so + * cfd->cpumask will be zero. */ - atomic_set(&data->refs, refs); - raw_spin_unlock_irqrestore(&call_function.lock, flags); + cpumask_copy(cfd->cpumask_ipi, cfd->cpumask); - /* - * Make the list addition visible before sending the ipi. - * (IPIs must obey or appear to obey normal Linux cache - * coherency rules -- see comment in generic_exec_single). - */ - smp_mb(); + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); + struct call_single_queue *dst = + &per_cpu(call_single_queue, cpu); + unsigned long flags; + + csd_lock(csd); + csd->func = func; + csd->info = info; + + raw_spin_lock_irqsave(&dst->lock, flags); + list_add_tail(&csd->list, &dst->list); + raw_spin_unlock_irqrestore(&dst->lock, flags); + } /* Send a message to all CPUs in the map */ - arch_send_call_function_ipi_mask(data->cpumask_ipi); + arch_send_call_function_ipi_mask(cfd->cpumask_ipi); - /* Optionally wait for the CPUs to complete */ - if (wait) - csd_lock_wait(&data->csd); + if (wait) { + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd; + + csd = per_cpu_ptr(cfd->csd, cpu); + csd_lock_wait(csd); + } + } } EXPORT_SYMBOL(smp_call_function_many); |