diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/rcupdate.c | 122 | ||||
-rw-r--r-- | kernel/rcutiny.c | 282 | ||||
-rw-r--r-- | kernel/rcutorture.c | 65 | ||||
-rw-r--r-- | kernel/rcutree.c | 465 | ||||
-rw-r--r-- | kernel/rcutree.h | 69 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 309 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 12 | ||||
-rw-r--r-- | kernel/sched.c | 1 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/srcu.c | 74 |
11 files changed, 1040 insertions, 362 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d7c13d249b2..dcf6789bf54 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o +obj-$(CONFIG_TINY_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 400183346ad..9b7fd472387 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,7 +44,6 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/module.h> -#include <linux/kernel_stat.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map = EXPORT_SYMBOL_GPL(rcu_lock_map); #endif -int rcu_scheduler_active __read_mostly; - /* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. @@ -66,122 +63,3 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/** - * synchronize_rcu - wait until a grace period has elapsed. - * - * Control will return to the caller some time after a full grace - * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - */ -void synchronize_rcu(void) -{ - struct rcu_synchronize rcu; - - if (!rcu_scheduler_active) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu); - -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - -/** - * synchronize_sched - wait until an rcu-sched grace period has elapsed. - * - * Control will return to the caller some time after a full rcu-sched - * grace period has elapsed, in other words after all currently executing - * rcu-sched read-side critical sections have completed. These read-side - * critical sections are delimited by rcu_read_lock_sched() and - * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), - * local_irq_disable(), and so on may be used in place of - * rcu_read_lock_sched(). - * - * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. - * - * This primitive provides the guarantees made by the (now removed) - * synchronize_kernel() API. In contrast, synchronize_rcu() only - * guarantees that rcu_read_lock() sections will have completed. - * In "classic RCU", these two guarantees happen to be one and - * the same, but can differ in realtime RCU implementations. - */ -void synchronize_sched(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_sched); - -/** - * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. - * - * Control will return to the caller some time after a full rcu_bh grace - * period has elapsed, in other words after all currently executing rcu_bh - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), - * and may be nested. - */ -void synchronize_rcu_bh(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_bh(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu_bh); - -static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - return rcu_cpu_notify(self, action, hcpu); -} - -void __init rcu_init(void) -{ - int i; - - __rcu_init(); - cpu_notifier(rcu_barrier_cpu_hotplug, 0); - - /* - * We don't need protection against CPU-hotplug here because - * this is called early in boot, before either interrupts - * or the scheduler are operational. - */ - for_each_online_cpu(i) - rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i); -} - -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c new file mode 100644 index 00000000000..9f6d9ff2572 --- /dev/null +++ b/kernel/rcutiny.c @@ -0,0 +1,282 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ +#include <linux/moduleparam.h> +#include <linux/completion.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/rcupdate.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/time.h> +#include <linux/cpu.h> + +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_ctrlblk = { + .donetail = &rcu_ctrlblk.rcucblist, + .curtail = &rcu_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_NO_HZ + +static long rcu_dynticks_nesting = 1; + +/* + * Enter dynticks-idle mode, which is an extended quiescent state + * if we have fully entered that mode (i.e., if the new value of + * dynticks_nesting is zero). + */ +void rcu_enter_nohz(void) +{ + if (--rcu_dynticks_nesting == 0) + rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ +} + +/* + * Exit dynticks-idle mode, so that we are no longer in an extended + * quiescent state. + */ +void rcu_exit_nohz(void) +{ + rcu_dynticks_nesting++; +} + +#endif /* #ifdef CONFIG_NO_HZ */ + +/* + * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). + * Also disable irqs to avoid confusion due to interrupt handlers + * invoking call_rcu(). + */ +static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + local_irq_save(flags); + if (rcp->rcucblist != NULL && + rcp->donetail != rcp->curtail) { + rcp->donetail = rcp->curtail; + local_irq_restore(flags); + return 1; + } + local_irq_restore(flags); + + return 0; +} + +/* + * Record an rcu quiescent state. And an rcu_bh quiescent state while we + * are at it, given that any rcu quiescent state is also an rcu_bh + * quiescent state. Use "+" instead of "||" to defeat short circuiting. + */ +void rcu_sched_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Record an rcu_bh quiescent state. + */ +void rcu_bh_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Check to see if the scheduling-clock interrupt came from an extended + * quiescent state, and, if so, tell RCU about it. + */ +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && + !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) + rcu_sched_qs(cpu); + else if (!in_softirq()) + rcu_bh_qs(cpu); +} + +/* + * Helper function for rcu_process_callbacks() that operates on the + * specified rcu_ctrlkblk structure. + */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +{ + struct rcu_head *next, *list; + unsigned long flags; + + /* If no RCU callbacks ready to invoke, just return. */ + if (&rcp->rcucblist == rcp->donetail) + return; + + /* Move the ready-to-invoke callbacks to a local list. */ + local_irq_save(flags); + list = rcp->rcucblist; + rcp->rcucblist = *rcp->donetail; + *rcp->donetail = NULL; + if (rcp->curtail == rcp->donetail) + rcp->curtail = &rcp->rcucblist; + rcp->donetail = &rcp->rcucblist; + local_irq_restore(flags); + + /* Invoke the callbacks on the local list. */ + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + } +} + +/* + * Invoke any callbacks whose grace period has completed. + */ +static void rcu_process_callbacks(struct softirq_action *unused) +{ + __rcu_process_callbacks(&rcu_ctrlblk); + __rcu_process_callbacks(&rcu_bh_ctrlblk); +} + +/* + * Wait for a grace period to elapse. But it is illegal to invoke + * synchronize_sched() from within an RCU read-side critical section. + * Therefore, any legal call to synchronize_sched() is a quiescent + * state, and so on a UP system, synchronize_sched() need do nothing. + * Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the + * benefits of doing might_sleep() to reduce latency.) + * + * Cool, huh? (Due to Josh Triplett.) + * + * But we want to make this a static inline later. + */ +void synchronize_sched(void) +{ + cond_resched(); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + +void synchronize_rcu_bh(void) +{ + synchronize_sched(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + +/* + * Helper function for call_rcu() and call_rcu_bh(). + */ +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + head->func = func; + head->next = NULL; + + local_irq_save(flags); + *rcp->curtail = head; + rcp->curtail = &head->next; + local_irq_restore(flags); +} + +/* + * Post an RCU callback to be invoked after the end of an RCU grace + * period. But since we have but one CPU, that would be after any + * quiescent state. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Post an RCU bottom-half callback to be invoked after any subsequent + * quiescent state. + */ +void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_bh_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu_bh); + +void rcu_barrier(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + +void rcu_barrier_bh(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_bh); + +void rcu_barrier_sched(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_sched); + +void __init rcu_init(void) +{ + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); +} diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 697c0a0229d..a621a67ef4e 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p) cur_ops->deferred_free(rp); } +static int rcu_no_completed(void) +{ + return 0; +} + static void rcu_torture_deferred_free(struct rcu_torture *p) { call_rcu(&p->rtort_rcu, rcu_torture_cb); @@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = { .name = "rcu_sync" }; +static struct rcu_torture_ops rcu_expedited_ops = { + .init = rcu_sync_torture_init, + .cleanup = NULL, + .readlock = rcu_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = rcu_torture_read_unlock, + .completed = rcu_no_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = synchronize_rcu_expedited, + .cb_barrier = NULL, + .stats = NULL, + .irq_capable = 1, + .name = "rcu_expedited" +}; + /* * Definitions for rcu_bh torture testing. */ @@ -547,6 +567,25 @@ static struct rcu_torture_ops srcu_ops = { .name = "srcu" }; +static void srcu_torture_synchronize_expedited(void) +{ + synchronize_srcu_expedited(&srcu_ctl); +} + +static struct rcu_torture_ops srcu_expedited_ops = { + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .completed = srcu_torture_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = srcu_torture_synchronize_expedited, + .cb_barrier = NULL, + .stats = srcu_torture_stats, + .name = "srcu_expedited" +}; + /* * Definitions for sched torture testing. */ @@ -562,11 +601,6 @@ static void sched_torture_read_unlock(int idx) preempt_enable(); } -static int sched_torture_completed(void) -{ - return 0; -} - static void rcu_sched_torture_deferred_free(struct rcu_torture *p) { call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); @@ -583,7 +617,7 @@ static struct rcu_torture_ops sched_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sched_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = rcu_barrier_sched, @@ -592,13 +626,13 @@ static struct rcu_torture_ops sched_ops = { .name = "sched" }; -static struct rcu_torture_ops sched_ops_sync = { +static struct rcu_torture_ops sched_sync_ops = { .init = rcu_sync_torture_init, .cleanup = NULL, .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = NULL, @@ -612,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_sched_expedited, .cb_barrier = NULL, @@ -1097,9 +1131,10 @@ rcu_torture_init(void) int cpu; int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = - { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, - &sched_expedited_ops, - &srcu_ops, &sched_ops, &sched_ops_sync, }; + { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, + &rcu_bh_ops, &rcu_bh_sync_ops, + &srcu_ops, &srcu_expedited_ops, + &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; mutex_lock(&fullstop_mutex); @@ -1110,8 +1145,12 @@ rcu_torture_init(void) break; } if (i == ARRAY_SIZE(torture_ops)) { - printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", + printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n", torture_type); + printk(KERN_ALERT "rcu-torture types:"); + for (i = 0; i < ARRAY_SIZE(torture_ops); i++) + printk(KERN_ALERT " %s", torture_ops[i]->name); + printk(KERN_ALERT "\n"); mutex_unlock(&fullstop_mutex); return -EINVAL; } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f3077c0ab18..53ae9598f79 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,18 +46,22 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/time.h> +#include <linux/kernel_stat.h> #include "rcutree.h" /* Data structures. */ +static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; + #define RCU_STATE_INITIALIZER(name) { \ .level = { &name.node[0] }, \ .levelcnt = { \ NUM_RCU_LVL_0, /* root of hierarchy. */ \ NUM_RCU_LVL_1, \ NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ + NUM_RCU_LVL_3, \ + NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ }, \ .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ @@ -77,6 +81,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +static int rcu_scheduler_active __read_mostly; + /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -98,7 +104,7 @@ void rcu_sched_qs(int cpu) struct rcu_data *rdp; rdp = &per_cpu(rcu_sched_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; rcu_preempt_note_context_switch(cpu); @@ -109,7 +115,7 @@ void rcu_bh_qs(int cpu) struct rcu_data *rdp; rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } @@ -335,28 +341,9 @@ void rcu_irq_exit(void) set_need_resched(); } -/* - * Record the specified "completed" value, which is later used to validate - * dynticks counter manipulations. Specify "rsp->completed - 1" to - * unconditionally invalidate any future dynticks manipulations (which is - * useful at the beginning of a grace period). - */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ - rsp->dynticks_completed = comp; -} - #ifdef CONFIG_SMP /* - * Recall the previously recorded value of the completion for dynticks. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->dynticks_completed; -} - -/* * Snapshot the specified CPU's dynticks counter so that we can later * credit them with an implicit quiescent state. Return 1 if this CPU * is in dynticks idle mode, which is an extended quiescent state. @@ -419,24 +406,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #else /* #ifdef CONFIG_NO_HZ */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ -} - #ifdef CONFIG_SMP -/* - * If there are no dynticks, then the only way that a CPU can passively - * be in a quiescent state is to be offline. Unlike dynticks idle, which - * is a point in time during the prior (already finished) grace period, - * an offline CPU is always in a quiescent state, and thus can be - * unconditionally applied. So just return the current value of completed. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->completed; -} - static int dyntick_save_progress_counter(struct rcu_data *rdp) { return 0; @@ -553,13 +524,33 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) /* * Update CPU-local rcu_data state to record the newly noticed grace period. * This is used both when we started the grace period and when we notice - * that someone else started the grace period. + * that someone else started the grace period. The caller must hold the + * ->lock of the leaf rcu_node structure corresponding to the current CPU, + * and must have irqs disabled. */ +static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + if (rdp->gpnum != rnp->gpnum) { + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->gpnum = rnp->gpnum; + } +} + static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) { - rdp->qs_pending = 1; - rdp->passed_quiesc = 0; - rdp->gpnum = rsp->gpnum; + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __note_new_gpnum(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -583,6 +574,79 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) } /* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. In addition, the corresponding leaf rcu_node structure's + * ->lock must be held by the caller, with irqs disabled. + */ +static void +__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Did another grace period end? */ + if (rdp->completed != rnp->completed) { + + /* Advance callbacks. No harm if list empty. */ + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Remember that we saw this grace-period completion. */ + rdp->completed = rnp->completed; + } +} + +/* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. + */ +static void +rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __rcu_process_gp_end(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Do per-CPU grace-period initialization for running CPU. The caller + * must hold the lock of the leaf rcu_node structure corresponding to + * this CPU. + */ +static void +rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Prior grace period ended, so advance callbacks for current CPU. */ + __rcu_process_gp_end(rsp, rnp, rdp); + + /* + * Because this CPU just now started the new grace period, we know + * that all of its callbacks will be covered by this upcoming grace + * period, even the ones that were registered arbitrarily recently. + * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. + * + * Other CPUs cannot be sure exactly when the grace period started. + * Therefore, their recently registered callbacks must pass through + * an additional RCU_NEXT_READY stage, so that they will be handled + * by the next RCU grace period. + */ + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Set state so that this CPU will detect the next quiescent state. */ + __note_new_gpnum(rsp, rnp, rdp); +} + +/* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold * the root node's ->lock, which is released before return. Hard irqs must @@ -596,7 +660,23 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_node *rnp = rcu_get_root(rsp); if (!cpu_needs_another_gp(rsp, rdp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + if (rnp->completed == rsp->completed) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + + /* + * Propagate new ->completed value to rcu_node structures + * so that other CPUs don't have to wait until the start + * of the next grace period to process their callbacks. + */ + rcu_for_each_node_breadth_first(rsp, rnp) { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->completed = rsp->completed; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } + local_irq_restore(flags); return; } @@ -606,29 +686,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); - dyntick_record_completed(rsp, rsp->completed - 1); - note_new_gpnum(rsp, rdp); - - /* - * Because this CPU just now started the new grace period, we know - * that all of its callbacks will be covered by this upcoming grace - * period, even the ones that were registered arbitrarily recently. - * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. - * - * Other CPUs cannot be sure exactly when the grace period started. - * Therefore, their recently registered callbacks must pass through - * an additional RCU_NEXT_READY stage, so that they will be handled - * by the next RCU grace period. - */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -661,6 +727,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; + if (rnp == rdp->mynode) + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock(&rnp->lock); /* irqs remain disabled. */ } @@ -672,58 +741,32 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) } /* - * Advance this CPU's callbacks, but only if the current grace period - * has ended. This may be called only from the CPU to whom the rdp - * belongs. + * Report a full set of quiescent states to the specified rcu_state + * data structure. This involves cleaning up after the prior grace + * period and letting rcu_start_gp() start up the next grace period + * if one is needed. Note that the caller must hold rnp->lock, as + * required by rcu_start_gp(), which will release it. */ -static void -rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) -{ - long completed_snap; - unsigned long flags; - - local_irq_save(flags); - completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */ - - /* Did another grace period end? */ - if (rdp->completed != completed_snap) { - - /* Advance callbacks. No harm if list empty. */ - rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - - /* Remember that we saw this grace-period completion. */ - rdp->completed = completed_snap; - } - local_irq_restore(flags); -} - -/* - * Clean up after the prior grace period and let rcu_start_gp() start up - * the next grace period if one is needed. Note that the caller must - * hold rnp->lock, as required by rcu_start_gp(), which will release it. - */ -static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) +static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); rsp->completed = rsp->gpnum; rsp->signaled = RCU_GP_IDLE; - rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } /* - * Similar to cpu_quiet(), for which it is a helper function. Allows - * a group of CPUs to be quieted at one go, though all the CPUs in the - * group must be represented by the same leaf rcu_node structure. - * That structure's lock must be held upon entry, and it is released - * before return. + * Similar to rcu_report_qs_rdp(), for which it is a helper function. + * Allows quiescent states for a group of CPUs to be reported at one go + * to the specified rcu_node structure, though all the CPUs in the group + * must be represented by the same rcu_node structure (which need not be + * a leaf rcu_node structure, though it often will be). That structure's + * lock must be held upon entry, and it is released before return. */ static void -cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, - unsigned long flags) +rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, + struct rcu_node *rnp, unsigned long flags) __releases(rnp->lock) { struct rcu_node *rnp_c; @@ -759,21 +802,23 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, /* * Get here if we are the last CPU to pass through a quiescent - * state for this grace period. Invoke cpu_quiet_msk_finish() + * state for this grace period. Invoke rcu_report_qs_rsp() * to clean up and start the next grace period if one is needed. */ - cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */ + rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ } /* - * Record a quiescent state for the specified CPU, which must either be - * the current CPU. The lastcomp argument is used to make sure we are - * still in the grace period of interest. We don't want to end the current - * grace period based on quiescent states detected in an earlier grace - * period! + * Record a quiescent state for the specified CPU to that CPU's rcu_data + * structure. This must be either called from the specified CPU, or + * called when the specified CPU is known to be offline (and when it is + * also known that no other CPU is concurrently trying to help the offline + * CPU). The lastcomp argument is used to make sure we are still in the + * grace period of interest. We don't want to end the current grace period + * based on quiescent states detected in an earlier grace period! */ static void -cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) +rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) { unsigned long flags; unsigned long mask; @@ -781,15 +826,15 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) rnp = rdp->mynode; spin_lock_irqsave(&rnp->lock, flags); - if (lastcomp != ACCESS_ONCE(rsp->completed)) { + if (lastcomp != rnp->completed) { /* * Someone beat us to it for this grace period, so leave. * The race with GP start is resolved by the fact that we * hold the leaf rcu_node lock, so that the per-CPU bits * cannot yet be initialized -- so we would simply find our - * CPU's bit already cleared in cpu_quiet_msk() if this race - * occurred. + * CPU's bit already cleared in rcu_report_qs_rnp() if this + * race occurred. */ rdp->passed_quiesc = 0; /* try again later! */ spin_unlock_irqrestore(&rnp->lock, flags); @@ -807,7 +852,7 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) */ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ + rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ } } @@ -838,8 +883,11 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) if (!rdp->passed_quiesc) return; - /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */ - cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); + /* + * Tell RCU we are done (but rcu_report_qs_rdp() will be the + * judge of that). + */ + rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); } #ifdef CONFIG_HOTPLUG_CPU @@ -899,8 +947,8 @@ static void rcu |