aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/rcupdate.c122
-rw-r--r--kernel/rcutiny.c282
-rw-r--r--kernel/rcutorture.c65
-rw-r--r--kernel/rcutree.c465
-rw-r--r--kernel/rcutree.h69
-rw-r--r--kernel/rcutree_plugin.h309
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--kernel/sched.c1
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/srcu.c74
11 files changed, 1040 insertions, 362 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d7c13d249b2..dcf6789bf54 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_TREE_RCU) += rcutree.o
obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
+obj-$(CONFIG_TINY_RCU) += rcutiny.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 400183346ad..9b7fd472387 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,7 +44,6 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/module.h>
-#include <linux/kernel_stat.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
@@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map =
EXPORT_SYMBOL_GPL(rcu_lock_map);
#endif
-int rcu_scheduler_active __read_mostly;
-
/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
@@ -66,122 +63,3 @@ void wakeme_after_rcu(struct rcu_head *head)
rcu = container_of(head, struct rcu_synchronize, head);
complete(&rcu->completion);
}
-
-#ifdef CONFIG_TREE_PREEMPT_RCU
-
-/**
- * synchronize_rcu - wait until a grace period has elapsed.
- *
- * Control will return to the caller some time after a full grace
- * period has elapsed, in other words after all currently executing RCU
- * read-side critical sections have completed. RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- */
-void synchronize_rcu(void)
-{
- struct rcu_synchronize rcu;
-
- if (!rcu_scheduler_active)
- return;
-
- init_completion(&rcu.completion);
- /* Will wake me after RCU finished. */
- call_rcu(&rcu.head, wakeme_after_rcu);
- /* Wait for it. */
- wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu);
-
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-/**
- * synchronize_sched - wait until an rcu-sched grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu-sched
- * grace period has elapsed, in other words after all currently executing
- * rcu-sched read-side critical sections have completed. These read-side
- * critical sections are delimited by rcu_read_lock_sched() and
- * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
- * local_irq_disable(), and so on may be used in place of
- * rcu_read_lock_sched().
- *
- * This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns. However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API. In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
- */
-void synchronize_sched(void)
-{
- struct rcu_synchronize rcu;
-
- if (rcu_blocking_is_gp())
- return;
-
- init_completion(&rcu.completion);
- /* Will wake me after RCU finished. */
- call_rcu_sched(&rcu.head, wakeme_after_rcu);
- /* Wait for it. */
- wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_sched);
-
-/**
- * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu_bh grace
- * period has elapsed, in other words after all currently executing rcu_bh
- * read-side critical sections have completed. RCU read-side critical
- * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
- * and may be nested.
- */
-void synchronize_rcu_bh(void)
-{
- struct rcu_synchronize rcu;
-
- if (rcu_blocking_is_gp())
- return;
-
- init_completion(&rcu.completion);
- /* Will wake me after RCU finished. */
- call_rcu_bh(&rcu.head, wakeme_after_rcu);
- /* Wait for it. */
- wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
-static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- return rcu_cpu_notify(self, action, hcpu);
-}
-
-void __init rcu_init(void)
-{
- int i;
-
- __rcu_init();
- cpu_notifier(rcu_barrier_cpu_hotplug, 0);
-
- /*
- * We don't need protection against CPU-hotplug here because
- * this is called early in boot, before either interrupts
- * or the scheduler are operational.
- */
- for_each_online_cpu(i)
- rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i);
-}
-
-void rcu_scheduler_starting(void)
-{
- WARN_ON(num_online_cpus() != 1);
- WARN_ON(nr_context_switches() > 0);
- rcu_scheduler_active = 1;
-}
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
new file mode 100644
index 00000000000..9f6d9ff2572
--- /dev/null
+++ b/kernel/rcutiny.c
@@ -0,0 +1,282 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * Documentation/RCU
+ */
+#include <linux/moduleparam.h>
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/cpu.h>
+
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+ struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
+ struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
+ struct rcu_head **curtail; /* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_ctrlblk = {
+ .donetail = &rcu_ctrlblk.rcucblist,
+ .curtail = &rcu_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+ .donetail = &rcu_bh_ctrlblk.rcucblist,
+ .curtail = &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_NO_HZ
+
+static long rcu_dynticks_nesting = 1;
+
+/*
+ * Enter dynticks-idle mode, which is an extended quiescent state
+ * if we have fully entered that mode (i.e., if the new value of
+ * dynticks_nesting is zero).
+ */
+void rcu_enter_nohz(void)
+{
+ if (--rcu_dynticks_nesting == 0)
+ rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+}
+
+/*
+ * Exit dynticks-idle mode, so that we are no longer in an extended
+ * quiescent state.
+ */
+void rcu_exit_nohz(void)
+{
+ rcu_dynticks_nesting++;
+}
+
+#endif /* #ifdef CONFIG_NO_HZ */
+
+/*
+ * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
+ * Also disable irqs to avoid confusion due to interrupt handlers
+ * invoking call_rcu().
+ */
+static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (rcp->rcucblist != NULL &&
+ rcp->donetail != rcp->curtail) {
+ rcp->donetail = rcp->curtail;
+ local_irq_restore(flags);
+ return 1;
+ }
+ local_irq_restore(flags);
+
+ return 0;
+}
+
+/*
+ * Record an rcu quiescent state. And an rcu_bh quiescent state while we
+ * are at it, given that any rcu quiescent state is also an rcu_bh
+ * quiescent state. Use "+" instead of "||" to defeat short circuiting.
+ */
+void rcu_sched_qs(int cpu)
+{
+ if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk))
+ raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Record an rcu_bh quiescent state.
+ */
+void rcu_bh_qs(int cpu)
+{
+ if (rcu_qsctr_help(&rcu_bh_ctrlblk))
+ raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Check to see if the scheduling-clock interrupt came from an extended
+ * quiescent state, and, if so, tell RCU about it.
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+ if (user ||
+ (idle_cpu(cpu) &&
+ !in_softirq() &&
+ hardirq_count() <= (1 << HARDIRQ_SHIFT)))
+ rcu_sched_qs(cpu);
+ else if (!in_softirq())
+ rcu_bh_qs(cpu);
+}
+
+/*
+ * Helper function for rcu_process_callbacks() that operates on the
+ * specified rcu_ctrlkblk structure.
+ */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+{
+ struct rcu_head *next, *list;
+ unsigned long flags;
+
+ /* If no RCU callbacks ready to invoke, just return. */
+ if (&rcp->rcucblist == rcp->donetail)
+ return;
+
+ /* Move the ready-to-invoke callbacks to a local list. */
+ local_irq_save(flags);
+ list = rcp->rcucblist;
+ rcp->rcucblist = *rcp->donetail;
+ *rcp->donetail = NULL;
+ if (rcp->curtail == rcp->donetail)
+ rcp->curtail = &rcp->rcucblist;
+ rcp->donetail = &rcp->rcucblist;
+ local_irq_restore(flags);
+
+ /* Invoke the callbacks on the local list. */
+ while (list) {
+ next = list->next;
+ prefetch(next);
+ list->func(list);
+ list = next;
+ }
+}
+
+/*
+ * Invoke any callbacks whose grace period has completed.
+ */
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+ __rcu_process_callbacks(&rcu_ctrlblk);
+ __rcu_process_callbacks(&rcu_bh_ctrlblk);
+}
+
+/*
+ * Wait for a grace period to elapse. But it is illegal to invoke
+ * synchronize_sched() from within an RCU read-side critical section.
+ * Therefore, any legal call to synchronize_sched() is a quiescent
+ * state, and so on a UP system, synchronize_sched() need do nothing.
+ * Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the
+ * benefits of doing might_sleep() to reduce latency.)
+ *
+ * Cool, huh? (Due to Josh Triplett.)
+ *
+ * But we want to make this a static inline later.
+ */
+void synchronize_sched(void)
+{
+ cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
+void synchronize_rcu_bh(void)
+{
+ synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
+/*
+ * Helper function for call_rcu() and call_rcu_bh().
+ */
+static void __call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu),
+ struct rcu_ctrlblk *rcp)
+{
+ unsigned long flags;
+
+ head->func = func;
+ head->next = NULL;
+
+ local_irq_save(flags);
+ *rcp->curtail = head;
+ rcp->curtail = &head->next;
+ local_irq_restore(flags);
+}
+
+/*
+ * Post an RCU callback to be invoked after the end of an RCU grace
+ * period. But since we have but one CPU, that would be after any
+ * quiescent state.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+ __call_rcu(head, func, &rcu_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Post an RCU bottom-half callback to be invoked after any subsequent
+ * quiescent state.
+ */
+void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+ __call_rcu(head, func, &rcu_bh_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+void rcu_barrier(void)
+{
+ struct rcu_synchronize rcu;
+
+ init_completion(&rcu.completion);
+ /* Will wake me after RCU finished. */
+ call_rcu(&rcu.head, wakeme_after_rcu);
+ /* Wait for it. */
+ wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+void rcu_barrier_bh(void)
+{
+ struct rcu_synchronize rcu;
+
+ init_completion(&rcu.completion);
+ /* Will wake me after RCU finished. */
+ call_rcu_bh(&rcu.head, wakeme_after_rcu);
+ /* Wait for it. */
+ wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+
+void rcu_barrier_sched(void)
+{
+ struct rcu_synchronize rcu;
+
+ init_completion(&rcu.completion);
+ /* Will wake me after RCU finished. */
+ call_rcu_sched(&rcu.head, wakeme_after_rcu);
+ /* Wait for it. */
+ wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+
+void __init rcu_init(void)
+{
+ open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+}
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 697c0a0229d..a621a67ef4e 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p)
cur_ops->deferred_free(rp);
}
+static int rcu_no_completed(void)
+{
+ return 0;
+}
+
static void rcu_torture_deferred_free(struct rcu_torture *p)
{
call_rcu(&p->rtort_rcu, rcu_torture_cb);
@@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = {
.name = "rcu_sync"
};
+static struct rcu_torture_ops rcu_expedited_ops = {
+ .init = rcu_sync_torture_init,
+ .cleanup = NULL,
+ .readlock = rcu_torture_read_lock,
+ .read_delay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = rcu_torture_read_unlock,
+ .completed = rcu_no_completed,
+ .deferred_free = rcu_sync_torture_deferred_free,
+ .sync = synchronize_rcu_expedited,
+ .cb_barrier = NULL,
+ .stats = NULL,
+ .irq_capable = 1,
+ .name = "rcu_expedited"
+};
+
/*
* Definitions for rcu_bh torture testing.
*/
@@ -547,6 +567,25 @@ static struct rcu_torture_ops srcu_ops = {
.name = "srcu"
};
+static void srcu_torture_synchronize_expedited(void)
+{
+ synchronize_srcu_expedited(&srcu_ctl);
+}
+
+static struct rcu_torture_ops srcu_expedited_ops = {
+ .init = srcu_torture_init,
+ .cleanup = srcu_torture_cleanup,
+ .readlock = srcu_torture_read_lock,
+ .read_delay = srcu_read_delay,
+ .readunlock = srcu_torture_read_unlock,
+ .completed = srcu_torture_completed,
+ .deferred_free = rcu_sync_torture_deferred_free,
+ .sync = srcu_torture_synchronize_expedited,
+ .cb_barrier = NULL,
+ .stats = srcu_torture_stats,
+ .name = "srcu_expedited"
+};
+
/*
* Definitions for sched torture testing.
*/
@@ -562,11 +601,6 @@ static void sched_torture_read_unlock(int idx)
preempt_enable();
}
-static int sched_torture_completed(void)
-{
- return 0;
-}
-
static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
@@ -583,7 +617,7 @@ static struct rcu_torture_ops sched_ops = {
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
- .completed = sched_torture_completed,
+ .completed = rcu_no_completed,
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
@@ -592,13 +626,13 @@ static struct rcu_torture_ops sched_ops = {
.name = "sched"
};
-static struct rcu_torture_ops sched_ops_sync = {
+static struct rcu_torture_ops sched_sync_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
- .completed = sched_torture_completed,
+ .completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
@@ -612,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
- .completed = sched_torture_completed,
+ .completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
@@ -1097,9 +1131,10 @@ rcu_torture_init(void)
int cpu;
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] =
- { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
- &sched_expedited_ops,
- &srcu_ops, &sched_ops, &sched_ops_sync, };
+ { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
+ &rcu_bh_ops, &rcu_bh_sync_ops,
+ &srcu_ops, &srcu_expedited_ops,
+ &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
@@ -1110,8 +1145,12 @@ rcu_torture_init(void)
break;
}
if (i == ARRAY_SIZE(torture_ops)) {
- printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
+ printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n",
torture_type);
+ printk(KERN_ALERT "rcu-torture types:");
+ for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
+ printk(KERN_ALERT " %s", torture_ops[i]->name);
+ printk(KERN_ALERT "\n");
mutex_unlock(&fullstop_mutex);
return -EINVAL;
}
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f3077c0ab18..53ae9598f79 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,18 +46,22 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/time.h>
+#include <linux/kernel_stat.h>
#include "rcutree.h"
/* Data structures. */
+static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
+
#define RCU_STATE_INITIALIZER(name) { \
.level = { &name.node[0] }, \
.levelcnt = { \
NUM_RCU_LVL_0, /* root of hierarchy. */ \
NUM_RCU_LVL_1, \
NUM_RCU_LVL_2, \
- NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \
+ NUM_RCU_LVL_3, \
+ NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
}, \
.signaled = RCU_GP_IDLE, \
.gpnum = -300, \
@@ -77,6 +81,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+static int rcu_scheduler_active __read_mostly;
+
/*
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
@@ -98,7 +104,7 @@ void rcu_sched_qs(int cpu)
struct rcu_data *rdp;
rdp = &per_cpu(rcu_sched_data, cpu);
- rdp->passed_quiesc_completed = rdp->completed;
+ rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
rcu_preempt_note_context_switch(cpu);
@@ -109,7 +115,7 @@ void rcu_bh_qs(int cpu)
struct rcu_data *rdp;
rdp = &per_cpu(rcu_bh_data, cpu);
- rdp->passed_quiesc_completed = rdp->completed;
+ rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
}
@@ -335,28 +341,9 @@ void rcu_irq_exit(void)
set_need_resched();
}
-/*
- * Record the specified "completed" value, which is later used to validate
- * dynticks counter manipulations. Specify "rsp->completed - 1" to
- * unconditionally invalidate any future dynticks manipulations (which is
- * useful at the beginning of a grace period).
- */
-static void dyntick_record_completed(struct rcu_state *rsp, long comp)
-{
- rsp->dynticks_completed = comp;
-}
-
#ifdef CONFIG_SMP
/*
- * Recall the previously recorded value of the completion for dynticks.
- */
-static long dyntick_recall_completed(struct rcu_state *rsp)
-{
- return rsp->dynticks_completed;
-}
-
-/*
* Snapshot the specified CPU's dynticks counter so that we can later
* credit them with an implicit quiescent state. Return 1 if this CPU
* is in dynticks idle mode, which is an extended quiescent state.
@@ -419,24 +406,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
#else /* #ifdef CONFIG_NO_HZ */
-static void dyntick_record_completed(struct rcu_state *rsp, long comp)
-{
-}
-
#ifdef CONFIG_SMP
-/*
- * If there are no dynticks, then the only way that a CPU can passively
- * be in a quiescent state is to be offline. Unlike dynticks idle, which
- * is a point in time during the prior (already finished) grace period,
- * an offline CPU is always in a quiescent state, and thus can be
- * unconditionally applied. So just return the current value of completed.
- */
-static long dyntick_recall_completed(struct rcu_state *rsp)
-{
- return rsp->completed;
-}
-
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
return 0;
@@ -553,13 +524,33 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
/*
* Update CPU-local rcu_data state to record the newly noticed grace period.
* This is used both when we started the grace period and when we notice
- * that someone else started the grace period.
+ * that someone else started the grace period. The caller must hold the
+ * ->lock of the leaf rcu_node structure corresponding to the current CPU,
+ * and must have irqs disabled.
*/
+static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
+{
+ if (rdp->gpnum != rnp->gpnum) {
+ rdp->qs_pending = 1;
+ rdp->passed_quiesc = 0;
+ rdp->gpnum = rnp->gpnum;
+ }
+}
+
static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
{
- rdp->qs_pending = 1;
- rdp->passed_quiesc = 0;
- rdp->gpnum = rsp->gpnum;
+ unsigned long flags;
+ struct rcu_node *rnp;
+
+ local_irq_save(flags);
+ rnp = rdp->mynode;
+ if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
+ !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
+ local_irq_restore(flags);
+ return;
+ }
+ __note_new_gpnum(rsp, rnp, rdp);
+ spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@@ -583,6 +574,79 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
}
/*
+ * Advance this CPU's callbacks, but only if the current grace period
+ * has ended. This may be called only from the CPU to whom the rdp
+ * belongs. In addition, the corresponding leaf rcu_node structure's
+ * ->lock must be held by the caller, with irqs disabled.
+ */
+static void
+__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
+{
+ /* Did another grace period end? */
+ if (rdp->completed != rnp->completed) {
+
+ /* Advance callbacks. No harm if list empty. */
+ rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
+ rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
+ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+ /* Remember that we saw this grace-period completion. */
+ rdp->completed = rnp->completed;
+ }
+}
+
+/*
+ * Advance this CPU's callbacks, but only if the current grace period
+ * has ended. This may be called only from the CPU to whom the rdp
+ * belongs.
+ */
+static void
+rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+ unsigned long flags;
+ struct rcu_node *rnp;
+
+ local_irq_save(flags);
+ rnp = rdp->mynode;
+ if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
+ !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
+ local_irq_restore(flags);
+ return;
+ }
+ __rcu_process_gp_end(rsp, rnp, rdp);
+ spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Do per-CPU grace-period initialization for running CPU. The caller
+ * must hold the lock of the leaf rcu_node structure corresponding to
+ * this CPU.
+ */
+static void
+rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
+{
+ /* Prior grace period ended, so advance callbacks for current CPU. */
+ __rcu_process_gp_end(rsp, rnp, rdp);
+
+ /*
+ * Because this CPU just now started the new grace period, we know
+ * that all of its callbacks will be covered by this upcoming grace
+ * period, even the ones that were registered arbitrarily recently.
+ * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL.
+ *
+ * Other CPUs cannot be sure exactly when the grace period started.
+ * Therefore, their recently registered callbacks must pass through
+ * an additional RCU_NEXT_READY stage, so that they will be handled
+ * by the next RCU grace period.
+ */
+ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+ rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+ /* Set state so that this CPU will detect the next quiescent state. */
+ __note_new_gpnum(rsp, rnp, rdp);
+}
+
+/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
* the root node's ->lock, which is released before return. Hard irqs must
@@ -596,7 +660,23 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
struct rcu_node *rnp = rcu_get_root(rsp);
if (!cpu_needs_another_gp(rsp, rdp)) {
- spin_unlock_irqrestore(&rnp->lock, flags);
+ if (rnp->completed == rsp->completed) {
+ spin_unlock_irqrestore(&rnp->lock, flags);
+ return;
+ }
+ spin_unlock(&rnp->lock); /* irqs remain disabled. */
+
+ /*
+ * Propagate new ->completed value to rcu_node structures
+ * so that other CPUs don't have to wait until the start
+ * of the next grace period to process their callbacks.
+ */
+ rcu_for_each_node_breadth_first(rsp, rnp) {
+ spin_lock(&rnp->lock); /* irqs already disabled. */
+ rnp->completed = rsp->completed;
+ spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ }
+ local_irq_restore(flags);
return;
}
@@ -606,29 +686,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp);
- dyntick_record_completed(rsp, rsp->completed - 1);
- note_new_gpnum(rsp, rdp);
-
- /*
- * Because this CPU just now started the new grace period, we know
- * that all of its callbacks will be covered by this upcoming grace
- * period, even the ones that were registered arbitrarily recently.
- * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL.
- *
- * Other CPUs cannot be sure exactly when the grace period started.
- * Therefore, their recently registered callbacks must pass through
- * an additional RCU_NEXT_READY stage, so that they will be handled
- * by the next RCU grace period.
- */
- rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
- rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
/* Special-case the common single-level case. */
if (NUM_RCU_NODES == 1) {
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
+ rnp->completed = rsp->completed;
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
+ rcu_start_gp_per_cpu(rsp, rnp, rdp);
spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
@@ -661,6 +727,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
+ rnp->completed = rsp->completed;
+ if (rnp == rdp->mynode)
+ rcu_start_gp_per_cpu(rsp, rnp, rdp);
spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
@@ -672,58 +741,32 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
}
/*
- * Advance this CPU's callbacks, but only if the current grace period
- * has ended. This may be called only from the CPU to whom the rdp
- * belongs.
+ * Report a full set of quiescent states to the specified rcu_state
+ * data structure. This involves cleaning up after the prior grace
+ * period and letting rcu_start_gp() start up the next grace period
+ * if one is needed. Note that the caller must hold rnp->lock, as
+ * required by rcu_start_gp(), which will release it.
*/
-static void
-rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
-{
- long completed_snap;
- unsigned long flags;
-
- local_irq_save(flags);
- completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */
-
- /* Did another grace period end? */
- if (rdp->completed != completed_snap) {
-
- /* Advance callbacks. No harm if list empty. */
- rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
- rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
- rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
-
- /* Remember that we saw this grace-period completion. */
- rdp->completed = completed_snap;
- }
- local_irq_restore(flags);
-}
-
-/*
- * Clean up after the prior grace period and let rcu_start_gp() start up
- * the next grace period if one is needed. Note that the caller must
- * hold rnp->lock, as required by rcu_start_gp(), which will release it.
- */
-static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
+static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
{
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
rsp->completed = rsp->gpnum;
rsp->signaled = RCU_GP_IDLE;
- rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
}
/*
- * Similar to cpu_quiet(), for which it is a helper function. Allows
- * a group of CPUs to be quieted at one go, though all the CPUs in the
- * group must be represented by the same leaf rcu_node structure.
- * That structure's lock must be held upon entry, and it is released
- * before return.
+ * Similar to rcu_report_qs_rdp(), for which it is a helper function.
+ * Allows quiescent states for a group of CPUs to be reported at one go
+ * to the specified rcu_node structure, though all the CPUs in the group
+ * must be represented by the same rcu_node structure (which need not be
+ * a leaf rcu_node structure, though it often will be). That structure's
+ * lock must be held upon entry, and it is released before return.
*/
static void
-cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
- unsigned long flags)
+rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
+ struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
struct rcu_node *rnp_c;
@@ -759,21 +802,23 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
/*
* Get here if we are the last CPU to pass through a quiescent
- * state for this grace period. Invoke cpu_quiet_msk_finish()
+ * state for this grace period. Invoke rcu_report_qs_rsp()
* to clean up and start the next grace period if one is needed.
*/
- cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */
+ rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
}
/*
- * Record a quiescent state for the specified CPU, which must either be
- * the current CPU. The lastcomp argument is used to make sure we are
- * still in the grace period of interest. We don't want to end the current
- * grace period based on quiescent states detected in an earlier grace
- * period!
+ * Record a quiescent state for the specified CPU to that CPU's rcu_data
+ * structure. This must be either called from the specified CPU, or
+ * called when the specified CPU is known to be offline (and when it is
+ * also known that no other CPU is concurrently trying to help the offline
+ * CPU). The lastcomp argument is used to make sure we are still in the
+ * grace period of interest. We don't want to end the current grace period
+ * based on quiescent states detected in an earlier grace period!
*/
static void
-cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
+rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
{
unsigned long flags;
unsigned long mask;
@@ -781,15 +826,15 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
rnp = rdp->mynode;
spin_lock_irqsave(&rnp->lock, flags);
- if (lastcomp != ACCESS_ONCE(rsp->completed)) {
+ if (lastcomp != rnp->completed) {
/*
* Someone beat us to it for this grace period, so leave.
* The race with GP start is resolved by the fact that we
* hold the leaf rcu_node lock, so that the per-CPU bits
* cannot yet be initialized -- so we would simply find our
- * CPU's bit already cleared in cpu_quiet_msk() if this race
- * occurred.
+ * CPU's bit already cleared in rcu_report_qs_rnp() if this
+ * race occurred.
*/
rdp->passed_quiesc = 0; /* try again later! */
spin_unlock_irqrestore(&rnp->lock, flags);
@@ -807,7 +852,7 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
*/
rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
- cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */
+ rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
}
}
@@ -838,8 +883,11 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
if (!rdp->passed_quiesc)
return;
- /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */
- cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
+ /*
+ * Tell RCU we are done (but rcu_report_qs_rdp() will be the
+ * judge of that).
+ */
+ rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -899,8 +947,8 @@ static void rcu