aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 18:14:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 18:14:34 -0700
commiteb04f2f04ed1227c266b3219c0aaeda525639718 (patch)
tree7f224483a3cd0e439cd64a8666ec9dc5ed178a3d /kernel
parent5765040ebfc9a28d9dcfaaaaf3d25840d922de96 (diff)
parent80d02085d99039b3b7f3a73c8896226b0cb1ba07 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (78 commits) Revert "rcu: Decrease memory-barrier usage based on semi-formal proof" net,rcu: convert call_rcu(prl_entry_destroy_rcu) to kfree batman,rcu: convert call_rcu(softif_neigh_free_rcu) to kfree_rcu batman,rcu: convert call_rcu(neigh_node_free_rcu) to kfree() batman,rcu: convert call_rcu(gw_node_free_rcu) to kfree_rcu net,rcu: convert call_rcu(kfree_tid_tx) to kfree_rcu() net,rcu: convert call_rcu(xt_osf_finger_free_rcu) to kfree_rcu() net/mac80211,rcu: convert call_rcu(work_free_rcu) to kfree_rcu() net,rcu: convert call_rcu(wq_free_rcu) to kfree_rcu() net,rcu: convert call_rcu(phonet_device_rcu_free) to kfree_rcu() perf,rcu: convert call_rcu(swevent_hlist_release_rcu) to kfree_rcu() perf,rcu: convert call_rcu(free_ctx) to kfree_rcu() net,rcu: convert call_rcu(__nf_ct_ext_free_rcu) to kfree_rcu() net,rcu: convert call_rcu(net_generic_release) to kfree_rcu() net,rcu: convert call_rcu(netlbl_unlhsh_free_addr6) to kfree_rcu() net,rcu: convert call_rcu(netlbl_unlhsh_free_addr4) to kfree_rcu() security,rcu: convert call_rcu(sel_netif_free) to kfree_rcu() net,rcu: convert call_rcu(xps_dev_maps_release) to kfree_rcu() net,rcu: convert call_rcu(xps_map_release) to kfree_rcu() net,rcu: convert call_rcu(rps_map_release) to kfree_rcu() ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c27
-rw-r--r--kernel/events/core.c20
-rw-r--r--kernel/rcupdate.c32
-rw-r--r--kernel/rcutiny.c45
-rw-r--r--kernel/rcutiny_plugin.h203
-rw-r--r--kernel/rcutorture.c26
-rw-r--r--kernel/rcutree.c526
-rw-r--r--kernel/rcutree.h104
-rw-r--r--kernel/rcutree_plugin.h568
-rw-r--r--kernel/rcutree_trace.c180
-rw-r--r--kernel/softirq.c2
11 files changed, 1333 insertions, 400 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 25c7eb52de1..909a35510af 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -326,12 +326,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
return &css_set_table[index];
}
-static void free_css_set_rcu(struct rcu_head *obj)
-{
- struct css_set *cg = container_of(obj, struct css_set, rcu_head);
- kfree(cg);
-}
-
/* We don't maintain the lists running through each css_set to its
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
@@ -375,7 +369,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
}
write_unlock(&css_set_lock);
- call_rcu(&cg->rcu_head, free_css_set_rcu);
+ kfree_rcu(cg, rcu_head);
}
/*
@@ -812,13 +806,6 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
return ret;
}
-static void free_cgroup_rcu(struct rcu_head *obj)
-{
- struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
-
- kfree(cgrp);
-}
-
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
{
/* is dentry a directory ? if so, kfree() associated cgroup */
@@ -856,7 +843,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
*/
BUG_ON(!list_empty(&cgrp->pidlists));
- call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
+ kfree_rcu(cgrp, rcu_head);
}
iput(inode);
}
@@ -4623,14 +4610,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
return ret;
}
-static void __free_css_id_cb(struct rcu_head *head)
-{
- struct css_id *id;
-
- id = container_of(head, struct css_id, rcu_head);
- kfree(id);
-}
-
void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
{
struct css_id *id = css->id;
@@ -4645,7 +4624,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
spin_lock(&ss->id_lock);
idr_remove(&ss->idr, id->id);
spin_unlock(&ss->id_lock);
- call_rcu(&id->rcu_head, __free_css_id_cb);
+ kfree_rcu(id, rcu_head);
}
EXPORT_SYMBOL_GPL(free_css_id);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0fc34a370ba..c09767f7db3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -586,14 +586,6 @@ static void get_ctx(struct perf_event_context *ctx)
WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
}
-static void free_ctx(struct rcu_head *head)
-{
- struct perf_event_context *ctx;
-
- ctx = container_of(head, struct perf_event_context, rcu_head);
- kfree(ctx);
-}
-
static void put_ctx(struct perf_event_context *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
@@ -601,7 +593,7 @@ static void put_ctx(struct perf_event_context *ctx)
put_ctx(ctx->parent_ctx);
if (ctx->task)
put_task_struct(ctx->task);
- call_rcu(&ctx->rcu_head, free_ctx);
+ kfree_rcu(ctx, rcu_head);
}
}
@@ -5331,14 +5323,6 @@ swevent_hlist_deref(struct swevent_htable *swhash)
lockdep_is_held(&swhash->hlist_mutex));
}
-static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
-{
- struct swevent_hlist *hlist;
-
- hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
- kfree(hlist);
-}
-
static void swevent_hlist_release(struct swevent_htable *swhash)
{
struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
@@ -5347,7 +5331,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
return;
rcu_assign_pointer(swhash->swevent_hlist, NULL);
- call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
+ kfree_rcu(hlist, rcu_head);
}
static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f3240e98792..7784bd216b6 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -142,10 +142,17 @@ static int rcuhead_fixup_init(void *addr, enum debug_obj_state state)
* Ensure that queued callbacks are all executed.
* If we detect that we are nested in a RCU read-side critical
* section, we should simply fail, otherwise we would deadlock.
+ * In !PREEMPT configurations, there is no way to tell if we are
+ * in a RCU read-side critical section or not, so we never
+ * attempt any fixup and just print a warning.
*/
+#ifndef CONFIG_PREEMPT
+ WARN_ON_ONCE(1);
+ return 0;
+#endif
if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
irqs_disabled()) {
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return 0;
}
rcu_barrier();
@@ -184,10 +191,17 @@ static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
* Ensure that queued callbacks are all executed.
* If we detect that we are nested in a RCU read-side critical
* section, we should simply fail, otherwise we would deadlock.
+ * In !PREEMPT configurations, there is no way to tell if we are
+ * in a RCU read-side critical section or not, so we never
+ * attempt any fixup and just print a warning.
*/
+#ifndef CONFIG_PREEMPT
+ WARN_ON_ONCE(1);
+ return 0;
+#endif
if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
irqs_disabled()) {
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return 0;
}
rcu_barrier();
@@ -214,15 +228,17 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
* Ensure that queued callbacks are all executed.
* If we detect that we are nested in a RCU read-side critical
* section, we should simply fail, otherwise we would deadlock.
- * Note that the machinery to reliably determine whether
- * or not we are in an RCU read-side critical section
- * exists only in the preemptible RCU implementations
- * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why
- * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT.
+ * In !PREEMPT configurations, there is no way to tell if we are
+ * in a RCU read-side critical section or not, so we never
+ * attempt any fixup and just print a warning.
*/
+#ifndef CONFIG_PREEMPT
+ WARN_ON_ONCE(1);
+ return 0;
+#endif
if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
irqs_disabled()) {
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return 0;
}
rcu_barrier();
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0c343b9a46d..421abfd3641 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -40,10 +40,10 @@
static struct task_struct *rcu_kthread_task;
static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
static unsigned long have_rcu_kthread_work;
-static void invoke_rcu_kthread(void);
/* Forward declarations for rcutiny_plugin.h. */
struct rcu_ctrlblk;
+static void invoke_rcu_kthread(void);
static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static int rcu_kthread(void *arg);
static void __call_rcu(struct rcu_head *head,
@@ -79,36 +79,45 @@ void rcu_exit_nohz(void)
#endif /* #ifdef CONFIG_NO_HZ */
/*
- * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
- * Also disable irqs to avoid confusion due to interrupt handlers
+ * Helper function for rcu_sched_qs() and rcu_bh_qs().
+ * Also irqs are disabled to avoid confusion due to interrupt handlers
* invoking call_rcu().
*/
static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
{
- unsigned long flags;
-
- local_irq_save(flags);
if (rcp->rcucblist != NULL &&
rcp->donetail != rcp->curtail) {
rcp->donetail = rcp->curtail;
- local_irq_restore(flags);
return 1;
}
- local_irq_restore(flags);
return 0;
}
/*
+ * Wake up rcu_kthread() to process callbacks now eligible for invocation
+ * or to boost readers.
+ */
+static void invoke_rcu_kthread(void)
+{
+ have_rcu_kthread_work = 1;
+ wake_up(&rcu_kthread_wq);
+}
+
+/*
* Record an rcu quiescent state. And an rcu_bh quiescent state while we
* are at it, given that any rcu quiescent state is also an rcu_bh
* quiescent state. Use "+" instead of "||" to defeat short circuiting.
*/
void rcu_sched_qs(int cpu)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
rcu_qsctr_help(&rcu_bh_ctrlblk))
invoke_rcu_kthread();
+ local_irq_restore(flags);
}
/*
@@ -116,8 +125,12 @@ void rcu_sched_qs(int cpu)
*/
void rcu_bh_qs(int cpu)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
if (rcu_qsctr_help(&rcu_bh_ctrlblk))
invoke_rcu_kthread();
+ local_irq_restore(flags);
}
/*
@@ -167,7 +180,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
prefetch(next);
debug_rcu_head_unqueue(list);
local_bh_disable();
- list->func(list);
+ __rcu_reclaim(list);
local_bh_enable();
list = next;
RCU_TRACE(cb_count++);
@@ -208,20 +221,6 @@ static int rcu_kthread(void *arg)
}
/*
- * Wake up rcu_kthread() to process callbacks now eligible for invocation
- * or to boost readers.
- */
-static void invoke_rcu_kthread(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- have_rcu_kthread_work = 1;
- wake_up(&rcu_kthread_wq);
- local_irq_restore(flags);
-}
-
-/*
* Wait for a grace period to elapse. But it is illegal to invoke
* synchronize_sched() from within an RCU read-side critical section.
* Therefore, any legal call to synchronize_sched() is a quiescent
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 3cb8e362e88..f259c676195 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -100,23 +100,28 @@ struct rcu_preempt_ctrlblk {
u8 completed; /* Last grace period completed. */
/* If all three are equal, RCU is idle. */
#ifdef CONFIG_RCU_BOOST
- s8 boosted_this_gp; /* Has boosting already happened? */
unsigned long boost_time; /* When to start boosting (jiffies) */
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_TRACE
unsigned long n_grace_periods;
#ifdef CONFIG_RCU_BOOST
unsigned long n_tasks_boosted;
+ /* Total number of tasks boosted. */
unsigned long n_exp_boosts;
+ /* Number of tasks boosted for expedited GP. */
unsigned long n_normal_boosts;
- unsigned long n_normal_balk_blkd_tasks;
- unsigned long n_normal_balk_gp_tasks;
- unsigned long n_normal_balk_boost_tasks;
- unsigned long n_normal_balk_boosted;
- unsigned long n_normal_balk_notyet;
- unsigned long n_normal_balk_nos;
- unsigned long n_exp_balk_blkd_tasks;
- unsigned long n_exp_balk_nos;
+ /* Number of tasks boosted for normal GP. */
+ unsigned long n_balk_blkd_tasks;
+ /* Refused to boost: no blocked tasks. */
+ unsigned long n_balk_exp_gp_tasks;
+ /* Refused to boost: nothing blocking GP. */
+ unsigned long n_balk_boost_tasks;
+ /* Refused to boost: already boosting. */
+ unsigned long n_balk_notyet;
+ /* Refused to boost: not yet time. */
+ unsigned long n_balk_nos;
+ /* Refused to boost: not sure why, though. */
+ /* This can happen due to race conditions. */
#endif /* #ifdef CONFIG_RCU_BOOST */
#endif /* #ifdef CONFIG_RCU_TRACE */
};
@@ -201,7 +206,6 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t)
#ifdef CONFIG_RCU_BOOST
static void rcu_initiate_boost_trace(void);
-static void rcu_initiate_exp_boost_trace(void);
#endif /* #ifdef CONFIG_RCU_BOOST */
/*
@@ -219,41 +223,21 @@ static void show_tiny_preempt_stats(struct seq_file *m)
"N."[!rcu_preempt_ctrlblk.gp_tasks],
"E."[!rcu_preempt_ctrlblk.exp_tasks]);
#ifdef CONFIG_RCU_BOOST
- seq_printf(m, " ttb=%c btg=",
- "B."[!rcu_preempt_ctrlblk.boost_tasks]);
- switch (rcu_preempt_ctrlblk.boosted_this_gp) {
- case -1:
- seq_puts(m, "exp");
- break;
- case 0:
- seq_puts(m, "no");
- break;
- case 1:
- seq_puts(m, "begun");
- break;
- case 2:
- seq_puts(m, "done");
- break;
- default:
- seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
- }
- seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
+ seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
+ " ",
+ "B."[!rcu_preempt_ctrlblk.boost_tasks],
rcu_preempt_ctrlblk.n_tasks_boosted,
rcu_preempt_ctrlblk.n_exp_boosts,
rcu_preempt_ctrlblk.n_normal_boosts,
(int)(jiffies & 0xffff),
(int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
- seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n",
- "normal balk",
- rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
- rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
- rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
- rcu_preempt_ctrlblk.n_normal_balk_boosted,
- rcu_preempt_ctrlblk.n_normal_balk_notyet,
- rcu_preempt_ctrlblk.n_normal_balk_nos);
- seq_printf(m, " exp balk: bt=%lu nos=%lu\n",
- rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
- rcu_preempt_ctrlblk.n_exp_balk_nos);
+ seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
+ " balk",
+ rcu_preempt_ctrlblk.n_balk_blkd_tasks,
+ rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
+ rcu_preempt_ctrlblk.n_balk_boost_tasks,
+ rcu_preempt_ctrlblk.n_balk_notyet,
+ rcu_preempt_ctrlblk.n_balk_nos);
#endif /* #ifdef CONFIG_RCU_BOOST */
}
@@ -271,25 +255,59 @@ static int rcu_boost(void)
{
unsigned long flags;
struct rt_mutex mtx;
- struct list_head *np;
struct task_struct *t;
+ struct list_head *tb;
- if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+ if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
+ rcu_preempt_ctrlblk.exp_tasks == NULL)
return 0; /* Nothing to boost. */
+
raw_local_irq_save(flags);
- rcu_preempt_ctrlblk.boosted_this_gp++;
- t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
- rcu_node_entry);
- np = rcu_next_node_entry(t);
+
+ /*
+ * Recheck with irqs disabled: all tasks in need of boosting
+ * might exit their RCU read-side critical sections on their own
+ * if we are preempted just before disabling irqs.
+ */
+ if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
+ rcu_preempt_ctrlblk.exp_tasks == NULL) {
+ raw_local_irq_restore(flags);
+ return 0;
+ }
+
+ /*
+ * Preferentially boost tasks blocking expedited grace periods.
+ * This cannot starve the normal grace periods because a second
+ * expedited grace period must boost all blocked tasks, including
+ * those blocking the pre-existing normal grace period.
+ */
+ if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
+ tb = rcu_preempt_ctrlblk.exp_tasks;
+ RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
+ } else {
+ tb = rcu_preempt_ctrlblk.boost_tasks;
+ RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
+ }
+ RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
+
+ /*
+ * We boost task t by manufacturing an rt_mutex that appears to
+ * be held by task t. We leave a pointer to that rt_mutex where
+ * task t can find it, and task t will release the mutex when it
+ * exits its outermost RCU read-side critical section. Then
+ * simply acquiring this artificial rt_mutex will boost task
+ * t's priority. (Thanks to tglx for suggesting this approach!)
+ */
+ t = container_of(tb, struct task_struct, rcu_node_entry);
rt_mutex_init_proxy_locked(&mtx, t);
t->rcu_boost_mutex = &mtx;
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
raw_local_irq_restore(flags);
rt_mutex_lock(&mtx);
- RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
- rcu_preempt_ctrlblk.boosted_this_gp++;
- rt_mutex_unlock(&mtx);
- return rcu_preempt_ctrlblk.boost_tasks != NULL;
+ rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
+
+ return rcu_preempt_ctrlblk.boost_tasks != NULL ||
+ rcu_preempt_ctrlblk.exp_tasks != NULL;
}
/*
@@ -304,42 +322,25 @@ static int rcu_boost(void)
*/
static int rcu_initiate_boost(void)
{
- if (!rcu_preempt_blocked_readers_cgp()) {
- RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
+ if (!rcu_preempt_blocked_readers_cgp() &&
+ rcu_preempt_ctrlblk.exp_tasks == NULL) {
+ RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
return 0;
}
- if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
- rcu_preempt_ctrlblk.boost_tasks == NULL &&
- rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
- ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
- rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
+ if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
+ (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+ rcu_preempt_ctrlblk.boost_tasks == NULL &&
+ ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
+ if (rcu_preempt_ctrlblk.exp_tasks == NULL)
+ rcu_preempt_ctrlblk.boost_tasks =
+ rcu_preempt_ctrlblk.gp_tasks;
invoke_rcu_kthread();
- RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
} else
RCU_TRACE(rcu_initiate_boost_trace());
return 1;
}
-/*
- * Initiate boosting for an expedited grace period.
- */
-static void rcu_initiate_expedited_boost(void)
-{
- unsigned long flags;
-
- raw_local_irq_save(flags);
- if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
- rcu_preempt_ctrlblk.boost_tasks =
- rcu_preempt_ctrlblk.blkd_tasks.next;
- rcu_preempt_ctrlblk.boosted_this_gp = -1;
- invoke_rcu_kthread();
- RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
- } else
- RCU_TRACE(rcu_initiate_exp_boost_trace());
- raw_local_irq_restore(flags);
-}
-
-#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
/*
* Do priority-boost accounting for the start of a new grace period.
@@ -347,8 +348,6 @@ static void rcu_initiate_expedited_boost(void)
static void rcu_preempt_boost_start_gp(void)
{
rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
- if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
- rcu_preempt_ctrlblk.boosted_this_gp = 0;
}
#else /* #ifdef CONFIG_RCU_BOOST */
@@ -372,13 +371,6 @@ static int rcu_initiate_boost(void)
}
/*
- * If there is no RCU priority boosting, we don't initiate expedited boosting.
- */
-static void rcu_initiate_expedited_boost(void)
-{
-}
-
-/*
* If there is no RCU priority boosting, nothing to do at grace-period start.
*/
static void rcu_preempt_boost_start_gp(void)
@@ -418,7 +410,7 @@ static void rcu_preempt_cpu_qs(void)
if (!rcu_preempt_gp_in_progress())
return;
/*
- * Check up on boosting. If there are no readers blocking the
+ * Check up on boosting. If there are readers blocking the
* current grace period, leave.
*/
if (rcu_initiate_boost())
@@ -578,7 +570,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
empty = !rcu_preempt_blocked_readers_cgp();
empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
np = rcu_next_node_entry(t);
- list_del(&t->rcu_node_entry);
+ list_del_init(&t->rcu_node_entry);
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
rcu_preempt_ctrlblk.gp_tasks = np;
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
@@ -587,7 +579,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
rcu_preempt_ctrlblk.boost_tasks = np;
#endif /* #ifdef CONFIG_RCU_BOOST */
- INIT_LIST_HEAD(&t->rcu_node_entry);
/*
* If this was the last task on the current list, and if
@@ -812,13 +803,16 @@ void synchronize_rcu_expedited(void)
rpcp->exp_tasks = rpcp->blkd_tasks.next;
if (rpcp->exp_tasks == &rpcp->blkd_tasks)
rpcp->exp_tasks = NULL;
- local_irq_restore(flags);
/* Wait for tail of ->blkd_tasks list to drain. */
- if (rcu_preempted_readers_exp())
- rcu_initiate_expedited_boost();
+ if (!rcu_preempted_readers_exp())
+ local_irq_restore(flags);
+ else {
+ rcu_initiate_boost();
+ local_irq_restore(flags);
wait_event(sync_rcu_preempt_exp_wq,
!rcu_preempted_readers_exp());
+ }
/* Clean up and exit. */
barrier(); /* ensure expedited GP seen before counter increment. */
@@ -931,24 +925,17 @@ void __init rcu_scheduler_starting(void)
static void rcu_initiate_boost_trace(void)
{
- if (rcu_preempt_ctrlblk.gp_tasks == NULL)
- rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
+ if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
+ rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
+ else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
+ rcu_preempt_ctrlblk.exp_tasks == NULL)
+ rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
- rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
- else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
- rcu_preempt_ctrlblk.n_normal_balk_boosted++;
+ rcu_preempt_ctrlblk.n_balk_boost_tasks++;
else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
- rcu_preempt_ctrlblk.n_normal_balk_notyet++;
- else
- rcu_preempt_ctrlblk.n_normal_balk_nos++;
-}
-
-static void rcu_initiate_exp_boost_trace(void)
-{
- if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
- rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
+ rcu_preempt_ctrlblk.n_balk_notyet++;
else
- rcu_preempt_ctrlblk.n_exp_balk_nos++;
+ rcu_preempt_ctrlblk.n_balk_nos++;
}
#endif /* #ifdef CONFIG_RCU_BOOST */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index c224da41890..2e138db0338 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -131,7 +131,7 @@ struct rcu_torture {
static LIST_HEAD(rcu_torture_freelist);
static struct rcu_torture __rcu *rcu_torture_current;
-static long rcu_torture_current_version;
+static unsigned long rcu_torture_current_version;
static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
static DEFINE_SPINLOCK(rcu_torture_lock);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -146,8 +146,6 @@ static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_error;
static long n_rcu_torture_boost_ktrerror;
static long n_rcu_torture_boost_rterror;
-static long n_rcu_torture_boost_allocerror;
-static long n_rcu_torture_boost_afferror;
static long n_rcu_torture_boost_failure;
static long n_rcu_torture_boosts;
static long n_rcu_torture_timers;
@@ -163,11 +161,11 @@ static int stutter_pause_test;
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
-#ifdef CONFIG_RCU_BOOST
+#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
#define rcu_can_boost() 1
-#else /* #ifdef CONFIG_RCU_BOOST */
+#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
#define rcu_can_boost() 0
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
+#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
@@ -751,6 +749,7 @@ static int rcu_torture_boost(void *arg)
n_rcu_torture_boost_rterror++;
}
+ init_rcu_head_on_stack(&rbi.rcu);
/* Each pass through the following loop does one boost-test cycle. */
do {
/* Wait for the next test interval. */
@@ -810,6 +809,7 @@ checkwait: rcu_stutter_wait("rcu_torture_boost");
/* Clean up and exit. */
VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
+ destroy_rcu_head_on_stack(&rbi.rcu);
rcutorture_shutdown_absorb("rcu_torture_boost");
while (!kthread_should_stop() || rbi.inflight)
schedule_timeout_uninterruptible(1);
@@ -886,7 +886,7 @@ rcu_torture_writer(void *arg)
old_rp->rtort_pipe_count++;
cur_ops->deferred_free(old_rp);
}
- rcu_torture_current_version++;
+ rcutorture_record_progress(++rcu_torture_current_version);
oldbatch = cur_ops->completed();
rcu_stutter_wait("rcu_torture_writer");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1066,8 +1066,8 @@ rcu_torture_printk(char *page)
}
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
cnt += sprintf(&page[cnt],
- "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
- "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld "
+ "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
+ "rtmbe: %d rtbke: %ld rtbre: %ld "
"rtbf: %ld rtb: %ld nt: %ld",
rcu_torture_current,
rcu_torture_current_version,
@@ -1078,16 +1078,12 @@ rcu_torture_printk(char *page)
atomic_read(&n_rcu_torture_mberror),
n_rcu_torture_boost_ktrerror,
n_rcu_torture_boost_rterror,
- n_rcu_torture_boost_allocerror,
- n_rcu_torture_boost_afferror,
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
n_rcu_torture_timers);
if (atomic_read(&n_rcu_torture_mberror) != 0 ||
n_rcu_torture_boost_ktrerror != 0 ||
n_rcu_torture_boost_rterror != 0 ||
- n_rcu_torture_boost_allocerror != 0 ||
- n_rcu_torture_boost_afferror != 0 ||
n_rcu_torture_boost_failure != 0)
cnt += sprintf(&page[cnt], " !!!");
cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
@@ -1331,6 +1327,7 @@ rcu_torture_cleanup(void)
int i;
mutex_lock(&fullstop_mutex);
+ rcutorture_record_test_transition();
if (fullstop == FULLSTOP_SHUTDOWN) {
printk(KERN_WARNING /* but going down anyway, so... */
"Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
@@ -1486,8 +1483,6 @@ rcu_torture_init(void)
atomic_set(&n_rcu_torture_error, 0);
n_rcu_torture_boost_ktrerror = 0;
n_rcu_torture_boost_rterror = 0;
- n_rcu_torture_boost_allocerror = 0;
- n_rcu_torture_boost_afferror = 0;
n_rcu_torture_boost_failure = 0;
n_rcu_torture_boosts = 0;
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -1624,6 +1619,7 @@ rcu_torture_init(void)
}
}
register_reboot_notifier(&rcutorture_shutdown_nb);
+ rcutorture_record_test_transition();
mutex_unlock(&fullstop_mutex);
return 0;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dd4aea806f8..e486f7c3ffb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,8 @@
#include <linux/mutex.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
#include "rcutree.h"
@@ -79,10 +81,41 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+static struct rcu_state *rcu_state;
+
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
/*
+ * Control variables for per-CPU and per-rcu_node kthreads. These
+ * handle all flavors of RCU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
+DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
+static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
+DEFINE_PER_CPU(char, rcu_cpu_has_work);
+static char rcu_kthreads_spawnable;
+
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
+static void invoke_rcu_cpu_kthread(void);
+
+#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
+
+/*
+ * Track the rcutorture test sequence number and the update version
+ * number within a given test. The rcutorture_testseq is incremented
+ * on every rcutorture module load and unload, so has an odd value
+ * when a test is running. The rcutorture_vernum is set to zero
+ * when rcutorture starts and is incremented on each rcutorture update.
+ * These variables enable correlating rcutorture output with the
+ * RCU tracing information.
+ */
+unsigned long rcutorture_testseq;
+unsigned long rcutorture_vernum;
+
+/*
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
* permit this function to be invoked without holding the root rcu_node
* structure's ->lock, but of course results can be subject to change.
@@ -124,6 +157,7 @@ void rcu_note_context_switch(int cpu)
rcu_sched_qs(cpu);
rcu_preempt_note_context_switch(cpu);
}
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
@@ -140,10 +174,8 @@ module_param(blimit, int, 0);
module_param(qhimark, int, 0);
module_param(qlowmark, int, 0);
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
+int rcu_cpu_stall_suppress __read_mostly;
module_param(rcu_cpu_stall_suppress, int, 0644);
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
static int rcu_pending(int cpu);
@@ -176,6 +208,31 @@ void rcu_bh_force_quiescent_state(void)
EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
/*
+ * Record the number of times rcutorture tests have been initiated and
+ * terminated. This information allows the debugfs tracing stats to be
+ * correlated to the rcutorture messages, even when the rcutorture module
+ * is being repeatedly loaded and unloaded. In other words, we cannot
+ * store this state in rcutorture itself.
+ */
+void rcutorture_record_test_transition(void)
+{
+ rcutorture_testseq++;
+ rcutorture_vernum = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
+
+/*
+ * Record the number of writer passes through the current rcutorture test.
+ * This is also used to correlate debugfs tracing stats with the rcutorture
+ * messages.
+ */
+void rcutorture_record_progress(unsigned long vernum)
+{
+ rcutorture_vernum++;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_progress);
+
+/*
* Force a quiescent state for RCU-sched.
*/
void rcu_sched_force_quiescent_state(void)
@@ -234,8 +291,8 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
return 1;
}
- /* If preemptable RCU, no point in sending reschedule IPI. */
- if (rdp->preemptable)
+ /* If preemptible RCU, no point in sending reschedule IPI. */
+ if (rdp->preemptible)
return 0;
/* The CPU is online, so send it a reschedule IPI. */
@@ -450,8 +507,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
#endif /* #else #ifdef CONFIG_NO_HZ */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-
int rcu_cpu_stall_suppress __read_mostly;
static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -537,21 +592,24 @@ static void print_cpu_stall(struct rcu_state *rsp)
static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
{
- long delta;
+ unsigned long j;
+ unsigned long js;
struct rcu_node *rnp;
if (rcu_cpu_stall_suppress)
return;
- delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
+ j = ACCESS_ONCE(jiffies);
+ js = ACCESS_ONCE(rsp->jiffies_stall);
rnp = rdp->mynode;
- if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) {
+ if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
/* We haven't checked in, so go dump stack. */
print_cpu_stall(rsp);
- } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) {
+ } else if (rcu_gp_in_progress(rsp) &&
+ ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
- /* They had two time units to dump stack, so complain. */
+ /* They had a few time units to dump stack, so complain. */
print_other_cpu_stall(rsp);
}
}
@@ -587,26 +645,6 @@ static void __init check_cpu_stall_init(void)
atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
}
-#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-static void record_gp_stall_check_time(struct rcu_state *rsp)
-{
-}
-
-static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-}
-
-void rcu_cpu_stall_reset(void)
-{
-}
-
-static void __init check_cpu_stall_init(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
/*
* Update CPU-local rcu_data state to record the newly noticed grace period.
* This is used both when we started the grace period and wh