diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-19 18:14:34 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-19 18:14:34 -0700 |
commit | eb04f2f04ed1227c266b3219c0aaeda525639718 (patch) | |
tree | 7f224483a3cd0e439cd64a8666ec9dc5ed178a3d /kernel | |
parent | 5765040ebfc9a28d9dcfaaaaf3d25840d922de96 (diff) | |
parent | 80d02085d99039b3b7f3a73c8896226b0cb1ba07 (diff) |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (78 commits)
Revert "rcu: Decrease memory-barrier usage based on semi-formal proof"
net,rcu: convert call_rcu(prl_entry_destroy_rcu) to kfree
batman,rcu: convert call_rcu(softif_neigh_free_rcu) to kfree_rcu
batman,rcu: convert call_rcu(neigh_node_free_rcu) to kfree()
batman,rcu: convert call_rcu(gw_node_free_rcu) to kfree_rcu
net,rcu: convert call_rcu(kfree_tid_tx) to kfree_rcu()
net,rcu: convert call_rcu(xt_osf_finger_free_rcu) to kfree_rcu()
net/mac80211,rcu: convert call_rcu(work_free_rcu) to kfree_rcu()
net,rcu: convert call_rcu(wq_free_rcu) to kfree_rcu()
net,rcu: convert call_rcu(phonet_device_rcu_free) to kfree_rcu()
perf,rcu: convert call_rcu(swevent_hlist_release_rcu) to kfree_rcu()
perf,rcu: convert call_rcu(free_ctx) to kfree_rcu()
net,rcu: convert call_rcu(__nf_ct_ext_free_rcu) to kfree_rcu()
net,rcu: convert call_rcu(net_generic_release) to kfree_rcu()
net,rcu: convert call_rcu(netlbl_unlhsh_free_addr6) to kfree_rcu()
net,rcu: convert call_rcu(netlbl_unlhsh_free_addr4) to kfree_rcu()
security,rcu: convert call_rcu(sel_netif_free) to kfree_rcu()
net,rcu: convert call_rcu(xps_dev_maps_release) to kfree_rcu()
net,rcu: convert call_rcu(xps_map_release) to kfree_rcu()
net,rcu: convert call_rcu(rps_map_release) to kfree_rcu()
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 27 | ||||
-rw-r--r-- | kernel/events/core.c | 20 | ||||
-rw-r--r-- | kernel/rcupdate.c | 32 | ||||
-rw-r--r-- | kernel/rcutiny.c | 45 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 203 | ||||
-rw-r--r-- | kernel/rcutorture.c | 26 | ||||
-rw-r--r-- | kernel/rcutree.c | 526 | ||||
-rw-r--r-- | kernel/rcutree.h | 104 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 568 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 180 | ||||
-rw-r--r-- | kernel/softirq.c | 2 |
11 files changed, 1333 insertions, 400 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 25c7eb52de1..909a35510af 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -326,12 +326,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) return &css_set_table[index]; } -static void free_css_set_rcu(struct rcu_head *obj) -{ - struct css_set *cg = container_of(obj, struct css_set, rcu_head); - kfree(cg); -} - /* We don't maintain the lists running through each css_set to its * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups @@ -375,7 +369,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) } write_unlock(&css_set_lock); - call_rcu(&cg->rcu_head, free_css_set_rcu); + kfree_rcu(cg, rcu_head); } /* @@ -812,13 +806,6 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) return ret; } -static void free_cgroup_rcu(struct rcu_head *obj) -{ - struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head); - - kfree(cgrp); -} - static void cgroup_diput(struct dentry *dentry, struct inode *inode) { /* is dentry a directory ? if so, kfree() associated cgroup */ @@ -856,7 +843,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) */ BUG_ON(!list_empty(&cgrp->pidlists)); - call_rcu(&cgrp->rcu_head, free_cgroup_rcu); + kfree_rcu(cgrp, rcu_head); } iput(inode); } @@ -4623,14 +4610,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, return ret; } -static void __free_css_id_cb(struct rcu_head *head) -{ - struct css_id *id; - - id = container_of(head, struct css_id, rcu_head); - kfree(id); -} - void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) { struct css_id *id = css->id; @@ -4645,7 +4624,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) spin_lock(&ss->id_lock); idr_remove(&ss->idr, id->id); spin_unlock(&ss->id_lock); - call_rcu(&id->rcu_head, __free_css_id_cb); + kfree_rcu(id, rcu_head); } EXPORT_SYMBOL_GPL(free_css_id); diff --git a/kernel/events/core.c b/kernel/events/core.c index 0fc34a370ba..c09767f7db3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -586,14 +586,6 @@ static void get_ctx(struct perf_event_context *ctx) WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); } -static void free_ctx(struct rcu_head *head) -{ - struct perf_event_context *ctx; - - ctx = container_of(head, struct perf_event_context, rcu_head); - kfree(ctx); -} - static void put_ctx(struct perf_event_context *ctx) { if (atomic_dec_and_test(&ctx->refcount)) { @@ -601,7 +593,7 @@ static void put_ctx(struct perf_event_context *ctx) put_ctx(ctx->parent_ctx); if (ctx->task) put_task_struct(ctx->task); - call_rcu(&ctx->rcu_head, free_ctx); + kfree_rcu(ctx, rcu_head); } } @@ -5331,14 +5323,6 @@ swevent_hlist_deref(struct swevent_htable *swhash) lockdep_is_held(&swhash->hlist_mutex)); } -static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) -{ - struct swevent_hlist *hlist; - - hlist = container_of(rcu_head, struct swevent_hlist, rcu_head); - kfree(hlist); -} - static void swevent_hlist_release(struct swevent_htable *swhash) { struct swevent_hlist *hlist = swevent_hlist_deref(swhash); @@ -5347,7 +5331,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash) return; rcu_assign_pointer(swhash->swevent_hlist, NULL); - call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); + kfree_rcu(hlist, rcu_head); } static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f3240e98792..7784bd216b6 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -142,10 +142,17 @@ static int rcuhead_fixup_init(void *addr, enum debug_obj_state state) * Ensure that queued callbacks are all executed. * If we detect that we are nested in a RCU read-side critical * section, we should simply fail, otherwise we would deadlock. + * In !PREEMPT configurations, there is no way to tell if we are + * in a RCU read-side critical section or not, so we never + * attempt any fixup and just print a warning. */ +#ifndef CONFIG_PREEMPT + WARN_ON_ONCE(1); + return 0; +#endif if (rcu_preempt_depth() != 0 || preempt_count() != 0 || irqs_disabled()) { - WARN_ON(1); + WARN_ON_ONCE(1); return 0; } rcu_barrier(); @@ -184,10 +191,17 @@ static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state) * Ensure that queued callbacks are all executed. * If we detect that we are nested in a RCU read-side critical * section, we should simply fail, otherwise we would deadlock. + * In !PREEMPT configurations, there is no way to tell if we are + * in a RCU read-side critical section or not, so we never + * attempt any fixup and just print a warning. */ +#ifndef CONFIG_PREEMPT + WARN_ON_ONCE(1); + return 0; +#endif if (rcu_preempt_depth() != 0 || preempt_count() != 0 || irqs_disabled()) { - WARN_ON(1); + WARN_ON_ONCE(1); return 0; } rcu_barrier(); @@ -214,15 +228,17 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state) * Ensure that queued callbacks are all executed. * If we detect that we are nested in a RCU read-side critical * section, we should simply fail, otherwise we would deadlock. - * Note that the machinery to reliably determine whether - * or not we are in an RCU read-side critical section - * exists only in the preemptible RCU implementations - * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why - * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT. + * In !PREEMPT configurations, there is no way to tell if we are + * in a RCU read-side critical section or not, so we never + * attempt any fixup and just print a warning. */ +#ifndef CONFIG_PREEMPT + WARN_ON_ONCE(1); + return 0; +#endif if (rcu_preempt_depth() != 0 || preempt_count() != 0 || irqs_disabled()) { - WARN_ON(1); + WARN_ON_ONCE(1); return 0; } rcu_barrier(); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 0c343b9a46d..421abfd3641 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -40,10 +40,10 @@ static struct task_struct *rcu_kthread_task; static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); static unsigned long have_rcu_kthread_work; -static void invoke_rcu_kthread(void); /* Forward declarations for rcutiny_plugin.h. */ struct rcu_ctrlblk; +static void invoke_rcu_kthread(void); static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); static int rcu_kthread(void *arg); static void __call_rcu(struct rcu_head *head, @@ -79,36 +79,45 @@ void rcu_exit_nohz(void) #endif /* #ifdef CONFIG_NO_HZ */ /* - * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). - * Also disable irqs to avoid confusion due to interrupt handlers + * Helper function for rcu_sched_qs() and rcu_bh_qs(). + * Also irqs are disabled to avoid confusion due to interrupt handlers * invoking call_rcu(). */ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) { - unsigned long flags; - - local_irq_save(flags); if (rcp->rcucblist != NULL && rcp->donetail != rcp->curtail) { rcp->donetail = rcp->curtail; - local_irq_restore(flags); return 1; } - local_irq_restore(flags); return 0; } /* + * Wake up rcu_kthread() to process callbacks now eligible for invocation + * or to boost readers. + */ +static void invoke_rcu_kthread(void) +{ + have_rcu_kthread_work = 1; + wake_up(&rcu_kthread_wq); +} + +/* * Record an rcu quiescent state. And an rcu_bh quiescent state while we * are at it, given that any rcu quiescent state is also an rcu_bh * quiescent state. Use "+" instead of "||" to defeat short circuiting. */ void rcu_sched_qs(int cpu) { + unsigned long flags; + + local_irq_save(flags); if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) invoke_rcu_kthread(); + local_irq_restore(flags); } /* @@ -116,8 +125,12 @@ void rcu_sched_qs(int cpu) */ void rcu_bh_qs(int cpu) { + unsigned long flags; + + local_irq_save(flags); if (rcu_qsctr_help(&rcu_bh_ctrlblk)) invoke_rcu_kthread(); + local_irq_restore(flags); } /* @@ -167,7 +180,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) prefetch(next); debug_rcu_head_unqueue(list); local_bh_disable(); - list->func(list); + __rcu_reclaim(list); local_bh_enable(); list = next; RCU_TRACE(cb_count++); @@ -208,20 +221,6 @@ static int rcu_kthread(void *arg) } /* - * Wake up rcu_kthread() to process callbacks now eligible for invocation - * or to boost readers. - */ -static void invoke_rcu_kthread(void) -{ - unsigned long flags; - - local_irq_save(flags); - have_rcu_kthread_work = 1; - wake_up(&rcu_kthread_wq); - local_irq_restore(flags); -} - -/* * Wait for a grace period to elapse. But it is illegal to invoke * synchronize_sched() from within an RCU read-side critical section. * Therefore, any legal call to synchronize_sched() is a quiescent diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 3cb8e362e88..f259c676195 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -100,23 +100,28 @@ struct rcu_preempt_ctrlblk { u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ #ifdef CONFIG_RCU_BOOST - s8 boosted_this_gp; /* Has boosting already happened? */ unsigned long boost_time; /* When to start boosting (jiffies) */ #endif /* #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_TRACE unsigned long n_grace_periods; #ifdef CONFIG_RCU_BOOST unsigned long n_tasks_boosted; + /* Total number of tasks boosted. */ unsigned long n_exp_boosts; + /* Number of tasks boosted for expedited GP. */ unsigned long n_normal_boosts; - unsigned long n_normal_balk_blkd_tasks; - unsigned long n_normal_balk_gp_tasks; - unsigned long n_normal_balk_boost_tasks; - unsigned long n_normal_balk_boosted; - unsigned long n_normal_balk_notyet; - unsigned long n_normal_balk_nos; - unsigned long n_exp_balk_blkd_tasks; - unsigned long n_exp_balk_nos; + /* Number of tasks boosted for normal GP. */ + unsigned long n_balk_blkd_tasks; + /* Refused to boost: no blocked tasks. */ + unsigned long n_balk_exp_gp_tasks; + /* Refused to boost: nothing blocking GP. */ + unsigned long n_balk_boost_tasks; + /* Refused to boost: already boosting. */ + unsigned long n_balk_notyet; + /* Refused to boost: not yet time. */ + unsigned long n_balk_nos; + /* Refused to boost: not sure why, though. */ + /* This can happen due to race conditions. */ #endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifdef CONFIG_RCU_TRACE */ }; @@ -201,7 +206,6 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t) #ifdef CONFIG_RCU_BOOST static void rcu_initiate_boost_trace(void); -static void rcu_initiate_exp_boost_trace(void); #endif /* #ifdef CONFIG_RCU_BOOST */ /* @@ -219,41 +223,21 @@ static void show_tiny_preempt_stats(struct seq_file *m) "N."[!rcu_preempt_ctrlblk.gp_tasks], "E."[!rcu_preempt_ctrlblk.exp_tasks]); #ifdef CONFIG_RCU_BOOST - seq_printf(m, " ttb=%c btg=", - "B."[!rcu_preempt_ctrlblk.boost_tasks]); - switch (rcu_preempt_ctrlblk.boosted_this_gp) { - case -1: - seq_puts(m, "exp"); - break; - case 0: - seq_puts(m, "no"); - break; - case 1: - seq_puts(m, "begun"); - break; - case 2: - seq_puts(m, "done"); - break; - default: - seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp); - } - seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n", + seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n", + " ", + "B."[!rcu_preempt_ctrlblk.boost_tasks], rcu_preempt_ctrlblk.n_tasks_boosted, rcu_preempt_ctrlblk.n_exp_boosts, rcu_preempt_ctrlblk.n_normal_boosts, (int)(jiffies & 0xffff), (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); - seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", - "normal balk", - rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, - rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, - rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, - rcu_preempt_ctrlblk.n_normal_balk_boosted, - rcu_preempt_ctrlblk.n_normal_balk_notyet, - rcu_preempt_ctrlblk.n_normal_balk_nos); - seq_printf(m, " exp balk: bt=%lu nos=%lu\n", - rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks, - rcu_preempt_ctrlblk.n_exp_balk_nos); + seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n", + " balk", + rcu_preempt_ctrlblk.n_balk_blkd_tasks, + rcu_preempt_ctrlblk.n_balk_exp_gp_tasks, + rcu_preempt_ctrlblk.n_balk_boost_tasks, + rcu_preempt_ctrlblk.n_balk_notyet, + rcu_preempt_ctrlblk.n_balk_nos); #endif /* #ifdef CONFIG_RCU_BOOST */ } @@ -271,25 +255,59 @@ static int rcu_boost(void) { unsigned long flags; struct rt_mutex mtx; - struct list_head *np; struct task_struct *t; + struct list_head *tb; - if (rcu_preempt_ctrlblk.boost_tasks == NULL) + if (rcu_preempt_ctrlblk.boost_tasks == NULL && + rcu_preempt_ctrlblk.exp_tasks == NULL) return 0; /* Nothing to boost. */ + raw_local_irq_save(flags); - rcu_preempt_ctrlblk.boosted_this_gp++; - t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, - rcu_node_entry); - np = rcu_next_node_entry(t); + + /* + * Recheck with irqs disabled: all tasks in need of boosting + * might exit their RCU read-side critical sections on their own + * if we are preempted just before disabling irqs. + */ + if (rcu_preempt_ctrlblk.boost_tasks == NULL && + rcu_preempt_ctrlblk.exp_tasks == NULL) { + raw_local_irq_restore(flags); + return 0; + } + + /* + * Preferentially boost tasks blocking expedited grace periods. + * This cannot starve the normal grace periods because a second + * expedited grace period must boost all blocked tasks, including + * those blocking the pre-existing normal grace period. + */ + if (rcu_preempt_ctrlblk.exp_tasks != NULL) { + tb = rcu_preempt_ctrlblk.exp_tasks; + RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++); + } else { + tb = rcu_preempt_ctrlblk.boost_tasks; + RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++); + } + RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); + + /* + * We boost task t by manufacturing an rt_mutex that appears to + * be held by task t. We leave a pointer to that rt_mutex where + * task t can find it, and task t will release the mutex when it + * exits its outermost RCU read-side critical section. Then + * simply acquiring this artificial rt_mutex will boost task + * t's priority. (Thanks to tglx for suggesting this approach!) + */ + t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&mtx, t); t->rcu_boost_mutex = &mtx; t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; raw_local_irq_restore(flags); rt_mutex_lock(&mtx); - RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); - rcu_preempt_ctrlblk.boosted_this_gp++; - rt_mutex_unlock(&mtx); - return rcu_preempt_ctrlblk.boost_tasks != NULL; + rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ + + return rcu_preempt_ctrlblk.boost_tasks != NULL || + rcu_preempt_ctrlblk.exp_tasks != NULL; } /* @@ -304,42 +322,25 @@ static int rcu_boost(void) */ static int rcu_initiate_boost(void) { - if (!rcu_preempt_blocked_readers_cgp()) { - RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); + if (!rcu_preempt_blocked_readers_cgp() && + rcu_preempt_ctrlblk.exp_tasks == NULL) { + RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++); return 0; } - if (rcu_preempt_ctrlblk.gp_tasks != NULL && - rcu_preempt_ctrlblk.boost_tasks == NULL && - rcu_preempt_ctrlblk.boosted_this_gp == 0 && - ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { - rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; + if (rcu_preempt_ctrlblk.exp_tasks != NULL || + (rcu_preempt_ctrlblk.gp_tasks != NULL && + rcu_preempt_ctrlblk.boost_tasks == NULL && + ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) { + if (rcu_preempt_ctrlblk.exp_tasks == NULL) + rcu_preempt_ctrlblk.boost_tasks = + rcu_preempt_ctrlblk.gp_tasks; invoke_rcu_kthread(); - RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++); } else RCU_TRACE(rcu_initiate_boost_trace()); return 1; } -/* - * Initiate boosting for an expedited grace period. - */ -static void rcu_initiate_expedited_boost(void) -{ - unsigned long flags; - - raw_local_irq_save(flags); - if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { - rcu_preempt_ctrlblk.boost_tasks = - rcu_preempt_ctrlblk.blkd_tasks.next; - rcu_preempt_ctrlblk.boosted_this_gp = -1; - invoke_rcu_kthread(); - RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++); - } else - RCU_TRACE(rcu_initiate_exp_boost_trace()); - raw_local_irq_restore(flags); -} - -#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); +#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) /* * Do priority-boost accounting for the start of a new grace period. @@ -347,8 +348,6 @@ static void rcu_initiate_expedited_boost(void) static void rcu_preempt_boost_start_gp(void) { rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; - if (rcu_preempt_ctrlblk.boosted_this_gp > 0) - rcu_preempt_ctrlblk.boosted_this_gp = 0; } #else /* #ifdef CONFIG_RCU_BOOST */ @@ -372,13 +371,6 @@ static int rcu_initiate_boost(void) } /* - * If there is no RCU priority boosting, we don't initiate expedited boosting. - */ -static void rcu_initiate_expedited_boost(void) -{ -} - -/* * If there is no RCU priority boosting, nothing to do at grace-period start. */ static void rcu_preempt_boost_start_gp(void) @@ -418,7 +410,7 @@ static void rcu_preempt_cpu_qs(void) if (!rcu_preempt_gp_in_progress()) return; /* - * Check up on boosting. If there are no readers blocking the + * Check up on boosting. If there are readers blocking the * current grace period, leave. */ if (rcu_initiate_boost()) @@ -578,7 +570,7 @@ static void rcu_read_unlock_special(struct task_struct *t) empty = !rcu_preempt_blocked_readers_cgp(); empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; np = rcu_next_node_entry(t); - list_del(&t->rcu_node_entry); + list_del_init(&t->rcu_node_entry); if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) rcu_preempt_ctrlblk.gp_tasks = np; if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) @@ -587,7 +579,6 @@ static void rcu_read_unlock_special(struct task_struct *t) if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) rcu_preempt_ctrlblk.boost_tasks = np; #endif /* #ifdef CONFIG_RCU_BOOST */ - INIT_LIST_HEAD(&t->rcu_node_entry); /* * If this was the last task on the current list, and if @@ -812,13 +803,16 @@ void synchronize_rcu_expedited(void) rpcp->exp_tasks = rpcp->blkd_tasks.next; if (rpcp->exp_tasks == &rpcp->blkd_tasks) rpcp->exp_tasks = NULL; - local_irq_restore(flags); /* Wait for tail of ->blkd_tasks list to drain. */ - if (rcu_preempted_readers_exp()) - rcu_initiate_expedited_boost(); + if (!rcu_preempted_readers_exp()) + local_irq_restore(flags); + else { + rcu_initiate_boost(); + local_irq_restore(flags); wait_event(sync_rcu_preempt_exp_wq, !rcu_preempted_readers_exp()); + } /* Clean up and exit. */ barrier(); /* ensure expedited GP seen before counter increment. */ @@ -931,24 +925,17 @@ void __init rcu_scheduler_starting(void) static void rcu_initiate_boost_trace(void) { - if (rcu_preempt_ctrlblk.gp_tasks == NULL) - rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; + if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) + rcu_preempt_ctrlblk.n_balk_blkd_tasks++; + else if (rcu_preempt_ctrlblk.gp_tasks == NULL && + rcu_preempt_ctrlblk.exp_tasks == NULL) + rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++; else if (rcu_preempt_ctrlblk.boost_tasks != NULL) - rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; - else if (rcu_preempt_ctrlblk.boosted_this_gp != 0) - rcu_preempt_ctrlblk.n_normal_balk_boosted++; + rcu_preempt_ctrlblk.n_balk_boost_tasks++; else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) - rcu_preempt_ctrlblk.n_normal_balk_notyet++; - else - rcu_preempt_ctrlblk.n_normal_balk_nos++; -} - -static void rcu_initiate_exp_boost_trace(void) -{ - if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) - rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++; + rcu_preempt_ctrlblk.n_balk_notyet++; else - rcu_preempt_ctrlblk.n_exp_balk_nos++; + rcu_preempt_ctrlblk.n_balk_nos++; } #endif /* #ifdef CONFIG_RCU_BOOST */ diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index c224da41890..2e138db0338 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -131,7 +131,7 @@ struct rcu_torture { static LIST_HEAD(rcu_torture_freelist); static struct rcu_torture __rcu *rcu_torture_current; -static long rcu_torture_current_version; +static unsigned long rcu_torture_current_version; static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; static DEFINE_SPINLOCK(rcu_torture_lock); static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = @@ -146,8 +146,6 @@ static atomic_t n_rcu_torture_mberror; static atomic_t n_rcu_torture_error; static long n_rcu_torture_boost_ktrerror; static long n_rcu_torture_boost_rterror; -static long n_rcu_torture_boost_allocerror; -static long n_rcu_torture_boost_afferror; static long n_rcu_torture_boost_failure; static long n_rcu_torture_boosts; static long n_rcu_torture_timers; @@ -163,11 +161,11 @@ static int stutter_pause_test; #endif int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; -#ifdef CONFIG_RCU_BOOST +#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) #define rcu_can_boost() 1 -#else /* #ifdef CONFIG_RCU_BOOST */ +#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ #define rcu_can_boost() 0 -#endif /* #else #ifdef CONFIG_RCU_BOOST */ +#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ static unsigned long boost_starttime; /* jiffies of next boost test start. */ DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ @@ -751,6 +749,7 @@ static int rcu_torture_boost(void *arg) n_rcu_torture_boost_rterror++; } + init_rcu_head_on_stack(&rbi.rcu); /* Each pass through the following loop does one boost-test cycle. */ do { /* Wait for the next test interval. */ @@ -810,6 +809,7 @@ checkwait: rcu_stutter_wait("rcu_torture_boost"); /* Clean up and exit. */ VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); + destroy_rcu_head_on_stack(&rbi.rcu); rcutorture_shutdown_absorb("rcu_torture_boost"); while (!kthread_should_stop() || rbi.inflight) schedule_timeout_uninterruptible(1); @@ -886,7 +886,7 @@ rcu_torture_writer(void *arg) old_rp->rtort_pipe_count++; cur_ops->deferred_free(old_rp); } - rcu_torture_current_version++; + rcutorture_record_progress(++rcu_torture_current_version); oldbatch = cur_ops->completed(); rcu_stutter_wait("rcu_torture_writer"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); @@ -1066,8 +1066,8 @@ rcu_torture_printk(char *page) } cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); cnt += sprintf(&page[cnt], - "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " - "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " + "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " + "rtmbe: %d rtbke: %ld rtbre: %ld " "rtbf: %ld rtb: %ld nt: %ld", rcu_torture_current, rcu_torture_current_version, @@ -1078,16 +1078,12 @@ rcu_torture_printk(char *page) atomic_read(&n_rcu_torture_mberror), n_rcu_torture_boost_ktrerror, n_rcu_torture_boost_rterror, - n_rcu_torture_boost_allocerror, - n_rcu_torture_boost_afferror, n_rcu_torture_boost_failure, n_rcu_torture_boosts, n_rcu_torture_timers); if (atomic_read(&n_rcu_torture_mberror) != 0 || n_rcu_torture_boost_ktrerror != 0 || n_rcu_torture_boost_rterror != 0 || - n_rcu_torture_boost_allocerror != 0 || - n_rcu_torture_boost_afferror != 0 || n_rcu_torture_boost_failure != 0) cnt += sprintf(&page[cnt], " !!!"); cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); @@ -1331,6 +1327,7 @@ rcu_torture_cleanup(void) int i; mutex_lock(&fullstop_mutex); + rcutorture_record_test_transition(); if (fullstop == FULLSTOP_SHUTDOWN) { printk(KERN_WARNING /* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); @@ -1486,8 +1483,6 @@ rcu_torture_init(void) atomic_set(&n_rcu_torture_error, 0); n_rcu_torture_boost_ktrerror = 0; n_rcu_torture_boost_rterror = 0; - n_rcu_torture_boost_allocerror = 0; - n_rcu_torture_boost_afferror = 0; n_rcu_torture_boost_failure = 0; n_rcu_torture_boosts = 0; for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) @@ -1624,6 +1619,7 @@ rcu_torture_init(void) } } register_reboot_notifier(&rcutorture_shutdown_nb); + rcutorture_record_test_transition(); mutex_unlock(&fullstop_mutex); return 0; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dd4aea806f8..e486f7c3ffb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -47,6 +47,8 @@ #include <linux/mutex.h> #include <linux/time.h> #include <linux/kernel_stat.h> +#include <linux/wait.h> +#include <linux/kthread.h> #include "rcutree.h" @@ -79,10 +81,41 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +static struct rcu_state *rcu_state; + int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* + * Control variables for per-CPU and per-rcu_node kthreads. These + * handle all flavors of RCU. + */ +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); +DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); +static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq); +DEFINE_PER_CPU(char, rcu_cpu_has_work); +static char rcu_kthreads_spawnable; + +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); +static void invoke_rcu_cpu_kthread(void); + +#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ + +/* + * Track the rcutorture test sequence number and the update version + * number within a given test. The rcutorture_testseq is incremented + * on every rcutorture module load and unload, so has an odd value + * when a test is running. The rcutorture_vernum is set to zero + * when rcutorture starts and is incremented on each rcutorture update. + * These variables enable correlating rcutorture output with the + * RCU tracing information. + */ +unsigned long rcutorture_testseq; +unsigned long rcutorture_vernum; + +/* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node * structure's ->lock, but of course results can be subject to change. @@ -124,6 +157,7 @@ void rcu_note_context_switch(int cpu) rcu_sched_qs(cpu); rcu_preempt_note_context_switch(cpu); } +EXPORT_SYMBOL_GPL(rcu_note_context_switch); #ifdef CONFIG_NO_HZ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { @@ -140,10 +174,8 @@ module_param(blimit, int, 0); module_param(qhimark, int, 0); module_param(qlowmark, int, 0); -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR -int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT; +int rcu_cpu_stall_suppress __read_mostly; module_param(rcu_cpu_stall_suppress, int, 0644); -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void force_quiescent_state(struct rcu_state *rsp, int relaxed); static int rcu_pending(int cpu); @@ -176,6 +208,31 @@ void rcu_bh_force_quiescent_state(void) EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); /* + * Record the number of times rcutorture tests have been initiated and + * terminated. This information allows the debugfs tracing stats to be + * correlated to the rcutorture messages, even when the rcutorture module + * is being repeatedly loaded and unloaded. In other words, we cannot + * store this state in rcutorture itself. + */ +void rcutorture_record_test_transition(void) +{ + rcutorture_testseq++; + rcutorture_vernum = 0; +} +EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); + +/* + * Record the number of writer passes through the current rcutorture test. + * This is also used to correlate debugfs tracing stats with the rcutorture + * messages. + */ +void rcutorture_record_progress(unsigned long vernum) +{ + rcutorture_vernum++; +} +EXPORT_SYMBOL_GPL(rcutorture_record_progress); + +/* * Force a quiescent state for RCU-sched. */ void rcu_sched_force_quiescent_state(void) @@ -234,8 +291,8 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) return 1; } - /* If preemptable RCU, no point in sending reschedule IPI. */ - if (rdp->preemptable) + /* If preemptible RCU, no point in sending reschedule IPI. */ + if (rdp->preemptible) return 0; /* The CPU is online, so send it a reschedule IPI. */ @@ -450,8 +507,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #endif /* #else #ifdef CONFIG_NO_HZ */ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - int rcu_cpu_stall_suppress __read_mostly; static void record_gp_stall_check_time(struct rcu_state *rsp) @@ -537,21 +592,24 @@ static void print_cpu_stall(struct rcu_state *rsp) static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { - long delta; + unsigned long j; + unsigned long js; struct rcu_node *rnp; if (rcu_cpu_stall_suppress) return; - delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); + j = ACCESS_ONCE(jiffies); + js = ACCESS_ONCE(rsp->jiffies_stall); rnp = rdp->mynode; - if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) { + if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); - } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) { + } else if (rcu_gp_in_progress(rsp) && + ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { - /* They had two time units to dump stack, so complain. */ + /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(rsp); } } @@ -587,26 +645,6 @@ static void __init check_cpu_stall_init(void) atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); } -#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - -static void record_gp_stall_check_time(struct rcu_state *rsp) -{ -} - -static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) -{ -} - -void rcu_cpu_stall_reset(void) -{ -} - -static void __init check_cpu_stall_init(void) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - /* * Update CPU-local rcu_data state to record the newly noticed grace period. * This is used both when we started the grace period and wh |