diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-12-23 12:57:04 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-12-23 12:57:04 +0100 |
commit | 394f4528c523d88daabd50f883a8d6b164075555 (patch) | |
tree | b45a5b87a1ba9be8afe71f1db1537ff19e2b05d8 /kernel | |
parent | 90a8a73c06cc32b609a880d48449d7083327e11a (diff) | |
parent | 3c2dcf2aed5ea22ecf65a9a871c4963faec421b3 (diff) |
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu into core/rcu
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutiny.c | 105 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 433 | ||||
-rw-r--r-- | kernel/rcutorture.c | 270 | ||||
-rw-r--r-- | kernel/rcutree.c | 156 | ||||
-rw-r--r-- | kernel/rcutree.h | 61 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 135 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 12 | ||||
-rw-r--r-- | kernel/sched.c | 69 | ||||
-rw-r--r-- | kernel/srcu.c | 8 |
9 files changed, 996 insertions, 253 deletions
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index d806735342a..03449372474 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -36,31 +36,16 @@ #include <linux/time.h> #include <linux/cpu.h> -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ - struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ - struct rcu_head **curtail; /* ->next pointer of last CB. */ -}; - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_sched_ctrlblk = { - .donetail = &rcu_sched_ctrlblk.rcucblist, - .curtail = &rcu_sched_ctrlblk.rcucblist, -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_kthread_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); +static unsigned long have_rcu_kthread_work; +static void invoke_rcu_kthread(void); /* Forward declarations for rcutiny_plugin.h. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +struct rcu_ctrlblk; +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static int rcu_kthread(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_kthread(); } /* @@ -132,7 +117,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_kthread(); } /* @@ -152,13 +137,14 @@ void rcu_check_callbacks(int cpu, int user) } /* - * Helper function for rcu_process_callbacks() that operates on the - * specified rcu_ctrlkblk structure. + * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure + * whose grace period has elapsed. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) { struct rcu_head *next, *list; unsigned long flags; + RCU_TRACE(int cb_count = 0); /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) @@ -180,19 +166,58 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) next = list->next; prefetch(next); debug_rcu_head_unqueue(list); + local_bh_disable(); list->func(list); + local_bh_enable(); list = next; + RCU_TRACE(cb_count++); } + RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); } /* - * Invoke any callbacks whose grace period has completed. + * This kthread invokes RCU callbacks whose grace periods have + * elapsed. It is awakened as needed, and takes the place of the + * RCU_SOFTIRQ that was used previously for this purpose. + * This is a kthread, but it is never stopped, at least not until + * the system goes down. */ -static void rcu_process_callbacks(struct softirq_action *unused) +static int rcu_kthread(void *arg) { - __rcu_process_callbacks(&rcu_sched_ctrlblk); - __rcu_process_callbacks(&rcu_bh_ctrlblk); - rcu_preempt_process_callbacks(); + unsigned long work; + unsigned long morework; + unsigned long flags; + + for (;;) { + wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); + morework = rcu_boost(); + local_irq_save(flags); + work = have_rcu_kthread_work; + have_rcu_kthread_work = morework; + local_irq_restore(flags); + if (work) { + rcu_process_callbacks(&rcu_sched_ctrlblk); + rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); + } + schedule_timeout_interruptible(1); /* Leave CPU for others. */ + } + + return 0; /* Not reached, but needed to shut gcc up. */ +} + +/* + * Wake up rcu_kthread() to process callbacks now eligible for invocation + * or to boost readers. + */ +static void invoke_rcu_kthread(void) +{ + unsigned long flags; + + local_irq_save(flags); + have_rcu_kthread_work = 1; + wake_up(&rcu_kthread_wq); + local_irq_restore(flags); } /* @@ -230,6 +255,7 @@ static void __call_rcu(struct rcu_head *head, local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; + RCU_TRACE(rcp->qlen++); local_irq_restore(flags); } @@ -282,7 +308,16 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL_GPL(rcu_barrier_sched); -void __init rcu_init(void) +/* + * Spawn the kthread that invokes RCU callbacks. + */ +static int __init rcu_spawn_kthreads(void) { - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + struct sched_param sp; + + rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); + sp.sched_priority = RCU_BOOST_PRIO; + sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); + return 0; } +early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 6ceca4f745f..015abaea962 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -22,6 +22,40 @@ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> */ +#include <linux/kthread.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#ifdef CONFIG_RCU_TRACE +#define RCU_TRACE(stmt) stmt +#else /* #ifdef CONFIG_RCU_TRACE */ +#define RCU_TRACE(stmt) +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ + RCU_TRACE(long qlen); /* Number of pending CBs. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #ifdef CONFIG_TINY_PREEMPT_RCU #include <linux/delay.h> @@ -46,17 +80,45 @@ struct rcu_preempt_ctrlblk { struct list_head *gp_tasks; /* Pointer to the first task blocking the */ /* current grace period, or NULL if there */ - /* is not such task. */ + /* is no such task. */ struct list_head *exp_tasks; /* Pointer to first task blocking the */ /* current expedited grace period, or NULL */ /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there cannot be any such task. */ +#ifdef CONFIG_RCU_BOOST + struct list_head *boost_tasks; + /* Pointer to first task that needs to be */ + /* priority-boosted, or NULL if no priority */ + /* boosting is needed. If there is no */ + /* current or expedited grace period, there */ + /* can be no such task. */ +#endif /* #ifdef CONFIG_RCU_BOOST */ u8 gpnum; /* Current grace period. */ u8 gpcpu; /* Last grace period blocked by the CPU. */ u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ +#ifdef CONFIG_RCU_BOOST + s8 boosted_this_gp; /* Has boosting already happened? */ + unsigned long boost_time; /* When to start boosting (jiffies) */ +#endif /* #ifdef CONFIG_RCU_BOOST */ +#ifdef CONFIG_RCU_TRACE + unsigned long n_grace_periods; +#ifdef CONFIG_RCU_BOOST + unsigned long n_tasks_boosted; + unsigned long n_exp_boosts; + unsigned long n_normal_boosts; + unsigned long n_normal_balk_blkd_tasks; + unsigned long n_normal_balk_gp_tasks; + unsigned long n_normal_balk_boost_tasks; + unsigned long n_normal_balk_boosted; + unsigned long n_normal_balk_notyet; + unsigned long n_normal_balk_nos; + unsigned long n_exp_balk_blkd_tasks; + unsigned long n_exp_balk_nos; +#endif /* #ifdef CONFIG_RCU_BOOST */ +#endif /* #ifdef CONFIG_RCU_TRACE */ }; static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { @@ -122,6 +184,210 @@ static int rcu_preempt_gp_in_progress(void) } /* + * Advance a ->blkd_tasks-list pointer to the next entry, instead + * returning NULL if at the end of the list. + */ +static struct list_head *rcu_next_node_entry(struct task_struct *t) +{ + struct list_head *np; + + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + return np; +} + +#ifdef CONFIG_RCU_TRACE + +#ifdef CONFIG_RCU_BOOST +static void rcu_initiate_boost_trace(void); +static void rcu_initiate_exp_boost_trace(void); +#endif /* #ifdef CONFIG_RCU_BOOST */ + +/* + * Dump additional statistice for TINY_PREEMPT_RCU. + */ +static void show_tiny_preempt_stats(struct seq_file *m) +{ + seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n", + rcu_preempt_ctrlblk.rcb.qlen, + rcu_preempt_ctrlblk.n_grace_periods, + rcu_preempt_ctrlblk.gpnum, + rcu_preempt_ctrlblk.gpcpu, + rcu_preempt_ctrlblk.completed, + "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)], + "N."[!rcu_preempt_ctrlblk.gp_tasks], + "E."[!rcu_preempt_ctrlblk.exp_tasks]); +#ifdef CONFIG_RCU_BOOST + seq_printf(m, " ttb=%c btg=", + "B."[!rcu_preempt_ctrlblk.boost_tasks]); + switch (rcu_preempt_ctrlblk.boosted_this_gp) { + case -1: + seq_puts(m, "exp"); + break; + case 0: + seq_puts(m, "no"); + break; + case 1: + seq_puts(m, "begun"); + break; + case 2: + seq_puts(m, "done"); + break; + default: + seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp); + } + seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n", + rcu_preempt_ctrlblk.n_tasks_boosted, + rcu_preempt_ctrlblk.n_exp_boosts, + rcu_preempt_ctrlblk.n_normal_boosts, + (int)(jiffies & 0xffff), + (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); + seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", + "normal balk", + rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, + rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, + rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, + rcu_preempt_ctrlblk.n_normal_balk_boosted, + rcu_preempt_ctrlblk.n_normal_balk_notyet, + rcu_preempt_ctrlblk.n_normal_balk_nos); + seq_printf(m, " exp balk: bt=%lu nos=%lu\n", + rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks, + rcu_preempt_ctrlblk.n_exp_balk_nos); +#endif /* #ifdef CONFIG_RCU_BOOST */ +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + +#ifdef CONFIG_RCU_BOOST + +#include "rtmutex_common.h" + +/* + * Carry out RCU priority boosting on the task indicated by ->boost_tasks, + * and advance ->boost_tasks to the next task in the ->blkd_tasks list. + */ +static int rcu_boost(void) +{ + unsigned long flags; + struct rt_mutex mtx; + struct list_head *np; + struct task_struct *t; + + if (rcu_preempt_ctrlblk.boost_tasks == NULL) + return 0; /* Nothing to boost. */ + raw_local_irq_save(flags); + rcu_preempt_ctrlblk.boosted_this_gp++; + t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, + rcu_node_entry); + np = rcu_next_node_entry(t); + rt_mutex_init_proxy_locked(&mtx, t); + t->rcu_boost_mutex = &mtx; + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; + raw_local_irq_restore(flags); + rt_mutex_lock(&mtx); + RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); + rcu_preempt_ctrlblk.boosted_this_gp++; + rt_mutex_unlock(&mtx); + return rcu_preempt_ctrlblk.boost_tasks != NULL; +} + +/* + * Check to see if it is now time to start boosting RCU readers blocking + * the current grace period, and, if so, tell the rcu_kthread_task to + * start boosting them. If there is an expedited boost in progress, + * we wait for it to complete. + * + * If there are no blocked readers blocking the current grace period, + * return 0 to let the caller know, otherwise return 1. Note that this + * return value is independent of whether or not boosting was done. + */ +static int rcu_initiate_boost(void) +{ + if (!rcu_preempt_blocked_readers_cgp()) { + RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); + return 0; + } + if (rcu_preempt_ctrlblk.gp_tasks != NULL && + rcu_preempt_ctrlblk.boost_tasks == NULL && + rcu_preempt_ctrlblk.boosted_this_gp == 0 && + ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { + rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; + invoke_rcu_kthread(); + RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++); + } else + RCU_TRACE(rcu_initiate_boost_trace()); + return 1; +} + +/* + * Initiate boosting for an expedited grace period. + */ +static void rcu_initiate_expedited_boost(void) +{ + unsigned long flags; + + raw_local_irq_save(flags); + if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { + rcu_preempt_ctrlblk.boost_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + rcu_preempt_ctrlblk.boosted_this_gp = -1; + invoke_rcu_kthread(); + RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++); + } else + RCU_TRACE(rcu_initiate_exp_boost_trace()); + raw_local_irq_restore(flags); +} + +#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); + +/* + * Do priority-boost accounting for the start of a new grace period. + */ +static void rcu_preempt_boost_start_gp(void) +{ + rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; + if (rcu_preempt_ctrlblk.boosted_this_gp > 0) + rcu_preempt_ctrlblk.boosted_this_gp = 0; +} + +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * If there is no RCU priority boosting, we don't boost. + */ +static int rcu_boost(void) +{ + return 0; +} + +/* + * If there is no RCU priority boosting, we don't initiate boosting, + * but we do indicate whether there are blocked readers blocking the + * current grace period. + */ +static int rcu_initiate_boost(void) +{ + return rcu_preempt_blocked_readers_cgp(); +} + +/* + * If there is no RCU priority boosting, we don't initiate expedited boosting. + */ +static void rcu_initiate_expedited_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, nothing to do at grace-period start. + */ +static void rcu_preempt_boost_start_gp(void) +{ +} + +#endif /* else #ifdef CONFIG_RCU_BOOST */ + +/* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is * in a quiescent state. There might be any number of tasks blocked @@ -148,11 +414,14 @@ static void rcu_preempt_cpu_qs(void) rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + /* If there is no GP then there is nothing more to do. */ + if (!rcu_preempt_gp_in_progress()) + return; /* - * If there is no GP, or if blocked readers are still blocking GP, - * then there is nothing more to do. + * Check up on boosting. If there are no readers blocking the + * current grace period, leave. */ - if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) + if (rcu_initiate_boost()) return; /* Advance callbacks. */ @@ -164,9 +433,9 @@ static void rcu_preempt_cpu_qs(void) if (!rcu_preempt_blocked_readers_any()) rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; - /* If there are done callbacks, make RCU_SOFTIRQ process them. */ + /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_kthread(); } /* @@ -178,12 +447,16 @@ static void rcu_preempt_start_gp(void) /* Official start of GP. */ rcu_preempt_ctrlblk.gpnum++; + RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); /* Any blocked RCU readers block new GP. */ if (rcu_preempt_blocked_readers_any()) rcu_preempt_ctrlblk.gp_tasks = rcu_preempt_ctrlblk.blkd_tasks.next; + /* Set up for RCU priority boosting. */ + rcu_preempt_boost_start_gp(); + /* If there is no running reader, CPU is done with GP. */ if (!rcu_preempt_running_reader()) rcu_preempt_cpu_qs(); @@ -304,14 +577,16 @@ static void rcu_read_unlock_special(struct task_struct *t) */ empty = !rcu_preempt_blocked_readers_cgp(); empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; - np = t->rcu_node_entry.next; - if (np == &rcu_preempt_ctrlblk.blkd_tasks) - np = NULL; + np = rcu_next_node_entry(t); list_del(&t->rcu_node_entry); if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) rcu_preempt_ctrlblk.gp_tasks = np; if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) rcu_preempt_ctrlblk.exp_tasks = np; +#ifdef CONFIG_RCU_BOOST + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) + rcu_preempt_ctrlblk.boost_tasks = np; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&t->rcu_node_entry); /* @@ -331,6 +606,14 @@ static void rcu_read_unlock_special(struct task_struct *t) if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) rcu_report_exp_done(); } +#ifdef CONFIG_RCU_BOOST + /* Unboost self if was boosted. */ + if (special & RCU_READ_UNLOCK_BOOSTED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; + rt_mutex_unlock(t->rcu_boost_mutex); + t->rcu_boost_mutex = NULL; + } +#endif /* #ifdef CONFIG_RCU_BOOST */ local_irq_restore(flags); } @@ -374,7 +657,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_kthread(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -383,7 +666,7 @@ static void rcu_preempt_check_callbacks(void) /* * TINY_PREEMPT_RCU has an extra callback-list tail pointer to - * update, so this is invoked from __rcu_process_callbacks() to + * update, so this is invoked from rcu_process_callbacks() to * handle that case. Of course, it is invoked for all flavors of * RCU, but RCU callbacks can appear only on one of the lists, and * neither ->nexttail nor ->donetail can possibly be NULL, so there @@ -400,7 +683,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) */ static void rcu_preempt_process_callbacks(void) { - __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); + rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); } /* @@ -417,6 +700,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) local_irq_save(flags); *rcu_preempt_ctrlblk.nexttail = head; rcu_preempt_ctrlblk.nexttail = &head->next; + RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++); rcu_preempt_start_gp(); /* checks to see if GP needed. */ local_irq_restore(flags); } @@ -532,6 +816,7 @@ void synchronize_rcu_expedited(void) /* Wait for tail of ->blkd_tasks list to drain. */ if (rcu_preempted_readers_exp()) + rcu_initiate_expedited_boost(); wait_event(sync_rcu_preempt_exp_wq, !rcu_preempted_readers_exp()); @@ -572,6 +857,27 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ +#ifdef CONFIG_RCU_TRACE + +/* + * Because preemptible RCU does not exist, it is not necessary to + * dump out its statistics. + */ +static void show_tiny_preempt_stats(struct seq_file *m) +{ +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + +/* + * Because preemptible RCU does not exist, it is never necessary to + * boost preempted RCU readers. + */ +static int rcu_boost(void) +{ + return 0; +} + /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -599,17 +905,116 @@ static void rcu_preempt_process_callbacks(void) #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC - #include <linux/kernel_stat.h> /* * During boot, we forgive RCU lockdep issues. After this function is * invoked, we start taking RCU lockdep issues seriously. */ -void rcu_scheduler_starting(void) +void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); rcu_scheduler_active = 1; } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_RCU_BOOST +#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO +#else /* #ifdef CONFIG_RCU_BOOST */ +#define RCU_BOOST_PRIO 1 +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + +#ifdef CONFIG_RCU_TRACE + +#ifdef CONFIG_RCU_BOOST + +static void rcu_initiate_boost_trace(void) +{ + if (rcu_preempt_ctrlblk.gp_tasks == NULL) + rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; + else if (rcu_preempt_ctrlblk.boost_tasks != NULL) + rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; + else if (rcu_preempt_ctrlblk.boosted_this_gp != 0) + rcu_preempt_ctrlblk.n_normal_balk_boosted++; + else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) + rcu_preempt_ctrlblk.n_normal_balk_notyet++; + else + rcu_preempt_ctrlblk.n_normal_balk_nos++; +} + +static void rcu_initiate_exp_boost_trace(void) +{ + if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) + rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++; + else + rcu_preempt_ctrlblk.n_exp_balk_nos++; +} + +#endif /* #ifdef CONFIG_RCU_BOOST */ + +static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) +{ + unsigned long flags; + + raw_local_irq_save(flags); + rcp->qlen -= n; + raw_local_irq_restore(flags); +} + +/* + * Dump statistics for TINY_RCU, such as they are. + */ +static int show_tiny_stats(struct seq_file *m, void *unused) +{ + show_tiny_preempt_stats(m); + seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen); + seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen); + return 0; +} + +static int show_tiny_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_tiny_stats, NULL); +} + +static const struct file_operations show_tiny_stats_fops = { + .owner = THIS_MODULE, + .open = show_tiny_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir; + +static int __init rcutiny_trace_init(void) +{ + struct dentry *retval; + + rcudir = debugfs_create_dir("rcu", NULL); + if (!rcudir) + goto free_out; + retval = debugfs_create_file("rcudata", 0444, rcudir, + NULL, &show_tiny_stats_fops); + if (!retval) + goto free_out; + return 0; +free_out: + debugfs_remove_recursive(rcudir); + return 1; +} + +static void __exit rcutiny_trace_cleanup(void) +{ + debugfs_remove_recursive(rcudir); +} + +module_init(rcutiny_trace_init); +module_exit(rcutiny_trace_cleanup); + +MODULE_AUTHOR("Paul E. McKenney"); +MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); +MODULE_LICENSE("GPL"); + +#endif /* #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9d8e8fb2515..89613f97ff2 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -47,6 +47,7 @@ #include <linux/srcu.h> #include <linux/slab.h> #include <asm/byteorder.h> +#include <linux/sched.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " @@ -64,6 +65,9 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ static int fqs_holdoff = 0; /* Hold time within burst (us). */ static int fqs_stutter = 3; /* Wait time between bursts (s). */ +static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ +static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ +static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0444); @@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444); MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); module_param(fqs_stutter, int, 0444); MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); +module_param(test_boost, int, 0444); +MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); +module_param(test_boost_interval, int, 0444); +MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); +module_param(test_boost_duration, int, 0444); +MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); @@ -109,6 +119,7 @@ static struct task_struct *stats_task; static struct task_struct *shuffler_task; static struct task_struct *stutter_task; static struct task_struct *fqs_task; +static struct task_struct *boost_tasks[NR_CPUS]; #define RCU_TORTURE_PIPE_LEN 10 @@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail; static atomic_t n_rcu_torture_free; static atomic_t n_rcu_torture_mberror; static atomic_t n_rcu_torture_error; +static long n_rcu_torture_boost_ktrerror; +static long n_rcu_torture_boost_rterror; +static long n_rcu_torture_boost_allocerror; +static long n_rcu_torture_boost_afferror; +static long n_rcu_torture_boost_failure; +static long n_rcu_torture_boosts; static long n_rcu_torture_timers; static struct list_head rcu_torture_removed; static cpumask_var_t shuffle_tmp_mask; @@ -147,6 +164,16 @@ static int stutter_pause_test; #endif int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; +#ifdef CONFIG_RCU_BOOST +#define rcu_can_boost() 1 +#else /* #ifdef CONFIG_RCU_BOOST */ +#define rcu_can_boost() 0 +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + +static unsigned long boost_starttime; /* jiffies of next boost test start. */ +DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ + /* and boost task create/destroy. */ + /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ @@ -277,6 +304,7 @@ struct rcu_torture_ops { void (*fqs)(void); int (*stats)(char *page); int irq_capable; + int can_boost; char *name; }; @@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, + .can_boost = rcu_can_boost(), .name = "rcu" }; @@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, + .can_boost = rcu_can_boost(), .name = "rcu_sync" }; @@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, + .can_boost = rcu_can_boost(), .name = "rcu_expedited" }; @@ -684,6 +715,110 @@ static struct rcu_torture_ops sched_expedited_ops = { }; /* + * RCU torture priority-boost testing. Runs one real-time thread per + * CPU for moderate bursts, repeatedly registering RCU callbacks and + * spinning waiting for them to be invoked. If a given callback takes + * too long to be invoked, we assume that priority inversion has occurred. + */ + +struct rcu_boost_inflight { + struct rcu_head rcu; + int inflight; +}; + +static void rcu_torture_boost_cb(struct rcu_head *head) +{ + struct rcu_boost_inflight *rbip = + container_of(head, struct rcu_boost_inflight, rcu); + + smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ + rbip->inflight = 0; +} + +static int rcu_torture_boost(void *arg) +{ + unsigned long call_rcu_time; + unsigned long endtime; + unsigned long oldstarttime; + struct rcu_boost_inflight rbi = { .inflight = 0 }; + struct sched_param sp; + + VERBOSE_PRINTK_STRING("rcu_torture_boost started"); + + /* Set real-time priority. */ + sp.sched_priority = 1; + if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { + VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); + n_rcu_torture_boost_rterror++; + } + + /* Each pass through the following loop does one boost-test cycle. */ + do { + /* Wait for the next test interval. */ + oldstarttime = boost_starttime; + while (jiffies - oldstarttime > ULONG_MAX / 2) { + schedule_timeout_uninterruptible(1); + rcu_stutter_wait("rcu_torture_boost"); + if (kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP) + goto checkwait; + } + + /* Do one boost-test interval. */ + endtime = oldstarttime + test_boost_duration * HZ; + call_rcu_time = jiffies; + while (jiffies - endtime > ULONG_MAX / 2) { + /* If we don't have a callback in flight, post one. */ + if (!rbi.inflight) { + smp_mb(); /* RCU core before ->inflight = 1. */ + rbi.inflight = 1; + call_rcu(&rbi.rcu, rcu_torture_boost_cb); + if (jiffies - call_rcu_time > + test_boost_duration * HZ - HZ / 2) { + VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); + n_rcu_torture_boost_failure++; + } + call_rcu_time = jiffies; + } + cond_resched(); + rcu_stutter_wait("rcu_torture_boost"); + if (kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP) + goto checkwait; + } + + /* + * Set the start time of the next test interval. + * Yes, this is vulnerable to long delays, but such + * delays simply cause a false negative for the next + * interval. Besides, we are running at RT priority, + * so delays should be relatively rare. + */ + while (oldstarttime == boost_starttime) { + if (mutex_trylock(&boost_mutex)) { + boost_starttime = jiffies + + test_boost_interval * HZ; + n_rcu_torture_boosts++; + mutex_unlock(&boost_mutex); + break; + } + schedule_timeout_uninterruptible(1); + } + + /* Go do the stutter. */ +checkwait: rcu_stutter_wait("rcu_torture_boost"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + + /* Clean up and exit. */ + VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); + rcutorture_shutdown_absorb("rcu_torture_boost"); + while (!kthread_should_stop() || rbi.inflight) + schedule_timeout_uninterruptible(1); + smp_mb(); /* order accesses to ->inflight before stack-frame death. */ + return 0; +} + +/* * RCU torture force-quiescent-state kthread. Repeatedly induces * bursts of calls to force_quiescent_state(), increasing the probability * of occurrence of some important types of race conditions. @@ -933,7 +1068,8 @@ rcu_torture_printk(char *page) cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); cnt += sprintf(&page[cnt], "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " - "rtmbe: %d nt: %ld", + "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " + "rtbf: %ld rtb: %ld nt: %ld", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), @@ -941,8 +1077,19 @@ rcu_torture_printk(char *page) atomic_read(&n_rcu_torture_alloc_fail), atomic_read(&n_rcu_torture_free), atomic_read(&n_rcu_torture_mberror), + n_rcu_torture_boost_ktrerror, + n_rcu_torture_boost_rterror, + n_rcu_torture_boost_allocerror, + n_rcu_torture_boost_afferror, + n_rcu_torture_boost_failure, + n_rcu_torture_boosts, n_rcu_torture_timers); - if (atomic_read(&n_rcu_torture_mberror) != 0) + if (atomic_read(&n_rcu_torture_mberror) != 0 || + n_rcu_torture_boost_ktrerror != 0 || + n_rcu_torture_boost_rterror != 0 || + n_rcu_torture_boost_allocerror != 0 || + n_rcu_torture_boost_afferror != 0 || + n_rcu_torture_boost_failure != 0) cnt += sprintf(&page[cnt], " !!!"); cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); if (i > 1) { @@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg) } static inline void -rcu_torture_print_module_parms(char *tag) +rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) { printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " "shuffle_interval=%d stutter=%d irqreader=%d " - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " + "test_boost=%d/%d test_boost_interval=%d " + "test_boost_duration=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, |