diff options
-rw-r--r-- | Documentation/DocBook/device-drivers.tmpl | 5 | ||||
-rw-r--r-- | Documentation/RCU/checklist.txt | 4 | ||||
-rw-r--r-- | Documentation/RCU/stallwarn.txt | 22 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 95 | ||||
-rw-r--r-- | Documentation/kernel-per-CPU-kthreads.txt | 17 | ||||
-rw-r--r-- | MAINTAINERS | 11 | ||||
-rw-r--r-- | include/linux/rculist.h | 23 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 24 | ||||
-rw-r--r-- | include/linux/rcutiny.h | 17 | ||||
-rw-r--r-- | include/linux/rcutree.h | 2 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 80 | ||||
-rw-r--r-- | kernel/Makefile | 11 | ||||
-rw-r--r-- | kernel/lockdep.c | 4 | ||||
-rw-r--r-- | kernel/rcu/Makefile | 6 | ||||
-rw-r--r-- | kernel/rcu/rcu.h (renamed from kernel/rcu.h) | 7 | ||||
-rw-r--r-- | kernel/rcu/srcu.c (renamed from kernel/srcu.c) | 0 | ||||
-rw-r--r-- | kernel/rcu/tiny.c (renamed from kernel/rcutiny.c) | 37 | ||||
-rw-r--r-- | kernel/rcu/tiny_plugin.h (renamed from kernel/rcutiny_plugin.h) | 0 | ||||
-rw-r--r-- | kernel/rcu/torture.c (renamed from kernel/rcutorture.c) | 6 | ||||
-rw-r--r-- | kernel/rcu/tree.c (renamed from kernel/rcutree.c) | 185 | ||||
-rw-r--r-- | kernel/rcu/tree.h (renamed from kernel/rcutree.h) | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h (renamed from kernel/rcutree_plugin.h) | 84 | ||||
-rw-r--r-- | kernel/rcu/tree_trace.c (renamed from kernel/rcutree_trace.c) | 2 | ||||
-rw-r--r-- | kernel/rcu/update.c (renamed from kernel/rcupdate.c) | 10 |
24 files changed, 464 insertions, 190 deletions
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index fe397f90a34..6c9d9d37c83 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -87,7 +87,10 @@ X!Iinclude/linux/kobject.h !Ekernel/printk/printk.c !Ekernel/panic.c !Ekernel/sys.c -!Ekernel/rcupdate.c +!Ekernel/rcu/srcu.c +!Ekernel/rcu/tree.c +!Ekernel/rcu/tree_plugin.h +!Ekernel/rcu/update.c </sect1> <sect1><title>Device Resource Management</title> diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 7703ec73a9b..91266193b8f 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -202,8 +202,8 @@ over a rather long period of time, but improvements are always welcome! updater uses call_rcu_sched() or synchronize_sched(), then the corresponding readers must disable preemption, possibly by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). - If the updater uses synchronize_srcu() or call_srcu(), - the the corresponding readers must use srcu_read_lock() and + If the updater uses synchronize_srcu() or call_srcu(), then + the corresponding readers must use srcu_read_lock() and srcu_read_unlock(), and with the same srcu_struct. The rules for the expedited primitives are the same as for their non-expedited counterparts. Mixing things up will result in confusion and diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 8e9359de1d2..6f3a0057548 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -12,12 +12,12 @@ CONFIG_RCU_CPU_STALL_TIMEOUT This kernel configuration parameter defines the period of time that RCU will wait from the beginning of a grace period until it issues an RCU CPU stall warning. This time period is normally - sixty seconds. + 21 seconds. This configuration parameter may be changed at runtime via the /sys/module/rcutree/parameters/rcu_cpu_stall_timeout, however this parameter is checked only at the beginning of a cycle. - So if you are 30 seconds into a 70-second stall, setting this + So if you are 10 seconds into a 40-second stall, setting this sysfs parameter to (say) five will shorten the timeout for the -next- stall, or the following warning for the current stall (assuming the stall lasts long enough). It will not affect the @@ -32,7 +32,7 @@ CONFIG_RCU_CPU_STALL_VERBOSE also dump the stacks of any tasks that are blocking the current RCU-preempt grace period. -RCU_CPU_STALL_INFO +CONFIG_RCU_CPU_STALL_INFO This kernel configuration parameter causes the stall warning to print out additional per-CPU diagnostic information, including @@ -43,7 +43,8 @@ RCU_STALL_DELAY_DELTA Although the lockdep facility is extremely useful, it does add some overhead. Therefore, under CONFIG_PROVE_RCU, the RCU_STALL_DELAY_DELTA macro allows five extra seconds before - giving an RCU CPU stall warning message. + giving an RCU CPU stall warning message. (This is a cpp + macro, not a kernel configuration parameter.) RCU_STALL_RAT_DELAY @@ -52,7 +53,8 @@ RCU_STALL_RAT_DELAY However, if the offending CPU does not detect its own stall in the number of jiffies specified by RCU_STALL_RAT_DELAY, then some other CPU will complain. This delay is normally set to - two jiffies. + two jiffies. (This is a cpp macro, not a kernel configuration + parameter.) When a CPU detects that it is stalling, it will print a message similar to the following: @@ -86,7 +88,12 @@ printing, there will be a spurious stall-warning message: INFO: rcu_bh_state detected stalls on CPUs/tasks: { } (detected by 4, 2502 jiffies) -This is rare, but does happen from time to time in real life. +This is rare, but does happen from time to time in real life. It is also +possible for a zero-jiffy stall to be flagged in this case, depending +on how the stall warning and the grace-period initialization happen to +interact. Please note that it is not possible to entirely eliminate this +sort of false positive without resorting to things like stop_machine(), +which is overkill for this sort of problem. If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set, more information is printed with the stall-warning message, for example: @@ -216,4 +223,5 @@ that portion of the stack which remains the same from trace to trace. If you can reliably trigger the stall, ftrace can be quite helpful. RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE -and with RCU's event tracing. +and with RCU's event tracing. For information on RCU's event tracing, +see include/trace/events/rcu.h. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fcbb736d55f..203f4a9d9ef 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2599,7 +2599,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. - rcu_nocbs= [KNL,BOOT] + rcu_nocbs= [KNL] In kernels built with CONFIG_RCU_NOCB_CPU=y, set the specified list of CPUs to be no-callback CPUs. Invocation of these CPUs' RCU callbacks will @@ -2612,7 +2612,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. real-time workloads. It can also improve energy efficiency for asymmetric multiprocessors. - rcu_nocb_poll [KNL,BOOT] + rcu_nocb_poll [KNL] Rather than requiring that offloaded CPUs (specified by rcu_nocbs= above) explicitly awaken the corresponding "rcuoN" kthreads, @@ -2623,126 +2623,145 @@ bytes respectively. Such letter suffixes can also be entirely omitted. energy efficiency by requiring that the kthreads periodically wake up to do the polling. - rcutree.blimit= [KNL,BOOT] + rcutree.blimit= [KNL] Set maximum number of finished RCU callbacks to process in one batch. - rcutree.fanout_leaf= [KNL,BOOT] + rcutree.rcu_fanout_leaf= [KNL] Increase the number of CPUs assigned to each leaf rcu_node structure. Useful for very large systems. - rcutree.jiffies_till_first_fqs= [KNL,BOOT] + rcutree.jiffies_till_first_fqs= [KNL] Set delay from grace-period initialization to first attempt to force quiescent states. Units are jiffies, minimum value is zero, and maximum value is HZ. - rcutree.jiffies_till_next_fqs= [KNL,BOOT] + rcutree.jiffies_till_next_fqs= [KNL] Set delay between subsequent attempts to force quiescent states. Units are jiffies, minimum value is one, and maximum value is HZ. - rcutree.qhimark= [KNL,BOOT] + rcutree.qhimark= [KNL] Set threshold of queued RCU callbacks over which batch limiting is disabled. - rcutree.qlowmark= [KNL,BOOT] + rcutree.qlowmark= [KNL] Set threshold of queued RCU callbacks below which batch limiting is re-enabled. - rcutree.rcu_cpu_stall_suppress= [KNL,BOOT] - Suppress RCU CPU stall warning messages. - - rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] - Set timeout for RCU CPU stall warning messages. - - rcutree.rcu_idle_gp_delay= [KNL,BOOT] + rcutree.rcu_idle_gp_delay= [KNL] Set wakeup interval for idle CPUs that have RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] + rcutree.rcu_idle_lazy_gp_delay= [KNL] Set wakeup interval for idle CPUs that have only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). Lazy RCU callbacks are those which RCU can prove do nothing more than free memory. - rcutorture.fqs_duration= [KNL,BOOT] + rcutorture.fqs_duration= [KNL] Set duration of force_quiescent_state bursts. - rcutorture.fqs_holdoff= [KNL,BOOT] + rcutorture.fqs_holdoff= [KNL] Set holdoff time within force_quiescent_state bursts. - rcutorture.fqs_stutter= [KNL,BOOT] + rcutorture.fqs_stutter= [KNL] Set wait time between force_quiescent_state bursts. - rcutorture.irqreader= [KNL,BOOT] - Test RCU readers from irq handlers. + rcutorture.gp_exp= [KNL] + Use expedited update-side primitives. + + rcutorture.gp_normal= [KNL] + Use normal (non-expedited) update-side primitives. + If both gp_exp and gp_normal are set, do both. + If neither gp_exp nor gp_normal are set, still + do both. - rcutorture.n_barrier_cbs= [KNL,BOOT] + rcutorture.n_barrier_cbs= [KNL] Set callbacks/threads for rcu_barrier() testing. - rcutorture.nfakewriters= [KNL,BOOT] + rcutorture.nfakewriters= [KNL] Set number of concurrent RCU writers. These just stress RCU, they don't participate in the actual test, hence the "fake". - rcutorture.nreaders= [KNL,BOOT] + rcutorture.nreaders= [KNL] Set number of RCU readers. - rcutorture.onoff_holdoff= [KNL,BOOT] + rcutorture.object_debug= [KNL] + Enable debug-object double-call_rcu() testing. + + rcutorture.onoff_holdoff= [KNL] Set time (s) after boot for CPU-hotplug testing. - rcutorture.onoff_interval= [KNL,BOOT] + rcutorture.onoff_interval= [KNL] Set time (s) between CPU-hotplug operations, or zero to disable CPU-hotplug testing. - rcutorture.shuffle_interval= [KNL,BOOT] + rcutorture.rcutorture_runnable= [BOOT] + Start rcutorture running at boot time. + + rcutorture.shuffle_interval= [KNL] Set task-shuffle interval (s). Shuffling tasks allows some CPUs to go into dyntick-idle mode during the rcutorture test. - rcutorture.shutdown_secs= [KNL,BOOT] + rcutorture.shutdown_secs= [KNL] Set time (s) after boot system shutdown. This is useful for hands-off automated testing. - rcutorture.stall_cpu= [KNL,BOOT] + rcutorture.stall_cpu= [KNL] Duration of CPU stall (s) to test RCU CPU stall warnings, zero to disable. - rcutorture.stall_cpu_holdoff= [KNL,BOOT] + rcutorture.stall_cpu_holdoff= [KNL] Time to wait (s) after boot before inducing stall. - rcutorture.stat_interval= [KNL,BOOT] + rcutorture.stat_interval= [KNL] Time (s) between statistics printk()s. - rcutorture.stutter= [KNL,BOOT] + rcutorture.stutter= [KNL] Time (s) to stutter testing, for example, specifying five seconds causes the test to run for five seconds, wait for five seconds, and so on. This tests RCU's ability to transition abruptly to and from idle. - rcutorture.test_boost= [KNL,BOOT] + rcutorture.test_boost= [KNL] Test RCU priority boosting? 0=no, 1=maybe, 2=yes. "Maybe" means test if the RCU implementation under test support RCU priority boosting. - rcutorture.test_boost_duration= [KNL,BOOT] + rcutorture.test_boost_duration= [KNL] Duration (s) of each individual boost test. - rcutorture.test_boost_interval= [KNL,BOOT] + rcutorture.test_boost_interval= [KNL] Interval (s) between each boost test. - rcutorture.test_no_idle_hz= [KNL,BOOT] + rcutorture.test_no_idle_hz= [KNL] Test RCU's dyntick-idle handling. See also the rcutorture.shuffle_interval parameter. - rcutorture.torture_type= [KNL,BOOT] + rcutorture.torture_type= [KNL] Specify the RCU implementation to test. - rcutorture.verbose= [KNL,BOOT] + rcutorture.verbose= [KNL] Enable additional printk() statements. + rcupdate.rcu_expedited= [KNL] + Use expedited grace-period primitives, for + example, synchronize_rcu_expedited() instead + of synchronize_rcu(). This reduces latency, + but can increase CPU utilization, degrade + real-time latency, and degrade energy efficiency. + + rcupdate.rcu_cpu_stall_suppress= [KNL] + Suppress RCU CPU stall warning messages. + + rcupdate.rcu_cpu_stall_timeout= [KNL] + Set timeout for RCU CPU stall warning messages. + rdinit= [KNL] Format: <full_path> Run specified binary instead of /init from the ramdisk, diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index 32351bfabf2..827104fb936 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -181,12 +181,17 @@ To reduce its OS jitter, do any of the following: make sure that this is safe on your particular system. d. It is not possible to entirely get rid of OS jitter from vmstat_update() on CONFIG_SMP=y systems, but you - can decrease its frequency by writing a large value to - /proc/sys/vm/stat_interval. The default value is HZ, - for an interval of one second. Of course, larger values - will make your virtual-memory statistics update more - slowly. Of course, you can also run your workload at - a real-time priority, thus preempting vmstat_update(). + can decrease its frequency by writing a large value + to /proc/sys/vm/stat_interval. The default value is + HZ, for an interval of one second. Of course, larger + values will make your virtual-memory statistics update + more slowly. Of course, you can also run your workload + at a real-time priority, thus preempting vmstat_update(), + but if your workload is CPU-bound, this is a bad idea. + However, there is an RFC patch from Christoph Lameter + (based on an earlier one from Gilad Ben-Yossef) that + reduces or even eliminates vmstat overhead for some + workloads at https://lkml.org/lkml/2013/9/4/379. e. If running on high-end powerpc servers, build with CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS daemon from running on each CPU every second or so. diff --git a/MAINTAINERS b/MAINTAINERS index a7c34ef3509..a97694dbf06 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6935,7 +6935,7 @@ M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git F: Documentation/RCU/torture.txt -F: kernel/rcutorture.c +F: kernel/rcu/torture.c RDC R-321X SoC M: Florian Fainelli <florian@openwrt.org> @@ -6962,8 +6962,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git F: Documentation/RCU/ X: Documentation/RCU/torture.txt F: include/linux/rcu* -F: kernel/rcu* -X: kernel/rcutorture.c +X: include/linux/srcu.h +F: kernel/rcu/ +X: kernel/rcu/torture.c REAL TIME CLOCK (RTC) SUBSYSTEM M: Alessandro Zummo <a.zummo@towertech.it> @@ -7650,8 +7651,8 @@ M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git -F: include/linux/srcu* -F: kernel/srcu* +F: include/linux/srcu.h +F: kernel/rcu/srcu.c SMACK SECURITY MODULE M: Casey Schaufler <casey@schaufler-ca.com> diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4106721c4e5..45a0a9e8147 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -19,6 +19,21 @@ */ /* + * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers + * @list: list to be initialized + * + * You should instead use INIT_LIST_HEAD() for normal initialization and + * cleanup tasks, when readers have no access to the list being initialized. + * However, if the list being initialized is visible to readers, you + * need to keep the compiler from being too mischievous. + */ +static inline void INIT_LIST_HEAD_RCU(struct list_head *list) +{ + ACCESS_ONCE(list->next) = list; + ACCESS_ONCE(list->prev) = list; +} + +/* * return the ->next pointer of a list_head in an rcu safe * way, we must not access it directly */ @@ -191,9 +206,13 @@ static inline void list_splice_init_rcu(struct list_head *list, if (list_empty(list)) return; - /* "first" and "last" tracking list, so initialize it. */ + /* + * "first" and "last" tracking list, so initialize it. RCU readers + * have access to this list, so we must use INIT_LIST_HEAD_RCU() + * instead of INIT_LIST_HEAD(). + */ - INIT_LIST_HEAD(list); + INIT_LIST_HEAD_RCU(list); /* * At this point, the list body still points to the source list. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f1f1bc39346..39cbb889e20 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -261,6 +261,10 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, rcu_irq_exit(); \ } while (0) +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) +extern bool __rcu_is_watching(void); +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. @@ -297,10 +301,6 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) -extern int rcu_is_cpu_idle(void); -#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ - #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ @@ -351,7 +351,7 @@ static inline int rcu_read_lock_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) return 0; if (!rcu_lockdep_current_cpu_online()) return 0; @@ -402,7 +402,7 @@ static inline int rcu_read_lock_sched_held(void) if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) return 0; if (!rcu_lockdep_current_cpu_online()) return 0; @@ -771,7 +771,7 @@ static inline void rcu_read_lock(void) __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_lock() used illegally while idle"); } @@ -792,7 +792,7 @@ static inline void rcu_read_lock(void) */ static inline void rcu_read_unlock(void) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); rcu_lock_release(&rcu_lock_map); __release(RCU); @@ -821,7 +821,7 @@ static inline void rcu_read_lock_bh(void) local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_lock_bh() used illegally while idle"); } @@ -832,7 +832,7 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); @@ -857,7 +857,7 @@ static inline void rcu_read_lock_sched(void) preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_lock_sched() used illegally while idle"); } @@ -875,7 +875,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), + rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e31005ee339..09ebcbe9fd7 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -132,4 +132,21 @@ static inline void rcu_scheduler_starting(void) } #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) + +static inline bool rcu_is_watching(void) +{ + return __rcu_is_watching(); +} + +#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ + +static inline bool rcu_is_watching(void) +{ + return true; +} + + +#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ + #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 226169d1bd2..4b9c8154874 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -90,4 +90,6 @@ extern void exit_rcu(void); extern void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; +extern bool rcu_is_watching(void); + #endif /* __LINUX_RCUTREE_H */ diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ee2376cfaab..aca38226641 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -39,15 +39,26 @@ TRACE_EVENT(rcu_utilization, #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) /* - * Tracepoint for grace-period events: starting and ending a grace - * period ("start" and "end", respectively), a CPU noting the start - * of a new grace period or the end of an old grace period ("cpustart" - * and "cpuend", respectively), a CPU passing through a quiescent - * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" - * and "cpuofl", respectively), a CPU being kicked for being too - * long in dyntick-idle mode ("kick"), a CPU accelerating its new - * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU - * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB"). + * Tracepoint for grace-period events. Takes a string identifying the + * RCU flavor, the grace-period number, and a string identifying the + * grace-period-related event as follows: + * + * "AccReadyCB": CPU acclerates new callbacks to RCU_NEXT_READY_TAIL. + * "AccWaitCB": CPU accelerates new callbacks to RCU_WAIT_TAIL. + * "newreq": Request a new grace period. + * "start": Start a grace period. + * "cpustart": CPU first notices a grace-period start. + * "cpuqs": CPU passes through a quiescent state. + * "cpuonl": CPU comes online. + * "cpuofl": CPU goes offline. + * "reqwait": GP kthread sleeps waiting for grace-period request. + * "reqwaitsig": GP kthread awakened by signal from reqwait state. + * "fqswait": GP kthread waiting until time to force quiescent states. + * "fqsstart": GP kthread starts forcing quiescent states. + * "fqsend": GP kthread done forcing quiescent states. + * "fqswaitsig": GP kthread awakened by signal from fqswait state. + * "end": End a grace period. + * "cpuend": CPU first notices a grace-period end. */ TRACE_EVENT(rcu_grace_period, @@ -161,6 +172,46 @@ TRACE_EVENT(rcu_grace_period_init, ); /* + * Tracepoint for RCU no-CBs CPU callback handoffs. This event is intended + * to assist debugging of these handoffs. + * + * The first argument is the name of the RCU flavor, and the second is + * the number of the offloaded CPU are extracted. The third and final + * argument is a string as follows: + * + * "WakeEmpty": Wake rcuo kthread, first CB to empty list. + * "WakeOvf": Wake rcuo kthread, CB list is huge. + * "WakeNot": Don't wake rcuo kthread. + * "WakeNotPoll": Don't wake rcuo kthread because it is polling. + * "Poll": Start of new polling cycle for rcu_nocb_poll. + * "Sleep": Sleep waiting for CBs for !rcu_nocb_poll. + * "WokeEmpty": rcuo kthread woke to find empty list. + * "WokeNonEmpty": rcuo kthread woke to find non-empty list. + * "WaitQueue": Enqueue partially done, timed wait for it to complete. + * "WokeQueue": Partial enqueue now complete. + */ +TRACE_EVENT(rcu_nocb_wake, + + TP_PROTO(const char *rcuname, int cpu, const char *reason), + + TP_ARGS(rcuname, cpu, reason), + + TP_STRUCT__entry( + __field(const char *, rcuname) + __field(int, cpu) + __field(const char *, reason) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->cpu = cpu; + __entry->reason = reason; + ), + + TP_printk("%s %d %s", __entry->rcuname, __entry->cpu, __entry->reason) +); + +/* * Tracepoint for tasks blocking within preemptible-RCU read-side * critical sections. Track the type of RCU (which one day might * include SRCU), the grace-period number that the task is blocking @@ -540,17 +591,17 @@ TRACE_EVENT(rcu_invoke_kfree_callback, TRACE_EVENT(rcu_batch_end, TP_PROTO(const char *rcuname, int callbacks_invoked, - bool cb, bool nr, bool iit, bool risk), + char cb, char nr, char iit, char risk), TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk), TP_STRUCT__entry( __field(const char *, rcuname) __field(int, callbacks_invoked) - __field(bool, cb) - __field(bool, nr) - __field(bool, iit) - __field(bool, risk) + __field(char, cb) + __field(char, nr) + __field(char, iit) + __field(char, risk) ), TP_fast_assign( @@ -656,6 +707,7 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ level, grplo, grphi, event) \ do { } while (0) +#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ diff --git a/kernel/Makefile b/kernel/Makefile index 1ce47553fb0..f99d908b555 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -6,9 +6,9 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ - rcupdate.o extable.o params.o posix-timers.o \ + extable.o params.o posix-timers.o \ kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ + hrtimer.o rwsem.o nsproxy.o semaphore.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o @@ -27,6 +27,7 @@ obj-y += power/ obj-y += printk/ obj-y += cpu/ obj-y += irq/ +obj-y += rcu/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o @@ -81,12 +82,6 @@ obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_SECCOMP) += seccomp.o -obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o -obj-$(CONFIG_TREE_RCU) += rcutree.o -obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o -obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o -obj-$(CONFIG_TINY_RCU) += rcutiny.o -obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e16c45b9ee7..4e8e14c34e4 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4224,7 +4224,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" - : rcu_is_cpu_idle() + : !rcu_is_watching() ? "RCU used illegally from idle CPU!\n" : "", rcu_scheduler_active, debug_locks); @@ -4247,7 +4247,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) * So complain bitterly if someone does call rcu_read_lock(), * rcu_read_lock_bh() and so on from extended quiescent states. */ - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) printk("RCU used illegally from extended quiescent state!\n"); lockdep_print_held_locks(curr); diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile new file mode 100644 index 00000000000..01e9ec37a3e --- /dev/null +++ b/kernel/rcu/Makefile @@ -0,0 +1,6 @@ +obj-y += update.o srcu.o +obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o +obj-$(CONFIG_TREE_RCU) += tree.o +obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o +obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o +obj-$(CONFIG_TINY_RCU) += tiny.o diff --git a/kernel/rcu.h b/kernel/rcu/rcu.h index 77131966c4a..7859a0a3951 100644 --- a/kernel/rcu.h +++ b/kernel/rcu/rcu.h @@ -122,4 +122,11 @@ int rcu_jiffies_till_stall_check(void); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ +/* + * Strings used in tracepoints need to be exported via the + * tracing system such that tools like perf and trace-cmd can + * translate the string address pointers to actual text. + */ +#define TPS(x) tracepoint_string(x) + #endif /* __LINUX_RCU_H */ diff --git a/kernel/srcu.c b/kernel/rcu/srcu.c index 01d5ccb8bfe..01d5ccb8bfe 100644 --- a/kernel/srcu.c +++ b/kernel/rcu/srcu.c diff --git a/kernel/rcutiny.c b/kernel/rcu/tiny.c index 9ed6075dc56..0c9a934cfec 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcu/tiny.c @@ -35,6 +35,7 @@ #include <linux/time.h> #include <linux/cpu.h> #include <linux/prefetch.h> +#include <linux/ftrace_event.h> #ifdef CONFIG_RCU_TRACE #include <trace/events/rcu.h> @@ -42,7 +43,7 @@ #include "rcu.h" -/* Forward declarations for rcutiny_plugin.h. */ +/* Forward declarations for tiny_plugin.h. */ struct rcu_ctrlblk; static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); static void rcu_process_callbacks(struct softirq_action *unused); @@ -52,22 +53,23 @@ static void __call_rcu(struct rcu_head *head, static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; -#include "rcutiny_plugin.h" +#include "tiny_plugin.h" /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long newval) { if (newval) { - RCU_TRACE(trace_rcu_dyntick("--=", + RCU_TRACE(trace_rcu_dyntick(TPS("--="), rcu_dynticks_nesting, newval)); rcu_dynticks_nesting = newval; return; } - RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); + RCU_TRACE(trace_rcu_dyntick(TPS("Start"), + rcu_dynticks_nesting, newval)); if (!is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", + RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -120,15 +122,15 @@ EXPORT_SYMBOL_GPL(rcu_irq_exit); static void rcu_idle_exit_common(long long oldval) { if (oldval) { |