diff options
-rw-r--r-- | Documentation/RCU/checklist.txt | 39 | ||||
-rw-r--r-- | Documentation/RCU/rcubarrier.txt | 15 | ||||
-rw-r--r-- | Documentation/RCU/torture.txt | 9 | ||||
-rw-r--r-- | Documentation/RCU/whatisRCU.txt | 6 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 5 | ||||
-rw-r--r-- | include/linux/init_task.h | 4 | ||||
-rw-r--r-- | include/linux/key.h | 4 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 53 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 45 | ||||
-rw-r--r-- | kernel/rcupdate.c | 44 | ||||
-rw-r--r-- | kernel/rcutiny.c | 4 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 56 | ||||
-rw-r--r-- | kernel/rcutorture.c | 72 | ||||
-rw-r--r-- | kernel/rcutree.c | 478 | ||||
-rw-r--r-- | kernel/rcutree.h | 46 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 223 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 148 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 | ||||
-rw-r--r-- | lib/list_debug.c | 6 |
19 files changed, 669 insertions, 590 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 5c8d7496809..fc103d7a047 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome! when publicizing a pointer to a structure that can be traversed by an RCU read-side critical section. -5. If call_rcu(), or a related primitive such as call_rcu_bh() or - call_rcu_sched(), is used, the callback function must be - written to be called from softirq context. In particular, +5. If call_rcu(), or a related primitive such as call_rcu_bh(), + call_rcu_sched(), or call_srcu() is used, the callback function + must be written to be called from softirq context. In particular, it cannot block. 6. Since synchronize_rcu() can block, it cannot be called from @@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome! updater uses call_rcu_sched() or synchronize_sched(), then the corresponding readers must disable preemption, possibly by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). - If the updater uses synchronize_srcu(), the the corresponding - readers must use srcu_read_lock() and srcu_read_unlock(), - and with the same srcu_struct. The rules for the expedited - primitives are the same as for their non-expedited counterparts. - Mixing things up will result in confusion and broken kernels. + If the updater uses synchronize_srcu() or call_srcu(), + the the corresponding readers must use srcu_read_lock() and + srcu_read_unlock(), and with the same srcu_struct. The rules for + the expedited primitives are the same as for their non-expedited + counterparts. Mixing things up will result in confusion and + broken kernels. One exception to this rule: rcu_read_lock() and rcu_read_unlock() may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() @@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome! victim CPU from ever going offline.) 14. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), - synchronize_srcu(), and synchronize_srcu_expedited()) may only - be invoked from process context. Unlike other forms of RCU, it - -is- permissible to block in an SRCU read-side critical section - (demarked by srcu_read_lock() and srcu_read_unlock()), hence the - "SRCU": "sleepable RCU". Please note that if you don't need - to sleep in read-side critical sections, you should be using - RCU rather than SRCU, because RCU is almost always faster and - easier to use than is SRCU. + synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu()) + may only be invoked from process context. Unlike other forms of + RCU, it -is- permissible to block in an SRCU read-side critical + section (demarked by srcu_read_lock() and srcu_read_unlock()), + hence the "SRCU": "sleepable RCU". Please note that if you + don't need to sleep in read-side critical sections, you should be + using RCU rather than SRCU, because RCU is almost always faster + and easier to use than is SRCU. If you need to enter your read-side critical section in a hardirq or exception handler, and then exit that same read-side @@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome! cleanup_srcu_struct(). These are passed a "struct srcu_struct" that defines the scope of a given SRCU domain. Once initialized, the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() - synchronize_srcu(), and synchronize_srcu_expedited(). A given - synchronize_srcu() waits only for SRCU read-side critical + synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). + A given synchronize_srcu() waits only for SRCU read-side critical sections governed by srcu_read_lock() and srcu_read_unlock() calls that have been passed the same srcu_struct. This property is what makes sleeping read-side critical sections tolerable -- @@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome! requiring SRCU's read-side deadlock immunity or low read-side realtime latency. - Note that, rcu_assign_pointer() relates to SRCU just as they do + Note that, rcu_assign_pointer() relates to SRCU just as it does to other forms of RCU. 15. The whole point of call_rcu(), synchronize_rcu(), and friends diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt index e439a0edee2..38428c12513 100644 --- a/Documentation/RCU/rcubarrier.txt +++ b/Documentation/RCU/rcubarrier.txt @@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows: 2. Execute rcu_barrier(). 3. Allow the module to be unloaded. -Quick Quiz #1: Why is there no srcu_barrier()? - The rcutorture module makes use of rcu_barrier in its exit function as follows: @@ -162,7 +160,7 @@ for any pre-existing callbacks to complete. Then lines 55-62 print status and do operation-specific cleanup, and then return, permitting the module-unload operation to be completed. -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might be required? Your module might have additional complications. For example, if your @@ -242,7 +240,7 @@ reaches zero, as follows: 4 complete(&rcu_barrier_completion); 5 } -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes immediately (thus incrementing rcu_barrier_cpu_count to the value one), but the other CPU's rcu_barrier_func() invocations are delayed for a full grace period? Couldn't this result in @@ -259,12 +257,7 @@ so that your module may be safely unloaded. Answers to Quick Quizzes -Quick Quiz #1: Why is there no srcu_barrier()? - -Answer: Since there is no call_srcu(), there can be no outstanding SRCU - callbacks. Therefore, there is no need to wait for them. - -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might be required? Answer: Interestingly enough, rcu_barrier() was not originally @@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally implementing rcutorture, and found that rcu_barrier() solves this problem as well. -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes immediately (thus incrementing rcu_barrier_cpu_count to the value one), but the other CPU's rcu_barrier_func() invocations are delayed for a full grace period? Couldn't this result in diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 4ddf3913fd8..7dce8a17eac 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt @@ -174,11 +174,20 @@ torture_type The type of RCU to test, with string values as follows: and synchronize_rcu_bh_expedited(). "srcu": srcu_read_lock(), srcu_read_unlock() and + call_srcu(). + + "srcu_sync": srcu_read_lock(), srcu_read_unlock() and synchronize_srcu(). "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and synchronize_srcu_expedited(). + "srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(), + and call_srcu(). + + "srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(), + and synchronize_srcu(). + "sched": preempt_disable(), preempt_enable(), and call_rcu_sched(). diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 6bbe8dcdc3d..69ee188515e 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -833,9 +833,9 @@ sched: Critical sections Grace period Barrier SRCU: Critical sections Grace period Barrier - srcu_read_lock synchronize_srcu N/A - srcu_read_unlock synchronize_srcu_expedited - srcu_read_lock_raw + srcu_read_lock synchronize_srcu srcu_barrier + srcu_read_unlock call_srcu + srcu_read_lock_raw synchronize_srcu_expedited srcu_read_unlock_raw srcu_dereference diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373..12783fa833c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set maximum number of finished RCU callbacks to process in one batch. + rcutree.fanout_leaf= [KNL,BOOT] + Increase the number of CPUs assigned to each + leaf rcu_node structure. Useful for very large + systems. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e65eff6af3..8a747618699 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -168,8 +168,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - RCU_INIT_POINTER(.real_cred, &init_cred), \ - RCU_INIT_POINTER(.cred, &init_cred), \ + RCU_POINTER_INITIALIZER(real_cred, &init_cred), \ + RCU_POINTER_INITIALIZER(cred, &init_cred), \ .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/key.h b/include/linux/key.h index 4cd22ed627e..cef3b315ba7 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key) rwsem_is_locked(&((struct key *)(KEY))->sem))) #define rcu_assign_keypointer(KEY, PAYLOAD) \ - (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD)) +do { \ + rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \ +} while (0) #ifdef CONFIG_SYSCTL extern ctl_table key_sysctls[]; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9cac722b169..115ead2b515 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -147,6 +147,7 @@ extern void synchronize_sched(void); extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); +extern void rcu_read_unlock_special(struct task_struct *t); void synchronize_rcu(void); /* @@ -255,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) +extern int rcu_is_cpu_idle(void); +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ + #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ @@ -266,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void) #ifdef CONFIG_DEBUG_LOCK_ALLOC -#ifdef CONFIG_PROVE_RCU -extern int rcu_is_cpu_idle(void); -#else /* !CONFIG_PROVE_RCU */ -static inline int rcu_is_cpu_idle(void) -{ - return 0; -} -#endif /* else !CONFIG_PROVE_RCU */ - static inline void rcu_lock_acquire(struct lockdep_map *map) { lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); @@ -431,8 +427,7 @@ extern int rcu_my_thread_group_empty(void); static inline void rcu_preempt_sleep_check(void) { rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), - "Illegal context switch in RCU read-side " - "critical section"); + "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) @@ -513,10 +508,10 @@ static inline void rcu_preempt_sleep_check(void) (_________p1); \ }) #define __rcu_assign_pointer(p, v, space) \ - ({ \ + do { \ smp_wmb(); \ (p) = (typeof(*v) __force space *)(v); \ - }) + } while (0) /** @@ -851,7 +846,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * * Assigns the specified value to the specified RCU-protected * pointer, ensuring that any concurrent RCU readers will see - * any prior initialization. Returns the value assigned. + * any prior initialization. * * Inserts memory barriers on architectures that require them * (which is most of them), and also prevents the compiler from @@ -903,25 +898,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * the reader-accessible portions of the linked structure. */ #define RCU_INIT_POINTER(p, v) \ - p = (typeof(*v) __force __rcu *)(v) - -static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) -{ - return offset < 4096; -} - -static __always_inline -void __kfree_rcu(struct rcu_head *head, unsigned long offset) -{ - typedef void (*rcu_callback)(struct rcu_head *); - - BUILD_BUG_ON(!__builtin_constant_p(offset)); - - /* See the kfree_rcu() header comment. */ - BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); + do { \ + p = (typeof(*v) __force __rcu *)(v); \ + } while (0) - kfree_call_rcu(head, (rcu_callback)offset); -} +/** + * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * + * GCC-style initialization for an RCU-protected pointer in a structure field. + */ +#define RCU_POINTER_INITIALIZER(p, v) \ + .p = (typeof(*v) __force __rcu *)(v) /* * Does the specified offset indicate that the corresponding rcu_head @@ -935,7 +922,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) #define __kfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ + kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ } while (0) /** diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d274734b2aa..5bde94d8585 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read, __entry->rcutorturename, __entry->rhp) ); +/* + * Tracepoint for _rcu_barrier() execution. The string "s" describes + * the _rcu_barrier phase: + * "Begin": rcu_barrier_callback() started. + * "Check": rcu_barrier_callback() checking for piggybacking. + * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. + * "Inc1": rcu_barrier_callback() piggyback check counter incremented. + * "Offline": rcu_barrier_callback() found offline CPU + * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks. + * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. + * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. + * "CB": An rcu_barrier_callback() invoked a callback, not the last. + * "LastCB": An rcu_barrier_callback() invoked the last callback. + * "Inc2": rcu_barrier_callback() piggyback check counter incremented. + * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument + * is the count of remaining callbacks, and "done" is the piggybacking count. + */ +TRACE_EVENT(rcu_barrier, + + TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done), + + TP_ARGS(rcuname, s, cpu, cnt, done), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(char *, s) + __field(int, cpu) + __field(int, cnt) + __field(unsigned long, done) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->s = s; + __entry->cpu = cpu; + __entry->cnt = cnt; + __entry->done = done; + ), + + TP_printk("%s %s cpu %d remaining %d # %lu", + __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt, + __entry->done) +); + #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) @@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ do { } while (0) #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 95cba41ce1e..4e6a61b15e8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -54,6 +54,50 @@ #ifdef CONFIG_PREEMPT_RCU /* + * Preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ + current->rcu_read_lock_nesting++; + barrier(); /* critical section after entry code. */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting != 1) { + --t->rcu_read_lock_nesting; + } else { + barrier(); /* critical section before exit code. */ + t->rcu_read_lock_nesting = INT_MIN; + barrier(); /* assign before ->rcu_read_unlock_special load */ + if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); + barrier(); /* ->rcu_read_unlock_special load before assign */ + t->rcu_read_lock_nesting = 0; + } +#ifdef CONFIG_PROVE_LOCKING + { + int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); + + WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); + } +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +/* * Check for a task exiting while in a preemptible-RCU read-side * critical section, clean up if so. No need to issue warnings, * as debug_check_no_locks_held() already does this if lockdep diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 37a5444204d..547b1fe5b05 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -172,7 +172,7 @@ void rcu_irq_enter(void) local_irq_restore(flags); } -#ifdef CONFIG_PROVE_RCU +#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Test whether RCU thinks that the current CPU is idle. @@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /* * Test whether the current CPU was interrupted from idle. Nested diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index fc31a2d6510..918fd1e8509 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { RCU_TRACE(.rcb.name = "rcu_preempt") }; -static void rcu_read_unlock_special(struct task_struct *t); static int rcu_preempted_readers_exp(void); static void rcu_report_exp_done(void); @@ -351,8 +350,9 @@ static int rcu_initiate_boost(void) rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; invoke_rcu_callbacks(); - } else + } else { RCU_TRACE(rcu_initiate_boost_trace()); + } return 1; } @@ -527,23 +527,11 @@ void rcu_preempt_note_context_switch(void) } /* - * Tiny-preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ - current->rcu_read_lock_nesting++; - barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - -/* * Handle special cases during rcu_read_unlock(), such as needing to * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +void rcu_read_unlock_special(struct task_struct *t) { int empty; int empty_exp; @@ -627,38 +615,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) } /* - * Tiny-preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ - struct task_struct *t = current; - - barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ - if (t->rcu_read_lock_nesting != 1) - --t->rcu_read_lock_nesting; - else { - t->rcu_read_lock_nesting = INT_MIN; - barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) - rcu_read_unlock_special(t); - barrier(); /* ->rcu_read_unlock_special load before assign */ - t->rcu_read_lock_nesting = 0; - } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); - } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - -/* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the rcu_preempt_ctrlblk structure, which is * checked elsewhere. This is called from the scheduling-clock interrupt. @@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void) rpcp->exp_tasks = NULL; /* Wait for tail of ->blkd_tasks list to drain. */ - if (!rcu_preempted_readers_exp()) + if (!rcu_preempted_readers_exp()) { local_irq_restore(flags); - else { + } else { rcu_initiate_boost(); local_irq_restore(flags); wait_event(sync_rcu_preempt_exp_wq, @@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); */ int rcu_preempt_needs_cpu(void) { - if (!rcu_preempt_running_reader()) - rcu_preempt_cpu_qs(); return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; } diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index e66b34ab755..25b15033c61 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -49,8 +49,7 @@ #include <asm/byteorder.h> MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " - "Josh Triplett <josh@freedesktop.org>"); +MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ static int nfakewriters = 4; /* # fake writer threads */ @@ -206,6 +205,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */ DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ +static bool barrier_phase; /* Test phase. */ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); @@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p) if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { rp->rtort_mbtest = 0; rcu_torture_free(rp); - } else + } else { cur_ops->deferred_free(rp); + } } static int rcu_no_completed(void) @@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void) synchronize_srcu(&srcu_ctl); } +static void srcu_torture_call(struct rcu_head *head, + void (*func)(struct rcu_head *head)) +{ + call_srcu(&srcu_ctl, head, func); +} + +static void srcu_torture_barrier(void) +{ + srcu_barrier(&srcu_ctl); +} + static int srcu_torture_stats(char *page) { int cnt = 0; @@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = { .completed = srcu_torture_completed, .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, - .call = NULL, - .cb_barrier = NULL, + .call = srcu_torture_call, + .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .name = "srcu" }; @@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg) do { schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); udelay(rcu_random(&rand) & 0x3ff); - cur_ops->sync(); + if (cur_ops->cb_barrier != NULL && + rcu_random(&rand) % (nfakewriters * 8) == 0) + cur_ops->cb_barrier(); + else + cur_ops->sync(); rcu_stutter_wait("rcu_torture_fakewriter"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); @@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page) } cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); cnt += sprintf(&page[cnt], - "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " - "rtmbe: %d rtbke: %ld rtbre: %ld " - "rtbf: %ld rtb: %ld nt: %ld " - "onoff: %ld/%ld:%ld/%ld " - "barrier: %ld/%ld:%ld", + "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), atomic_read(&n_rcu_torture_alloc), atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free), + atomic_read(&n_rcu_torture_free)); + cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ", atomic_read(&n_rcu_torture_mberror), n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror, + n_rcu_torture_boost_rterror); + cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ", n_rcu_torture_boost_failure, n_rcu_torture_boosts, - n_rcu_torture_timers, + n_rcu_torture_timers); + cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", n_online_successes, n_online_attempts, n_offline_successes, - n_offline_attempts, + n_offline_attempts); + cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", n_barrier_successes, n_barrier_attempts, n_rcu_torture_barrier_error); @@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg) delta = shutdown_time - jiffies_snap; if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu " - "jiffies remaining\n", + "rcu_torture_shutdown task: %lu jiffies remaining\n", torture_type, delta); schedule_timeout_interruptible(delta); jiffies_snap = ACCESS_ONCE(jiffies); @@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg) if (cpu_down(cpu) == 0) { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: " - "offlined %d\n", + "rcu_torture_onoff task: offlined %d\n", torture_type, cpu); n_offline_successes++; } @@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg) if (cpu_up(cpu) == 0) { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: " - "onlined %d\n", + "rcu_torture_onoff task: onlined %d\n", torture_type, cpu); n_online_successes++; } @@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu) static int rcu_torture_barrier_cbs(void *arg) { long myid = (long)arg; + bool lastphase = 0; struct rcu_head rcu; init_rcu_head_on_stack(&rcu); @@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg) set_user_nice(current, 19); do { wait_event(barrier_cbs_wq[myid], - atomic_read(&barrier_cbs_count) == n_barrier_cbs || + barrier_phase != lastphase || kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP); + lastphase = barrier_phase; + smp_mb(); /* ensure barrier_phase load before ->call(). */ if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) break; cur_ops->call(&rcu, rcu_torture_barrier_cbf); @@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg) do { atomic_set(&barrier_cbs_invoked, 0); atomic_set(&barrier_cbs_count, n_barrier_cbs); - /* wake_up() path contains the required barriers. */ + smp_mb(); /* Ensure barrier_phase after prior assignments. */ + barrier_phase = !barrier_phase; for (i = 0; i < n_barrier_cbs; i++) wake_up(&barrier_cbs_wq[i]); wait_event(barrier_wq, @@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg) schedule_timeout_interruptible(HZ / 10); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); - rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); + rcutorture_shutdown_absorb("rcu_torture_barrier"); while (!kthread_should_stop()) schedule_timeout_interruptible(1); return 0; @@ -1908,8 +1925,8 @@ rcu_torture_init(void) static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, - &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, - &srcu_raw_sync_ops, &srcu_expedited_ops, + &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops, + &srcu_raw_ops, &srcu_raw_sync_ops, &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; mutex_lock(&fullstop_mutex); @@ -1931,8 +1948,7 @@ rcu_torture_init(void) return -EINVAL; } if (cur_ops->fqs == NULL && fqs_duration != 0) { - printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " - "fqs_duration, fqs disabled.\n"); + printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); fqs_duration = 0; } if (cur_ops->init) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396..f280e542e3e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -60,36 +60,44 @@ /* Data structures. */ -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; - -#define RCU_STATE_INITIALIZER(structname) { \ - .level = { &structname##_state.node[0] }, \ - .levelcnt = { \ - NUM_RCU_LVL_0, /* root of hierarchy. */ \ - NUM_RCU_LVL_1, \ - NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, \ - NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ - }, \ +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; + +#define RCU_STATE_INITIALIZER(sname, cr) { \ + .level = { &sname##_state.node[0] }, \ + .call = cr, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ - .orphan_nxttail = &structname##_state.orphan_nxtlist, \ - .orphan_donetail = &structname##_state.orphan_donelist, \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ - .n_force_qs = 0, \ - .n_force_qs_ngp = 0, \ - .name = #structname, \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ + .orphan_nxttail = &sname##_state.orphan_nxtlist, \ + .orphan_donetail = &sname##_state.orphan_donelist, \ + .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ + .name = #sname, \ } -struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); +struct rcu_state rcu_sched_state = + RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +LIST_HEAD(rcu_struct_flavors); + +/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +module_param(rcu_fanout_leaf, int, 0); +int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; +static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ + NUM_RCU_LVL_0, + NUM_RCU_LVL_1, + NUM_RCU_LVL_2, + NUM_RCU_LVL_3, + NUM_RCU_LVL_4, +}; +int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ /* * The rcu_scheduler_active variable transitions from zero to one just |