aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/checklist.txt39
-rw-r--r--Documentation/RCU/rcubarrier.txt15
-rw-r--r--Documentation/RCU/torture.txt9
-rw-r--r--Documentation/RCU/whatisRCU.txt6
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--include/linux/init_task.h4
-rw-r--r--include/linux/key.h4
-rw-r--r--include/linux/rcupdate.h53
-rw-r--r--include/trace/events/rcu.h45
-rw-r--r--kernel/rcupdate.c44
-rw-r--r--kernel/rcutiny.c4
-rw-r--r--kernel/rcutiny_plugin.h56
-rw-r--r--kernel/rcutorture.c72
-rw-r--r--kernel/rcutree.c478
-rw-r--r--kernel/rcutree.h46
-rw-r--r--kernel/rcutree_plugin.h223
-rw-r--r--kernel/rcutree_trace.c148
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--lib/list_debug.c6
19 files changed, 669 insertions, 590 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 5c8d7496809..fc103d7a047 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome!
when publicizing a pointer to a structure that can
be traversed by an RCU read-side critical section.
-5. If call_rcu(), or a related primitive such as call_rcu_bh() or
- call_rcu_sched(), is used, the callback function must be
- written to be called from softirq context. In particular,
+5. If call_rcu(), or a related primitive such as call_rcu_bh(),
+ call_rcu_sched(), or call_srcu() is used, the callback function
+ must be written to be called from softirq context. In particular,
it cannot block.
6. Since synchronize_rcu() can block, it cannot be called from
@@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome!
updater uses call_rcu_sched() or synchronize_sched(), then
the corresponding readers must disable preemption, possibly
by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
- If the updater uses synchronize_srcu(), the the corresponding
- readers must use srcu_read_lock() and srcu_read_unlock(),
- and with the same srcu_struct. The rules for the expedited
- primitives are the same as for their non-expedited counterparts.
- Mixing things up will result in confusion and broken kernels.
+ If the updater uses synchronize_srcu() or call_srcu(),
+ the the corresponding readers must use srcu_read_lock() and
+ srcu_read_unlock(), and with the same srcu_struct. The rules for
+ the expedited primitives are the same as for their non-expedited
+ counterparts. Mixing things up will result in confusion and
+ broken kernels.
One exception to this rule: rcu_read_lock() and rcu_read_unlock()
may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome!
victim CPU from ever going offline.)
14. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(),
- synchronize_srcu(), and synchronize_srcu_expedited()) may only
- be invoked from process context. Unlike other forms of RCU, it
- -is- permissible to block in an SRCU read-side critical section
- (demarked by srcu_read_lock() and srcu_read_unlock()), hence the
- "SRCU": "sleepable RCU". Please note that if you don't need
- to sleep in read-side critical sections, you should be using
- RCU rather than SRCU, because RCU is almost always faster and
- easier to use than is SRCU.
+ synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu())
+ may only be invoked from process context. Unlike other forms of
+ RCU, it -is- permissible to block in an SRCU read-side critical
+ section (demarked by srcu_read_lock() and srcu_read_unlock()),
+ hence the "SRCU": "sleepable RCU". Please note that if you
+ don't need to sleep in read-side critical sections, you should be
+ using RCU rather than SRCU, because RCU is almost always faster
+ and easier to use than is SRCU.
If you need to enter your read-side critical section in a
hardirq or exception handler, and then exit that same read-side
@@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome!
cleanup_srcu_struct(). These are passed a "struct srcu_struct"
that defines the scope of a given SRCU domain. Once initialized,
the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock()
- synchronize_srcu(), and synchronize_srcu_expedited(). A given
- synchronize_srcu() waits only for SRCU read-side critical
+ synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
+ A given synchronize_srcu() waits only for SRCU read-side critical
sections governed by srcu_read_lock() and srcu_read_unlock()
calls that have been passed the same srcu_struct. This property
is what makes sleeping read-side critical sections tolerable --
@@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome!
requiring SRCU's read-side deadlock immunity or low read-side
realtime latency.
- Note that, rcu_assign_pointer() relates to SRCU just as they do
+ Note that, rcu_assign_pointer() relates to SRCU just as it does
to other forms of RCU.
15. The whole point of call_rcu(), synchronize_rcu(), and friends
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index e439a0edee2..38428c12513 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows:
2. Execute rcu_barrier().
3. Allow the module to be unloaded.
-Quick Quiz #1: Why is there no srcu_barrier()?
-
The rcutorture module makes use of rcu_barrier in its exit function
as follows:
@@ -162,7 +160,7 @@ for any pre-existing callbacks to complete.
Then lines 55-62 print status and do operation-specific cleanup, and
then return, permitting the module-unload operation to be completed.
-Quick Quiz #2: Is there any other situation where rcu_barrier() might
+Quick Quiz #1: Is there any other situation where rcu_barrier() might
be required?
Your module might have additional complications. For example, if your
@@ -242,7 +240,7 @@ reaches zero, as follows:
4 complete(&rcu_barrier_completion);
5 }
-Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes
+Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
immediately (thus incrementing rcu_barrier_cpu_count to the
value one), but the other CPU's rcu_barrier_func() invocations
are delayed for a full grace period? Couldn't this result in
@@ -259,12 +257,7 @@ so that your module may be safely unloaded.
Answers to Quick Quizzes
-Quick Quiz #1: Why is there no srcu_barrier()?
-
-Answer: Since there is no call_srcu(), there can be no outstanding SRCU
- callbacks. Therefore, there is no need to wait for them.
-
-Quick Quiz #2: Is there any other situation where rcu_barrier() might
+Quick Quiz #1: Is there any other situation where rcu_barrier() might
be required?
Answer: Interestingly enough, rcu_barrier() was not originally
@@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally
implementing rcutorture, and found that rcu_barrier() solves
this problem as well.
-Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes
+Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
immediately (thus incrementing rcu_barrier_cpu_count to the
value one), but the other CPU's rcu_barrier_func() invocations
are delayed for a full grace period? Couldn't this result in
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 4ddf3913fd8..7dce8a17eac 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -174,11 +174,20 @@ torture_type The type of RCU to test, with string values as follows:
and synchronize_rcu_bh_expedited().
"srcu": srcu_read_lock(), srcu_read_unlock() and
+ call_srcu().
+
+ "srcu_sync": srcu_read_lock(), srcu_read_unlock() and
synchronize_srcu().
"srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
synchronize_srcu_expedited().
+ "srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(),
+ and call_srcu().
+
+ "srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(),
+ and synchronize_srcu().
+
"sched": preempt_disable(), preempt_enable(), and
call_rcu_sched().
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 6bbe8dcdc3d..69ee188515e 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -833,9 +833,9 @@ sched: Critical sections Grace period Barrier
SRCU: Critical sections Grace period Barrier
- srcu_read_lock synchronize_srcu N/A
- srcu_read_unlock synchronize_srcu_expedited
- srcu_read_lock_raw
+ srcu_read_lock synchronize_srcu srcu_barrier
+ srcu_read_unlock call_srcu
+ srcu_read_lock_raw synchronize_srcu_expedited
srcu_read_unlock_raw
srcu_dereference
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a92c5ebf373..12783fa833c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Set maximum number of finished RCU callbacks to process
in one batch.
+ rcutree.fanout_leaf= [KNL,BOOT]
+ Increase the number of CPUs assigned to each
+ leaf rcu_node structure. Useful for very large
+ systems.
+
rcutree.qhimark= [KNL,BOOT]
Set threshold of queued
RCU callbacks over which batch limiting is disabled.
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e65eff6af3..8a747618699 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -168,8 +168,8 @@ extern struct cred init_cred;
.children = LIST_HEAD_INIT(tsk.children), \
.sibling = LIST_HEAD_INIT(tsk.sibling), \
.group_leader = &tsk, \
- RCU_INIT_POINTER(.real_cred, &init_cred), \
- RCU_INIT_POINTER(.cred, &init_cred), \
+ RCU_POINTER_INITIALIZER(real_cred, &init_cred), \
+ RCU_POINTER_INITIALIZER(cred, &init_cred), \
.comm = INIT_TASK_COMM, \
.thread = INIT_THREAD, \
.fs = &init_fs, \
diff --git a/include/linux/key.h b/include/linux/key.h
index 4cd22ed627e..cef3b315ba7 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key)
rwsem_is_locked(&((struct key *)(KEY))->sem)))
#define rcu_assign_keypointer(KEY, PAYLOAD) \
- (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD))
+do { \
+ rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \
+} while (0)
#ifdef CONFIG_SYSCTL
extern ctl_table key_sysctls[];
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9cac722b169..115ead2b515 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -147,6 +147,7 @@ extern void synchronize_sched(void);
extern void __rcu_read_lock(void);
extern void __rcu_read_unlock(void);
+extern void rcu_read_unlock_special(struct task_struct *t);
void synchronize_rcu(void);
/*
@@ -255,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
}
#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
+extern int rcu_is_cpu_idle(void);
+#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
+
#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
bool rcu_lockdep_current_cpu_online(void);
#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
@@ -266,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#ifdef CONFIG_PROVE_RCU
-extern int rcu_is_cpu_idle(void);
-#else /* !CONFIG_PROVE_RCU */
-static inline int rcu_is_cpu_idle(void)
-{
- return 0;
-}
-#endif /* else !CONFIG_PROVE_RCU */
-
static inline void rcu_lock_acquire(struct lockdep_map *map)
{
lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
@@ -431,8 +427,7 @@ extern int rcu_my_thread_group_empty(void);
static inline void rcu_preempt_sleep_check(void)
{
rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
- "Illegal context switch in RCU read-side "
- "critical section");
+ "Illegal context switch in RCU read-side critical section");
}
#else /* #ifdef CONFIG_PROVE_RCU */
static inline void rcu_preempt_sleep_check(void)
@@ -513,10 +508,10 @@ static inline void rcu_preempt_sleep_check(void)
(_________p1); \
})
#define __rcu_assign_pointer(p, v, space) \
- ({ \
+ do { \
smp_wmb(); \
(p) = (typeof(*v) __force space *)(v); \
- })
+ } while (0)
/**
@@ -851,7 +846,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
*
* Assigns the specified value to the specified RCU-protected
* pointer, ensuring that any concurrent RCU readers will see
- * any prior initialization. Returns the value assigned.
+ * any prior initialization.
*
* Inserts memory barriers on architectures that require them
* (which is most of them), and also prevents the compiler from
@@ -903,25 +898,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* the reader-accessible portions of the linked structure.
*/
#define RCU_INIT_POINTER(p, v) \
- p = (typeof(*v) __force __rcu *)(v)
-
-static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
-{
- return offset < 4096;
-}
-
-static __always_inline
-void __kfree_rcu(struct rcu_head *head, unsigned long offset)
-{
- typedef void (*rcu_callback)(struct rcu_head *);
-
- BUILD_BUG_ON(!__builtin_constant_p(offset));
-
- /* See the kfree_rcu() header comment. */
- BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
+ do { \
+ p = (typeof(*v) __force __rcu *)(v); \
+ } while (0)
- kfree_call_rcu(head, (rcu_callback)offset);
-}
+/**
+ * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
+ *
+ * GCC-style initialization for an RCU-protected pointer in a structure field.
+ */
+#define RCU_POINTER_INITIALIZER(p, v) \
+ .p = (typeof(*v) __force __rcu *)(v)
/*
* Does the specified offset indicate that the corresponding rcu_head
@@ -935,7 +922,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
#define __kfree_rcu(head, offset) \
do { \
BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
- call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+ kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
} while (0)
/**
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d274734b2aa..5bde94d8585 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read,
__entry->rcutorturename, __entry->rhp)
);
+/*
+ * Tracepoint for _rcu_barrier() execution. The string "s" describes
+ * the _rcu_barrier phase:
+ * "Begin": rcu_barrier_callback() started.
+ * "Check": rcu_barrier_callback() checking for piggybacking.
+ * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit.
+ * "Inc1": rcu_barrier_callback() piggyback check counter incremented.
+ * "Offline": rcu_barrier_callback() found offline CPU
+ * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks.
+ * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks.
+ * "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
+ * "CB": An rcu_barrier_callback() invoked a callback, not the last.
+ * "LastCB": An rcu_barrier_callback() invoked the last callback.
+ * "Inc2": rcu_barrier_callback() piggyback check counter incremented.
+ * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
+ * is the count of remaining callbacks, and "done" is the piggybacking count.
+ */
+TRACE_EVENT(rcu_barrier,
+
+ TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done),
+
+ TP_ARGS(rcuname, s, cpu, cnt, done),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(char *, s)
+ __field(int, cpu)
+ __field(int, cnt)
+ __field(unsigned long, done)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->s = s;
+ __entry->cpu = cpu;
+ __entry->cnt = cnt;
+ __entry->done = done;
+ ),
+
+ TP_printk("%s %s cpu %d remaining %d # %lu",
+ __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt,
+ __entry->done)
+);
+
#else /* #ifdef CONFIG_RCU_TRACE */
#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
@@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read,
#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
do { } while (0)
#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 95cba41ce1e..4e6a61b15e8 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -54,6 +54,50 @@
#ifdef CONFIG_PREEMPT_RCU
/*
+ * Preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+ current->rcu_read_lock_nesting++;
+ barrier(); /* critical section after entry code. */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+ struct task_struct *t = current;
+
+ if (t->rcu_read_lock_nesting != 1) {
+ --t->rcu_read_lock_nesting;
+ } else {
+ barrier(); /* critical section before exit code. */
+ t->rcu_read_lock_nesting = INT_MIN;
+ barrier(); /* assign before ->rcu_read_unlock_special load */
+ if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+ rcu_read_unlock_special(t);
+ barrier(); /* ->rcu_read_unlock_special load before assign */
+ t->rcu_read_lock_nesting = 0;
+ }
+#ifdef CONFIG_PROVE_LOCKING
+ {
+ int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
+
+ WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+ }
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+/*
* Check for a task exiting while in a preemptible-RCU read-side
* critical section, clean up if so. No need to issue warnings,
* as debug_check_no_locks_held() already does this if lockdep
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 37a5444204d..547b1fe5b05 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -172,7 +172,7 @@ void rcu_irq_enter(void)
local_irq_restore(flags);
}
-#ifdef CONFIG_PROVE_RCU
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Test whether RCU thinks that the current CPU is idle.
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)
}
EXPORT_SYMBOL(rcu_is_cpu_idle);
-#endif /* #ifdef CONFIG_PROVE_RCU */
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/*
* Test whether the current CPU was interrupted from idle. Nested
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index fc31a2d6510..918fd1e8509 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
RCU_TRACE(.rcb.name = "rcu_preempt")
};
-static void rcu_read_unlock_special(struct task_struct *t);
static int rcu_preempted_readers_exp(void);
static void rcu_report_exp_done(void);
@@ -351,8 +350,9 @@ static int rcu_initiate_boost(void)
rcu_preempt_ctrlblk.boost_tasks =
rcu_preempt_ctrlblk.gp_tasks;
invoke_rcu_callbacks();
- } else
+ } else {
RCU_TRACE(rcu_initiate_boost_trace());
+ }
return 1;
}
@@ -527,23 +527,11 @@ void rcu_preempt_note_context_switch(void)
}
/*
- * Tiny-preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
- current->rcu_read_lock_nesting++;
- barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
-/*
* Handle special cases during rcu_read_unlock(), such as needing to
* notify RCU core processing or task having blocked during the RCU
* read-side critical section.
*/
-static noinline void rcu_read_unlock_special(struct task_struct *t)
+void rcu_read_unlock_special(struct task_struct *t)
{
int empty;
int empty_exp;
@@ -627,38 +615,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
}
/*
- * Tiny-preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
- struct task_struct *t = current;
-
- barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
- if (t->rcu_read_lock_nesting != 1)
- --t->rcu_read_lock_nesting;
- else {
- t->rcu_read_lock_nesting = INT_MIN;
- barrier(); /* assign before ->rcu_read_unlock_special load */
- if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
- rcu_read_unlock_special(t);
- barrier(); /* ->rcu_read_unlock_special load before assign */
- t->rcu_read_lock_nesting = 0;
- }
-#ifdef CONFIG_PROVE_LOCKING
- {
- int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
-
- WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
- }
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
-/*
* Check for a quiescent state from the current CPU. When a task blocks,
* the task is recorded in the rcu_preempt_ctrlblk structure, which is
* checked elsewhere. This is called from the scheduling-clock interrupt.
@@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void)
rpcp->exp_tasks = NULL;
/* Wait for tail of ->blkd_tasks list to drain. */
- if (!rcu_preempted_readers_exp())
+ if (!rcu_preempted_readers_exp()) {
local_irq_restore(flags);
- else {
+ } else {
rcu_initiate_boost();
local_irq_restore(flags);
wait_event(sync_rcu_preempt_exp_wq,
@@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
*/
int rcu_preempt_needs_cpu(void)
{
- if (!rcu_preempt_running_reader())
- rcu_preempt_cpu_qs();
return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
}
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index e66b34ab755..25b15033c61 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -49,8 +49,7 @@
#include <asm/byteorder.h>
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
- "Josh Triplett <josh@freedesktop.org>");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
static int nfakewriters = 4; /* # fake writer threads */
@@ -206,6 +205,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
+static bool barrier_phase; /* Test phase. */
static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
@@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p)
if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
rp->rtort_mbtest = 0;
rcu_torture_free(rp);
- } else
+ } else {
cur_ops->deferred_free(rp);
+ }
}
static int rcu_no_completed(void)
@@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void)
synchronize_srcu(&srcu_ctl);
}
+static void srcu_torture_call(struct rcu_head *head,
+ void (*func)(struct rcu_head *head))
+{
+ call_srcu(&srcu_ctl, head, func);
+}
+
+static void srcu_torture_barrier(void)
+{
+ srcu_barrier(&srcu_ctl);
+}
+
static int srcu_torture_stats(char *page)
{
int cnt = 0;
@@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = {
.completed = srcu_torture_completed,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
- .call = NULL,
- .cb_barrier = NULL,
+ .call = srcu_torture_call,
+ .cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.name = "srcu"
};
@@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg)
do {
schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
udelay(rcu_random(&rand) & 0x3ff);
- cur_ops->sync();
+ if (cur_ops->cb_barrier != NULL &&
+ rcu_random(&rand) % (nfakewriters * 8) == 0)
+ cur_ops->cb_barrier();
+ else
+ cur_ops->sync();
rcu_stutter_wait("rcu_torture_fakewriter");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page)
}
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
cnt += sprintf(&page[cnt],
- "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
- "rtmbe: %d rtbke: %ld rtbre: %ld "
- "rtbf: %ld rtb: %ld nt: %ld "
- "onoff: %ld/%ld:%ld/%ld "
- "barrier: %ld/%ld:%ld",
+ "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
- atomic_read(&n_rcu_torture_free),
+ atomic_read(&n_rcu_torture_free));
+ cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",
atomic_read(&n_rcu_torture_mberror),
n_rcu_torture_boost_ktrerror,
- n_rcu_torture_boost_rterror,
+ n_rcu_torture_boost_rterror);
+ cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
- n_rcu_torture_timers,
+ n_rcu_torture_timers);
+ cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ",
n_online_successes,
n_online_attempts,
n_offline_successes,
- n_offline_attempts,
+ n_offline_attempts);
+ cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
n_barrier_successes,
n_barrier_attempts,
n_rcu_torture_barrier_error);
@@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg)
delta = shutdown_time - jiffies_snap;
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_shutdown task: %lu "
- "jiffies remaining\n",
+ "rcu_torture_shutdown task: %lu jiffies remaining\n",
torture_type, delta);
schedule_timeout_interruptible(delta);
jiffies_snap = ACCESS_ONCE(jiffies);
@@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg)
if (cpu_down(cpu) == 0) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: "
- "offlined %d\n",
+ "rcu_torture_onoff task: offlined %d\n",
torture_type, cpu);
n_offline_successes++;
}
@@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg)
if (cpu_up(cpu) == 0) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: "
- "onlined %d\n",
+ "rcu_torture_onoff task: onlined %d\n",
torture_type, cpu);
n_online_successes++;
}
@@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu)
static int rcu_torture_barrier_cbs(void *arg)
{
long myid = (long)arg;
+ bool lastphase = 0;
struct rcu_head rcu;
init_rcu_head_on_stack(&rcu);
@@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg)
set_user_nice(current, 19);
do {
wait_event(barrier_cbs_wq[myid],
- atomic_read(&barrier_cbs_count) == n_barrier_cbs ||
+ barrier_phase != lastphase ||
kthread_should_stop() ||
fullstop != FULLSTOP_DONTSTOP);
+ lastphase = barrier_phase;
+ smp_mb(); /* ensure barrier_phase load before ->call(). */
if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
break;
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
@@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg)
do {
atomic_set(&barrier_cbs_invoked, 0);
atomic_set(&barrier_cbs_count, n_barrier_cbs);
- /* wake_up() path contains the required barriers. */
+ smp_mb(); /* Ensure barrier_phase after prior assignments. */
+ barrier_phase = !barrier_phase;
for (i = 0; i < n_barrier_cbs; i++)
wake_up(&barrier_cbs_wq[i]);
wait_event(barrier_wq,
@@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg)
schedule_timeout_interruptible(HZ / 10);
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
- rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
+ rcutorture_shutdown_absorb("rcu_torture_barrier");
while (!kthread_should_stop())
schedule_timeout_interruptible(1);
return 0;
@@ -1908,8 +1925,8 @@ rcu_torture_init(void)
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
&rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
- &srcu_ops, &srcu_sync_ops, &srcu_raw_ops,
- &srcu_raw_sync_ops, &srcu_expedited_ops,
+ &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops,
+ &srcu_raw_ops, &srcu_raw_sync_ops,
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
@@ -1931,8 +1948,7 @@ rcu_torture_init(void)
return -EINVAL;
}
if (cur_ops->fqs == NULL && fqs_duration != 0) {
- printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
- "fqs_duration, fqs disabled.\n");
+ printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
fqs_duration = 0;
}
if (cur_ops->init)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4b97bba7396..f280e542e3e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -60,36 +60,44 @@
/* Data structures. */
-static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
-
-#define RCU_STATE_INITIALIZER(structname) { \
- .level = { &structname##_state.node[0] }, \
- .levelcnt = { \
- NUM_RCU_LVL_0, /* root of hierarchy. */ \
- NUM_RCU_LVL_1, \
- NUM_RCU_LVL_2, \
- NUM_RCU_LVL_3, \
- NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
- }, \
+static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
+
+#define RCU_STATE_INITIALIZER(sname, cr) { \
+ .level = { &sname##_state.node[0] }, \
+ .call = cr, \
.fqs_state = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
- .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
- .orphan_nxttail = &structname##_state.orphan_nxtlist, \
- .orphan_donetail = &structname##_state.orphan_donelist, \
- .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
- .n_force_qs = 0, \
- .n_force_qs_ngp = 0, \
- .name = #structname, \
+ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
+ .orphan_nxttail = &sname##_state.orphan_nxtlist, \
+ .orphan_donetail = &sname##_state.orphan_donelist, \
+ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
+ .name = #sname, \
}
-struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
+struct rcu_state rcu_sched_state =
+ RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
-struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
+struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static struct rcu_state *rcu_state;
+LIST_HEAD(rcu_struct_flavors);
+
+/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
+static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
+module_param(rcu_fanout_leaf, int, 0);
+int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
+static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
+ NUM_RCU_LVL_0,
+ NUM_RCU_LVL_1,
+ NUM_RCU_LVL_2,
+ NUM_RCU_LVL_3,
+ NUM_RCU_LVL_4,
+};
+int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
/*
* The rcu_scheduler_active variable transitions from zero to one just