aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 18:10:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 18:10:49 -0800
commit37ea95a959d4a49846ecbf2dd45326b6b34bf049 (patch)
tree43791e1244ce06d8ca18ecbfd0b0f6dcb86ebb8b /kernel
parentde0c276b31538fcd56611132f20b63eae2891876 (diff)
parent630e1e0bcddfda9566462d4f9a0d58b31c29d467 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU update from Ingo Molnar: "The major features of this tree are: 1. A first version of no-callbacks CPUs. This version prohibits offlining CPU 0, but only when enabled via CONFIG_RCU_NOCB_CPU=y. Relaxing this constraint is in progress, but not yet ready for prime time. These commits were posted to LKML at https://lkml.org/lkml/2012/10/30/724. 2. Changes to SRCU that allows statically initialized srcu_struct structures. These commits were posted to LKML at https://lkml.org/lkml/2012/10/30/296. 3. Restructuring of RCU's debugfs output. These commits were posted to LKML at https://lkml.org/lkml/2012/10/30/341. 4. Additional CPU-hotplug/RCU improvements, posted to LKML at https://lkml.org/lkml/2012/10/30/327. Note that the commit eliminating __stop_machine() was judged to be too-high of risk, so is deferred to 3.9. 5. Changes to RCU's idle interface, most notably a new module parameter that redirects normal grace-period operations to their expedited equivalents. These were posted to LKML at https://lkml.org/lkml/2012/10/30/739. 6. Additional diagnostics for RCU's CPU stall warning facility, posted to LKML at https://lkml.org/lkml/2012/10/30/315. The most notable change reduces the default RCU CPU stall-warning time from 60 seconds to 21 seconds, so that it once again happens sooner than the softlockup timeout. 7. Documentation updates, which were posted to LKML at https://lkml.org/lkml/2012/10/30/280. A couple of late-breaking changes were posted at https://lkml.org/lkml/2012/11/16/634 and https://lkml.org/lkml/2012/11/16/547. 8. Miscellaneous fixes, which were posted to LKML at https://lkml.org/lkml/2012/10/30/309. 9. Finally, a fix for an lockdep-RCU splat was posted to LKML at https://lkml.org/lkml/2012/11/7/486." * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (49 commits) context_tracking: New context tracking susbsystem sched: Mark RCU reader in sched_show_task() rcu: Separate accounting of callbacks from callback-free CPUs rcu: Add callback-free CPUs rcu: Add documentation for the new rcuexp debugfs trace file rcu: Update documentation for TREE_RCU debugfs tracing rcu: Reduce default RCU CPU stall warning timeout rcu: Fix TINY_RCU rcu_is_cpu_rrupt_from_idle check rcu: Clarify memory-ordering properties of grace-period primitives rcu: Add new rcutorture module parameters to start/end test messages rcu: Remove list_for_each_continue_rcu() rcu: Fix batch-limit size problem rcu: Add tracing for synchronize_sched_expedited() rcu: Remove old debugfs interfaces and also RCU flavor name rcu: split 'rcuhier' to each flavor rcu: split 'rcugp' to each flavor rcu: split 'rcuboost' to each flavor rcu: split 'rcubarrier' to each flavor rcu: Fix tracing formatting rcu: Remove the interface "rcudata.csv" ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/context_tracking.c83
-rw-r--r--kernel/ksysfs.c18
-rw-r--r--kernel/rcu.h2
-rw-r--r--kernel/rcupdate.c3
-rw-r--r--kernel/rcutiny.c2
-rw-r--r--kernel/rcutiny_plugin.h5
-rw-r--r--kernel/rcutorture.c54
-rw-r--r--kernel/rcutree.c347
-rw-r--r--kernel/rcutree.h67
-rw-r--r--kernel/rcutree_plugin.h415
-rw-r--r--kernel/rcutree_trace.c330
-rw-r--r--kernel/sched/core.c23
-rw-r--r--kernel/srcu.c16
14 files changed, 1014 insertions, 352 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 86e3285ae7e..ac0d533eb7d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
$(obj)/configs.o: $(obj)/config_data.h
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
new file mode 100644
index 00000000000..e0e07fd5550
--- /dev/null
+++ b/kernel/context_tracking.c
@@ -0,0 +1,83 @@
+#include <linux/context_tracking.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+
+struct context_tracking {
+ /*
+ * When active is false, hooks are not set to
+ * minimize overhead: TIF flags are cleared
+ * and calls to user_enter/exit are ignored. This
+ * may be further optimized using static keys.
+ */
+ bool active;
+ enum {
+ IN_KERNEL = 0,
+ IN_USER,
+ } state;
+};
+
+static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
+#ifdef CONFIG_CONTEXT_TRACKING_FORCE
+ .active = true,
+#endif
+};
+
+void user_enter(void)
+{
+ unsigned long flags;
+
+ /*
+ * Some contexts may involve an exception occuring in an irq,
+ * leading to that nesting:
+ * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+ * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+ * helpers are enough to protect RCU uses inside the exception. So
+ * just return immediately if we detect we are in an IRQ.
+ */
+ if (in_interrupt())
+ return;
+
+ WARN_ON_ONCE(!current->mm);
+
+ local_irq_save(flags);
+ if (__this_cpu_read(context_tracking.active) &&
+ __this_cpu_read(context_tracking.state) != IN_USER) {
+ __this_cpu_write(context_tracking.state, IN_USER);
+ rcu_user_enter();
+ }
+ local_irq_restore(flags);
+}
+
+void user_exit(void)
+{
+ unsigned long flags;
+
+ /*
+ * Some contexts may involve an exception occuring in an irq,
+ * leading to that nesting:
+ * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+ * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+ * helpers are enough to protect RCU uses inside the exception. So
+ * just return immediately if we detect we are in an IRQ.
+ */
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+ if (__this_cpu_read(context_tracking.state) == IN_USER) {
+ __this_cpu_write(context_tracking.state, IN_KERNEL);
+ rcu_user_exit();
+ }
+ local_irq_restore(flags);
+}
+
+void context_tracking_task_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (__this_cpu_read(context_tracking.active)) {
+ clear_tsk_thread_flag(prev, TIF_NOHZ);
+ set_tsk_thread_flag(next, TIF_NOHZ);
+ }
+}
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index c01eac66c0c..6ada93c23a9 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -140,6 +140,23 @@ static ssize_t fscaps_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(fscaps);
+int rcu_expedited;
+static ssize_t rcu_expedited_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", rcu_expedited);
+}
+static ssize_t rcu_expedited_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (kstrtoint(buf, 0, &rcu_expedited))
+ return -EINVAL;
+
+ return count;
+}
+KERNEL_ATTR_RW(rcu_expedited);
+
/*
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
*/
@@ -179,6 +196,7 @@ static struct attribute * kernel_attrs[] = {
&kexec_crash_size_attr.attr,
&vmcoreinfo_attr.attr,
#endif
+ &rcu_expedited_attr.attr,
NULL
};
diff --git a/kernel/rcu.h b/kernel/rcu.h
index 8ba99cdc651..20dfba576c2 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -109,4 +109,6 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
}
}
+extern int rcu_expedited;
+
#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 29ca1c6da59..a2cf76177b4 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -46,12 +46,15 @@
#include <linux/export.h>
#include <linux/hardirq.h>
#include <linux/delay.h>
+#include <linux/module.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rcu.h>
#include "rcu.h"
+module_param(rcu_expedited, int, 0);
+
#ifdef CONFIG_PREEMPT_RCU
/*
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index e4c6a598d6f..e7dce58f9c2 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
*/
int rcu_is_cpu_rrupt_from_idle(void)
{
- return rcu_dynticks_nesting <= 0;
+ return rcu_dynticks_nesting <= 1;
}
/*
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 3d019028220..f85016a2309 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -706,7 +706,10 @@ void synchronize_rcu(void)
return;
/* Once we get past the fastpath checks, same code as rcu_barrier(). */
- rcu_barrier();
+ if (rcu_expedited)
+ synchronize_rcu_expedited();
+ else
+ rcu_barrier();
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index aaa7b9f3532..31dea01c85f 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -339,7 +339,6 @@ rcu_stutter_wait(char *title)
struct rcu_torture_ops {
void (*init)(void);
- void (*cleanup)(void);
int (*readlock)(void);
void (*read_delay)(struct rcu_random_state *rrsp);
void (*readunlock)(int idx);
@@ -431,7 +430,6 @@ static void rcu_torture_deferred_free(struct rcu_torture *p)
static struct rcu_torture_ops rcu_ops = {
.init = NULL,
- .cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
@@ -475,7 +473,6 @@ static void rcu_sync_torture_init(void)
static struct rcu_torture_ops rcu_sync_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
@@ -493,7 +490,6 @@ static struct rcu_torture_ops rcu_sync_ops = {
static struct rcu_torture_ops rcu_expedited_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_torture_read_unlock,
@@ -536,7 +532,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
static struct rcu_torture_ops rcu_bh_ops = {
.init = NULL,
- .cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
@@ -553,7 +548,6 @@ static struct rcu_torture_ops rcu_bh_ops = {
static struct rcu_torture_ops rcu_bh_sync_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
@@ -570,7 +564,6 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
static struct rcu_torture_ops rcu_bh_expedited_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
@@ -589,19 +582,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = {
* Definitions for srcu torture testing.
*/
-static struct srcu_struct srcu_ctl;
-
-static void srcu_torture_init(void)
-{
- init_srcu_struct(&srcu_ctl);
- rcu_sync_torture_init();
-}
-
-static void srcu_torture_cleanup(void)
-{
- synchronize_srcu(&srcu_ctl);
- cleanup_srcu_struct(&srcu_ctl);
-}
+DEFINE_STATIC_SRCU(srcu_ctl);
static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
{
@@ -672,8 +653,7 @@ static int srcu_torture_stats(char *page)
}
static struct rcu_torture_ops srcu_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
@@ -687,8 +667,7 @@ static struct rcu_torture_ops srcu_ops = {
};
static struct rcu_torture_ops srcu_sync_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
@@ -712,8 +691,7 @@ static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl)
}
static struct rcu_torture_ops srcu_raw_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock_raw,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock_raw,
@@ -727,8 +705,7 @@ static struct rcu_torture_ops srcu_raw_ops = {
};
static struct rcu_torture_ops srcu_raw_sync_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock_raw,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock_raw,
@@ -747,8 +724,7 @@ static void srcu_torture_synchronize_expedited(void)
}
static struct rcu_torture_ops srcu_expedited_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
@@ -783,7 +759,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
static struct rcu_torture_ops sched_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
@@ -799,7 +774,6 @@ static struct rcu_torture_ops sched_ops = {
static struct rcu_torture_ops sched_sync_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
@@ -814,7 +788,6 @@ static struct rcu_torture_ops sched_sync_ops = {
static struct rcu_torture_ops sched_expedited_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
@@ -1396,12 +1369,16 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
"test_boost=%d/%d test_boost_interval=%d "
"test_boost_duration=%d shutdown_secs=%d "
+ "stall_cpu=%d stall_cpu_holdoff=%d "
+ "n_barrier_cbs=%d "
"onoff_interval=%d onoff_holdoff=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
test_boost, cur_ops->can_boost,
test_boost_interval, test_boost_duration, shutdown_secs,
+ stall_cpu, stall_cpu_holdoff,
+ n_barrier_cbs,
onoff_interval, onoff_holdoff);
}
@@ -1502,6 +1479,7 @@ rcu_torture_onoff(void *arg)
unsigned long delta;
int maxcpu = -1;
DEFINE_RCU_RANDOM(rand);
+ int ret;
unsigned long starttime;
VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
@@ -1522,7 +1500,13 @@ rcu_torture_onoff(void *arg)
torture_type, cpu);
starttime = jiffies;
n_offline_attempts++;
- if (cpu_down(cpu) == 0) {
+ ret = cpu_down(cpu);
+ if (ret) {
+ if (verbose)
+ pr_alert("%s" TORTURE_FLAG
+ "rcu_torture_onoff task: offline %d failed: errno %d\n",
+ torture_type, cpu, ret);
+ } else {
if (verbose)
pr_alert("%s" TORTURE_FLAG
"rcu_torture_onoff task: offlined %d\n",
@@ -1936,8 +1920,6 @@ rcu_torture_cleanup(void)
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
- if (cur_ops->cleanup)
- cur_ops->cleanup();
if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
else if (n_online_successes != n_online_attempts ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 74df86bd920..e441b77b614 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -68,9 +68,9 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
.level = { &sname##_state.node[0] }, \
.call = cr, \
.fqs_state = RCU_GP_IDLE, \
- .gpnum = -300, \
- .completed = -300, \
- .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
+ .gpnum = 0UL - 300UL, \
+ .completed = 0UL - 300UL, \
+ .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
.orphan_nxttail = &sname##_state.orphan_nxtlist, \
.orphan_donetail = &sname##_state.orphan_donelist, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
@@ -207,18 +207,15 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
.dynticks = ATOMIC_INIT(1),
-#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
- .ignore_user_qs = true,
-#endif
};
-static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
-static int qhimark = 10000; /* If this many pending, ignore blimit. */
-static int qlowmark = 100; /* Once only this many pending, use blimit. */
+static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
+static long qhimark = 10000; /* If this many pending, ignore blimit. */
+static long qlowmark = 100; /* Once only this many pending, use blimit. */
-module_param(blimit, int, 0444);
-module_param(qhimark, int, 0444);
-module_param(qlowmark, int, 0444);
+module_param(blimit, long, 0444);
+module_param(qhimark, long, 0444);
+module_param(qlowmark, long, 0444);
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
@@ -303,7 +300,8 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
static int
cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
{
- return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
+ return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
+ rdp->nxttail[RCU_DONE_TAIL] != NULL;
}
/*
@@ -312,8 +310,11 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
static int
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
- return *rdp->nxttail[RCU_DONE_TAIL +
- ACCESS_ONCE(rsp->completed) != rdp->completed] &&
+ struct rcu_head **ntp;
+
+ ntp = rdp->nxttail[RCU_DONE_TAIL +
+ (ACCESS_ONCE(rsp->completed) != rdp->completed)];
+ return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
!rcu_gp_in_progress(rsp);
}
@@ -416,29 +417,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
*/
void rcu_user_enter(void)
{
- unsigned long flags;
- struct rcu_dynticks *rdtp;
-
- /*
- * Some contexts may involve an exception occuring in an irq,
- * leading to that nesting:
- * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
- * This would mess up the dyntick_nesting count though. And rcu_irq_*()
- * helpers are enough to protect RCU uses inside the exception. So
- * just return immediately if we detect we are in an IRQ.
- */
- if (in_interrupt())
- return;
-
- WARN_ON_ONCE(!current->mm);
-
- local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
- if (!rdtp->ignore_user_qs && !rdtp->in_user) {
- rdtp->in_user = true;
- rcu_eqs_enter(true);
- }
- local_irq_restore(flags);
+ rcu_eqs_enter(1);
}
/**
@@ -575,27 +554,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
*/
void rcu_user_exit(void)
{
- unsigned long flags;
- struct rcu_dynticks *rdtp;
-
- /*
- * Some contexts may involve an exception occuring in an irq,
- * leading to that nesting:
- * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
- * This would mess up the dyntick_nesting count though. And rcu_irq_*()
- * helpers are enough to protect RCU uses inside the exception. So
- * just return immediately if we detect we are in an IRQ.
- */
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
- if (rdtp->in_user) {
- rdtp->in_user = false;
- rcu_eqs_exit(true);
- }
- local_irq_restore(flags);
+ rcu_eqs_exit(1);
}
/**
@@ -718,21 +677,6 @@ int rcu_is_cpu_idle(void)
}
EXPORT_SYMBOL(rcu_is_cpu_idle);
-#ifdef CONFIG_RCU_USER_QS
-void rcu_user_hooks_switch(struct task_struct *prev,
- struct task_struct *next)
-{
- struct rcu_dynticks *rdtp;
-
- /* Interrupts are disabled in context switch */
- rdtp = &__get_cpu_var(rcu_dynticks);
- if (!rdtp->ignore_user_qs) {
- clear_tsk_thread_flag(prev, TIF_NOHZ);
- set_tsk_thread_flag(next, TIF_NOHZ);
- }
-}
-#endif /* #ifdef CONFIG_RCU_USER_QS */
-
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
/*
@@ -873,6 +817,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
}
+/*
+ * Dump stacks of all tasks running on stalled CPUs. This is a fallback
+ * for architectures that do not implement trigger_all_cpu_backtrace().
+ * The NMI-triggered stack traces are more accurate because they are
+ * printed by the target CPU.
+ */
+static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
+{
+ int cpu;
+ unsigned long flags;
+ struct rcu_node *rnp;
+
+ rcu_for_each_leaf_node(rsp, rnp) {
+ raw_spin_lock_irqsave(&rnp->lock, flags);
+ if (rnp->qsmask != 0) {
+ for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
+ if (rnp->qsmask & (1UL << cpu))
+ dump_cpu_task(rnp->grplo + cpu);
+ }
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ }
+}
+
static void print_other_cpu_stall(struct rcu_state *rsp)
{
int cpu;
@@ -880,6 +847,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
unsigned long flags;
int ndetected = 0;
struct rcu_node *rnp = rcu_get_root(rsp);
+ long totqlen = 0;
/* Only let one CPU complain about others per time interval. */
@@ -924,12 +892,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
raw_spin_unlock_irqrestore(&rnp->lock, flags);
print_cpu_stall_info_end();
- printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
- smp_processor_id(), (long)(jiffies - rsp->gp_start));
+ for_each_possible_cpu(cpu)
+ totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+ pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
+ smp_processor_id(), (long)(jiffies - rsp->gp_start),
+ rsp->gpnum, rsp->completed, totqlen);
if (ndetected == 0)
printk(KERN_ERR "INFO: Stall ended before state dump start\n");
else if (!trigger_all_cpu_backtrace())
- dump_stack();
+ rcu_dump_cpu_stacks(rsp);
/* Complain about tasks blocking the grace period. */
@@ -940,8 +911,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
static void print_cpu_stall(struct rcu_state *rsp)
{
+ int cpu;
unsigned long flags;
struct rcu_node *rnp = rcu_get_root(rsp);
+ long totqlen = 0;
/*
* OK, time to rat on ourselves...
@@ -952,7 +925,10 @@ static void print_cpu_stall(struct rcu_state *rsp)
print_cpu_stall_info_begin();
print_cpu_stall_info(rsp, smp_processor_id());
print_cpu_stall_info_end();
- printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
+ for_each_possible_cpu(cpu)
+ totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+ pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
+ jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
if (!trigger_all_cpu_backtrace())
dump_stack();
@@ -1091,6 +1067,7 @@ static void init_callback_list(struct rcu_data *rdp)
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
+ init_nocb_callback_list(rdp);
}
/*
@@ -1404,15 +1381,37 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
!cpu_needs_another_gp(rsp, rdp)) {
/*
* Either we have not yet spawned the grace-period
- * task or this CPU does not need another grace period.
+ * task, this CPU does not need another grace period,
+ * or a grace period is already in progress.
* Either way, don't start a new grace period.
*/
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
+ /*
+ * Because there is no grace period in progress right now,
+ * any callbacks we have up to this point will be satisfied
+ * by the next grace period. So promote all callbacks to be
+ * handled after the end of the next grace period. If the
+ * CPU is not yet aware of the end of the previous grace period,
+ * we need to allow for the callback advancement that will
+ * occur when it does become aware. Deadlock prevents us from
+ * making it aware at this point: We cannot acquire a leaf
+ * rcu_node ->lock while holding the root rcu_node ->lock.
+ */
+ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+ if (rdp->completed == rsp->completed)
+ rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
rsp->gp_flags = RCU_GP_FLAG_INIT;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
+
+ /* Ensure that CPU is aware of completion of last grace period. */
+ rcu_process_gp_end(rsp, rdp);
+ local_irq_restore(flags);
+
+ /* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(&rsp->gp_wq);
}
@@ -1573,16 +1572,20 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
/*
* Send the specified CPU's RCU callbacks to the orphanage. The
* specified CPU must be offline, and the caller must hold the
- * ->onofflock.
+ * ->orphan_lock.
*/
static void
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_data *rdp)
{
+ /* No-CBs CPUs do not have orphanable callbacks. */
+ if (is_nocb_cpu(rdp->cpu))
+ return;
+
/*
* Orphan the callbacks. First adjust the counts. This is safe
- * because ->onofflock excludes _rcu_barrier()'s adoption of
- * the callbacks, thus no memory barrier is required.
+ * because _rcu_barrier() excludes CPU-hotplug operations, so it
+ * cannot be running now. Thus no memory barrier is required.
*/
if (rdp->nxtlist != NULL) {
rsp->qlen_lazy += rdp->qlen_lazy;
@@ -1623,13 +1626,17 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
/*
* Adopt the RCU callbacks from the specified rcu_state structure's
- * orphanage. The caller must hold the ->onofflock.
+ * orphanage. The caller must hold the ->orphan_lock.
*/
static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
{
int i;
struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+ /* No-CBs CPUs are handled specially. */
+ if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
+ return;
+
/* Do the accounting first. */
rdp->qlen_lazy += rsp->qlen_lazy;
rdp->qlen += rsp->qlen;
@@ -1702,7 +1709,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
/* Exclude any attempts to start a new grace period. */
mutex_lock(&rsp->onoff_mutex);
- raw_spin_lock_irqsave(&rsp->onofflock, flags);
+ raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
@@ -1729,10 +1736,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
/*
* We still hold the leaf rcu_node structure lock here, and
* irqs are still disabled. The reason for this subterfuge is
- * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
+ * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
* held leads to deadlock.
*/
- raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
+ raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
rnp = rdp->mynode;
if (need_report & RCU_OFL_TASKS_NORM_GP)
rcu_report_unblock_qs_rnp(rnp, flags);
@@ -1769,7 +1776,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_head *next, *list, **tail;
- int bl, count, count_lazy, i;
+ long bl, count, count_lazy;
+ int i;
/* If no callbacks are ready, just return.*/
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -2107,9 +2115,15 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
}
}
+/*
+ * Helper function for call_rcu() and friends. The cpu argument will
+ * normally be -1, indicating "currently running CPU". It may specify
+ * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
+ * is expected to specify a CPU.
+ */
static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
- struct rcu_state *rsp, bool lazy)
+ struct rcu_state *rsp, int cpu, bool lazy)
{
unsigned long flags;
struct rcu_data *rdp;
@@ -2129,9 +2143,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
rdp = this_cpu_ptr(rsp->rda);
/* Add the callback to our list. */
- if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
+ if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
+ int offline;
+
+ if (cpu != -1)
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ offline = !__call_rcu_nocb(rdp, head, lazy);
+ WARN_ON_ONCE(offline);
/* _call_rcu() is illegal on offline CPU; leak the callback. */
- WARN_ON_ONCE(1);
local_irq_restore(flags);
return;
}
@@ -2160,7 +2179,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
*/
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
- __call_rcu(head, func, &rcu_sched_state, 0);
+ __call_rcu(head, func, &rcu_sched_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_sched);
@@ -2169,7 +2188,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
*/
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
- __call_rcu(head, func, &rcu_bh_state, 0);
+ __call_rcu(head, func, &rcu_bh_state, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
@@ -2205,10 +2224,28 @@ static inline int rcu_blocking_is_gp(void)
* rcu_read_lock_sched().
*
* This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns. However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
+ * non-threaded hardware-interrupt handlers, in progress on entry will
+ * have completed before this primitive returns. However, this does not
+ * guarantee that softirq handlers will have completed, since in some
+ * kernels, these handlers can run in process context, and can block.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_sched() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-sched read-side critical section whose beginning
+ * preceded the call to synchronize_sched(). In addition, each CPU having
+ * an RCU read-side critical section that extends beyond the return from
+ * synchronize_sched() is guaranteed to have executed a full memory barrier
+ * after the beginning of synchronize_sched() and before the beginning of
+ * that RCU read-side critical section. Note that these guarantees include
+ * CPUs that are offline, idle, or executing in user mode, as well as CPUs
+ * that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_sched(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
*
* This primitive provides the guarantees made by the (now removed)
* synchronize_kernel() API. In contrast, synchronize_rcu() only
@@ -2224,7 +2261,10 @@ void synchronize_sched(void)
"Illegal synchronize_sched() in RCU-sched read-side critical section");
if (rcu_blocking_is_gp())
return;
- wait_rcu_gp(call_rcu_sched);
+ if (rcu_expedited)
+ synchronize_sched_expedited();
+ else
+ wait_rcu_gp(call_rcu_sched);
}
EXPORT_SYMBOL_GPL(synchronize_sched);
@@ -2236,6 +2276,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
* and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
*/
void synchronize_rcu_bh(void)
{
@@ -2245,13 +2288,13 @@ void synchronize_rcu_bh(void)
"Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
if (rcu_blocking_is_gp())
return;
- wait_rcu_gp(call_rcu_bh);
+ if (rcu_expedited)
+ synchronize_rcu_bh_expedited();
+ else
+ wait_rcu_gp(call_rcu_bh);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
-static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
-
static int synchronize_sched_expedited_cpu_stop(void *data)
{
/*
@@ -2308,10 +2351,32 @@ static int synchronize_