aboutsummaryrefslogtreecommitdiff
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/rcutorture.c217
-rw-r--r--kernel/rcu/tiny_plugin.h8
-rw-r--r--kernel/rcu/tree.c299
-rw-r--r--kernel/rcu/tree.h11
-rw-r--r--kernel/rcu/tree_plugin.h120
-rw-r--r--kernel/rcu/update.c18
6 files changed, 441 insertions, 232 deletions
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index bd30bc61bc0..7fa34f86e5b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -58,9 +58,11 @@ torture_param(int, fqs_duration, 0,
"Duration of fqs bursts (us), 0 to disable");
torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
+torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(bool, gp_normal, false,
"Use normal (non-expedited) GP wait primitives");
+torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
torture_param(int, n_barrier_cbs, 0,
"# of callbacks/kthreads for barrier testing");
@@ -138,6 +140,18 @@ static long n_barrier_attempts;
static long n_barrier_successes;
static struct list_head rcu_torture_removed;
+static int rcu_torture_writer_state;
+#define RTWS_FIXED_DELAY 0
+#define RTWS_DELAY 1
+#define RTWS_REPLACE 2
+#define RTWS_DEF_FREE 3
+#define RTWS_EXP_SYNC 4
+#define RTWS_COND_GET 5
+#define RTWS_COND_SYNC 6
+#define RTWS_SYNC 7
+#define RTWS_STUTTER 8
+#define RTWS_STOPPING 9
+
#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
#define RCUTORTURE_RUNNABLE_INIT 1
#else
@@ -214,6 +228,7 @@ rcu_torture_free(struct rcu_torture *p)
*/
struct rcu_torture_ops {
+ int ttype;
void (*init)(void);
int (*readlock)(void);
void (*read_delay)(struct torture_random_state *rrsp);
@@ -222,6 +237,8 @@ struct rcu_torture_ops {
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*exp_sync)(void);
+ unsigned long (*get_state)(void);
+ void (*cond_sync)(unsigned long oldstate);
void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
void (*cb_barrier)(void);
void (*fqs)(void);
@@ -273,10 +290,48 @@ static int rcu_torture_completed(void)
return rcu_batches_completed();
}
+/*
+ * Update callback in the pipe. This should be invoked after a grace period.
+ */
+static bool
+rcu_torture_pipe_update_one(struct rcu_torture *rp)
+{
+ int i;
+
+ i = rp->rtort_pipe_count;
+ if (i > RCU_TORTURE_PIPE_LEN)
+ i = RCU_TORTURE_PIPE_LEN;
+ atomic_inc(&rcu_torture_wcount[i]);
+ if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
+ rp->rtort_mbtest = 0;
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Update all callbacks in the pipe. Suitable for synchronous grace-period
+ * primitives.
+ */
+static void
+rcu_torture_pipe_update(struct rcu_torture *old_rp)
+{
+ struct rcu_torture *rp;
+ struct rcu_torture *rp1;
+
+ if (old_rp)
+ list_add(&old_rp->rtort_free, &rcu_torture_removed);
+ list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) {
+ if (rcu_torture_pipe_update_one(rp)) {
+ list_del(&rp->rtort_free);
+ rcu_torture_free(rp);
+ }
+ }
+}
+
static void
rcu_torture_cb(struct rcu_head *p)
{
- int i;
struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
if (torture_must_stop_irq()) {
@@ -284,16 +339,10 @@ rcu_torture_cb(struct rcu_head *p)
/* The next initialization will pick up the pieces. */
return;
}
- i = rp->rtort_pipe_count;
- if (i > RCU_TORTURE_PIPE_LEN)
- i = RCU_TORTURE_PIPE_LEN;
- atomic_inc(&rcu_torture_wcount[i]);
- if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
- rp->rtort_mbtest = 0;
+ if (rcu_torture_pipe_update_one(rp))
rcu_torture_free(rp);
- } else {
+ else
cur_ops->deferred_free(rp);
- }
}
static int rcu_no_completed(void)
@@ -312,6 +361,7 @@ static void rcu_sync_torture_init(void)
}
static struct rcu_torture_ops rcu_ops = {
+ .ttype = RCU_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
@@ -320,6 +370,8 @@ static struct rcu_torture_ops rcu_ops = {
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited,
+ .get_state = get_state_synchronize_rcu,
+ .cond_sync = cond_synchronize_rcu,
.call = call_rcu,
.cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state,
@@ -355,6 +407,7 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
}
static struct rcu_torture_ops rcu_bh_ops = {
+ .ttype = RCU_BH_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
@@ -397,6 +450,7 @@ call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
static struct rcu_torture_ops rcu_busted_ops = {
+ .ttype = INVALID_RCU_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
@@ -479,9 +533,11 @@ static void srcu_torture_stats(char *page)
page += sprintf(page, "%s%s per-CPU(idx=%d):",
torture_type, TORTURE_FLAG, idx);
for_each_possible_cpu(cpu) {
- page += sprintf(page, " %d(%lu,%lu)", cpu,
- per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
- per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
+ long c0, c1;
+
+ c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
+ c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
+ page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1);
}
sprintf(page, "\n");
}
@@ -492,6 +548,7 @@ static void srcu_torture_synchronize_expedited(void)
}
static struct rcu_torture_ops srcu_ops = {
+ .ttype = SRCU_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
@@ -527,6 +584,7 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
}
static struct rcu_torture_ops sched_ops = {
+ .ttype = RCU_SCHED_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
@@ -688,23 +746,59 @@ rcu_torture_fqs(void *arg)
static int
rcu_torture_writer(void *arg)
{
- bool exp;
+ unsigned long gp_snap;
+ bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
+ bool gp_sync1 = gp_sync;
int i;
struct rcu_torture *rp;
- struct rcu_torture *rp1;
struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand);
+ int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC,
+ RTWS_COND_GET, RTWS_SYNC };
+ int nsynctypes = 0;
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
- set_user_nice(current, MAX_NICE);
+
+ /* Initialize synctype[] array. If none set, take default. */
+ if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync)
+ gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
+ if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
+ synctype[nsynctypes++] = RTWS_COND_GET;
+ else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync))
+ pr_alert("rcu_torture_writer: gp_cond without primitives.\n");
+ if (gp_exp1 && cur_ops->exp_sync)
+ synctype[nsynctypes++] = RTWS_EXP_SYNC;
+ else if (gp_exp && !cur_ops->exp_sync)
+ pr_alert("rcu_torture_writer: gp_exp without primitives.\n");
+ if (gp_normal1 && cur_ops->deferred_free)
+ synctype[nsynctypes++] = RTWS_DEF_FREE;
+ else if (gp_normal && !cur_ops->deferred_free)
+ pr_alert("rcu_torture_writer: gp_normal without primitives.\n");
+ if (gp_sync1 && cur_ops->sync)
+ synctype[nsynctypes++] = RTWS_SYNC;
+ else if (gp_sync && !cur_ops->sync)
+ pr_alert("rcu_torture_writer: gp_sync without primitives.\n");
+ if (WARN_ONCE(nsynctypes == 0,
+ "rcu_torture_writer: No update-side primitives.\n")) {
+ /*
+ * No updates primitives, so don't try updating.
+ * The resulting test won't be testing much, hence the
+ * above WARN_ONCE().
+ */
+ rcu_torture_writer_state = RTWS_STOPPING;
+ torture_kthread_stopping("rcu_torture_writer");
+ }
do {
+ rcu_torture_writer_state = RTWS_FIXED_DELAY;
schedule_timeout_uninterruptible(1);
rp = rcu_torture_alloc();
if (rp == NULL)
continue;
rp->rtort_pipe_count = 0;
+ rcu_torture_writer_state = RTWS_DELAY;
udelay(torture_random(&rand) & 0x3ff);
+ rcu_torture_writer_state = RTWS_REPLACE;
old_rp = rcu_dereference_check(rcu_torture_current,
current == writer_task);
rp->rtort_mbtest = 1;
@@ -716,35 +810,42 @@ rcu_torture_writer(void *arg)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
old_rp->rtort_pipe_count++;
- if (gp_normal == gp_exp)
- exp = !!(torture_random(&rand) & 0x80);
- else
- exp = gp_exp;
- if (!exp) {
+ switch (synctype[torture_random(&rand) % nsynctypes]) {
+ case RTWS_DEF_FREE:
+ rcu_torture_writer_state = RTWS_DEF_FREE;
cur_ops->deferred_free(old_rp);
- } else {
+ break;
+ case RTWS_EXP_SYNC:
+ rcu_torture_writer_state = RTWS_EXP_SYNC;
cur_ops->exp_sync();
- list_add(&old_rp->rtort_free,
- &rcu_torture_removed);
- list_for_each_entry_safe(rp, rp1,
- &rcu_torture_removed,
- rtort_free) {
- i = rp->rtort_pipe_count;
- if (i > RCU_TORTURE_PIPE_LEN)
- i = RCU_TORTURE_PIPE_LEN;
- atomic_inc(&rcu_torture_wcount[i]);
- if (++rp->rtort_pipe_count >=
- RCU_TORTURE_PIPE_LEN) {
- rp->rtort_mbtest = 0;
- list_del(&rp->rtort_free);
- rcu_torture_free(rp);
- }
- }
+ rcu_torture_pipe_update(old_rp);
+ break;
+ case RTWS_COND_GET:
+ rcu_torture_writer_state = RTWS_COND_GET;
+ gp_snap = cur_ops->get_state();
+ i = torture_random(&rand) % 16;
+ if (i != 0)
+ schedule_timeout_interruptible(i);
+ udelay(torture_random(&rand) % 1000);
+ rcu_torture_writer_state = RTWS_COND_SYNC;
+ cur_ops->cond_sync(gp_snap);
+ rcu_torture_pipe_update(old_rp);
+ break;
+ case RTWS_SYNC:
+ rcu_torture_writer_state = RTWS_SYNC;
+ cur_ops->sync();
+ rcu_torture_pipe_update(old_rp);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
}
rcutorture_record_progress(++rcu_torture_current_version);
+ rcu_torture_writer_state = RTWS_STUTTER;
stutter_wait("rcu_torture_writer");
} while (!torture_must_stop());
+ rcu_torture_writer_state = RTWS_STOPPING;
torture_kthread_stopping("rcu_torture_writer");
return 0;
}
@@ -784,7 +885,7 @@ rcu_torture_fakewriter(void *arg)
return 0;
}
-void rcutorture_trace_dump(void)
+static void rcutorture_trace_dump(void)
{
static atomic_t beenhere = ATOMIC_INIT(0);
@@ -918,11 +1019,13 @@ rcu_torture_reader(void *arg)
__this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
cur_ops->readunlock(idx);
- schedule();
+ cond_resched();
stutter_wait("rcu_torture_reader");
} while (!torture_must_stop());
- if (irqreader && cur_ops->irq_capable)
+ if (irqreader && cur_ops->irq_capable) {
del_timer_sync(&t);
+ destroy_timer_on_stack(&t);
+ }
torture_kthread_stopping("rcu_torture_reader");
return 0;
}
@@ -937,6 +1040,7 @@ rcu_torture_printk(char *page)
int i;
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+ static unsigned long rtcv_snap = ULONG_MAX;
for_each_possible_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -997,6 +1101,22 @@ rcu_torture_printk(char *page)
page += sprintf(page, "\n");
if (cur_ops->stats)
cur_ops->stats(page);
+ if (rtcv_snap == rcu_torture_current_version &&
+ rcu_torture_current != NULL) {
+ int __maybe_unused flags;
+ unsigned long __maybe_unused gpnum;
+ unsigned long __maybe_unused completed;
+
+ rcutorture_get_gp_data(cur_ops->ttype,
+ &flags, &gpnum, &completed);
+ page += sprintf(page,
+ "??? Writer stall state %d g%lu c%lu f%#x\n",
+ rcu_torture_writer_state,
+ gpnum, completed, flags);
+ show_rcu_gp_kthreads();
+ rcutorture_trace_dump();
+ }
+ rtcv_snap = rcu_torture_current_version;
}
/*
@@ -1146,7 +1266,7 @@ static int __init rcu_torture_stall_init(void)
}
/* Callback function for RCU barrier testing. */
-void rcu_torture_barrier_cbf(struct rcu_head *rcu)
+static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
{
atomic_inc(&barrier_cbs_invoked);
}
@@ -1416,7 +1536,8 @@ rcu_torture_init(void)
&rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
};
- torture_init_begin(torture_type, verbose, &rcutorture_runnable);
+ if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
+ return -EBUSY;
/* Process args and tell the world that the torturer is on the job. */
for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -1441,10 +1562,13 @@ rcu_torture_init(void)
if (cur_ops->init)
cur_ops->init(); /* no "goto unwind" prior to this point!!! */
- if (nreaders >= 0)
+ if (nreaders >= 0) {
nrealreaders = nreaders;
- else
- nrealreaders = 2 * num_online_cpus();
+ } else {
+ nrealreaders = num_online_cpus() - 1;
+ if (nrealreaders <= 0)
+ nrealreaders = 1;
+ }
rcu_torture_print_module_parms(cur_ops, "Start of test");
/* Set up the freelist. */
@@ -1533,7 +1657,8 @@ rcu_torture_init(void)
fqs_duration = 0;
if (fqs_duration) {
/* Create the fqs thread */
- torture_create_kthread(rcu_torture_fqs, NULL, fqs_task);
+ firsterr = torture_create_kthread(rcu_torture_fqs, NULL,
+ fqs_task);
if (firsterr)
goto unwind;
}
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 43152852056..858c5656912 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -144,7 +144,7 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
return;
rcp->ticks_this_gp++;
j = jiffies;
- js = rcp->jiffies_stall;
+ js = ACCESS_ONCE(rcp->jiffies_stall);
if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
@@ -152,17 +152,17 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
dump_stack();
}
if (*rcp->curtail && ULONG_CMP_GE(j, js))
- rcp->jiffies_stall = jiffies +
+ ACCESS_ONCE(rcp->jiffies_stall) = jiffies +
3 * rcu_jiffies_till_stall_check() + 3;
else if (ULONG_CMP_GE(j, js))
- rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+ ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
}
static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
{
rcp->ticks_this_gp = 0;
rcp->gp_start = jiffies;
- rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+ ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
}
static void check_cpu_stalls(void)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0c47e300210..3bbe48939e4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -243,7 +243,7 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
module_param(jiffies_till_first_fqs, ulong, 0644);
module_param(jiffies_till_next_fqs, ulong, 0644);
-static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp);
static void force_qs_rnp(struct rcu_state *rsp,
int (*f)(struct rcu_data *rsp, bool *isidle,
@@ -271,6 +271,15 @@ long rcu_batches_completed_bh(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
/*
+ * Force a quiescent state.
+ */
+void rcu_force_quiescent_state(void)
+{
+ force_quiescent_state(rcu_state);
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
+/*
* Force a quiescent state for RCU BH.
*/
void rcu_bh_force_quiescent_state(void)
@@ -280,6 +289,21 @@ void rcu_bh_force_quiescent_state(void)
EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
/*
+ * Show the state of the grace-period kthreads.
+ */
+void show_rcu_gp_kthreads(void)
+{
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp) {
+ pr_info("%s: wait state: %d ->state: %#lx\n",
+ rsp->name, rsp->gp_state, rsp->gp_kthread->state);
+ /* sched_show_task(rsp->gp_kthread); */
+ }
+}
+EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
+
+/*
* Record the number of times rcutorture tests have been initiated and
* terminated. This information allows the debugfs tracing stats to be
* correlated to the rcutorture messages, even when the rcutorture module
@@ -294,6 +318,39 @@ void rcutorture_record_test_transition(void)
EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
/*
+ * Send along grace-period-related data for rcutorture diagnostics.
+ */
+void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+ unsigned long *gpnum, unsigned long *completed)
+{
+ struct rcu_state *rsp = NULL;
+
+ switch (test_type) {
+ case RCU_FLAVOR:
+ rsp = rcu_state;
+ break;
+ case RCU_BH_FLAVOR:
+ rsp = &rcu_bh_state;
+ break;
+ case RCU_SCHED_FLAVOR:
+ rsp = &rcu_sched_state;
+ break;
+ default:
+ break;
+ }
+ if (rsp != NULL) {
+ *flags = ACCESS_ONCE(rsp->gp_flags);
+ *gpnum = ACCESS_ONCE(rsp->gpnum);
+ *completed = ACCESS_ONCE(rsp->completed);
+ return;
+ }
+ *flags = 0;
+ *gpnum = 0;
+ *completed = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
+
+/*
* Record the number of writer passes through the current rcutorture test.
* This is also used to correlate debugfs tracing stats with the rcutorture
* messages.
@@ -324,6 +381,28 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
}
/*
+ * Return the root node of the specified rcu_state structure.
+ */
+static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+{
+ return &rsp->node[0];
+}
+
+/*
+ * Is there any need for future grace periods?
+ * Interrupts must be disabled. If the caller does not hold the root
+ * rnp_node structure's ->lock, the results are advisory only.
+ */
+static int rcu_future_needs_gp(struct rcu_state *rsp)
+{
+ struct rcu_node *rnp = rcu_get_root(rsp);
+ int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1;
+ int *fp = &rnp->need_future_gp[idx];
+
+ return ACCESS_ONCE(*fp);
+}
+
+/*
* Does the current CPU require a not-yet-started grace period?
* The caller must have disabled interrupts to prevent races with
* normal callback registry.
@@ -335,7 +414,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
if (rcu_gp_in_progress(rsp))
return 0; /* No, a grace period is already in progress. */
- if (rcu_nocb_needs_gp(rsp))
+ if (rcu_future_needs_gp(rsp))
return 1; /* Yes, a no-CBs CPU needs one. */
if (!rdp->nxttail[RCU_NEXT_TAIL])
return 0; /* No, this is a no-CBs (or offline) CPU. */
@@ -350,14 +429,6 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
}
/*
- * Return the root node of the specified rcu_state structure.
- */
-static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
-{
- return &rsp->node[0];
-}
-
-/*
* rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
*
* If the new value of the ->dynticks_nesting counter now is zero,
@@ -758,7 +829,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
{
rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
rcu_sysidle_check_cpu(rdp, isidle, maxj);
- return (rdp->dynticks_snap & 0x1) == 0;
+ if ((rdp->dynticks_snap & 0x1) == 0) {
+ trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
+ return 1;
+ } else {
+ return 0;
+ }
}
/*
@@ -851,7 +927,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
rsp->gp_start = j;
smp_wmb(); /* Record start time before stall time. */
j1 = rcu_jiffies_till_stall_check();
- rsp->jiffies_stall = j + j1;
+ ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
rsp->jiffies_resched = j + j1 / 2;
}
@@ -890,12 +966,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
/* Only let one CPU complain about others per time interval. */
raw_spin_lock_irqsave(&rnp->lock, flags);
- delta = jiffies - rsp->jiffies_stall;
+ delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
- rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+ ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
/*
@@ -932,9 +1008,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
print_cpu_stall_info_end();
for_each_possible_cpu(cpu)
totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
- pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
+ pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
smp_processor_id(), (long)(jiffies - rsp->gp_start),
- rsp->gpnum, rsp->completed, totqlen);
+ (long)rsp->gpnum, (long)rsp->completed, totqlen);
if (ndetected == 0)
pr_err("INFO: Stall ended before state dump start\n");
else if (!trigger_all_cpu_backtrace())
@@ -947,12 +1023,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
force_quiescent_state(rsp); /* Kick them all. */
}
-/*
- * This function really isn't for public consumption, but RCU is special in
- * that context switches can allow the state machine to make progress.
- */
-extern void resched_cpu(int cpu);
-
static void print_cpu_stall(struct rcu_state *rsp)
{
int cpu;
@@ -971,14 +1041,15 @@ static void print_cpu_stall(struct rcu_state *rsp)
print_cpu_stall_info_end();
for_each_possible_cpu(cpu)
totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
- pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
- jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
+ pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
+ jiffies - rsp->gp_start,
+ (long)rsp->gpnum, (long)rsp->completed, totqlen);
if (!trigger_all_cpu_backtrace())
dump_stack();
raw_spin_lock_irqsave(&rnp->lock, flags);
- if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
- rsp->jiffies_stall = jiffies +
+ if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
+ ACCESS_ONCE(rsp->jiffies_stall) = jiffies +
3 * rcu_jiffies_till_stall_check() + 3;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1062,7 +1133,7 @@ void rcu_cpu_stall_reset(void)
struct rcu_state *rsp;
for_each_rcu_flavor(rsp)
- rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
+ ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2;
}
/*
@@ -1123,15 +1194,18 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
/*
* Start some future grace period, as needed to handle newly arrived
* callbacks. The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp field.
+ * rcu_node structure's ->need_future_gp field. Returns true if there
+ * is reason to awaken the grace-period kthread.
*
* The caller must hold the specified rcu_node structure's ->lock.
*/
-static unsigned long __maybe_unused
-rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
+static bool __maybe_unused
+rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+ unsigned long *c_out)
{
unsigned long c;
int i;
+ bool ret = false;
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
/*
@@ -1142,7 +1216,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
if (rnp->need_future_gp[c & 0x1]) {
trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
- return c;
+ goto out;
}
/*
@@ -1156,7 +1230,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
rnp->need_future_gp[c & 0x1]++;
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
- return c;
+ goto out;
}
/*
@@ -1197,12 +1271,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
} else {
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
- rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+ ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
}
unlock_out:
if (rnp != rnp_root)
raw_spin_unlock(&rnp_root->lock);
- return c;
+out:
+ if (c_out != NULL)
+ *c_out = c;
+ return ret;
}
/*
@@ -1226,25 +1303,43 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
}
/*
+ * Awaken the grace-period kthread for the specified flavor of RCU.
+ * Don't do a self-awaken, and don't bother awakening when there is
+ * nothing for the grace-period kthread to do (as in several CPUs
+ * raced to awaken, and we lost), and finally don't try to awaken
+ * a kthread that has not yet been created.
+ */
+static void rcu_gp_kthread_wake(struct rcu_state *rsp)
+{
+ if (current == rsp->gp_kthread ||
+ !ACCESS_ONCE(rsp->gp_flags) ||
+ !rsp->gp_kthread)
+ return;
+ wake_up(&rsp->gp_wq);
+}
+
+/*
* If there is room, assign a ->completed number to any callbacks on
* this CPU that have not already been assigned. Also accelerate any
* callbacks that were previously assigned a ->completed number that has
* since proven to be too conservative, which can happen if callbacks get
* assigned a ->completed number while RCU is idle, but with reference to
* a non-root rcu_node structure. This function is idempotent, so it does
- * not hurt to call it repeatedly.
+ * not hurt to call it repeatedly. Returns an flag saying that we should
+ * awaken the RCU grace-period kthread.
*
* The caller must hold rnp->lock with interrupts disabled.
*/
-static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
unsigned long c;
int i;
+ bool ret;
/* If the CPU has no callbacks, nothing to do. */
if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
- return;
+ return false;
/*
* Starting from the sublist containing the callbacks most
@@ -1273,7 +1368,7 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
* be grouped into.
*/
if (++i >= RCU_NEXT_TAIL)
- return;
+ return false;
/*
* Assign all subsequent callbacks' ->completed number to the next
@@ -1285,13 +1380,14 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
rdp->nxtcompleted[i] = c;
}
/* Record any needed additional grace periods. */
- rcu_start_future_gp(rnp, rdp);
+ ret = rcu_start_future_gp(rnp, rdp, NULL);
/* Trace depending on how much we were able to accelerate. */
if (!*rdp->nxttail[RCU_WAIT_TAIL])
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
else
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
+ return ret;
}
/*
@@ -1300,17 +1396,18 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
* assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
* sublist. This function is idempotent, so it does not hurt to
* invoke it repeatedly. As long as it is not invoked -too- often...
+ * Returns true if the RCU grace-period kthread needs to be awakened.
*
* The caller must hold rnp->lock with interrupts disabled.
*/
-static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
int i, j;
/* If the CPU has no callbacks, nothing to do. */
if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
- return;
+ return false;
/*
* Find all callbacks whose ->completed numbers indicate that they
@@ -1334,26 +1431,30 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
}
/* Classify any remaining callbacks. */
- rcu_accelerate_cbs(rsp, rnp, rdp);
+ return rcu_accelerate_cbs(rsp, rnp, rdp);
}
/*
* Update CPU-local rcu_data state to record the beginnings and ends of
* grace periods. The caller must hold the ->lock of the leaf rcu_node
* structure corresponding to the current CPU, and must have irqs disabled.
+ * Returns true if the grace-period kthread needs to be awakened.
*/
-static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
+static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
+ struct rcu_data *rdp)
{
+ bool ret;
+
/* Handle the ends of any preceding grace periods first. */
if (rdp->completed == rnp->completed) {
/* No grace period end, so just accelerate recent callbacks. */
- rcu_accelerate_cbs(rsp, rnp, rdp);
+ ret = rcu_accelerate_cbs(rsp, rnp, rdp);
} else {
/* Advance callbacks. */
- rcu_advance_cbs(rsp, rnp, rdp);
+ ret = rcu_advance_cbs(rsp, rnp, rdp);
/* Remember that we saw this grace-period completion. */
rdp->completed = rnp->completed;
@@ -1372,11 +1473,13 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
zero_cpu_stall_ticks(rdp);
}
+ return ret;
}
static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
+ bool needwake;
struct rcu_node *rnp;
local_irq_save(flags);
@@ -1388,8 +1491,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
return;
}
smp_mb__after_unlock_lock();
- __note_gp_changes(rsp, rnp, rdp);
+ needwake = __note_gp_changes(rsp, rnp, rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ if (needwake)
+ rcu_gp_kthread_wake(rsp);
}
/*
@@ -1403,12 +1508,12 @@ static int rcu_gp_init(struct rcu_state *rsp)
rcu_bind_gp_kthread();
raw_spin_lock_irq(&rnp->lock);
smp_mb__after_unlock_lock();
- if (rsp->gp_flags == 0) {
+ if (!ACCESS_ONCE(rsp->gp_flags)) {
/* Spurious wakeup, tell caller to go back to sleep. */
raw_spin_unlock_irq(&rnp->lock);
return 0;
}
- rsp->gp_flags = 0; /* Clear all flags: New grace period. */
+ ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */
if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
/*
@@ -1453,7 +1558,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
WARN_ON_ONCE(rnp->completed != rsp->completed);
ACCESS_ONCE(rnp->completed) = rsp->completed;
if (rnp == rdp->mynode)
- __note_gp_changes(rsp, rnp, rdp);
+ (void)__note_gp_changes(rsp, rnp, rdp);
rcu_preempt_boost_start_gp(rnp);
trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
rnp->level, rnp->grplo,
@@ -1501,7 +1606,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
raw_spin_lock_irq(&rnp->lock);
smp_mb__after_unlock_lock();
- rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
+ ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS;
raw_spin_unlock_irq(&rnp->lock);
}
return fqs_state;
@@ -1513,6 +1618,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
static void rcu_gp_cleanup(struct rcu_state *rsp)
{
unsigned long gp_duration;
+ bool needgp = false;
int nocb = 0;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1548,7 +1654,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
ACCESS_ONCE(rnp->completed) = rsp->gpnum;
rdp = this_cpu_ptr(rsp->rda);
if (rnp == rdp->mynode)
- __note_gp_changes(rsp, rnp, rdp);
+ needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
/* smp_mb() provided by prior unlock-lock pair. */
nocb += rcu_future_gp_cleanup(rsp, rnp);
raw_spin_unlock_irq(&rnp->lock);
@@ -1564,9 +1670,10 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
rsp->fqs_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
- rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
- if (cpu_needs_another_gp(rsp, rdp)) {
- rsp->gp_flags = RCU_GP_FLAG_INIT;
+ /* Advance CBs to reduce false positives below. */
+ needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
+ if (needgp || cpu_needs_another_gp(rsp, rdp)) {
+ ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
TPS("newreq"));
@@ -1593,6 +1700,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
TPS("reqwait"));
+ rsp->gp_state = RCU_GP_WAIT_GPS;
wait_event_interruptible(rsp->gp_wq,
ACCESS_ONCE(rsp->gp_flags) &
RCU_GP_FLAG_INIT);
@@ -1620,6 +1728,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
TPS("fqswait"));
+ rsp->gp_state = RCU_GP_WAIT_FQS;
ret = wait_event_interruptible_timeout(rsp->gp_wq,
((gf = ACCESS_ONCE(rsp->gp_flags)) &
RCU_GP_FLAG_FQS) ||
@@ -1665,14 +1774,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
}
}
-static void rsp_wakeup(struct irq_work *work)
-{
- struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
-
- /* Wake up rcu_gp_kthread() to start the grace period. */
- wake_up(&rsp->gp_wq);
-}
-
/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
@@ -1681,8 +1782,10 @@ static void rsp_wakeup(struct irq_work *work)
* Note that it is legal for a dying CPU (which is marked as offline) to
* invoke this function. This can happen when the dying CPU reports its
* quiescent state.
+ *
+ * Returns true if the grace-period kthread must be awakened.
*/
-static void
+static bool
rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
@@ -1693,20 +1796,18 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
* or a grace period is already in progress.
* Either way, don't start a new grace period.
*/
- return;
+ return false;
}
- rsp->gp_flags = RCU_GP_FLAG_INIT;
+ ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
TPS("newreq"));
/*
* We can't do wakeups while holding the rnp->lock, as that
* could cause possible deadlocks with the rq->lock. Defer
- * the wakeup to interrupt context. And don't bother waking
- * up the running kthread.
+ * the wakeup to our caller.
*/
- if (current != rsp->gp_kthread)
- irq_work_queue(&rsp->wakeup_work);
+ return true;
}
/*
@@ -1715,12 +1816,14 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
* is invoked indirectly from rcu_advance_cbs(), which would result in
* endless recursion -- or would do so if it wasn't for the self-deadlock
* that is encountered beforehand.
+ *
+ * Returns true if the grace-period kthread needs to be awakened.
*/
-static void
-rcu_start_gp(struct rcu_state *rsp)
+static bool rcu_start_gp(struct rcu_state *rsp)
{
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
+ bool ret = false;
/*
* If there is no grace period in progress right now, any
@@ -1730,8 +1833,9 @@ rcu_start_gp(struct rcu_state *rsp)
* resulting in pointless grace periods. So, advance callbacks
* then start the grace period!
*/
- rcu_advance_cbs(rsp, rnp, rdp);
- rcu_start_gp_advanced(rsp, rnp, rdp);
+ ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
+ ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
+ return ret;
}
/*
@@ -1820,6 +1924,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
unsigned long mask;
+ bool needwake;
struct rcu_node *rnp;
rnp = rdp->mynode;
@@ -1848,9 +1953,11 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
* This GP can't end until cpu checks in, so all of our
* callbacks can be processed during the next GP.
*/
- rcu_accelerate_cbs(rsp, rnp, rdp);
+ needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
+ if (needwake)
+ rcu_gp_kthread_wake(rsp);
}
}
@@ -1951,7 +2058,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
{
int i;
- struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+ struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
/* No-CBs CPUs are handled specially. */
if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
@@ -2320,7 +2427,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
return; /* Someone beat us to it. */
}
- rsp->gp_flags |= RCU_GP_FLAG_FQS;
+ ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS;
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
}
@@ -2334,7 +2441,8 @@ static void
__rcu_process_callbacks(struct rcu_state *rsp)
{
unsigned long flags;
- struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+ bool needwake;
+ struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
WARN_ON_ONCE(rdp->beenonline == 0);
@@ -2345,8 +2453,10 @@ __rcu_process_callbacks(struct rcu_state *rsp)
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
- rcu_start_gp(rsp);
+ needwake = rcu_start_gp(rsp);
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+ if (needwake)
+ rcu_gp_kthread_wake(rsp);
} else {
local_irq_restore(flags);
}
@@ -2404,6 +2514,8 @@ static void invoke_rcu_core(void)
static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
struct rcu_head *head, unsigned long flags)
{
+ bool needwake;
+
/*
* If called from an extended quiescent state, invoke the RCU
* core in order to force a re-evaluation of RCU's idleness.
@@ -2433,8 +2545,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
raw_spin_lock(&rnp_root->lock);
smp_mb__after_unlock_lock();
- rcu_start_gp(rsp);
+ needwake = rcu_start_gp(rsp);
raw_spin_unlock(&rnp_root->lock);
+ if (needwake)
+ rcu_gp_kthread_wake(rsp);
} else {
/* Give the grace period a kick. */
rdp->blimit = LONG_MAX;
@@ -2537,6 +2651,20 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
EXPORT_SYMBOL_GPL(call_rcu_bh);
/*
+ * Queue an RCU callback for lazy invocation after a grace period.
+ * This will likely be later named something like "call_rcu_lazy()",
+ * but this change will require some way of tagging the lazy RCU
+ * callbacks in the list of pending callbacks. Until then, this
+ * function may only be called from __kfree_rcu().
+ */
+void kfree_call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu))
+{
+ __call_rcu(head, func, rcu_state, -1, 1);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu);
+
+/*
* Because a context switch is a grace period for RCU-sched and RCU-bh,
* any blocking grace-period wait automatically implies a grace period
* if there is only one CPU online at any point time during execution
@@ -2988,7 +3116,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
static void rcu_barrier_func(void *type)
{
struct rcu_state *rsp = type;
- struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
+ struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
_rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
atomic_inc(&rsp->barrier_cpu_count);
@@ -3160,7 +3288,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
* that this CPU cannot possibly have any RCU callbacks in flight yet.
*/
static void
-rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
+rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
{
unsigned long flags;
unsigned long mask;
@@ -3173,7 +3301,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
/* Set up local state, ensuring consistent view of global state. */
raw_spin_lock_irqsave(&rnp->lock, flags);
rdp->beenonline = 1; /* We have now been online. */
- rdp->preemptible = preemptible;
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
@@ -3217,8 +3344,7 @@ static void rcu_prepare_cpu(int cpu)
struct rcu_state *rsp;
for_each_rcu_flavor(rsp)
- rcu_init_percpu_data(cpu, rsp,
- strcmp(rsp->name, "rcu_preempt") == 0);
+ rcu_init_percpu_data(cpu, rsp);
}
/*
@@ -3402,8 +3528,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
rnp->qsmaskinit = 0;
rnp->grplo = j * cpustride;
rnp->grphi = (j + 1) * cpustride - 1;
- if (rnp->grphi >= NR_CPUS)
- rnp->grphi = NR_CPUS - 1;
+ if (rnp->grphi >= nr_cpu_ids)
+ rnp->grphi = nr_cpu_ids - 1;
if (i == 0) {
rnp->grpnum = 0;
rnp->grpmask = 0;
@@ -3422,7 +3548,6 @@ static void __init rcu_init_one(struct rcu_state *rsp,
rsp->rda = rda;
init_waitqueue_head(&rsp->gp_wq);
- init_irq_work(&rsp->wakeup_work, rsp_wakeup);
rnp = rsp->level[rcu_num_lvls - 1];
for_each_possible_cpu(i) {
while (i > rnp->grphi)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 75dc3c39a02..bf2c1e66969 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -252,7 +252,6 @@ struct rcu_data {
bool passed_quiesce; /* User-mode/idle loop etc. */
bool qs_pending; /* Core waits for quiesc state. */
bool beenonline; /* CPU online at least once. */
- bool preemptible; /* Preemptible RCU? */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
#ifdef CONFIG_RCU_CPU_STALL_INFO
@@ -406,7 +405,8 @@ struct rcu_state {
unsigned long completed; /* # of last completed gp. */
struct task_struct *gp_kthread; /* Task for grace periods. */
wait_queue_head_t gp_wq; /* Where GP task waits. */
- int gp_flags; /* Commands for GP task. */
+ short gp_flags; /* Commands for GP task. */
+ short gp_state; /* GP kthread sleep state. */
/* End of fields guarded by root rcu_node's lock. */
@@ -462,13 +462,17 @@ struct rcu_state {
const char *name; /* Name of structure. */
char abbr; /* Abbreviated name. */
struct list_head flavors; /* List of RCU flavors. */
- struct irq_work wakeup_work; /* Postponed wakeups */
};
/* Values for rcu_state structure's gp_flags field. */
#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */
#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
+/* Values for rcu_state structure's gp_flags field. */
+#define RCU_GP_WAIT_INIT 0 /* Initial state. */
+#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */
+#define RCU_GP_WAIT_FQS 2 /* Wait for force-quiescent-state time. */
+
extern struct list_head rcu_struct_flavors;
/* Sequence through rcu_state structures for each RCU flavor. */
@@ -547,7 +551,6 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void);
-static int rcu_nocb_needs_gp(struct rcu_state *rsp);
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 962d1d58992..2e579c38bd9 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -149,15 +149,6 @@ long rcu_batches_completed(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
- * Force a quiescent state for preemptible RCU.
- */
-void rcu_force_quiescent_state(void)
-{
- force_quiescent_state(&rcu_preempt_state);
-}
-EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-
-/*
* Record a preemptible-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
* not in a quiescent state. There might be any number of tasks blocked
@@ -688,20 +679,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu);
-/*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks. Until then, this
- * function may only be called from __kfree_rcu().
- */
-void kfree_call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu))
-{
- __call_rcu(head, func, &rcu_preempt_state, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
-
/**
* synchronize_rcu - wait until a grace period has elapsed.
*
@@ -991,16 +968,6 @@ long rcu_batches_completed(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
- * Force a quiescent state for RCU, which, because there is no preemptible
- * RCU, becomes the same as rcu-sched.
- */
-void rcu_force_quiescent_state(void)
-{
- rcu_sched_force_quiescent_state();
-}
-EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-
-/*
* Because preemptible RCU does not exist, we never have to check for
* CPUs being in quiescent states.
*/
@@ -1080,22 +1047,6 @@ static void rcu_preempt_check_callbacks(int cpu)
}
/*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks. Until then, this
- * function may only be called from __kfree_rcu().
- *
- * Because there is no preemptible RCU, we use RCU-sched instead.
- */
-void kfree_call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu))
-{
- __call_rcu(head, func, &rcu_sched_state, -1, 1);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
-
-/*
* Wait for an rcu-preempt grace period, but make it happen quickly.
* But because preemptible RCU does not exist, map to rcu-sched.
*/
@@ -1744,6 +1695,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
static void rcu_prepare_for_idle(int cpu)
{
#ifndef CONFIG_RCU_NOCB_CPU_ALL
+ bool needwake;
struct rcu_data *rdp;
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
struct rcu_node *rnp;
@@ -1792,8 +1744,10 @@ static void rcu_prepare_for_idle(int cpu)
rnp = rdp->mynode;
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
smp_mb__after_unlock_lock();
- rcu_accelerate_cbs(rsp, rnp, rdp);
+ needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ if (needwake)
+ rcu_gp_kthread_wake(rsp);
}
#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
}
@@ -1855,7 +1809,7 @@ static void rcu_oom_notify_cpu(void *unused)
struct rcu_data *rdp;
for_each_rcu_flavor(rsp) {
- rdp = __this_cpu_ptr(rsp->rda);
+ rdp = raw_cpu_ptr(rsp->rda);
if (rdp->qlen_lazy != 0) {
atomic_inc(&oom_callback_count);
rsp->call(&rdp->oom_head, rcu_oom_callback);
@@ -1997,7 +1951,7 @@ static void increment_cpu_stall_ticks(void)
struct rcu_state *rsp;
for_each_rcu_flavor(rsp)
- __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
+ raw_cpu_inc(rsp->rda->ticks_this_gp);
}
#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
@@ -2068,19 +2022,6 @@ static int __init parse_rcu_nocb_poll(char *arg)
early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
/*
- * Do any no-CBs CPUs need another grace period?
- *
- * Interrupts must be disabled. If the caller does not hold the root
- * rnp_node structure's ->lock, the results are advisory only.
- */
-static int rcu_nocb_needs_gp(struct rcu_state *rsp)
-{
- struct rcu_node *rnp = rcu_get_root(rsp);
-
- return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
-}
-
-/*
* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
* grace period.
*/
@@ -2109,7 +2050,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
}
#ifndef CONFIG_RCU_NOCB_CPU_ALL
-/* Is the specified CPU a no-CPUs CPU? */
+/* Is the specified CPU a no-CBs CPU? */
bool rcu_is_nocb_cpu(int cpu)
{
if (have_rcu_nocb_mask)
@@ -2243,12 +2184,15 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
unsigned long c;
bool d;
unsigned long flags;
+ bool needwake;
struct rcu_node *rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
smp_mb__after_unlock_lock();
- c = rcu_start_future_gp(rnp, rdp);
+ needwake = rcu_start_future_gp(rnp, rdp, &c);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ if (needwake)
+ rcu_gp_kthread_wake(rdp->rsp);
/*
* Wait for the grace period. Do so interruptibly to avoid messing
@@ -2402,11 +2346,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
-static int rcu_nocb_needs_gp(struct rcu_state *rsp)
-{
- return 0;
-}
-
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
}
@@ -2657,20 +2596,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
}
/*
- * Bind the grace-period kthread for the sysidle flavor of RCU to the
- * timekeeping CPU.
- */
-static void rcu_bind_gp_kthread(void)
-{
- int cpu = ACCESS_ONCE(tick_do_timer_cpu);
-
- if (cpu < 0 || cpu >= nr_cpu_ids)
- return;
- if (raw_smp_processor_id() != cpu)
- set_cpus_allowed_ptr(current, cpumask_of(cpu));
-}
-
-/*
* Return a delay in jiffies based on the number of CPUs, rcu_node
* leaf fanout, and jiffies tick rate. The idea is to allow larger
* systems more time to transition to full-idle state in order to
@@ -2734,7 +2659,8 @@ static void rcu_sysidle(unsigned long j)
static void rcu_sysidle_cancel(void)
{
smp_mb();
- ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
+ if (full_sysidle_state > RCU_SYSIDLE_SHORT)
+ ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
}
/*
@@ -2880,10 +2806,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
return false;
}
-static void rcu_bind_gp_kthread(void)
-{
-}
-
static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
unsigned long maxj)
{
@@ -2914,3 +2836,19 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
#endif /* #ifdef CONFIG_NO_HZ_FULL */
return 0;
}
+
+/*
+ * Bind the grace-period kthread for the sysidle flavor of RCU to the
+ * timekeeping CPU.
+ */
+static void rcu_bind_gp_kthread(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+ int cpu = ACCESS_ONCE(tick_do_timer_cpu);
+
+ if (cpu < 0 || cpu >= nr_cpu_ids)
+ return;
+ if (raw_smp_processor_id() != cpu)
+ set_cpus_allowed_ptr(current, cpumask_of(cpu));
+#endif /* #ifdef CONFIG_NO_HZ_FULL */
+}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4c0a9b0af46..ed7a0d72562 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -338,3 +338,21 @@ static int __init check_cpu_stall_init(void)
early_initcall(check_cpu_stall_init);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+
+/*
+ * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
+ */
+
+DEFINE_PER_CPU(int, rcu_cond_resched_count);
+
+/*
+ * Report a set of RCU quiescent states, for use by cond_resched()
+ * and friends. Out of line due to being called infrequently.
+ */
+void rcu_resched(void)
+{
+ preempt_disable();
+ __this_cpu_write(rcu_cond_resched_count, 0);
+ rcu_note_context_switch(smp_processor_id());
+ preempt_enable();
+}