From 353af9c9a866b07492b15a4efd18ec93123d3a1d Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 20 Dec 2012 09:35:02 -0800 Subject: rcu: Prevent soft-lockup complaints about no-CBs CPUs The wait_event() at the head of the rcu_nocb_kthread() can result in soft-lockup complaints if the CPU in question does not register RCU callbacks for an extended period. This commit therefore changes the wait_event() to a wait_event_interruptible(). Reported-by: Frederic Weisbecker Signed-off-by: Paul Gortmaker Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f6e5ec2932b..43dba2d798a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2366,10 +2366,11 @@ static int rcu_nocb_kthread(void *arg) for (;;) { /* If not polling, wait for next batch of callbacks. */ if (!rcu_nocb_poll) - wait_event(rdp->nocb_wq, rdp->nocb_head); + wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); list = ACCESS_ONCE(rdp->nocb_head); if (!list) { schedule_timeout_interruptible(1); + flush_signals(current); continue; } -- cgit v1.2.3-70-g09d2 From 1b0048a44c502c5ab850203e6e0a6498d7d8676d Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 20 Dec 2012 13:19:22 -0800 Subject: rcu: Make rcu_nocb_poll an early_param instead of module_param The as-documented rcu_nocb_poll will fail to enable this feature for two reasons. (1) there is an extra "s" in the documented name which is not in the code, and (2) since it uses module_param, it really is expecting a prefix, akin to "rcutree.fanout_leaf" and the prefix isn't documented. However, there are several reasons why we might not want to simply fix the typo and add the prefix: 1) we'd end up with rcutree.rcu_nocb_poll, and rather probably make a change to rcutree.nocb_poll 2) if we did #1, then the prefix wouldn't be consistent with the rcu_nocbs= parameter (i.e. one with, one without prefix) 3) the use of module_param in a header file is less than desired, since it isn't immediately obvious that it will get processed via rcutree.c and get the prefix from that (although use of module_param_named() could clarify that.) 4) the implied export of /sys/module/rcutree/parameters/rcu_nocb_poll data to userspace via module_param() doesn't really buy us anything, as it is read-only and we can tell if it is enabled already without it, since there is a printk at early boot telling us so. In light of all that, just change it from a module_param() to an early_setup() call, and worry about adding it to /sys later on if we decide to allow a dynamic setting of it. Also change the variable to be tagged as read_mostly, since it will only ever be fiddled with at most, once at boot. Signed-off-by: Paul Gortmaker Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 2 +- kernel/rcutree_plugin.h | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 363e348bff9..6c723811c0a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2438,7 +2438,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. real-time workloads. It can also improve energy efficiency for asymmetric multiprocessors. - rcu_nocbs_poll [KNL,BOOT] + rcu_nocb_poll [KNL,BOOT] Rather than requiring that offloaded CPUs (specified by rcu_nocbs= above) explicitly awaken the corresponding "rcuoN" kthreads, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 43dba2d798a..c1cc7e17ff9 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -40,8 +40,7 @@ #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ -static bool rcu_nocb_poll; /* Offload kthread are to poll. */ -module_param(rcu_nocb_poll, bool, 0444); +static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ static char __initdata nocb_buf[NR_CPUS * 5]; #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ @@ -2159,6 +2158,13 @@ static int __init rcu_nocb_setup(char *str) } __setup("rcu_nocbs=", rcu_nocb_setup); +static int __init parse_rcu_nocb_poll(char *arg) +{ + rcu_nocb_poll = 1; + return 0; +} +early_param("rcu_nocb_poll", parse_rcu_nocb_poll); + /* Is the specified CPU a no-CPUs CPU? */ static bool is_nocb_cpu(int cpu) { -- cgit v1.2.3-70-g09d2 From aa7f67304d1a03180f463258aa6f15a8b434e77d Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Mon, 14 Jan 2013 11:55:31 -0600 Subject: sched/rt: Use root_domain of rt_rq not current processor When the system has multiple domains do_sched_rt_period_timer() can run on any CPU and may iterate over all rt_rq in cpu_online_mask. This means when balance_runtime() is run for a given rt_rq that rt_rq may be in a different rd than the current processor. Thus if we use smp_processor_id() to get rd in do_balance_runtime() we may borrow runtime from a rt_rq that is not part of our rd. This changes do_balance_runtime to get the rd from the passed in rt_rq ensuring that we borrow runtime only from the correct rd for the given rt_rq. This fixes a BUG at kernel/sched/rt.c:687! in __disable_runtime when we try reclaim runtime lent to other rt_rq but runtime has been lent to a rt_rq in another rd. Signed-off-by: Shawn Bohrer Acked-by: Steven Rostedt Acked-by: Mike Galbraith Cc: peterz@infradead.org Cc: Link: http://lkml.kernel.org/r/1358186131-29494-1-git-send-email-sbohrer@rgmadvisors.com Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 418feb01344..4f02b284735 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -566,7 +566,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) static int do_balance_runtime(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); - struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; int i, weight, more = 0; u64 rt_period; -- cgit v1.2.3-70-g09d2 From 38dc3348e36d6cbe6ad51d771e4db948cda5b0e3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Jan 2013 14:14:22 +0000 Subject: sched: Fix warning in kernel/sched/fair.c a4c96ae319 "sched: Unthrottle rt runqueues in __disable_runtime()" turned the unthrottle_offline_cfs_rqs function into a static symbol, which now triggers a warning about it being potentially unused: kernel/sched/fair.c:2055:13: warning: 'unthrottle_offline_cfs_rqs' defined but not used [-Wunused-function] Marking it __maybe_unused shuts up the gcc warning and lets the compiler safely drop the function body when it's not being used. To reproduce, build the ARM bcm2835_defconfig. Signed-off-by: Arnd Bergmann Cc: Peter Boonstoppel Cc: Peter Zijlstra Cc: Paul Turner Cc: linux-arm-kernel@list.infradead.org Link: http://lkml.kernel.org/r/1359123276-15833-6-git-send-email-arnd@arndb.de Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5eea8707234..81fa5364340 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2663,7 +2663,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) hrtimer_cancel(&cfs_b->slack_timer); } -static void unthrottle_offline_cfs_rqs(struct rq *rq) +static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) { struct cfs_rq *cfs_rq; -- cgit v1.2.3-70-g09d2 From cff3c124a7e82ca0ea1d6864b27ef18c403c0773 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Jan 2013 14:14:23 +0000 Subject: sched/debug: Fix format string for 32-bit platforms The type returned from atomic64_t can be either unsigned long or unsigned long long, depending on the architecture. Using a cast to unsigned long long lets us use the same format string for all architectures. Without this patch, building with scheduler debugging enabled results in: kernel/sched/debug.c: In function 'print_cfs_rq': kernel/sched/debug.c:225:2: warning: format '%ld' expects argument of type 'long int', but argument 4 has type 'long long int' [-Wformat] kernel/sched/debug.c:225:2: warning: format '%ld' expects argument of type 'long int', but argument 3 has type 'long long int' [-Wformat] Signed-off-by: Arnd Bergmann Cc: Peter Zijlstra Cc: Paul Turner Cc: linux-arm-kernel@list.infradead.org Link: http://lkml.kernel.org/r/1359123276-15833-7-git-send-email-arnd@arndb.de Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2cd3c1b4e58..7ae4c4c5420 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -222,8 +222,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->runnable_load_avg); SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", cfs_rq->blocked_load_avg); - SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg", - atomic64_read(&cfs_rq->tg->load_avg)); + SEQ_printf(m, " .%-30s: %lld\n", "tg_load_avg", + (unsigned long long)atomic64_read(&cfs_rq->tg->load_avg)); SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib", cfs_rq->tg_load_contrib); SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib", -- cgit v1.2.3-70-g09d2 From 0231bb5336758426b44ccd798ccd3c5419c95d58 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 1 Feb 2013 11:23:45 +0100 Subject: perf: Fix event group context move When we have group with mixed events (hw/sw) we want to end up with group leader being in hw context. So if group leader is initialy sw event, we move all the events under hw context. The move is done for each event by removing it from its context and adding it back into proper one. As a part of the removal the event is automatically disabled, which is not what we want at this stage of creating groups. The fix is to initialize event state after removal from sw context. This fix resulted from the following discussion: http://thread.gmane.org/gmane.linux.kernel.perf.user/1144 Reported-by: Andreas Hollmann Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/1359714225-4231-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 301079d06f2..7b6646a8c06 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -907,6 +907,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_stat++; } +/* + * Initialize event state based on the perf_event_attr::disabled. + */ +static inline void perf_event__state_init(struct perf_event *event) +{ + event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF : + PERF_EVENT_STATE_INACTIVE; +} + /* * Called at perf_event creation and when events are attached/detached from a * group. @@ -6179,8 +6188,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, event->overflow_handler = overflow_handler; event->overflow_handler_context = context; - if (attr->disabled) - event->state = PERF_EVENT_STATE_OFF; + perf_event__state_init(event); pmu = NULL; @@ -6609,9 +6617,17 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&gctx->mutex); perf_remove_from_context(group_leader); + + /* + * Removing from the context ends up with disabled + * event. What we want here is event in the initial + * startup state, ready to be add into new context. + */ + perf_event__state_init(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { perf_remove_from_context(sibling); + perf_event__state_init(sibling); put_ctx(gctx); } mutex_unlock(&gctx->mutex); -- cgit v1.2.3-70-g09d2