From 353af9c9a866b07492b15a4efd18ec93123d3a1d Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 20 Dec 2012 09:35:02 -0800
Subject: rcu: Prevent soft-lockup complaints about no-CBs CPUs

The wait_event() at the head of the rcu_nocb_kthread() can result in
soft-lockup complaints if the CPU in question does not register RCU
callbacks for an extended period.  This commit therefore changes
the wait_event() to a wait_event_interruptible().

Reported-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree_plugin.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index f6e5ec2932b..43dba2d798a 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2366,10 +2366,11 @@ static int rcu_nocb_kthread(void *arg)
 	for (;;) {
 		/* If not polling, wait for next batch of callbacks. */
 		if (!rcu_nocb_poll)
-			wait_event(rdp->nocb_wq, rdp->nocb_head);
+			wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
 		list = ACCESS_ONCE(rdp->nocb_head);
 		if (!list) {
 			schedule_timeout_interruptible(1);
+			flush_signals(current);
 			continue;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From 1b0048a44c502c5ab850203e6e0a6498d7d8676d Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 20 Dec 2012 13:19:22 -0800
Subject: rcu: Make rcu_nocb_poll an early_param instead of module_param

The as-documented rcu_nocb_poll will fail to enable this feature
for two reasons.  (1) there is an extra "s" in the documented
name which is not in the code, and (2) since it uses module_param,
it really is expecting a prefix, akin to "rcutree.fanout_leaf"
and the prefix isn't documented.

However, there are several reasons why we might not want to
simply fix the typo and add the prefix:

1) we'd end up with rcutree.rcu_nocb_poll, and rather probably make
a change to rcutree.nocb_poll

2) if we did #1, then the prefix wouldn't be consistent with the
rcu_nocbs=<cpumap> parameter (i.e. one with, one without prefix)

3) the use of module_param in a header file is less than desired,
since it isn't immediately obvious that it will get processed
via rcutree.c and get the prefix from that (although use of
module_param_named() could clarify that.)

4) the implied export of /sys/module/rcutree/parameters/rcu_nocb_poll
data to userspace via module_param() doesn't really buy us anything,
as it is read-only and we can tell if it is enabled already without
it, since there is a printk at early boot telling us so.

In light of all that, just change it from a module_param() to an
early_setup() call, and worry about adding it to /sys later on if
we decide to allow a dynamic setting of it.

Also change the variable to be tagged as read_mostly, since it
will only ever be fiddled with at most, once at boot.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/kernel-parameters.txt |  2 +-
 kernel/rcutree_plugin.h             | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 363e348bff9..6c723811c0a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2438,7 +2438,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			real-time workloads.  It can also improve energy
 			efficiency for asymmetric multiprocessors.
 
-	rcu_nocbs_poll	[KNL,BOOT]
+	rcu_nocb_poll	[KNL,BOOT]
 			Rather than requiring that offloaded CPUs
 			(specified by rcu_nocbs= above) explicitly
 			awaken the corresponding "rcuoN" kthreads,
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 43dba2d798a..c1cc7e17ff9 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -40,8 +40,7 @@
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
 static bool have_rcu_nocb_mask;	    /* Was rcu_nocb_mask allocated? */
-static bool rcu_nocb_poll;	    /* Offload kthread are to poll. */
-module_param(rcu_nocb_poll, bool, 0444);
+static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
 static char __initdata nocb_buf[NR_CPUS * 5];
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 
@@ -2159,6 +2158,13 @@ static int __init rcu_nocb_setup(char *str)
 }
 __setup("rcu_nocbs=", rcu_nocb_setup);
 
+static int __init parse_rcu_nocb_poll(char *arg)
+{
+	rcu_nocb_poll = 1;
+	return 0;
+}
+early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
+
 /* Is the specified CPU a no-CPUs CPU? */
 static bool is_nocb_cpu(int cpu)
 {
-- 
cgit v1.2.3-70-g09d2


From aa7f67304d1a03180f463258aa6f15a8b434e77d Mon Sep 17 00:00:00 2001
From: Shawn Bohrer <sbohrer@rgmadvisors.com>
Date: Mon, 14 Jan 2013 11:55:31 -0600
Subject: sched/rt: Use root_domain of rt_rq not current processor

When the system has multiple domains do_sched_rt_period_timer()
can run on any CPU and may iterate over all rt_rq in
cpu_online_mask.  This means when balance_runtime() is run for a
given rt_rq that rt_rq may be in a different rd than the current
processor.  Thus if we use smp_processor_id() to get rd in
do_balance_runtime() we may borrow runtime from a rt_rq that is
not part of our rd.

This changes do_balance_runtime to get the rd from the passed in
rt_rq ensuring that we borrow runtime only from the correct rd
for the given rt_rq.

This fixes a BUG at kernel/sched/rt.c:687! in __disable_runtime
when we try reclaim runtime lent to other rt_rq but runtime has
been lent to a rt_rq in another rd.

Signed-off-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Mike Galbraith <bitbucket@online.de>
Cc: peterz@infradead.org
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1358186131-29494-1-git-send-email-sbohrer@rgmadvisors.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 418feb01344..4f02b284735 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -566,7 +566,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 static int do_balance_runtime(struct rt_rq *rt_rq)
 {
 	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
-	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+	struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
 	int i, weight, more = 0;
 	u64 rt_period;
 
-- 
cgit v1.2.3-70-g09d2


From 38dc3348e36d6cbe6ad51d771e4db948cda5b0e3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 25 Jan 2013 14:14:22 +0000
Subject: sched: Fix warning in kernel/sched/fair.c

a4c96ae319 "sched: Unthrottle rt runqueues in
__disable_runtime()" turned the unthrottle_offline_cfs_rqs
function into a static symbol, which now triggers a warning
about it being potentially unused:

  kernel/sched/fair.c:2055:13: warning: 'unthrottle_offline_cfs_rqs' defined but not used [-Wunused-function]

Marking it __maybe_unused shuts up the gcc warning and lets the
compiler safely drop the function body when it's not being used.

To reproduce, build the ARM bcm2835_defconfig.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Boonstoppel <pboonstoppel@nvidia.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: linux-arm-kernel@list.infradead.org
Link: http://lkml.kernel.org/r/1359123276-15833-6-git-send-email-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5eea8707234..81fa5364340 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2663,7 +2663,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	hrtimer_cancel(&cfs_b->slack_timer);
 }
 
-static void unthrottle_offline_cfs_rqs(struct rq *rq)
+static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 {
 	struct cfs_rq *cfs_rq;
 
-- 
cgit v1.2.3-70-g09d2


From cff3c124a7e82ca0ea1d6864b27ef18c403c0773 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 25 Jan 2013 14:14:23 +0000
Subject: sched/debug: Fix format string for 32-bit platforms

The type returned from atomic64_t can be either unsigned
long or unsigned long long, depending on the architecture.
Using a cast to unsigned long long lets us use the same
format string for all architectures.

Without this patch, building with scheduler debugging
enabled results in:

  kernel/sched/debug.c: In function 'print_cfs_rq':
  kernel/sched/debug.c:225:2: warning: format '%ld' expects argument of type 'long int', but argument 4 has type 'long long int' [-Wformat]
  kernel/sched/debug.c:225:2: warning: format '%ld' expects argument of type 'long int', but argument 3 has type 'long long int' [-Wformat]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Turner <pjt@google.com>
Cc: linux-arm-kernel@list.infradead.org
Link: http://lkml.kernel.org/r/1359123276-15833-7-git-send-email-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2cd3c1b4e58..7ae4c4c5420 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -222,8 +222,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			cfs_rq->runnable_load_avg);
 	SEQ_printf(m, "  .%-30s: %lld\n", "blocked_load_avg",
 			cfs_rq->blocked_load_avg);
-	SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg",
-			atomic64_read(&cfs_rq->tg->load_avg));
+	SEQ_printf(m, "  .%-30s: %lld\n", "tg_load_avg",
+			(unsigned long long)atomic64_read(&cfs_rq->tg->load_avg));
 	SEQ_printf(m, "  .%-30s: %lld\n", "tg_load_contrib",
 			cfs_rq->tg_load_contrib);
 	SEQ_printf(m, "  .%-30s: %d\n", "tg_runnable_contrib",
-- 
cgit v1.2.3-70-g09d2


From 0231bb5336758426b44ccd798ccd3c5419c95d58 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 1 Feb 2013 11:23:45 +0100
Subject: perf: Fix event group context move

When we have group with mixed events (hw/sw) we want to end up
with group leader being in hw context. So if group leader is
initialy sw event, we move all the events under hw context.

The move is done for each event by removing it from its context
and adding it back into proper one. As a part of the removal the
event is automatically disabled, which is not what we want at
this stage of creating groups.

The fix is to initialize event state after removal from sw
context.

This fix resulted from the following discussion:

  http://thread.gmane.org/gmane.linux.kernel.perf.user/1144

Reported-by: Andreas Hollmann <hollmann@in.tum.de>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vince@deater.net>
Link: http://lkml.kernel.org/r/1359714225-4231-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 301079d06f2..7b6646a8c06 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -907,6 +907,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 		ctx->nr_stat++;
 }
 
+/*
+ * Initialize event state based on the perf_event_attr::disabled.
+ */
+static inline void perf_event__state_init(struct perf_event *event)
+{
+	event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
+					      PERF_EVENT_STATE_INACTIVE;
+}
+
 /*
  * Called at perf_event creation and when events are attached/detached from a
  * group.
@@ -6179,8 +6188,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	event->overflow_handler	= overflow_handler;
 	event->overflow_handler_context = context;
 
-	if (attr->disabled)
-		event->state = PERF_EVENT_STATE_OFF;
+	perf_event__state_init(event);
 
 	pmu = NULL;
 
@@ -6609,9 +6617,17 @@ SYSCALL_DEFINE5(perf_event_open,
 
 		mutex_lock(&gctx->mutex);
 		perf_remove_from_context(group_leader);
+
+		/*
+		 * Removing from the context ends up with disabled
+		 * event. What we want here is event in the initial
+		 * startup state, ready to be add into new context.
+		 */
+		perf_event__state_init(group_leader);
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
 			perf_remove_from_context(sibling);
+			perf_event__state_init(sibling);
 			put_ctx(gctx);
 		}
 		mutex_unlock(&gctx->mutex);
-- 
cgit v1.2.3-70-g09d2