From ac5637611150281f398bb7a47e3fcb69a09e7803 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Feb 2012 17:58:03 +0100
Subject: genirq: Unmask oneshot irqs when thread was not woken

When the primary handler of an interrupt which is marked IRQ_ONESHOT
returns IRQ_HANDLED or IRQ_NONE, then the interrupt thread is not
woken and the unmask logic of the interrupt line is never
invoked. This keeps the interrupt masked forever.

This was not noticed as most IRQ_ONESHOT users wake the thread
unconditionally (usually because they cannot access the underlying
device from hard interrupt context). Though this behaviour was nowhere
documented and not necessarily intentional. Some drivers can avoid the
thread wakeup in certain cases and run into the situation where the
interrupt line s kept masked.

Handle it gracefully.

Reported-and-tested-by: Lothar Wassmann <lw@karo-electronics.de>
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/chip.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f7c543a801d..b742edc0bdd 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -330,6 +330,24 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(handle_simple_irq);
 
+/*
+ * Called unconditionally from handle_level_irq() and only for oneshot
+ * interrupts from handle_fasteoi_irq()
+ */
+static void cond_unmask_irq(struct irq_desc *desc)
+{
+	/*
+	 * We need to unmask in the following cases:
+	 * - Standard level irq (IRQF_ONESHOT is not set)
+	 * - Oneshot irq which did not wake the thread (caused by a
+	 *   spurious interrupt or a primary handler handling it
+	 *   completely).
+	 */
+	if (!irqd_irq_disabled(&desc->irq_data) &&
+	    irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot)
+		unmask_irq(desc);
+}
+
 /**
  *	handle_level_irq - Level type irq handler
  *	@irq:	the interrupt number
@@ -362,8 +380,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 
 	handle_irq_event(desc);
 
-	if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT))
-		unmask_irq(desc);
+	cond_unmask_irq(desc);
+
 out_unlock:
 	raw_spin_unlock(&desc->lock);
 }
@@ -417,6 +435,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 	preflow_handler(desc);
 	handle_irq_event(desc);
 
+	if (desc->istate & IRQS_ONESHOT)
+		cond_unmask_irq(desc);
+
 out_eoi:
 	desc->irq_data.chip->irq_eoi(&desc->irq_data);
 out_unlock:
-- 
cgit v1.2.3-70-g09d2


From b4bc724e82e80478cba5fe9825b62e71ddf78757 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 8 Feb 2012 11:57:52 +0100
Subject: genirq: Handle pending irqs in irq_startup()

An interrupt might be pending when irq_startup() is called, but the
startup code does not invoke the resend logic. In some cases this
prevents the device from issuing another interrupt which renders the
device non functional.

Call the resend function in irq_startup() to keep things going.

Reported-and-tested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/autoprobe.c |  4 ++--
 kernel/irq/chip.c      | 17 ++++++++++-------
 kernel/irq/internals.h |  2 +-
 kernel/irq/manage.c    |  2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 342d8f44e40..0119b9d467a 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -53,7 +53,7 @@ unsigned long probe_irq_on(void)
 			if (desc->irq_data.chip->irq_set_type)
 				desc->irq_data.chip->irq_set_type(&desc->irq_data,
 							 IRQ_TYPE_PROBE);
-			irq_startup(desc);
+			irq_startup(desc, false);
 		}
 		raw_spin_unlock_irq(&desc->lock);
 	}
@@ -70,7 +70,7 @@ unsigned long probe_irq_on(void)
 		raw_spin_lock_irq(&desc->lock);
 		if (!desc->action && irq_settings_can_probe(desc)) {
 			desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
-			if (irq_startup(desc))
+			if (irq_startup(desc, false))
 				desc->istate |= IRQS_PENDING;
 		}
 		raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b742edc0bdd..fb7db75ee0c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -157,19 +157,22 @@ static void irq_state_set_masked(struct irq_desc *desc)
 	irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
 }
 
-int irq_startup(struct irq_desc *desc)
+int irq_startup(struct irq_desc *desc, bool resend)
 {
+	int ret = 0;
+
 	irq_state_clr_disabled(desc);
 	desc->depth = 0;
 
 	if (desc->irq_data.chip->irq_startup) {
-		int ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+		ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
 		irq_state_clr_masked(desc);
-		return ret;
+	} else {
+		irq_enable(desc);
 	}
-
-	irq_enable(desc);
-	return 0;
+	if (resend)
+		check_irq_resend(desc, desc->irq_data.irq);
+	return ret;
 }
 
 void irq_shutdown(struct irq_desc *desc)
@@ -646,7 +649,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		irq_settings_set_noprobe(desc);
 		irq_settings_set_norequest(desc);
 		irq_settings_set_nothread(desc);
-		irq_startup(desc);
+		irq_startup(desc, true);
 	}
 out:
 	irq_put_desc_busunlock(desc, flags);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b7952316016..40378ff877e 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -67,7 +67,7 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
 extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 
-extern int irq_startup(struct irq_desc *desc);
+extern int irq_startup(struct irq_desc *desc, bool resend);
 extern void irq_shutdown(struct irq_desc *desc);
 extern void irq_enable(struct irq_desc *desc);
 extern void irq_disable(struct irq_desc *desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a9a9dbe49fe..32313c08444 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1027,7 +1027,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			desc->istate |= IRQS_ONESHOT;
 
 		if (irq_settings_can_autoenable(desc))
-			irq_startup(desc);
+			irq_startup(desc, true);
 		else
 			/* Undo nested disables: */
 			desc->depth = 1;
-- 
cgit v1.2.3-70-g09d2


From 8c79a045fd590a26e81e75f5d8d4ec5c7d23e565 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Jan 2012 14:51:37 +0100
Subject: sched/events: Revert trace_sched_stat_sleeptime()

Commit 1ac9bc69 ("sched/tracing: Add a new tracepoint for sleeptime")
added a new sched:sched_stat_sleeptime tracepoint.

It's broken: the first sample we get on a task might be bad because
of a stale sleep_start value that wasn't reset at the last task switch
because the tracepoint was not active.

It also breaks the existing schedstat samples due to the side
effects of:

-               se->statistics.sleep_start = 0;
...
-               se->statistics.block_start = 0;

Nor do I see means to fix it without adding overhead to the scheduler
fast path, which I'm not willing to for the sake of redundant
instrumentation.

Most importantly, sleep time information can already be constructed
by tracing context switches and wakeups, and taking the timestamp
difference between the schedule-out, the wakeup and the schedule-in.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pc4c9qhl8q6vg3bs4j6k0rbd@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 50 --------------------------------------------
 kernel/sched/core.c          |  1 -
 kernel/sched/fair.c          |  2 ++
 3 files changed, 2 insertions(+), 51 deletions(-)

(limited to 'kernel')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 6ba596b07a7..e33ed1bfa11 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -370,56 +370,6 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
-#ifdef CREATE_TRACE_POINTS
-static inline u64 trace_get_sleeptime(struct task_struct *tsk)
-{
-#ifdef CONFIG_SCHEDSTATS
-	u64 block, sleep;
-
-	block = tsk->se.statistics.block_start;
-	sleep = tsk->se.statistics.sleep_start;
-	tsk->se.statistics.block_start = 0;
-	tsk->se.statistics.sleep_start = 0;
-
-	return block ? block : sleep ? sleep : 0;
-#else
-	return 0;
-#endif
-}
-#endif
-
-/*
- * Tracepoint for accounting sleeptime (time the task is sleeping
- * or waiting for I/O).
- */
-TRACE_EVENT(sched_stat_sleeptime,
-
-	TP_PROTO(struct task_struct *tsk, u64 now),
-
-	TP_ARGS(tsk, now),
-
-	TP_STRUCT__entry(
-		__array( char,	comm,	TASK_COMM_LEN	)
-		__field( pid_t,	pid			)
-		__field( u64,	sleeptime		)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-		__entry->pid		= tsk->pid;
-		__entry->sleeptime = trace_get_sleeptime(tsk);
-		__entry->sleeptime = __entry->sleeptime ?
-				now - __entry->sleeptime : 0;
-	)
-	TP_perf_assign(
-		__perf_count(__entry->sleeptime);
-	),
-
-	TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
-			__entry->comm, __entry->pid,
-			(unsigned long long)__entry->sleeptime)
-);
-
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d2e05..b342f57879e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1932,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 	finish_lock_switch(rq, prev);
-	trace_sched_stat_sleeptime(current, rq->clock);
 
 	fire_sched_in_preempt_notifiers(current);
 	if (mm)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7c6414fc669..aca16b843b7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.sleep_max))
 			se->statistics.sleep_max = delta;
 
+		se->statistics.sleep_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
@@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.block_max))
 			se->statistics.block_max = delta;
 
+		se->statistics.block_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
-- 
cgit v1.2.3-70-g09d2


From 8f2f748b0656257153bcf0941df8d6060acc5ca6 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 23 Feb 2012 15:27:15 +0530
Subject: CPU hotplug, cpusets, suspend: Don't touch cpusets during
 suspend/resume

Currently, during CPU hotplug, the cpuset callbacks modify the cpusets
to reflect the state of the system, and this handling is asymmetric.
That is, upon CPU offline, that CPU is removed from all cpusets. However
when it comes back online, it is put back only to the root cpuset.

This gives rise to a significant problem during suspend/resume. During
suspend, we offline all non-boot cpus and during resume we online them back.
Which means, after a resume, all cpusets (except the root cpuset) will be
restricted to just one single CPU (the boot cpu). But the whole point of
suspend/resume is to restore the system to a state which is as close as
possible to how it was before suspend.

So to fix this, don't touch cpusets during suspend/resume. That is, modify
the cpuset-related CPU hotplug callback to just ignore CPU hotplug when it
is initiated as part of the suspend/resume sequence.

Reported-by: Prashanth Nageshappa <prashanth@linux.vnet.ibm.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/4F460D7B.1020703@linux.vnet.ibm.com
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b342f57879e..33a0676ea74 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6728,7 +6728,7 @@ int __init sched_create_sysfs_power_savings_entries(struct device *dev)
 static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
 			     void *hcpu)
 {
-	switch (action & ~CPU_TASKS_FROZEN) {
+	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
 		cpuset_update_active_cpus();
@@ -6741,7 +6741,7 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
 static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
 			       void *hcpu)
 {
-	switch (action & ~CPU_TASKS_FROZEN) {
+	switch (action) {
 	case CPU_DOWN_PREPARE:
 		cpuset_update_active_cpus();
 		return NOTIFY_OK;
-- 
cgit v1.2.3-70-g09d2


From 30ce2f7eef095d1b8d070740f1948629814fe3c7 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 28 Feb 2012 10:19:38 +0900
Subject: perf/hwbp: Fix a possible memory leak

If kzalloc() for TYPE_DATA failed on a given cpu, previous chunk
of TYPE_INST will be leaked. Fix it.

Thanks to Peter Zijlstra for suggesting this better solution. It
should work as long as the initial value of the region is all
0's and that's the case of static (per-cpu) memory allocation.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Link: http://lkml.kernel.org/r/1330391978-28070-1-git-send-email-namhyung.kim@lge.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/events/hw_breakpoint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b7971d6f38b..ee706ce44aa 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -651,10 +651,10 @@ int __init init_hw_breakpoint(void)
 
  err_alloc:
 	for_each_possible_cpu(err_cpu) {
-		if (err_cpu == cpu)
-			break;
 		for (i = 0; i < TYPE_MAX; i++)
 			kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+		if (err_cpu == cpu)
+			break;
 	}
 
 	return -ENOMEM;
-- 
cgit v1.2.3-70-g09d2