From ac5637611150281f398bb7a47e3fcb69a09e7803 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Feb 2012 17:58:03 +0100 Subject: genirq: Unmask oneshot irqs when thread was not woken When the primary handler of an interrupt which is marked IRQ_ONESHOT returns IRQ_HANDLED or IRQ_NONE, then the interrupt thread is not woken and the unmask logic of the interrupt line is never invoked. This keeps the interrupt masked forever. This was not noticed as most IRQ_ONESHOT users wake the thread unconditionally (usually because they cannot access the underlying device from hard interrupt context). Though this behaviour was nowhere documented and not necessarily intentional. Some drivers can avoid the thread wakeup in certain cases and run into the situation where the interrupt line s kept masked. Handle it gracefully. Reported-and-tested-by: Lothar Wassmann Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f7c543a801d..b742edc0bdd 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -330,6 +330,24 @@ out_unlock: } EXPORT_SYMBOL_GPL(handle_simple_irq); +/* + * Called unconditionally from handle_level_irq() and only for oneshot + * interrupts from handle_fasteoi_irq() + */ +static void cond_unmask_irq(struct irq_desc *desc) +{ + /* + * We need to unmask in the following cases: + * - Standard level irq (IRQF_ONESHOT is not set) + * - Oneshot irq which did not wake the thread (caused by a + * spurious interrupt or a primary handler handling it + * completely). + */ + if (!irqd_irq_disabled(&desc->irq_data) && + irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) + unmask_irq(desc); +} + /** * handle_level_irq - Level type irq handler * @irq: the interrupt number @@ -362,8 +380,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) handle_irq_event(desc); - if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT)) - unmask_irq(desc); + cond_unmask_irq(desc); + out_unlock: raw_spin_unlock(&desc->lock); } @@ -417,6 +435,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) preflow_handler(desc); handle_irq_event(desc); + if (desc->istate & IRQS_ONESHOT) + cond_unmask_irq(desc); + out_eoi: desc->irq_data.chip->irq_eoi(&desc->irq_data); out_unlock: -- cgit v1.2.3-70-g09d2 From b4bc724e82e80478cba5fe9825b62e71ddf78757 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Feb 2012 11:57:52 +0100 Subject: genirq: Handle pending irqs in irq_startup() An interrupt might be pending when irq_startup() is called, but the startup code does not invoke the resend logic. In some cases this prevents the device from issuing another interrupt which renders the device non functional. Call the resend function in irq_startup() to keep things going. Reported-and-tested-by: Russell King Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/irq/autoprobe.c | 4 ++-- kernel/irq/chip.c | 17 ++++++++++------- kernel/irq/internals.h | 2 +- kernel/irq/manage.c | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 342d8f44e40..0119b9d467a 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -53,7 +53,7 @@ unsigned long probe_irq_on(void) if (desc->irq_data.chip->irq_set_type) desc->irq_data.chip->irq_set_type(&desc->irq_data, IRQ_TYPE_PROBE); - irq_startup(desc); + irq_startup(desc, false); } raw_spin_unlock_irq(&desc->lock); } @@ -70,7 +70,7 @@ unsigned long probe_irq_on(void) raw_spin_lock_irq(&desc->lock); if (!desc->action && irq_settings_can_probe(desc)) { desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; - if (irq_startup(desc)) + if (irq_startup(desc, false)) desc->istate |= IRQS_PENDING; } raw_spin_unlock_irq(&desc->lock); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b742edc0bdd..fb7db75ee0c 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -157,19 +157,22 @@ static void irq_state_set_masked(struct irq_desc *desc) irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); } -int irq_startup(struct irq_desc *desc) +int irq_startup(struct irq_desc *desc, bool resend) { + int ret = 0; + irq_state_clr_disabled(desc); desc->depth = 0; if (desc->irq_data.chip->irq_startup) { - int ret = desc->irq_data.chip->irq_startup(&desc->irq_data); + ret = desc->irq_data.chip->irq_startup(&desc->irq_data); irq_state_clr_masked(desc); - return ret; + } else { + irq_enable(desc); } - - irq_enable(desc); - return 0; + if (resend) + check_irq_resend(desc, desc->irq_data.irq); + return ret; } void irq_shutdown(struct irq_desc *desc) @@ -646,7 +649,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); - irq_startup(desc); + irq_startup(desc, true); } out: irq_put_desc_busunlock(desc, flags); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index b7952316016..40378ff877e 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -67,7 +67,7 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); -extern int irq_startup(struct irq_desc *desc); +extern int irq_startup(struct irq_desc *desc, bool resend); extern void irq_shutdown(struct irq_desc *desc); extern void irq_enable(struct irq_desc *desc); extern void irq_disable(struct irq_desc *desc); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a9a9dbe49fe..32313c08444 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1027,7 +1027,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) desc->istate |= IRQS_ONESHOT; if (irq_settings_can_autoenable(desc)) - irq_startup(desc); + irq_startup(desc, true); else /* Undo nested disables: */ desc->depth = 1; -- cgit v1.2.3-70-g09d2 From 8c79a045fd590a26e81e75f5d8d4ec5c7d23e565 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Jan 2012 14:51:37 +0100 Subject: sched/events: Revert trace_sched_stat_sleeptime() Commit 1ac9bc69 ("sched/tracing: Add a new tracepoint for sleeptime") added a new sched:sched_stat_sleeptime tracepoint. It's broken: the first sample we get on a task might be bad because of a stale sleep_start value that wasn't reset at the last task switch because the tracepoint was not active. It also breaks the existing schedstat samples due to the side effects of: - se->statistics.sleep_start = 0; ... - se->statistics.block_start = 0; Nor do I see means to fix it without adding overhead to the scheduler fast path, which I'm not willing to for the sake of redundant instrumentation. Most importantly, sleep time information can already be constructed by tracing context switches and wakeups, and taking the timestamp difference between the schedule-out, the wakeup and the schedule-in. Signed-off-by: Peter Zijlstra Cc: Andrew Vagin Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-pc4c9qhl8q6vg3bs4j6k0rbd@git.kernel.org Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 50 -------------------------------------------- kernel/sched/core.c | 1 - kernel/sched/fair.c | 2 ++ 3 files changed, 2 insertions(+), 51 deletions(-) (limited to 'kernel') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 6ba596b07a7..e33ed1bfa11 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -370,56 +370,6 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); -#ifdef CREATE_TRACE_POINTS -static inline u64 trace_get_sleeptime(struct task_struct *tsk) -{ -#ifdef CONFIG_SCHEDSTATS - u64 block, sleep; - - block = tsk->se.statistics.block_start; - sleep = tsk->se.statistics.sleep_start; - tsk->se.statistics.block_start = 0; - tsk->se.statistics.sleep_start = 0; - - return block ? block : sleep ? sleep : 0; -#else - return 0; -#endif -} -#endif - -/* - * Tracepoint for accounting sleeptime (time the task is sleeping - * or waiting for I/O). - */ -TRACE_EVENT(sched_stat_sleeptime, - - TP_PROTO(struct task_struct *tsk, u64 now), - - TP_ARGS(tsk, now), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, sleeptime ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->sleeptime = trace_get_sleeptime(tsk); - __entry->sleeptime = __entry->sleeptime ? - now - __entry->sleeptime : 0; - ) - TP_perf_assign( - __perf_count(__entry->sleeptime); - ), - - TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->sleeptime) -); - /* * Tracepoint for showing priority inheritance modifying a tasks * priority. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5255c9d2e05..b342f57879e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1932,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); - trace_sched_stat_sleeptime(current, rq->clock); fire_sched_in_preempt_notifiers(current); if (mm) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c6414fc669..aca16b843b7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) if (unlikely(delta > se->statistics.sleep_max)) se->statistics.sleep_max = delta; + se->statistics.sleep_start = 0; se->statistics.sum_sleep_runtime += delta; if (tsk) { @@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) if (unlikely(delta > se->statistics.block_max)) se->statistics.block_max = delta; + se->statistics.block_start = 0; se->statistics.sum_sleep_runtime += delta; if (tsk) { -- cgit v1.2.3-70-g09d2 From 8f2f748b0656257153bcf0941df8d6060acc5ca6 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 23 Feb 2012 15:27:15 +0530 Subject: CPU hotplug, cpusets, suspend: Don't touch cpusets during suspend/resume Currently, during CPU hotplug, the cpuset callbacks modify the cpusets to reflect the state of the system, and this handling is asymmetric. That is, upon CPU offline, that CPU is removed from all cpusets. However when it comes back online, it is put back only to the root cpuset. This gives rise to a significant problem during suspend/resume. During suspend, we offline all non-boot cpus and during resume we online them back. Which means, after a resume, all cpusets (except the root cpuset) will be restricted to just one single CPU (the boot cpu). But the whole point of suspend/resume is to restore the system to a state which is as close as possible to how it was before suspend. So to fix this, don't touch cpusets during suspend/resume. That is, modify the cpuset-related CPU hotplug callback to just ignore CPU hotplug when it is initiated as part of the suspend/resume sequence. Reported-by: Prashanth Nageshappa Signed-off-by: Srivatsa S. Bhat Cc: Linus Torvalds Cc: Andrew Morton Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/4F460D7B.1020703@linux.vnet.ibm.com Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b342f57879e..33a0676ea74 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6728,7 +6728,7 @@ int __init sched_create_sysfs_power_savings_entries(struct device *dev) static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_ONLINE: case CPU_DOWN_FAILED: cpuset_update_active_cpus(); @@ -6741,7 +6741,7 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: cpuset_update_active_cpus(); return NOTIFY_OK; -- cgit v1.2.3-70-g09d2 From 30ce2f7eef095d1b8d070740f1948629814fe3c7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Feb 2012 10:19:38 +0900 Subject: perf/hwbp: Fix a possible memory leak If kzalloc() for TYPE_DATA failed on a given cpu, previous chunk of TYPE_INST will be leaked. Fix it. Thanks to Peter Zijlstra for suggesting this better solution. It should work as long as the initial value of the region is all 0's and that's the case of static (per-cpu) memory allocation. Signed-off-by: Namhyung Kim Acked-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1330391978-28070-1-git-send-email-namhyung.kim@lge.com Signed-off-by: Ingo Molnar --- kernel/events/hw_breakpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index b7971d6f38b..ee706ce44aa 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -651,10 +651,10 @@ int __init init_hw_breakpoint(void) err_alloc: for_each_possible_cpu(err_cpu) { - if (err_cpu == cpu) - break; for (i = 0; i < TYPE_MAX; i++) kfree(per_cpu(nr_task_bp_pinned[i], cpu)); + if (err_cpu == cpu) + break; } return -ENOMEM; -- cgit v1.2.3-70-g09d2