From 7576958a9d5a4a677ad7dd40901cdbb6c1110c98 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 14 Feb 2011 14:04:46 +0100 Subject: workqueue: wake up a worker when a rescuer is leaving a gcwq After executing the matching works, a rescuer leaves the gcwq whether there are more pending works or not. This may decrease the concurrency level to zero and stall execution until a new work item is queued on the gcwq. Make rescuer wake up a regular worker when it leaves a gcwq if there are more works to execute, so that execution isn't stalled. Signed-off-by: Tejun Heo Reported-by: Ray Jui Cc: stable@kernel.org --- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 11869faa681..90a17ca2ad0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2047,6 +2047,15 @@ repeat: move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); + + /* + * Leave this gcwq. If keep_working() is %true, notify a + * regular worker; otherwise, we end up with 0 concurrency + * and stalling the execution. + */ + if (keep_working(gcwq)) + wake_up_worker(gcwq); + spin_unlock_irq(&gcwq->lock); } -- cgit v1.2.3-70-g09d2 From 4fe757dd48a9e95e1a071291f15dda5421dacb66 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Feb 2011 22:26:07 +0100 Subject: perf: Fix throttle logic It was possible to call pmu::start() on an already running event. In particular this lead so some wreckage as the hrtimer events would re-initialize active timers. This was due to throttled events being activated again by scheduling. Scheduling in a context would add and force start events, resulting in running events with a possible throttle status. The next tick to hit that task will then try to unthrottle the event and call ->start() on an already running event. Reported-by: Jeff Moyer Cc: Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 999835b6112..656222fcf76 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -782,6 +782,10 @@ retry: raw_spin_unlock_irq(&ctx->lock); } +#define MAX_INTERRUPTS (~0ULL) + +static void perf_log_throttle(struct perf_event *event, int enable); + static int event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx, @@ -794,6 +798,17 @@ event_sched_in(struct perf_event *event, event->state = PERF_EVENT_STATE_ACTIVE; event->oncpu = smp_processor_id(); + + /* + * Unthrottle events, since we scheduled we might have missed several + * ticks already, also for a heavily scheduling task there is little + * guarantee it'll get a tick in a timely manner. + */ + if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) { + perf_log_throttle(event, 1); + event->hw.interrupts = 0; + } + /* * The new state must be visible before we turn it on in the hardware: */ @@ -1596,10 +1611,6 @@ void __perf_event_task_sched_in(struct task_struct *task) } } -#define MAX_INTERRUPTS (~0ULL) - -static void perf_log_throttle(struct perf_event *event, int enable); - static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) { u64 frequency = event->attr.sample_freq; -- cgit v1.2.3-70-g09d2 From 58a69cb47ec6991bf006a3e5d202e8571b0327a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 09:25:31 +0100 Subject: workqueue, freezer: unify spelling of 'freeze' + 'able' to 'freezable' There are two spellings in use for 'freeze' + 'able' - 'freezable' and 'freezeable'. The former is the more prominent one. The latter is mostly used by workqueue and in a few other odd places. Unify the spelling to 'freezable'. Signed-off-by: Tejun Heo Reported-by: Alan Stern Acked-by: "Rafael J. Wysocki" Acked-by: Greg Kroah-Hartman Acked-by: Dmitry Torokhov Cc: David Woodhouse Cc: Alex Dubov Cc: "David S. Miller" Cc: Steven Whitehouse --- Documentation/workqueue.txt | 4 ++-- drivers/memstick/core/memstick.c | 2 +- drivers/misc/tifm_core.c | 2 +- drivers/misc/vmw_balloon.c | 2 +- drivers/mtd/nand/r852.c | 2 +- drivers/mtd/sm_ftl.c | 2 +- drivers/net/can/mcp251x.c | 2 +- drivers/tty/serial/max3100.c | 2 +- drivers/tty/serial/max3107.c | 2 +- fs/gfs2/glock.c | 4 ++-- fs/gfs2/main.c | 2 +- include/linux/freezer.h | 2 +- include/linux/sched.h | 2 +- include/linux/workqueue.h | 8 ++++---- kernel/power/main.c | 2 +- kernel/power/process.c | 6 +++--- kernel/workqueue.c | 24 ++++++++++++------------ 17 files changed, 35 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt index 996a27d9b8d..01c513fac40 100644 --- a/Documentation/workqueue.txt +++ b/Documentation/workqueue.txt @@ -190,9 +190,9 @@ resources, scheduled and executed. * Long running CPU intensive workloads which can be better managed by the system scheduler. - WQ_FREEZEABLE + WQ_FREEZABLE - A freezeable wq participates in the freeze phase of the system + A freezable wq participates in the freeze phase of the system suspend operations. Work items on the wq are drained and no new work item starts execution until thawed. diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index e9a3eab7b0c..8c1d85e27be 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -621,7 +621,7 @@ static int __init memstick_init(void) { int rc; - workqueue = create_freezeable_workqueue("kmemstick"); + workqueue = create_freezable_workqueue("kmemstick"); if (!workqueue) return -ENOMEM; diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c index 5f6852dff40..44d4475a09d 100644 --- a/drivers/misc/tifm_core.c +++ b/drivers/misc/tifm_core.c @@ -329,7 +329,7 @@ static int __init tifm_init(void) { int rc; - workqueue = create_freezeable_workqueue("tifm"); + workqueue = create_freezable_workqueue("tifm"); if (!workqueue) return -ENOMEM; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 4d2ea8e8014..6df5a55da11 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -785,7 +785,7 @@ static int __init vmballoon_init(void) if (x86_hyper != &x86_hyper_vmware) return -ENODEV; - vmballoon_wq = create_freezeable_workqueue("vmmemctl"); + vmballoon_wq = create_freezable_workqueue("vmmemctl"); if (!vmballoon_wq) { pr_err("failed to create workqueue\n"); return -ENOMEM; diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c index d9d7efbc77c..6322d1fb5d6 100644 --- a/drivers/mtd/nand/r852.c +++ b/drivers/mtd/nand/r852.c @@ -930,7 +930,7 @@ int r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) init_completion(&dev->dma_done); - dev->card_workqueue = create_freezeable_workqueue(DRV_NAME); + dev->card_workqueue = create_freezable_workqueue(DRV_NAME); if (!dev->card_workqueue) goto error9; diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index 67822cf6c02..ac0d6a8613b 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -1258,7 +1258,7 @@ static struct mtd_blktrans_ops sm_ftl_ops = { static __init int sm_module_init(void) { int error = 0; - cache_flush_workqueue = create_freezeable_workqueue("smflush"); + cache_flush_workqueue = create_freezable_workqueue("smflush"); if (IS_ERR(cache_flush_workqueue)) return PTR_ERR(cache_flush_workqueue); diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c index 7ab534aee45..7513c4523ac 100644 --- a/drivers/net/can/mcp251x.c +++ b/drivers/net/can/mcp251x.c @@ -940,7 +940,7 @@ static int mcp251x_open(struct net_device *net) goto open_unlock; } - priv->wq = create_freezeable_workqueue("mcp251x_wq"); + priv->wq = create_freezable_workqueue("mcp251x_wq"); INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler); INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler); diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index beb1afa27d8..7b951adac54 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -601,7 +601,7 @@ static int max3100_startup(struct uart_port *port) s->rts = 0; sprintf(b, "max3100-%d", s->minor); - s->workqueue = create_freezeable_workqueue(b); + s->workqueue = create_freezable_workqueue(b); if (!s->workqueue) { dev_warn(&s->spi->dev, "cannot create workqueue\n"); return -EBUSY; diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c index 910870edf70..750b4f62731 100644 --- a/drivers/tty/serial/max3107.c +++ b/drivers/tty/serial/max3107.c @@ -833,7 +833,7 @@ static int max3107_startup(struct uart_port *port) struct max3107_port *s = container_of(port, struct max3107_port, port); /* Initialize work queue */ - s->workqueue = create_freezeable_workqueue("max3107"); + s->workqueue = create_freezable_workqueue("max3107"); if (!s->workqueue) { dev_err(&s->spi->dev, "Workqueue creation failed\n"); return -EBUSY; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 08a8beb152e..7cd9a5a68d5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void) #endif glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + WQ_HIGHPRI | WQ_FREEZABLE, 0); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index ebef7ab6e17..85ba027d1c4 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -144,7 +144,7 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (!gfs_recovery_wq) goto fail_wq; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index da7e52b099f..1effc8b56b4 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -109,7 +109,7 @@ static inline void freezer_count(void) } /* - * Check if the task should be counted as freezeable by the freezer + * Check if the task should be counted as freezable by the freezer */ static inline int freezer_should_skip(struct task_struct *p) { diff --git a/include/linux/sched.h b/include/linux/sched.h index d747f948b34..777d8a5ed06 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1744,7 +1744,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1ac11586a2f..f7998a3bf02 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -250,7 +250,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ - WQ_FREEZEABLE = 1 << 2, /* freeze during suspend */ + WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ @@ -318,7 +318,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, /** * alloc_ordered_workqueue - allocate an ordered workqueue * @name: name of the workqueue - * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful) + * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * * Allocate an ordered workqueue. An ordered workqueue executes at * most one work item at any given time in the queued order. They are @@ -335,8 +335,8 @@ alloc_ordered_workqueue(const char *name, unsigned int flags) #define create_workqueue(name) \ alloc_workqueue((name), WQ_MEM_RECLAIM, 1) -#define create_freezeable_workqueue(name) \ - alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) +#define create_freezable_workqueue(name) \ + alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) #define create_singlethread_workqueue(name) \ alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1) diff --git a/kernel/power/main.c b/kernel/power/main.c index 7b5db6a8561..701853042c2 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -326,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { - pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); + pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); return pm_wq ? 0 : -ENOMEM; } diff --git a/kernel/power/process.c b/kernel/power/process.c index d6d2a10320e..0cf3a27a6c9 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static inline int freezeable(struct task_struct * p) +static inline int freezable(struct task_struct * p) { if ((p == current) || (p->flags & PF_NOFREEZE) || @@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (frozen(p) || !freezeable(p)) + if (frozen(p) || !freezable(p)) continue; if (!freeze_task(p, sig_only)) @@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) + if (!freezable(p)) continue; if (nosig_only && should_send_signal(p)) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 90a17ca2ad0..88a3e34f51f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2965,7 +2965,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, */ spin_lock(&workqueue_lock); - if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) + if (workqueue_freezing && wq->flags & WQ_FREEZABLE) for_each_cwq_cpu(cpu, wq) get_cwq(cpu, wq)->max_active = 0; @@ -3077,7 +3077,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) spin_lock_irq(&gcwq->lock); - if (!(wq->flags & WQ_FREEZEABLE) || + if (!(wq->flags & WQ_FREEZABLE) || !(gcwq->flags & GCWQ_FREEZING)) get_cwq(gcwq->cpu, wq)->max_active = max_active; @@ -3327,7 +3327,7 @@ static int __cpuinit trustee_thread(void *__gcwq) * want to get it over with ASAP - spam rescuers, wake up as * many idlers as necessary and create new ones till the * worklist is empty. Note that if the gcwq is frozen, there - * may be frozen works in freezeable cwqs. Don't declare + * may be frozen works in freezable cwqs. Don't declare * completion while frozen. */ while (gcwq->nr_workers != gcwq->nr_idle || @@ -3585,9 +3585,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu); /** * freeze_workqueues_begin - begin freezing workqueues * - * Start freezing workqueues. After this function returns, all - * freezeable workqueues will queue new works to their frozen_works - * list instead of gcwq->worklist. + * Start freezing workqueues. After this function returns, all freezable + * workqueues will queue new works to their frozen_works list instead of + * gcwq->worklist. * * CONTEXT: * Grabs and releases workqueue_lock and gcwq->lock's. @@ -3613,7 +3613,7 @@ void freeze_workqueues_begin(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (cwq && wq->flags & WQ_FREEZEABLE) + if (cwq && wq->flags & WQ_FREEZABLE) cwq->max_active = 0; } @@ -3624,7 +3624,7 @@ void freeze_workqueues_begin(void) } /** - * freeze_workqueues_busy - are freezeable workqueues still busy? + * freeze_workqueues_busy - are freezable workqueues still busy? * * Check whether freezing is complete. This function must be called * between freeze_workqueues_begin() and thaw_workqueues(). @@ -3633,8 +3633,8 @@ void freeze_workqueues_begin(void) * Grabs and releases workqueue_lock. * * RETURNS: - * %true if some freezeable workqueues are still busy. %false if - * freezing is complete. + * %true if some freezable workqueues are still busy. %false if freezing + * is complete. */ bool freeze_workqueues_busy(void) { @@ -3654,7 +3654,7 @@ bool freeze_workqueues_busy(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; BUG_ON(cwq->nr_active < 0); @@ -3699,7 +3699,7 @@ void thaw_workqueues(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; /* restore max_active and repopulate worklist */ -- cgit v1.2.3-70-g09d2 From 3233cdbd9fa347a6d6897a94cc6ed0302ae83c4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 18:10:19 +0100 Subject: workqueue: make sure MAYDAY_INITIAL_TIMEOUT is at least 2 jiffies long MAYDAY_INITIAL_TIMEOUT is defined as HZ / 100 and depending on configuration may end up 0 or 1. Even when it's 1, depending on when the mayday timer is added in the current jiffy interval, it may expire way before a jiffy has passed. Make sure MAYDAY_INITIAL_TIMEOUT is at least two to guarantee that at least a full jiffy has passed before calling rescuers. Signed-off-by: Tejun Heo Reported-by: Ray Jui Cc: stable@kernel.org --- kernel/workqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 88a3e34f51f..ee6578b578a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -79,7 +79,9 @@ enum { MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ - MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ + MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2, + /* call for help after 10ms + (min two ticks) */ MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ CREATE_COOLDOWN = HZ, /* time to breath after fail */ TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ -- cgit v1.2.3-70-g09d2 From 2e725a065b0153f0c449318da1923a120477633d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Sat, 12 Feb 2011 21:06:51 +0100 Subject: PM / Hibernate: Return error code when alloc_image_page() fails Currently we return 0 in swsusp_alloc() when alloc_image_page() fails. Fix that. Also remove unneeded "error" variable since the only useful value of error is -ENOMEM. [rjw: Fixed up the changelog and changed subject.] Signed-off-by: Stanislaw Gruszka Cc: stable@kernel.org Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0dac75ea445..64db648ff91 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1519,11 +1519,8 @@ static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { - int error = 0; - if (nr_highmem > 0) { - error = get_highmem_buffer(PG_ANY); - if (error) + if (get_highmem_buffer(PG_ANY)) goto err_out; if (nr_highmem > alloc_highmem) { nr_highmem -= alloc_highmem; @@ -1546,7 +1543,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, err_out: swsusp_free(); - return error; + return -ENOMEM; } asmlinkage int swsusp_save(void) -- cgit v1.2.3-70-g09d2 From c1ee6264280e740a9d3ff3feef38642cf0a57013 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 17 Feb 2011 17:45:15 +0100 Subject: genirq: Prevent access beyond allocated_irqs bitmap Lars-Peter Clausen pointed out: I stumbled upon this while looking through the existing archs using SPARSE_IRQ. Even with SPARSE_IRQ the NR_IRQS is still the upper limit for the number of IRQs. Both PXA and MMP set NR_IRQS to IRQ_BOARD_START, with IRQ_BOARD_START being the number of IRQs used by the core. In various machine files the nr_irqs field of the ARM machine defintion struct is then set to "IRQ_BOARD_START + NR_BOARD_IRQS". As a result "nr_irqs" will greater then NR_IRQS which then again causes the "allocated_irqs" bitmap in the core irq code to be accessed beyond its size overwriting unrelated data. The core code really misses a sanity check there. This went unnoticed so far as by chance the compiler/linker places data behind that bitmap which gets initialized later on those affected platforms. So the obvious fix would be to add a sanity check in early_irq_init() and break all affected platforms. Though that check wants to be backported to stable as well, which will require to fix all known problematic platforms and probably some more yet not known ones as well. Lots of churn. A way simpler solution is to allocate a slightly larger bitmap and avoid the whole churn w/o breaking anything. Add a few warnings when an arch returns utter crap. Reported-by: Lars-Peter Clausen Signed-off-by: Thomas Gleixner Cc: stable@kernel.org # .37 Cc: Haojian Zhuang Cc: Eric Miao Cc: Peter Zijlstra --- kernel/irq/internals.h | 6 ++++++ kernel/irq/irqdesc.c | 11 ++++++++++- kernel/irq/resend.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4571ae7e085..99c3bc8a6fb 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -3,6 +3,12 @@ */ #include +#ifdef CONFIG_SPARSE_IRQ +# define IRQ_BITMAP_BITS (NR_IRQS + 8196) +#else +# define IRQ_BITMAP_BITS NR_IRQS +#endif + extern int noirqdebug; #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 282f20230e6..2039bea31bd 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -94,7 +94,7 @@ int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); static DEFINE_MUTEX(sparse_irq_lock); -static DECLARE_BITMAP(allocated_irqs, NR_IRQS); +static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS); #ifdef CONFIG_SPARSE_IRQ @@ -217,6 +217,15 @@ int __init early_irq_init(void) initcnt = arch_probe_nr_irqs(); printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); + if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) + nr_irqs = IRQ_BITMAP_BITS; + + if (WARN_ON(initcnt > IRQ_BITMAP_BITS)) + initcnt = IRQ_BITMAP_BITS; + + if (initcnt > nr_irqs) + nr_irqs = initcnt; + for (i = 0; i < initcnt; i++) { desc = alloc_desc(i, node); set_bit(i, allocated_irqs); diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 891115a929a..dc49358b73f 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -23,7 +23,7 @@ #ifdef CONFIG_HARDIRQS_SW_RESEND /* Bitmap to handle software resend of interrupts: */ -static DECLARE_BITMAP(irqs_resend, NR_IRQS); +static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); /* * Run software resends of IRQ's -- cgit v1.2.3-70-g09d2 From 6d83f94db95cfe65d2a6359cccdf61cf087c2598 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 18 Feb 2011 23:27:23 +0100 Subject: genirq: Disable the SHIRQ_DEBUG call in request_threaded_irq for now With CONFIG_SHIRQ_DEBUG=y we call a newly installed interrupt handler in request_threaded_irq(). The original implementation (commit a304e1b8) called the handler _BEFORE_ it was installed, but that caused problems with handlers calling disable_irq_nosync(). See commit 377bf1e4. It's braindead in the first place to call disable_irq_nosync in shared handlers, but .... Moving this call after we installed the handler looks innocent, but it is very subtle broken on SMP. Interrupt handlers rely on the fact, that the irq core prevents reentrancy. Now this debug call violates that promise because we run the handler w/o the IRQ_INPROGRESS protection - which we cannot apply here because that would result in a possibly forever masked interrupt line. A concurrent real hardware interrupt on a different CPU results in handler reentrancy and can lead to complete wreckage, which was unfortunately observed in reality and took a fricking long time to debug. Leave the code here for now. We want this debug feature, but that's not easy to fix. We really should get rid of those disable_irq_nosync() abusers and remove that function completely. Signed-off-by: Thomas Gleixner Cc: Anton Vorontsov Cc: David Woodhouse Cc: Arjan van de Ven Cc: stable@kernel.org # .28 -> .37 --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0caa59f747d..9033c1c7082 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1100,7 +1100,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (retval) kfree(action); -#ifdef CONFIG_DEBUG_SHIRQ +#ifdef CONFIG_DEBUG_SHIRQ_FIXME if (!retval && (irqflags & IRQF_SHARED)) { /* * It's a shared IRQ -- the driver ought to be prepared for it -- cgit v1.2.3-70-g09d2 From 3a142a0672b48a853f00af61f184c7341ac9c99d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Feb 2011 22:34:23 +0100 Subject: clockevents: Prevent oneshot mode when broadcast device is periodic When the per cpu timer is marked CLOCK_EVT_FEAT_C3STOP, then we only can switch into oneshot mode, when the backup broadcast device supports oneshot mode as well. Otherwise we would try to switch the broadcast device into an unsupported mode unconditionally. This went unnoticed so far as the current available broadcast devices support oneshot mode. Seth unearthed this problem while debugging and working around an hpet related BIOS wreckage. Add the necessary check to tick_is_oneshot_available(). Reported-and-tested-by: Seth Forshee Signed-off-by: Thomas Gleixner LKML-Reference: Cc: stable@kernel.org # .21 -> --- kernel/time/tick-broadcast.c | 10 ++++++++++ kernel/time/tick-common.c | 6 +++++- kernel/time/tick-internal.h | 3 +++ 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 48b2761b566..a3b5aff6260 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -600,4 +600,14 @@ int tick_broadcast_oneshot_active(void) return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; } +/* + * Check whether the broadcast device supports oneshot. + */ +bool tick_broadcast_oneshot_available(void) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; +} + #endif diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 051bc80a0c4..ed228ef6f6b 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -51,7 +51,11 @@ int tick_is_oneshot_available(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); + if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) + return 0; + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) + return 1; + return tick_broadcast_oneshot_available(); } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 290eefbc1f6..f65d3a723a6 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -36,6 +36,7 @@ extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast(int cpu); +bool tick_broadcast_oneshot_available(void); # else /* BROADCAST */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { @@ -46,6 +47,7 @@ static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast(int cpu) { } +static inline bool tick_broadcast_oneshot_available(void) { return true; } # endif /* !BROADCAST */ #else /* !ONESHOT */ @@ -76,6 +78,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) return 0; } static inline int tick_broadcast_oneshot_active(void) { return 0; } +static inline bool tick_broadcast_oneshot_available(void) { return false; } #endif /* !TICK_ONESHOT */ /* -- cgit v1.2.3-70-g09d2 From 2d3a8497f8cc5aca14b722cd37d51f6c15ff9f74 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 3 Mar 2011 10:53:20 -0500 Subject: blktrace: Remove blk_fill_rwbs_rq. If we enable trace events to trace block actions, We use blk_fill_rwbs_rq to analyze the corresponding actions in request's cmd_flags, but we only choose the minor 2 bits from it, so most of other flags(e.g, REQ_SYNC) are missing. For example, with a sync write we get: write_test-2409 [001] 160.013869: block_rq_insert: 3,64 W 0 () 258135 + = 8 [write_test] Since now we have integrated the flags of both bio and request, it is safe to pass rq->cmd_flags directly to blk_fill_rwbs and blk_fill_rwbs_rq isn't needed any more. With this patch, after a sync write we get: write_test-2417 [000] 226.603878: block_rq_insert: 3,64 WS 0 () 258135 += 8 [write_test] Signed-off-by: Tao Ma Acked-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 1 - include/trace/events/block.h | 6 +++--- kernel/trace/blktrace.c | 16 ---------------- 3 files changed, 3 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3395cf7130f..b22fb0d3db0 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -245,7 +245,6 @@ static inline int blk_cmd_buf_len(struct request *rq) extern void blk_dump_cmd(char *buf, struct request *rq); extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); -extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h index aba421d68f6..78f18adb49c 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error, 0 : blk_rq_sectors(rq); __entry->errors = rq->errors; - blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); blk_dump_cmd(__get_str(cmd), rq); ), @@ -118,7 +118,7 @@ DECLARE_EVENT_CLASS(block_rq, __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? blk_rq_bytes(rq) : 0; - blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); blk_dump_cmd(__get_str(cmd), rq); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -563,7 +563,7 @@ TRACE_EVENT(block_rq_remap, __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; - blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d95721f3370..cbafed7d4f3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1827,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) rwbs[i] = '\0'; } -void blk_fill_rwbs_rq(char *rwbs, struct request *rq) -{ - int rw = rq->cmd_flags & 0x03; - int bytes; - - if (rq->cmd_flags & REQ_DISCARD) - rw |= REQ_DISCARD; - - if (rq->cmd_flags & REQ_SECURE) - rw |= REQ_SECURE; - - bytes = blk_rq_bytes(rq); - - blk_fill_rwbs(rwbs, rw, bytes); -} - #endif /* CONFIG_EVENT_TRACING */ -- cgit v1.2.3-70-g09d2 From e3e89cc535223433a619d0969db3fa05cdd946b8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 4 Mar 2011 09:23:30 -0800 Subject: Mark ptrace_{traceme,attach,detach} static They are only used inside kernel/ptrace.c, and have been for a long time. We don't want to go back to the bad-old-days when architectures did things on their own, so make them static and private. Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 3 --- kernel/ptrace.c | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 092a04f874a..a1147e5dd24 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -102,11 +102,8 @@ extern long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data); -extern int ptrace_traceme(void); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); -extern int ptrace_attach(struct task_struct *tsk); -extern int ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); extern int ptrace_request(struct task_struct *child, long request, diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1708b1e2972..e2302e40b36 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } -int ptrace_attach(struct task_struct *task) +static int ptrace_attach(struct task_struct *task) { int retval; @@ -219,7 +219,7 @@ out: * Performs checks and sets PT_PTRACED. * Should be used by all ptrace implementations for PTRACE_TRACEME. */ -int ptrace_traceme(void) +static int ptrace_traceme(void) { int ret = -EPERM; @@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) return false; } -int ptrace_detach(struct task_struct *child, unsigned int data) +static int ptrace_detach(struct task_struct *child, unsigned int data) { bool dead = false; -- cgit v1.2.3-70-g09d2 From b75f38d659e6fc747eda64cb72f3920e29dd44a4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 4 Mar 2011 17:36:21 -0800 Subject: cpuset: add a missing unlock in cpuset_write_resmask() Don't forget to release cgroup_mutex if alloc_trial_cpuset() fails. [akpm@linux-foundation.org: avoid multiple return points] Signed-off-by: Li Zefan Cc: Paul Menage Acked-by: David Rientjes Cc: Miao Xie Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4349935c2ad..e92e9818903 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1575,8 +1575,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, return -ENODEV; trialcs = alloc_trial_cpuset(cs); - if (!trialcs) - return -ENOMEM; + if (!trialcs) { + retval = -ENOMEM; + goto out; + } switch (cft->private) { case FILE_CPULIST: @@ -1591,6 +1593,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, } free_trial_cpuset(trialcs); +out: cgroup_unlock(); return retval; } -- cgit v1.2.3-70-g09d2