From 06f71b922ce5a05352acd706564ca4ae1f2add0e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 11 Mar 2010 14:04:46 -0800 Subject: timer: Print function name for timer callbacks modifying preemption count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A function scheduled with a timer must not exit with a different preempt count than it was entered. To make helping users running into the corresponding BUG() easier also print the name of the bad function not only its address. [ tglx: Sanitized printk ] Signed-off-by: Uwe Kleine-König Cc: johnstul@us.ibm.com Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/timer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index c61a7949387..f82f4bfe2d8 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1027,11 +1027,8 @@ static inline void __run_timers(struct tvec_base *base) lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { - printk(KERN_ERR "huh, entered %p " - "with preempt_count %08x, exited" - " with %08x?\n", - fn, preempt_count, - preempt_count()); + printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); BUG(); } } -- cgit v1.2.3-70-g09d2 From 576da126a6c7364d70dfd58d0bbe43d05cf5859f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Mar 2010 21:10:29 +0100 Subject: timer: Split out timer function call The ident level is starting to be annoying. More white space than actual code. Split out the timer function call into its own function. Signed-off-by: Thomas Gleixner --- kernel/timer.c | 72 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index f82f4bfe2d8..45229694dc6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -953,6 +953,41 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) return index; } +static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), + unsigned long data) +{ + int preempt_count = preempt_count(); + +#ifdef CONFIG_LOCKDEP + /* + * It is permissible to free the timer from inside the + * function that is called from it, this we need to take into + * account for lockdep too. To avoid bogus "held lock freed" + * warnings as well as problems when looking into + * timer->lockdep_map, make a copy and use that here. + */ + struct lockdep_map lockdep_map = timer->lockdep_map; +#endif + /* + * Couple the lock chain with the lock chain at + * del_timer_sync() by acquiring the lock_map around the fn() + * call here and in del_timer_sync(). + */ + lock_map_acquire(&lockdep_map); + + trace_timer_expire_entry(timer); + fn(data); + trace_timer_expire_exit(timer); + + lock_map_release(&lockdep_map); + + if (preempt_count != preempt_count()) { + printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); + BUG(); + } +} + #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) /** @@ -996,42 +1031,7 @@ static inline void __run_timers(struct tvec_base *base) detach_timer(timer, 1); spin_unlock_irq(&base->lock); - { - int preempt_count = preempt_count(); - -#ifdef CONFIG_LOCKDEP - /* - * It is permissible to free the timer from - * inside the function that is called from - * it, this we need to take into account for - * lockdep too. To avoid bogus "held lock - * freed" warnings as well as problems when - * looking into timer->lockdep_map, make a - * copy and use that here. - */ - struct lockdep_map lockdep_map = - timer->lockdep_map; -#endif - /* - * Couple the lock chain with the lock chain at - * del_timer_sync() by acquiring the lock_map - * around the fn() call here and in - * del_timer_sync(). - */ - lock_map_acquire(&lockdep_map); - - trace_timer_expire_entry(timer); - fn(data); - trace_timer_expire_exit(timer); - - lock_map_release(&lockdep_map); - - if (preempt_count != preempt_count()) { - printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", - fn, preempt_count, preempt_count()); - BUG(); - } - } + call_timer_fn(timer, fn, data); spin_lock_irq(&base->lock); } } -- cgit v1.2.3-70-g09d2 From 802702e0c2618465b813242d4dfee6a233ba0beb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Mar 2010 20:13:23 +0100 Subject: timer: Try to survive timer callback preempt_count leak If a timer callback leaks preempt_count we currently assert a BUG(). That makes it unnecessarily hard to retrieve information about the problem especially on laptops and headless stations. There is a decent chance to survive the preempt_count leak by restoring the preempt_count to the value before the callback. That allows in many cases to get valuable information about the root cause of the problem. We carried that fixup in preempt-rt for years and were able to decode such wreckage quite a few times. Signed-off-by: Thomas Gleixner Cc: Linux Torvalds Cc: Andrew Morton Cc: Arjan van de Veen --- kernel/timer.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 45229694dc6..7e12e7bc7ce 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -982,9 +982,15 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { - printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", - fn, preempt_count, preempt_count()); - BUG(); + WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); + /* + * Restore the preempt count. That gives us a decent + * chance to survive and extract information. If the + * callback kept a lock held, bad luck, but not worse + * than the BUG() we had. + */ + preempt_count() = preempt_count; } } -- cgit v1.2.3-70-g09d2 From 3bbb9ec946428b96657126768f65487a48dd090c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 11 Mar 2010 14:04:36 -0800 Subject: timers: Introduce the concept of timer slack for legacy timers While HR timers have had the concept of timer slack for quite some time now, the legacy timers lacked this concept, and had to make do with round_jiffies() and friends. Timer slack is important for power management; grouping timers reduces the number of wakeups which in turn reduces power consumption. This patch introduces timer slack to the legacy timers using the following pieces: * A slack field in the timer struct * An api (set_timer_slack) that callers can use to set explicit timer slack * A default slack of 0.4% of the requested delay for callers that do not set any explicit slack * Rounding code that is part of mod_timer() that tries to group timers around jiffies values every 'power of two' (so quick timers will group around every 2, but longer timers will group around every 4, 8, 16, 32 etc) Signed-off-by: Arjan van de Ven Cc: johnstul@us.ibm.com Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 10 ++++++++- kernel/timer.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'kernel/timer.c') diff --git a/include/linux/timer.h b/include/linux/timer.h index a2d1eb6cb3f..ea965b857a5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -10,13 +10,19 @@ struct tvec_base; struct timer_list { + /* + * All fields that change during normal runtime grouped to the + * same cacheline + */ struct list_head entry; unsigned long expires; + struct tvec_base *base; void (*function)(unsigned long); unsigned long data; - struct tvec_base *base; + int slack; + #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; @@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); +extern void set_timer_slack(struct timer_list *time, int slack_hz); + #define TIMER_NOT_PINNED 0 #define TIMER_PINNED 1 /* diff --git a/kernel/timer.c b/kernel/timer.c index 7e12e7bc7ce..49773f38c9b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -318,6 +318,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_up_relative); +/** + * set_timer_slack - set the allowed slack for a timer + * @slack_hz: the amount of time (in jiffies) allowed for rounding + * + * Set the amount of time, in jiffies, that a certain timer has + * in terms of slack. By setting this value, the timer subsystem + * will schedule the actual timer somewhere between + * the time mod_timer() asks for, and that time plus the slack. + * + * By setting the slack to -1, a percentage of the delay is used + * instead. + */ +void set_timer_slack(struct timer_list *timer, int slack_hz) +{ + timer->slack = slack_hz; +} +EXPORT_SYMBOL_GPL(set_timer_slack); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) @@ -549,6 +567,7 @@ static void __init_timer(struct timer_list *timer, { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); + timer->slack = -1; #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; timer->start_pid = -1; @@ -714,6 +733,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) } EXPORT_SYMBOL(mod_timer_pending); +/* + * Decide where to put the timer while taking the slack into account + * + * Algorithm: + * 1) calculate the maximum (absolute) time + * 2) calculate the highest bit where the expires and new max are different + * 3) use this bit to make a mask + * 4) use the bitmask to round down the maximum time, so that all last + * bits are zeros + */ +static inline +unsigned long apply_slack(struct timer_list *timer, unsigned long expires) +{ + unsigned long expires_limit, mask; + int bit; + + expires_limit = expires + timer->slack; + + if (timer->slack < 0) /* auto slack: use 0.4% */ + expires_limit = expires + (expires - jiffies)/256; + + mask = expires ^ expires_limit; + + if (mask == 0) + return expires; + + bit = find_last_bit(&mask, BITS_PER_LONG); + + mask = (1 << bit) - 1; + + expires_limit = expires_limit & ~(mask); + + return expires_limit; +} + /** * mod_timer - modify a timer's timeout * @timer: the timer to be modified @@ -744,6 +798,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer) && timer->expires == expires) return 1; + expires = apply_slack(timer, expires); + return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer); -- cgit v1.2.3-70-g09d2 From f00e047efdf9d31c8a7dd7875b411f97cfa7d8e5 Mon Sep 17 00:00:00 2001 From: Jeff Chua Date: Mon, 24 May 2010 07:16:24 +0800 Subject: timers: Fix slack calculation for expired timers commit 3bbb9ec946 (timers: Introduce the concept of timer slack for legacy timers) does not take the case into account when the timer is already expired. This broke wireless drivers. The solution is not to apply slack to already expired timers. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven --- kernel/timer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 9199f3c5221..be394af5bc2 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -750,13 +750,14 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) unsigned long expires_limit, mask; int bit; - expires_limit = expires + timer->slack; + expires_limit = expires; - if (timer->slack < 0) /* auto slack: use 0.4% */ + if (timer->slack > -1) + expires_limit = expires + timer->slack; + else if (time_after(expires, jiffies)) /* auto slack: use 0.4% */ expires_limit = expires + (expires - jiffies)/256; mask = expires ^ expires_limit; - if (mask == 0) return expires; -- cgit v1.2.3-70-g09d2 From 8e63d7795e30b4091e303cc8c060509bd8eea742 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 May 2010 20:43:30 +0200 Subject: timers: Fix slack calculation really commit f00e047ef (timers: Fix slack calculation for expired timers) fixed the issue of slack on expired timers only partially. Linus noticed that jiffies is volatile so it is reloaded twice, which generates bad code. But its worse. This can defeat the time_after() check if jiffies are incremented between time_after() and the slack calculation. Fix it by reading jiffies into a local variable, which prevents the compiler from loading it twice. While at it make the > -1 check into >= 0 which is easier to read. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Linus Torvalds --- kernel/timer.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index be394af5bc2..d8decb8d46b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -747,16 +747,19 @@ EXPORT_SYMBOL(mod_timer_pending); static inline unsigned long apply_slack(struct timer_list *timer, unsigned long expires) { - unsigned long expires_limit, mask; + unsigned long expires_limit, mask, now; int bit; expires_limit = expires; - if (timer->slack > -1) + if (timer->slack >= 0) { expires_limit = expires + timer->slack; - else if (time_after(expires, jiffies)) /* auto slack: use 0.4% */ - expires_limit = expires + (expires - jiffies)/256; - + } else { + now = jiffies; + /* No slack, if already expired else auto slack 0.4% */ + if (time_after(expires, now)) + expires_limit = expires + (expires - now)/256; + } mask = expires ^ expires_limit; if (mask == 0) return expires; -- cgit v1.2.3-70-g09d2 From 2abfb9e1d470f7082e5e20e4b11a271a0124211b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 May 2010 16:07:13 +0200 Subject: timers: Move local variable into else section Fix nit-picking coding style detail. Signed-off-by: Thomas Gleixner --- kernel/timer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index d8decb8d46b..22118342a45 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -747,7 +747,7 @@ EXPORT_SYMBOL(mod_timer_pending); static inline unsigned long apply_slack(struct timer_list *timer, unsigned long expires) { - unsigned long expires_limit, mask, now; + unsigned long expires_limit, mask; int bit; expires_limit = expires; @@ -755,7 +755,8 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) if (timer->slack >= 0) { expires_limit = expires + timer->slack; } else { - now = jiffies; + unsigned long now = jiffies; + /* No slack, if already expired else auto slack 0.4% */ if (time_after(expires, now)) expires_limit = expires + (expires - now)/256; -- cgit v1.2.3-70-g09d2 From 80b5184cc537718122e036afe7e62d202b70d077 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 26 May 2010 14:43:32 -0700 Subject: kernel/: convert cpu notifier to return encapsulate errno value By the previous modification, the cpu notifier can return encapsulate errno value. This converts the cpu notifiers for kernel/*.c Signed-off-by: Akinobu Mita Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/padata.c | 4 ++-- kernel/profile.c | 4 ++-- kernel/relay.c | 2 +- kernel/smp.c | 2 +- kernel/softirq.c | 2 +- kernel/timer.c | 7 +++++-- kernel/workqueue.c | 9 +++++---- 7 files changed, 17 insertions(+), 13 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/padata.c b/kernel/padata.c index b1c9857f840..fdd8ae609ce 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -659,7 +659,7 @@ static int padata_cpu_callback(struct notifier_block *nfb, err = __padata_add_cpu(pinst, cpu); mutex_unlock(&pinst->lock); if (err) - return NOTIFY_BAD; + return notifier_from_errno(err); break; case CPU_DOWN_PREPARE: @@ -670,7 +670,7 @@ static int padata_cpu_callback(struct notifier_block *nfb, err = __padata_remove_cpu(pinst, cpu); mutex_unlock(&pinst->lock); if (err) - return NOTIFY_BAD; + return notifier_from_errno(err); break; case CPU_UP_CANCELED: diff --git a/kernel/profile.c b/kernel/profile.c index dfadc5b729f..1e6a0d94ea6 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -372,7 +372,7 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info, GFP_KERNEL | __GFP_ZERO, 0); if (!page) - return NOTIFY_BAD; + return notifier_from_errno(-ENOMEM); per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); } if (!per_cpu(cpu_profile_hits, cpu)[0]) { @@ -388,7 +388,7 @@ out_free: page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); per_cpu(cpu_profile_hits, cpu)[1] = NULL; __free_page(page); - return NOTIFY_BAD; + return notifier_from_errno(-ENOMEM); case CPU_ONLINE: case CPU_ONLINE_FROZEN: if (prof_cpu_mask != NULL) diff --git a/kernel/relay.c b/kernel/relay.c index 4268287148c..c7cf397fb92 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -539,7 +539,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, "relay_hotcpu_callback: cpu %d buffer " "creation failed\n", hotcpu); mutex_unlock(&relay_channels_mutex); - return NOTIFY_BAD; + return notifier_from_errno(-ENOMEM); } } mutex_unlock(&relay_channels_mutex); diff --git a/kernel/smp.c b/kernel/smp.c index 3fc69733618..75c970c715d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_UP_PREPARE_FROZEN: if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, cpu_to_node(cpu))) - return NOTIFY_BAD; + return notifier_from_errno(-ENOMEM); break; #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/softirq.c b/kernel/softirq.c index 0db913a5c60..825e1126008 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -808,7 +808,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); if (IS_ERR(p)) { printk("ksoftirqd for %i failed\n", hotcpu); - return NOTIFY_BAD; + return notifier_from_errno(PTR_ERR(p)); } kthread_bind(p, hotcpu); per_cpu(ksoftirqd, hotcpu) = p; diff --git a/kernel/timer.c b/kernel/timer.c index be394af5bc2..e3b8c697bde 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1680,11 +1680,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; + int err; + switch(action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (init_timers_cpu(cpu) < 0) - return NOTIFY_BAD; + err = init_timers_cpu(cpu); + if (err < 0) + return notifier_from_errno(err); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 77dabbf64b8..327d2deb445 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1110,7 +1110,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, unsigned int cpu = (unsigned long)hcpu; struct cpu_workqueue_struct *cwq; struct workqueue_struct *wq; - int ret = NOTIFY_OK; + int err = 0; action &= ~CPU_TASKS_FROZEN; @@ -1124,12 +1124,13 @@ undo: switch (action) { case CPU_UP_PREPARE: - if (!create_workqueue_thread(cwq, cpu)) + err = create_workqueue_thread(cwq, cpu); + if (!err) break; printk(KERN_ERR "workqueue [%s] for %i failed\n", wq->name, cpu); action = CPU_UP_CANCELED; - ret = NOTIFY_BAD; + err = -ENOMEM; goto undo; case CPU_ONLINE: @@ -1150,7 +1151,7 @@ undo: cpumask_clear_cpu(cpu, cpu_populated_map); } - return ret; + return notifier_from_errno(err); } #ifdef CONFIG_SMP -- cgit v1.2.3-70-g09d2