From c4823bce033be74c0fcfbcae2f1be0854fdc2e18 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Mar 2007 16:17:42 +0000 Subject: [PATCH] fix deadlock in audit_log_task_context() GFP_KERNEL allocations in non-blocking context; fixed by killing an idiotic use of security_getprocattr(). Acked-by: Stephen Smalley Acked-by: James Morris Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- kernel/auditsc.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 359955800dd..628c7ac590a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -739,28 +739,26 @@ static inline void audit_free_context(struct audit_context *context) void audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; - ssize_t len = 0; + unsigned len; + int error; + u32 sid; + + selinux_get_task_sid(current, &sid); + if (!sid) + return; - len = security_getprocattr(current, "current", NULL, 0); - if (len < 0) { - if (len != -EINVAL) + error = selinux_sid_to_string(sid, &ctx, &len); + if (error) { + if (error != -EINVAL) goto error_path; return; } - ctx = kmalloc(len, GFP_KERNEL); - if (!ctx) - goto error_path; - - len = security_getprocattr(current, "current", ctx, len); - if (len < 0 ) - goto error_path; - audit_log_format(ab, " subj=%s", ctx); + kfree(ctx); return; error_path: - kfree(ctx); audit_panic("error in audit_log_task_context"); return; } -- cgit v1.2.3-70-g09d2 From 886c5952950ffed0e8ca3eb9efdc2728bfc144d3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 Mar 2007 13:38:06 -0800 Subject: [PATCH] swsusp: Fix resume error path in platform mode If swsusp is using the platform mode during the resume and the image cannot be read, the platform mode should be switched off before software_resume() returns. Make it happen. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 406b20adb27..a200c5fc42e 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -251,6 +251,7 @@ static int software_resume(void) error = swsusp_read(); if (error) { swsusp_free(); + platform_finish(); goto Thaw; } -- cgit v1.2.3-70-g09d2 From 94985134b7b46848267ed6b734320db01c974e72 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 Mar 2007 13:38:06 -0800 Subject: [PATCH] swsusp: disable nonboot CPUs before entering platform suspend Prevent the WARN_ON() in arch/x86_64/kernel/acpi/sleep.c:init_low_mapping() from triggering by disabling nonboot CPUs before we finally enter the platform suspend. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 1 + kernel/power/user.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index a200c5fc42e..873cdf8ea5a 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -58,6 +58,7 @@ static inline int platform_prepare(void) static void power_down(suspend_disk_method_t mode) { + disable_nonboot_cpus(); switch(mode) { case PM_DISK_PLATFORM: if (pm_ops && pm_ops->enter) { diff --git a/kernel/power/user.c b/kernel/power/user.c index dd09efe7df5..d6a8dcc26ae 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -398,9 +398,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, case PMOPS_ENTER: if (data->platform_suspend) { + disable_nonboot_cpus(); kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); error = pm_ops->enter(PM_SUSPEND_DISK); - error = 0; + enable_nonboot_cpus(); } break; -- cgit v1.2.3-70-g09d2 From 13788ccc41ceea5893f9c747c59bc0b28f2416c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 16 Mar 2007 13:38:20 -0800 Subject: [PATCH] hrtimer: prevent overrun DoS in hrtimer_forward() hrtimer_forward() does not check for the possible overflow of timer->expires. This can happen on 64 bit machines with large interval values and results currently in an endless loop in the softirq because the expiry value becomes negative and therefor the timer is expired all the time. Check for this condition and set the expiry value to the max. expiry time in the future. The fix should be applied to stable kernel series as well. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ec4cb9f3e3b..5e7122d3f46 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -644,6 +644,12 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) orun++; } timer->expires = ktime_add(timer->expires, interval); + /* + * Make sure, that the result did not wrap with a very large + * interval. + */ + if (timer->expires.tv64 < 0) + timer->expires = ktime_set(KTIME_SEC_MAX, 0); return orun; } -- cgit v1.2.3-70-g09d2 From ad28d94abb1313bdf27e196676292c493f92f824 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 16 Mar 2007 13:38:21 -0800 Subject: [PATCH] hrtimer: fix up unlocked access to wall_to_monotonic commit f4304ab21513b834c8fe3403927c60c2b81a72d7 (HZ free NTP) moved the access to wall_to_monotonic in hrtimer_get_softirq_time() out of the xtime_lock protection. Move it back into the seq_lock section. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5e7122d3f46..6a7938a0d51 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) { ktime_t xtim, tomono; - struct timespec xts; + struct timespec xts, tom; unsigned long seq; do { @@ -145,10 +145,11 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) #else xts = xtime; #endif + tom = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); xtim = timespec_to_ktime(xts); - tomono = timespec_to_ktime(wall_to_monotonic); + tomono = timespec_to_ktime(tom); base->clock_base[CLOCK_REALTIME].softirq_time = xtim; base->clock_base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono); -- cgit v1.2.3-70-g09d2 From b257bc051f06607beb3004d9a1c297085e728bec Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Fri, 16 Mar 2007 13:38:24 -0800 Subject: [PATCH] swsusp: fix suspend when console is in VT_AUTO+KD_GRAPHICS mode When the console is in VT_AUTO+KD_GRAPHICS mode, switching to the SUSPEND_CONSOLE fails, resulting in vt_waitactive() waiting indefinitely or until the task is interrupted. This patch tests if a console switch can occur in set_console() and returns early if a console switch is not possible. [akpm@linux-foundation.org: cleanup] Signed-off-by: Andrew Johnson Acked-by: Pavel Machek Cc: "Antonino A. Daplas" Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/vt.c | 20 +++++++++++++++++++- drivers/char/vt_ioctl.c | 2 +- include/linux/kbd_kern.h | 2 +- include/linux/vt_kern.h | 1 + kernel/power/console.c | 10 +++++++++- 5 files changed, 31 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/drivers/char/vt.c b/drivers/char/vt.c index c3f8e383933..0fefe2a2805 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -2185,10 +2185,28 @@ static void console_callback(struct work_struct *ignored) release_console_sem(); } -void set_console(int nr) +int set_console(int nr) { + struct vc_data *vc = vc_cons[fg_console].d; + + if (!vc_cons_allocated(nr) || vt_dont_switch || + (vc->vt_mode.mode == VT_AUTO && vc->vc_mode == KD_GRAPHICS)) { + + /* + * Console switch will fail in console_callback() or + * change_console() so there is no point scheduling + * the callback + * + * Existing set_console() users don't check the return + * value so this shouldn't break anything + */ + return -EINVAL; + } + want_console = nr; schedule_console_callback(); + + return 0; } struct tty_driver *console_driver; diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 3a5d301e783..1fa2da8f4fb 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -34,7 +34,7 @@ #include #include -static char vt_dont_switch; +char vt_dont_switch; extern struct tty_driver *console_driver; #define VT_IS_IN_USE(i) (console_driver->ttys[i] && console_driver->ttys[i]->count) diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 06c58c423fe..506ad20c18f 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -75,7 +75,7 @@ extern int do_poke_blanked_console; extern void (*kbd_ledfunc)(unsigned int led); -extern void set_console(int nr); +extern int set_console(int nr); extern void schedule_console_callback(void); static inline void set_leds(void) diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 37a1a41f5b6..e0db669998f 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -83,6 +83,7 @@ void reset_vc(struct vc_data *vc); #define CON_BUF_SIZE (CONFIG_BASE_SMALL ? 256 : PAGE_SIZE) extern char con_buf[CON_BUF_SIZE]; extern struct semaphore con_buf_sem; +extern char vt_dont_switch; struct vt_spawn_console { spinlock_t lock; diff --git a/kernel/power/console.c b/kernel/power/console.c index 623786d4415..89bcf4973ee 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -27,7 +27,15 @@ int pm_prepare_console(void) return 1; } - set_console(SUSPEND_CONSOLE); + if (set_console(SUSPEND_CONSOLE)) { + /* + * We're unable to switch to the SUSPEND_CONSOLE. + * Let the calling function know so it can decide + * what to do. + */ + release_console_sem(); + return 1; + } release_console_sem(); if (vt_waitactive(SUSPEND_CONSOLE)) { -- cgit v1.2.3-70-g09d2 From 21778867b1c8e0feb567addb6dc0a7e2ca6ecdec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 Mar 2007 13:38:31 -0800 Subject: [PATCH] futex: PI state locking fix Testing of -rt by IBM uncovered a locking bug in wake_futex_pi(): the PI state needs to be locked before we access it. Signed-off-by: Ingo Molnar Acked-by: Thomas Gleixner Cc: Chuck Ebbert Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/futex.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index e749e7df14b..5a270b5e3f9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -565,6 +565,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (!pi_state) return -EINVAL; + spin_lock(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); /* @@ -604,6 +605,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) pi_state->owner = new_owner; spin_unlock_irq(&new_owner->pi_lock); + spin_unlock(&pi_state->pi_mutex.wait_lock); rt_mutex_unlock(&pi_state->pi_mutex); return 0; -- cgit v1.2.3-70-g09d2 From e29e175b0f40cffc86068156733def14a7a533ab Mon Sep 17 00:00:00 2001 From: Zilvinas Valinskas Date: Fri, 16 Mar 2007 13:38:34 -0800 Subject: [PATCH] initialise pi_lock if CONFIG_RT_MUTEXES=N Fixes a bogus lockdep warning which causes lockdep to disable itself. Acked-by: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index d154cc78648..6af959c034d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -933,8 +933,8 @@ asmlinkage long sys_set_tid_address(int __user *tidptr) static inline void rt_mutex_init_task(struct task_struct *p) { -#ifdef CONFIG_RT_MUTEXES spin_lock_init(&p->pi_lock); +#ifdef CONFIG_RT_MUTEXES plist_head_init(&p->pi_waiters, &p->pi_lock); p->pi_blocked_on = NULL; #endif -- cgit v1.2.3-70-g09d2 From cd05a1f818073a623455a58e756c5b419fc98db9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 17 Mar 2007 00:25:52 +0100 Subject: [PATCH] clockevents: Fix suspend/resume to disk hangs I finally found a dual core box, which survives suspend/resume without crashing in the middle of nowhere. Sigh, I never figured out from the code and the bug reports what's going on. The observed hangs are caused by a stale state transition of the clock event devices, which keeps the RCU synchronization away from completion, when the non boot CPU is brought back up. The suspend/resume in oneshot mode needs the similar care as the periodic mode during suspend to RAM. My assumption that the state transitions during the different shutdown/bringups of s2disk would go through the periodic boot phase and then switch over to highres resp. nohz mode were simply wrong. Add the appropriate suspend / resume handling for the non periodic modes. Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/time/tick-broadcast.c | 27 ++++++++++++++++++++++----- kernel/time/tick-common.c | 13 +++++++------ kernel/time/tick-internal.h | 11 ++++++++++- kernel/time/tick-oneshot.c | 12 ++++++++++++ 4 files changed, 51 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 5567745470f..eadfce2fff7 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -307,12 +307,19 @@ int tick_resume_broadcast(void) spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; - if (bc) { - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC && - !cpus_empty(tick_broadcast_mask)) - tick_broadcast_start_periodic(bc); - broadcast = cpu_isset(smp_processor_id(), tick_broadcast_mask); + if (bc) { + switch (tick_broadcast_device.mode) { + case TICKDEV_MODE_PERIODIC: + if(!cpus_empty(tick_broadcast_mask)) + tick_broadcast_start_periodic(bc); + broadcast = cpu_isset(smp_processor_id(), + tick_broadcast_mask); + break; + case TICKDEV_MODE_ONESHOT: + broadcast = tick_resume_broadcast_oneshot(bc); + break; + } } spin_unlock_irqrestore(&tick_broadcast_lock, flags); @@ -347,6 +354,16 @@ static int tick_broadcast_set_event(ktime_t expires, int force) } } +int tick_resume_broadcast_oneshot(struct clock_event_device *bc) +{ + clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + + if(!cpus_empty(tick_broadcast_oneshot_mask)) + tick_broadcast_set_event(ktime_get(), 1); + + return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask); +} + /* * Reprogram the broadcast device: * diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 43ba1bdec14..bfda3f7f071 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -298,18 +298,17 @@ static void tick_shutdown(unsigned int *cpup) spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_suspend_periodic(void) +static void tick_suspend(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; spin_lock_irqsave(&tick_device_lock, flags); - if (td->mode == TICKDEV_MODE_PERIODIC) - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN); spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_resume_periodic(void) +static void tick_resume(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; @@ -317,6 +316,8 @@ static void tick_resume_periodic(void) spin_lock_irqsave(&tick_device_lock, flags); if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(td->evtdev, 0); + else + tick_resume_oneshot(); spin_unlock_irqrestore(&tick_device_lock, flags); } @@ -348,13 +349,13 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, break; case CLOCK_EVT_NOTIFY_SUSPEND: - tick_suspend_periodic(); + tick_suspend(); tick_suspend_broadcast(); break; case CLOCK_EVT_NOTIFY_RESUME: if (!tick_resume_broadcast()) - tick_resume_periodic(); + tick_resume(); break; default: diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 75890efd24f..c9d203bde51 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -19,12 +19,13 @@ extern void tick_setup_oneshot(struct clock_event_device *newdev, extern int tick_program_event(ktime_t expires, int force); extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); - +extern void tick_resume_oneshot(void); # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); +extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); # else /* BROADCAST */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { @@ -43,6 +44,10 @@ void tick_setup_oneshot(struct clock_event_device *newdev, { BUG(); } +static inline void tick_resume_oneshot(void) +{ + BUG(); +} static inline int tick_program_event(ktime_t expires, int force) { return 0; @@ -54,6 +59,10 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) } static inline void tick_broadcast_oneshot_control(unsigned long reason) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) +{ + return 0; +} #endif /* !TICK_ONESHOT */ /* diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 2e8b7ff863c..f6997ab0c3c 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -40,6 +40,18 @@ int tick_program_event(ktime_t expires, int force) } } +/** + * tick_resume_onshot - resume oneshot mode + */ +void tick_resume_oneshot(void) +{ + struct tick_device *td = &__get_cpu_var(tick_cpu_device); + struct clock_event_device *dev = td->evtdev; + + clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + tick_program_event(ktime_get(), 1); +} + /** * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) */ -- cgit v1.2.3-70-g09d2 From 93c9a7ff50a5b39dbdf80129c5da89e6d6256bea Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Mar 2007 00:11:20 -0800 Subject: [PATCH] swsusp: Fix SNAPSHOT_S2RAM ioctl The SNAPSHOT_S2RAM ioctl does not disable the nonboot CPUs before entering the suspend, although it should do this. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/user.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/user.c b/kernel/power/user.c index d6a8dcc26ae..bf211fee122 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -368,9 +368,12 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (error) { printk(KERN_ERR "Failed to suspend some devices.\n"); } else { - /* Enter S3, system is already frozen */ - suspend_enter(PM_SUSPEND_MEM); - + error = disable_nonboot_cpus(); + if (!error) { + /* Enter S3, system is already frozen */ + suspend_enter(PM_SUSPEND_MEM); + enable_nonboot_cpus(); + } /* Wake up devices */ device_resume(); } -- cgit v1.2.3-70-g09d2 From 058560fbd70190ea6b50b5df4d814bc30911d06b Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 22 Mar 2007 00:11:25 -0800 Subject: [PATCH] fix extra BIOS invocation during resume It causes extra moon icons blinking on x60, and breaks at least two other systems. During resume, we do not know that "reboot"/"shutdown" method was used, so we assume "plaform" and call BIOS, anyway... This is 2.6.21 material, and should fix 2 or 3 regressions from 2.6.20. Signed-off-by: Pavel Machek Acked-by: "Rafael J. Wysocki" Cc: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 873cdf8ea5a..dee0ff40bef 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -241,18 +241,11 @@ static int software_resume(void) goto Done; } - error = platform_prepare(); - if (error) { - swsusp_free(); - goto Thaw; - } - pr_debug("PM: Reading swsusp image.\n"); error = swsusp_read(); if (error) { swsusp_free(); - platform_finish(); goto Thaw; } @@ -270,7 +263,6 @@ static int software_resume(void) enable_nonboot_cpus(); Free: swsusp_free(); - platform_finish(); device_resume(); resume_console(); Thaw: -- cgit v1.2.3-70-g09d2 From 9c35dd7f8bda1849dcb430be99325504221048df Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 22 Mar 2007 00:11:28 -0800 Subject: [PATCH] lockdep: debug_show_all_locks & debug_show_held_locks vs. debug_locks lockdep's data shouldn't be used when debug_locks == 0 because it's not updated after this, so it's more misleading than helpful. PS: probably lockdep's current-> fields should be reset after it turns debug_locks off: so, after printing a bug report, but before return from exported functions, but there are really a lot of these possibilities (e.g. after DEBUG_LOCKS_WARN_ON), so, something could be missed. (Of course direct use of this fields isn't recommended either.) Reported-by: Folkert van Heusden Inspired-by: Oleg Nesterov Signed-off-by: Jarek Poplawski Acked-by: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/lockdep.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8dc24c92dc6..7065a687ac5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2742,6 +2742,10 @@ void debug_show_all_locks(void) int count = 10; int unlock = 1; + if (unlikely(!debug_locks)) { + printk("INFO: lockdep is turned off.\n"); + return; + } printk("\nShowing all locks held in the system:\n"); /* @@ -2785,6 +2789,10 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks); void debug_show_held_locks(struct task_struct *task) { + if (unlikely(!debug_locks)) { + printk("INFO: lockdep is turned off.\n"); + return; + } lockdep_print_held_locks(task); } -- cgit v1.2.3-70-g09d2 From 0444b3035e5f4981f4d1d96f9f0c3cbada1e6d69 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 23 Mar 2007 00:09:58 -0700 Subject: [PATCH] time: fix formatting in /proc/timer_list Fix the print formatting of three unsigned long fields in /proc/timer_list, which are currently being formatted as signed long. Signed-off-by: James Morris Acked-by: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timer_list.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index f82c635c3d5..59df5e8555a 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -194,9 +194,9 @@ print_tickdevice(struct seq_file *m, struct tick_device *td) return; } SEQ_printf(m, "%s\n", dev->name); - SEQ_printf(m, " max_delta_ns: %ld\n", dev->max_delta_ns); - SEQ_printf(m, " min_delta_ns: %ld\n", dev->min_delta_ns); - SEQ_printf(m, " mult: %ld\n", dev->mult); + SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); + SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); + SEQ_printf(m, " mult: %lu\n", dev->mult); SEQ_printf(m, " shift: %d\n", dev->shift); SEQ_printf(m, " mode: %d\n", dev->mode); SEQ_printf(m, " next_event: %Ld nsecs\n", -- cgit v1.2.3-70-g09d2 From 9501b6cf5541f0d576d566a463f1e7d3eaaab4eb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Mar 2007 14:31:17 +0200 Subject: [PATCH] dynticks: fix hrtimer rounding error in next_timer_interrupt The rework of next_timer_interrupt() fixed the timer wheel bugs, but invented a rounding error versus the next hrtimer event. This is caused by the conversion of the hrtimer internal representation to relative jiffies. This causes bug #8100: http://bugzilla.kernel.org/show_bug.cgi?id=8100 next_timer_interrupt() returns "now" in such a case and causes the code in tick_nohz_stop_sched_tick() to trigger the timer softirq, which is bogus as no timer is due for expiry. This results in an endless context switching between idle and ksoftirqd until a timer is due for expiry. Modify the hrtimer evaluation so that, it returns now + 1, when the conversion results in a delta < 1 jiffie. It's confirmed to resolve bug #8100 Reported-by: Emil Karlson Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/timer.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 797cccb8643..440048acaea 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -695,15 +695,28 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, { ktime_t hr_delta = hrtimer_get_next_event(); struct timespec tsdelta; + unsigned long delta; if (hr_delta.tv64 == KTIME_MAX) return expires; - if (hr_delta.tv64 <= TICK_NSEC) - return now; + /* + * Expired timer available, let it expire in the next tick + */ + if (hr_delta.tv64 <= 0) + return now + 1; tsdelta = ktime_to_timespec(hr_delta); - now += timespec_to_jiffies(&tsdelta); + delta = timespec_to_jiffies(&tsdelta); + /* + * Take rounding errors in to account and make sure, that it + * expires in the next tick. Otherwise we go into an endless + * ping pong due to tick_nohz_stop_sched_tick() retriggering + * the timer softirq + */ + if (delta < 1) + delta = 1; + now += delta; if (time_before(now, expires)) return now; return expires; -- cgit v1.2.3-70-g09d2 From 948ac6d71cf868b431adb3139d8dfbd9c4e4a6ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Mar 2007 14:42:51 +0200 Subject: [PATCH] clocksource: Fix thinko in watchdog selection The watchdog implementation excludes low res / non continuous clocksources from being selected as a watchdog reference unintentionally. Allow using jiffies/PIT as a watchdog reference as long as no better clocksource is available. This is necessary to detect TSC breakage on systems, which have no pmtimer/hpet. The main goal of the initial patch (preventing to switch to highres/nohz when no reliable fallback clocksource is available) is still guaranteed by the checks in clocksource_watchdog(). Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/time/clocksource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5b0e46b56fd..fe5c7db2424 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -151,7 +151,8 @@ static void clocksource_check_watchdog(struct clocksource *cs) watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer(&watchdog_timer); } - } else if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) { + } else { + if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; if (!watchdog || cs->rating > watchdog->rating) { -- cgit v1.2.3-70-g09d2 From 291bc047e125ff02c9affe06a7df28bed57b054d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Mar 2007 11:21:08 +0200 Subject: [PATCH] clockevents: remove bad designed sysfs support for now The current sysfs support of clockevents does not obey the "only one value per file" rule. The real fix is not 2.6.21 material. Therefor remove the sysfs support for now. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- kernel/time/clockevents.c | 69 ----------------------------------------------- 1 file changed, 69 deletions(-) (limited to 'kernel') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 67932ea78c1..76212b2a99d 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -274,72 +274,3 @@ void clockevents_notify(unsigned long reason, void *arg) } EXPORT_SYMBOL_GPL(clockevents_notify); -#ifdef CONFIG_SYSFS - -/** - * clockevents_show_registered - sysfs interface for listing clockevents - * @dev: unused - * @buf: char buffer to be filled with clock events list - * - * Provides sysfs interface for listing registered clock event devices - */ -static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf) -{ - struct list_head *tmp; - char *p = buf; - int cpu; - - spin_lock(&clockevents_lock); - - list_for_each(tmp, &clockevent_devices) { - struct clock_event_device *ce; - - ce = list_entry(tmp, struct clock_event_device, list); - p += sprintf(p, "%-20s F:%04x M:%d", ce->name, - ce->features, ce->mode); - p += sprintf(p, " C:"); - if (!cpus_equal(ce->cpumask, cpu_possible_map)) { - for_each_cpu_mask(cpu, ce->cpumask) - p += sprintf(p, " %d", cpu); - } else { - /* - * FIXME: Add the cpu which is handling this sucker - */ - } - p += sprintf(p, "\n"); - } - - spin_unlock(&clockevents_lock); - - return p - buf; -} - -/* - * Sysfs setup bits: - */ -static SYSDEV_ATTR(registered, 0600, - clockevents_show_registered, NULL); - -static struct sysdev_class clockevents_sysclass = { - set_kset_name("clockevents"), -}; - -static struct sys_device clockevents_sys_device = { - .id = 0, - .cls = &clockevents_sysclass, -}; - -static int __init clockevents_sysfs_init(void) -{ - int error = sysdev_class_register(&clockevents_sysclass); - - if (!error) - error = sysdev_register(&clockevents_sys_device); - if (!error) - error = sysdev_create_file( - &clockevents_sys_device, - &attr_registered); - return error; -} -device_initcall(clockevents_sysfs_init); -#endif -- cgit v1.2.3-70-g09d2 From d62ac21aa075c8ddf3d02a98d28afce635e77e8e Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 26 Mar 2007 21:32:26 -0800 Subject: [PATCH] ntp: avoid time_offset overflows I've been seeing some odd NTP behavior recently on a few boxes and finally narrowed it down to time_offset overflowing when converted to SHIFT_UPDATE units (which was a side effect from my HZfreeNTP patch). This patch converts time_offset from a long to a s64 which resolves the issue. [tglx@linutronix.de: signedness fixes] Signed-off-by: John Stultz Cc: Roman Zippel Cc: john stultz Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/ntp.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index eb12509e00b..cb25649c6f5 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -32,7 +32,7 @@ static u64 tick_length, tick_length_base; /* TIME_ERROR prevents overwriting the CMOS clock */ static int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ -static long time_offset; /* time adjustment (ns) */ +static s64 time_offset; /* time adjustment (ns) */ static long time_constant = 2; /* pll time constant */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ @@ -196,7 +196,7 @@ void __attribute__ ((weak)) notify_arch_cmos_timer(void) */ int do_adjtimex(struct timex *txc) { - long ltemp, mtemp, save_adjust; + long mtemp, save_adjust, rem; s64 freq_adj, temp64; int result; @@ -277,14 +277,14 @@ int do_adjtimex(struct timex *txc) time_adjust = txc->offset; } else if (time_status & STA_PLL) { - ltemp = txc->offset * NSEC_PER_USEC; + time_offset = txc->offset * NSEC_PER_USEC; /* * Scale the phase adjustment and * clamp to the operating range. */ - time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC); - time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC); + time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC); + time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC); /* * Select whether the frequency is to be controlled @@ -297,11 +297,11 @@ int do_adjtimex(struct timex *txc) mtemp = xtime.tv_sec - time_reftime; time_reftime = xtime.tv_sec; - freq_adj = (s64)time_offset * mtemp; + freq_adj = time_offset * mtemp; freq_adj = shift_right(freq_adj, time_constant * 2 + (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { - temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL); + temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL); if (time_offset < 0) { temp64 = -temp64; do_div(temp64, mtemp); @@ -314,8 +314,10 @@ int do_adjtimex(struct timex *txc) freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = (time_offset / NTP_INTERVAL_FREQ) - << SHIFT_UPDATE; + time_offset = div_long_long_rem_signed(time_offset, + NTP_INTERVAL_FREQ, + &rem); + time_offset <<= SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) @@ -328,12 +330,12 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) result = TIME_ERROR; if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - txc->offset = save_adjust; + txc->offset = save_adjust; else - txc->offset = shift_right(time_offset, SHIFT_UPDATE) - * NTP_INTERVAL_FREQ / 1000; - txc->freq = (time_freq / NSEC_PER_USEC) - << (SHIFT_USEC - SHIFT_NSEC); + txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) * + NTP_INTERVAL_FREQ / 1000; + txc->freq = (time_freq / NSEC_PER_USEC) << + (SHIFT_USEC - SHIFT_NSEC); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; -- cgit v1.2.3-70-g09d2 From 436ce71638eceb0f9dd7608157807c37b29c3db7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 27 Mar 2007 12:09:13 +0200 Subject: [PATCH] Revert "swsusp: disable nonboot CPUs before entering platform suspend" This reverts commit 94985134b7b46848267ed6b734320db01c974e72 and insteads removes the WARN_ON() that caused that commit in the first place. The problem is that we call disable_nonboot_cpus() in swsusp before powering down the system in order to avoid triggering the WARN_ON() in arch/x86_64/kernel/acpi/sleep.c:init_low_mapping() and this doesn't work well on Thomas' system. So instead, remove the WARN_ON() in arch/x86_64/kernel/acpi/sleep.c: init_low_mapping(), which triggers every time during the suspend to disk in the platform mode, as the potential problem it is related to doesn't seem to occur in practice. [ I think we might want to disallow the case of multiple users of that mm, or something. Normally, playing with the current process page tables on the current CPU should be fine as long as we don't have other threads using those tables at the same time.. Anyway, not pretty, but better than the warning or the lockup - Linus ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/acpi/sleep.c | 4 +++- kernel/power/disk.c | 1 - kernel/power/user.c | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index 23178ce6c78..e1548fbe95a 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c @@ -66,8 +66,10 @@ static void init_low_mapping(void) { pgd_t *slot0 = pgd_offset(current->mm, 0UL); low_ptr = *slot0; + /* FIXME: We're playing with the current task's page tables here, which + * is potentially dangerous on SMP systems. + */ set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); - WARN_ON(num_online_cpus() != 1); local_flush_tlb(); } diff --git a/kernel/power/disk.c b/kernel/power/disk.c index dee0ff40bef..aec19b063e3 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -58,7 +58,6 @@ static inline int platform_prepare(void) static void power_down(suspend_disk_method_t mode) { - disable_nonboot_cpus(); switch(mode) { case PM_DISK_PLATFORM: if (pm_ops && pm_ops->enter) { diff --git a/kernel/power/user.c b/kernel/power/user.c index bf211fee122..7cf6713b232 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -401,10 +401,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, case PMOPS_ENTER: if (data->platform_suspend) { - disable_nonboot_cpus(); kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); error = pm_ops->enter(PM_SUSPEND_DISK); - enable_nonboot_cpus(); + error = 0; } break; -- cgit v1.2.3-70-g09d2 From 935c631db827cc3a96df4dcc6fec374b994fdbd1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Mar 2007 13:17:18 +0200 Subject: [PATCH] hrtimers: fix reprogramming SMP race hrtimer_start() incorrectly set the 'reprogram' flag to enqueue_hrtimer(), which should only be 1 if the hrtimer is queued to the current CPU. Doing otherwise could result in a reprogramming of the current CPU's clockevents device, with a timer that is not queued to it - resulting in a bogus next expiry value. Signed-off-by: Ingo Molnar Cc: Michal Piotrowski Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6a7938a0d51..067ba2c0532 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -814,7 +814,12 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) timer_stats_hrtimer_set_start_info(timer); - enqueue_hrtimer(timer, new_base, base == new_base); + /* + * Only allow reprogramming if the new base is on this CPU. + * (it might still be on another CPU if the timer was pending) + */ + enqueue_hrtimer(timer, new_base, + new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); unlock_hrtimer_base(timer, &flags); -- cgit v1.2.3-70-g09d2 From 14e9d5730adfca26452b3a2838a80af6950556f5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 28 Mar 2007 23:38:16 -0600 Subject: [PATCH] pid: Properly detect orphaned process groups in exit_notify In commit 0475ac0845f9295bc5f69af45f58dff2c104c8d1 when converting the orphaned process group handling to use struct pid I made a small mistake. I accidentally replaced an == with a !=. Besides just being a dumb thing to do apparently this has a bad side effect. The improper orphaned process group detection causes kwin to die after a suspend/resume cycle. I'm amazed this patch has been around as long as it has without anyone else noticing something funny going on. And the following people deserve credit for spotting and helping to reproduce this. Thanks to: Sid Boyce Thanks to: "Michael Wu" Signed-off-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index f132349c032..b55ed4cc910 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -790,7 +790,7 @@ static void exit_notify(struct task_struct *tsk) pgrp = task_pgrp(tsk); if ((task_pgrp(t) != pgrp) && - (task_session(t) != task_session(tsk)) && + (task_session(t) == task_session(tsk)) && will_become_orphaned_pgrp(pgrp, tsk) && has_stopped_jobs(pgrp)) { __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); -- cgit v1.2.3-70-g09d2 From 0c84ce268b69855919b6ac7edc8f11caf21e9c88 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 1 Apr 2007 23:49:48 -0700 Subject: [PATCH] driver core: fix built-in drivers sysfs links built-in drivers had broken sysfs links that caused bootup hangs for certain driver unregistry sequences. Signed-off-by: Ingo Molnar Acked-by: Kay Sievers Signed-off-by: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/device.h | 1 + kernel/module.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/device.h b/include/linux/device.h index caad9bba965..5cf30e95c8b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -128,6 +128,7 @@ struct device_driver { struct module * owner; const char * mod_name; /* used for built-in modules */ + struct module_kobject * mkobj; int (*probe) (struct device * dev); int (*remove) (struct device * dev); diff --git a/kernel/module.c b/kernel/module.c index fbc51de6444..dcdb32b8b13 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2384,8 +2384,13 @@ void module_add_driver(struct module *mod, struct device_driver *drv) /* Lookup built-in module entry in /sys/modules */ mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name); - if (mkobj) + if (mkobj) { mk = container_of(mkobj, struct module_kobject, kobj); + /* remember our module structure */ + drv->mkobj = mk; + /* kset_find_obj took a reference */ + kobject_put(mkobj); + } } if (!mk) @@ -2405,17 +2410,22 @@ EXPORT_SYMBOL(module_add_driver); void module_remove_driver(struct device_driver *drv) { + struct module_kobject *mk = NULL; char *driver_name; if (!drv) return; sysfs_remove_link(&drv->kobj, "module"); - if (drv->owner && drv->owner->mkobj.drivers_dir) { + + if (drv->owner) + mk = &drv->owner->mkobj; + else if (drv->mkobj) + mk = drv->mkobj; + if (mk && mk->drivers_dir) { driver_name = make_driver_name(drv); if (driver_name) { - sysfs_remove_link(drv->owner->mkobj.drivers_dir, - driver_name); + sysfs_remove_link(mk->drivers_dir, driver_name); kfree(driver_name); } } -- cgit v1.2.3-70-g09d2 From 1d64b9cb1dc2a7cd521444e3d908adeccd026356 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 1 Apr 2007 23:49:49 -0700 Subject: [PATCH] Fix microcode-related suspend problem Fix the regression resulting from the recent change of suspend code ordering that causes systems based on Intel x86 CPUs using the microcode driver to hang during the resume. The problem occurs since the microcode driver uses request_firmware() in its CPU hotplug notifier, which is called after tasks has been frozen and hangs. It can be fixed by telling the microcode driver to use the microcode stored in memory during the resume instead of trying to load it from disk. Signed-off-by: Rafael J. Wysocki Adrian Bunk Cc: Tigran Aivazian Cc: Pavel Machek Cc: Maxim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/microcode.c | 71 +++++++++++++++++++++++++++++++++++++++++--- include/linux/cpu.h | 4 +++ kernel/cpu.c | 32 ++++++++++---------- 3 files changed, 87 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index b8f16633a6e..cbe7ec8dbb9 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -567,6 +567,53 @@ static int cpu_request_microcode(int cpu) return error; } +static int apply_microcode_on_cpu(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + cpumask_t old; + unsigned int val[2]; + int err = 0; + + if (!uci->mc) + return -EINVAL; + + old = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) + err = -EINVAL; + + if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + if (uci->pf != (1 << ((val[1] >> 18) & 7))) + err = -EINVAL; + } + + if (!err) { + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (uci->rev != val[1]) + err = -EINVAL; + } + + if (!err) + apply_microcode(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " sig=0x%x, pf=0x%x, rev=0x%x\n", + cpu, uci->sig, uci->pf, uci->rev); + + set_cpus_allowed(current, old); + return err; +} + static void microcode_init_cpu(int cpu) { cpumask_t old; @@ -577,7 +624,8 @@ static void microcode_init_cpu(int cpu) set_cpus_allowed(current, cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING) + if (uci->valid && system_state == SYSTEM_RUNNING && + !suspend_cpu_hotplug) cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); set_cpus_allowed(current, old); @@ -663,13 +711,24 @@ static int mc_sysdev_add(struct sys_device *sys_dev) return 0; pr_debug("Microcode:CPU %d added\n", cpu); - memset(uci, 0, sizeof(*uci)); + /* If suspend_cpu_hotplug is set, the system is resuming and we should + * use the data from before the suspend. + */ + if (suspend_cpu_hotplug) { + err = apply_microcode_on_cpu(cpu); + if (err) + microcode_fini_cpu(cpu); + } + if (!uci->valid) + memset(uci, 0, sizeof(*uci)); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); if (err) return err; - microcode_init_cpu(cpu); + if (!uci->valid) + microcode_init_cpu(cpu); + return 0; } @@ -680,7 +739,11 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; pr_debug("Microcode:CPU %d removed\n", cpu); - microcode_fini_cpu(cpu); + /* If suspend_cpu_hotplug is set, the system is suspending and we should + * keep the microcode in memory for the resume. + */ + if (!suspend_cpu_hotplug) + microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 769ddc6df49..c22b0dfcbcd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -127,9 +127,13 @@ static inline int cpu_is_offline(int cpu) { return 0; } #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_SUSPEND_SMP +extern int suspend_cpu_hotplug; + extern int disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); #else +#define suspend_cpu_hotplug 0 + static inline int disable_nonboot_cpus(void) { return 0; } static inline void enable_nonboot_cpus(void) {} #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 3d4206ada5c..36e70845cfc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -254,6 +254,12 @@ int __cpuinit cpu_up(unsigned int cpu) } #ifdef CONFIG_SUSPEND_SMP +/* Needed to prevent the microcode driver from requesting firmware in its CPU + * hotplug notifier during the suspend/resume. + */ +int suspend_cpu_hotplug; +EXPORT_SYMBOL(suspend_cpu_hotplug); + static cpumask_t frozen_cpus; int disable_nonboot_cpus(void) @@ -261,16 +267,8 @@ int disable_nonboot_cpus(void) int cpu, first_cpu, error = 0; mutex_lock(&cpu_add_remove_lock); - first_cpu = first_cpu(cpu_present_map); - if (!cpu_online(first_cpu)) { - error = _cpu_up(first_cpu); - if (error) { - printk(KERN_ERR "Could not bring CPU%d up.\n", - first_cpu); - goto out; - } - } - + suspend_cpu_hotplug = 1; + first_cpu = first_cpu(cpu_online_map); /* We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ @@ -296,7 +294,7 @@ int disable_nonboot_cpus(void) } else { printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } -out: + suspend_cpu_hotplug = 0; mutex_unlock(&cpu_add_remove_lock); return error; } @@ -308,20 +306,22 @@ void enable_nonboot_cpus(void) /* Allow everyone to use the CPU hotplug again */ mutex_lock(&cpu_add_remove_lock); cpu_hotplug_disabled = 0; - mutex_unlock(&cpu_add_remove_lock); if (cpus_empty(frozen_cpus)) - return; + goto out; + suspend_cpu_hotplug = 1; printk("Enabling non-boot CPUs ...\n"); for_each_cpu_mask(cpu, frozen_cpus) { - error = cpu_up(cpu); + error = _cpu_up(cpu); if (!error) { printk("CPU%d is up\n", cpu); continue; } - printk(KERN_WARNING "Error taking CPU%d up: %d\n", - cpu, error); + printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); } cpus_clear(frozen_cpus); + suspend_cpu_hotplug = 0; +out: + mutex_unlock(&cpu_add_remove_lock); } #endif -- cgit v1.2.3-70-g09d2 From 456a09dce9ca9b0013cabcda918aee851a04471d Mon Sep 17 00:00:00 2001 From: Thomas Bittermann Date: Wed, 4 Apr 2007 22:20:54 +0200 Subject: [PATCH] kernel/time.c: add missing symbol exports This patch adds 2 missing symbol exports: jiffies_to_timeval() and timeval_to_jiffies(). The (not yet merged) dm-raid4-5 module will need them, and they used to be indirectly exported by virtue of being inline functions. Commit 8b9365d753d9870bb6451504c13570b81923228f ("[PATCH] Uninline jiffies.h functions") uninlined them, and thus modules now need them explicitly exported to use them. Signed-off-by: Thomas Bittermann Acked-by: Andrew Morton Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: john stultz Signed-off-by: Linus Torvalds --- kernel/time.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index c6c80ea5d0e..2f47888e46c 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -635,6 +635,7 @@ timeval_to_jiffies(const struct timeval *value) (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } +EXPORT_SYMBOL(timeval_to_jiffies); void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) { @@ -649,6 +650,7 @@ void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) tv_usec /= NSEC_PER_USEC; value->tv_usec = tv_usec; } +EXPORT_SYMBOL(jiffies_to_timeval); /* * Convert jiffies/jiffies_64 to clock_t and back. -- cgit v1.2.3-70-g09d2 From c75fd0ee6e1750e6e527ed1d4aeee66739d9ad79 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Apr 2007 19:08:21 -0700 Subject: [PATCH] swsusp: fix memory shrinker Fix a bug in the swsusp's memory shrinker that causes some systems using highmem to refuse to suspend to disk if image_size is set above 1/2 of available RAM. Special thanks to Jiri Slaby for reporting the problem and assistance in debugging it. Signed-off-by: Rafael J. Wysocki Cc: Jiri Slaby Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 7fb834397a0..175370824f3 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -229,13 +229,13 @@ int swsusp_shrink_memory(void) size += highmem_size; for_each_zone (zone) if (populated_zone(zone)) { + tmp += snapshot_additional_pages(zone); if (is_highmem(zone)) { highmem_size -= zone_page_state(zone, NR_FREE_PAGES); } else { tmp -= zone_page_state(zone, NR_FREE_PAGES); tmp += zone->lowmem_reserve[ZONE_NORMAL]; - tmp += snapshot_additional_pages(zone); } } -- cgit v1.2.3-70-g09d2 From 98de9e3ba23422b5c45b91c93aec1cb1e17514dc Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 4 Apr 2007 19:08:24 -0700 Subject: [PATCH] fix jiffies clocksource inittime In debugging a problem w/ the -rt tree, I noticed that on systems that mark the tsc as unstable before it is registered, the TSC would still be selected and used for a short period of time. Digging in it looks to be a result of the mix of the clocksource list changes and my clocksource initialization changes. With the -rt tree, using a bad TSC, even for a short period of time can results in a hang at boot. I was not able to reproduce this hang w/ mainline, but I'm not completely certain that someone won't trip on it. This patch resolves the issue by initializing the jiffies clocksource earlier so a bad TSC won't get selected just because nothing else is yet registered. Signed-off-by: John Stultz Acked-by: Thomas Gleixner Cc: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/jiffies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 3be8da8fed7..4c256fdb887 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -69,4 +69,4 @@ static int __init init_jiffies_clocksource(void) return clocksource_register(&clocksource_jiffies); } -module_init(init_jiffies_clocksource); +core_initcall(init_jiffies_clocksource); -- cgit v1.2.3-70-g09d2 From 995f054f2a342f8505fed4f8395d12c0f5966414 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 7 Apr 2007 12:05:00 +0200 Subject: [PATCH] high-res timers: resume fix Soeren Sonnenburg reported that upon resume he is getting this backtrace: [] smp_apic_timer_interrupt+0x57/0x90 [] retrigger_next_event+0x0/0xb0 [] apic_timer_interrupt+0x28/0x30 [] retrigger_next_event+0x0/0xb0 [] __kfifo_put+0x8/0x90 [] on_each_cpu+0x35/0x60 [] clock_was_set+0x18/0x20 [] timekeeping_resume+0x7c/0xa0 [] __sysdev_resume+0x11/0x80 [] sysdev_resume+0x47/0x80 [] device_power_up+0x5/0x10 it turns out that on resume we mistakenly re-enable interrupts too early. Do the timer retrigger only on the current CPU. Signed-off-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: Soeren Sonnenburg Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 3 +++ kernel/hrtimer.c | 12 ++++++++++++ kernel/timer.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5bdbc744e77..17c29dca835 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -206,6 +206,7 @@ struct hrtimer_cpu_base { struct clock_event_device; extern void clock_was_set(void); +extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); /* @@ -236,6 +237,8 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) */ static inline void clock_was_set(void) { } +static inline void hres_timers_resume(void) { } + /* * In non high resolution mode the time reference is taken from * the base softirq time variable. diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 067ba2c0532..b74860aaf5f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -458,6 +458,18 @@ void clock_was_set(void) on_each_cpu(retrigger_next_event, NULL, 0, 1); } +/* + * During resume we might have to reprogram the high resolution timer + * interrupt (on the local CPU): + */ +void hres_timers_resume(void) +{ + WARN_ON_ONCE(num_online_cpus() > 1); + + /* Retrigger the CPU local events: */ + retrigger_next_event(NULL); +} + /* * Check, whether the timer is on the callback pending list */ diff --git a/kernel/timer.c b/kernel/timer.c index 440048acaea..dd6c2c1c561 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1016,7 +1016,7 @@ static int timekeeping_resume(struct sys_device *dev) clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); /* Resume hrtimers */ - clock_was_set(); + hres_timers_resume(); return 0; } -- cgit v1.2.3-70-g09d2 From 7f30e49ee1c2c1e95ce9d2ccce5221ddb793dd60 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Apr 2007 14:59:41 +0900 Subject: [PATCH] irq-devres: fix failure path of devm_request_irq() devres should be deallocated with devres_free() not kfree(). This bug corrupts slab on IRQ request failure. Fix it. Signed-off-by: Tejun Heo Cc: Andrew Morton Cc: Greg KH Signed-off-by: Linus Torvalds --- kernel/irq/devres.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 85a430da0fb..d8ee241115f 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -54,7 +54,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, rc = request_irq(irq, handler, irqflags, devname, dev_id); if (rc) { - kfree(dr); + devres_free(dr); return rc; } -- cgit v1.2.3-70-g09d2 From 35f6f753b79705bc4b62da5dcc218d75ffa88370 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 6 Apr 2007 21:18:06 +0200 Subject: [PATCH] sched: get rid of p->children use in show_task() the p->parent PID printout gives us all the information about the task tree that we need - the eldest_child()/older_sibling()/ younger_sibling() printouts are mostly historic and i do not remember ever having used those fields. (IMO in fact they confuse the SysRq-T output.) So remove them. This code has sentimental value though, those fields and printouts are one of the oldest ones still surviving from Linux v0.95's kernel/sched.c: if (p->p_ysptr || p->p_osptr) printk(" Younger sib=%d, older sib=%d\n\r", p->p_ysptr ? p->p_ysptr->pid : -1, p->p_osptr ? p->p_osptr->pid : -1); else printk("\n\r"); written 15 years ago, in early 1992. Signed-off-by: Ingo Molnar Signed-off-by: Linus 'snif' Torvalds --- kernel/sched.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index a4ca632c477..cdad3b04242 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4687,27 +4687,6 @@ out_unlock: return retval; } -static inline struct task_struct *eldest_child(struct task_struct *p) -{ - if (list_empty(&p->children)) - return NULL; - return list_entry(p->children.next,struct task_struct,sibling); -} - -static inline struct task_struct *older_sibling(struct task_struct *p) -{ - if (p->sibling.prev==&p->parent->children) - return NULL; - return list_entry(p->sibling.prev,struct task_struct,sibling); -} - -static inline struct task_struct *younger_sibling(struct task_struct *p) -{ - if (p->sibling.next==&p->parent->children) - return NULL; - return list_entry(p->sibling.next,struct task_struct,sibling); -} - static const char stat_nam[] = "RSDTtZX"; static void show_task(struct task_struct *p) @@ -4738,19 +4717,7 @@ static void show_task(struct task_struct *p) free = (unsigned long)n - (unsigned long)end_of_stack(p); } #endif - printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); - if ((relative = eldest_child(p))) - printk("%5d ", relative->pid); - else - printk(" "); - if ((relative = younger_sibling(p))) - printk("%7d", relative->pid); - else - printk(" "); - if ((relative = older_sibling(p))) - printk(" %5d", relative->pid); - else - printk(" "); + printk("%5lu %5d %6d", free, p->pid, p->parent->pid); if (!p->mm) printk(" (L-TLB)\n"); else -- cgit v1.2.3-70-g09d2 From d354d2f4a6fc1b722c2e464a8b3cfd2f6afb304b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 7 Apr 2007 10:18:33 -0700 Subject: sched.c: Remove unused variable 'relative' Getting rid of the p->children printout in show_task() left behind an unused variable. Signed-off-by: Linus Torvalds --- kernel/sched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index cdad3b04242..b9a68373014 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4691,7 +4691,6 @@ static const char stat_nam[] = "RSDTtZX"; static void show_task(struct task_struct *p) { - struct task_struct *relative; unsigned long free = 0; unsigned state; -- cgit v1.2.3-70-g09d2 From fe20e581a72979917e35d5146458ceba79be391f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Apr 2007 23:28:41 -0700 Subject: [PATCH] fix kernel oops with badly formatted module option Catch malformed kernel parameter usage of "param = value". Spaces are not supported, but don't cause a kernel fault on such usage, just report an error. Signed-off-by: Randy Dunlap Acked-by: Larry Finger Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index e265b13195b..1fc4ac746cd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -356,6 +356,10 @@ int param_set_copystring(const char *val, struct kernel_param *kp) { struct kparam_string *kps = kp->arg; + if (!val) { + printk(KERN_ERR "%s: missing param set value\n", kp->name); + return -EINVAL; + } if (strlen(val)+1 > kps->maxlen) { printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); -- cgit v1.2.3-70-g09d2 From 91fcd412e957f433e9f1abeb0b1926dbeb66ca80 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Mon, 23 Apr 2007 14:41:14 -0700 Subject: Allow reading tainted flag as user The commit 34f5a39899f3f3e815da64f48ddb72942d86c366 restricted reading of the tainted value. The attached patch changes this back to a write-only check and restores the read behaviour of older versions. Signed-off-by: Bastian Blank Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1b255df4fcd..c904748f229 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1676,7 +1676,7 @@ static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp, { int op; - if (!capable(CAP_SYS_ADMIN)) + if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; op = OP_OR; -- cgit v1.2.3-70-g09d2