diff options
Diffstat (limited to 'kernel/power')
| -rw-r--r-- | kernel/power/Kconfig | 75 | ||||
| -rw-r--r-- | kernel/power/Makefile | 5 | ||||
| -rw-r--r-- | kernel/power/autosleep.c | 128 | ||||
| -rw-r--r-- | kernel/power/block_io.c | 2 | ||||
| -rw-r--r-- | kernel/power/console.c | 118 | ||||
| -rw-r--r-- | kernel/power/hibernate.c | 257 | ||||
| -rw-r--r-- | kernel/power/main.c | 287 | ||||
| -rw-r--r-- | kernel/power/power.h | 51 | ||||
| -rw-r--r-- | kernel/power/poweroff.c | 4 | ||||
| -rw-r--r-- | kernel/power/process.c | 84 | ||||
| -rw-r--r-- | kernel/power/qos.c | 198 | ||||
| -rw-r--r-- | kernel/power/snapshot.c | 79 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 272 | ||||
| -rw-r--r-- | kernel/power/suspend_test.c | 35 | ||||
| -rw-r--r-- | kernel/power/swap.c | 172 | ||||
| -rw-r--r-- | kernel/power/user.c | 70 | ||||
| -rw-r--r-- | kernel/power/wakelock.c | 268 |
17 files changed, 1653 insertions, 452 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index deb5461e321..9a83d780fac 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -100,9 +100,35 @@ config PM_SLEEP_SMP depends on SMP depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE depends on PM_SLEEP - select HOTPLUG select HOTPLUG_CPU +config PM_AUTOSLEEP + bool "Opportunistic sleep" + depends on PM_SLEEP + default n + ---help--- + Allow the kernel to trigger a system transition into a global sleep + state automatically whenever there are no active wakeup sources. + +config PM_WAKELOCKS + bool "User space wakeup sources interface" + depends on PM_SLEEP + default n + ---help--- + Allow user space to create, activate and deactivate wakeup source + objects with the help of a sysfs-based interface. + +config PM_WAKELOCKS_LIMIT + int "Maximum number of user space wakeup sources (0 = no limit)" + range 0 100000 + default 100 + depends on PM_WAKELOCKS + +config PM_WAKELOCKS_GC + bool "Garbage collector for user space wakeup sources" + depends on PM_WAKELOCKS + default y + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM @@ -148,10 +174,26 @@ config PM_TEST_SUSPEND You probably want to have your system's RTC driver statically linked, ensuring that it's available when this test runs. -config CAN_PM_TRACE +config PM_SLEEP_DEBUG def_bool y depends on PM_DEBUG && PM_SLEEP +config DPM_WATCHDOG + bool "Device suspend/resume watchdog" + depends on PM_DEBUG && PSTORE + ---help--- + Sets up a watchdog timer to capture drivers that are + locked up attempting to suspend/resume a device. + A detected lockup causes system panic with message + captured in pstore device for inspection in subsequent + boot session. + +config DPM_WATCHDOG_TIMEOUT + int "Watchdog timeout in seconds" + range 1 120 + default 12 + depends on DPM_WATCHDOG + config PM_TRACE bool help @@ -169,7 +211,7 @@ config PM_TRACE config PM_TRACE_RTC bool "Suspend/resume event tracing" - depends on CAN_PM_TRACE + depends on PM_SLEEP_DEBUG depends on X86 select PM_TRACE ---help--- @@ -215,8 +257,7 @@ config ARCH_HAS_OPP bool config PM_OPP - bool "Operating Performance Point (OPP) Layer library" - depends on ARCH_HAS_OPP + bool ---help--- SOCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. This @@ -236,6 +277,30 @@ config PM_GENERIC_DOMAINS bool depends on PM +config WQ_POWER_EFFICIENT_DEFAULT + bool "Enable workqueue power-efficient mode by default" + depends on PM + default n + help + Per-cpu workqueues are generally preferred because they show + better performance thanks to cache locality; unfortunately, + per-cpu workqueues tend to be more power hungry than unbound + workqueues. + + Enabling workqueue.power_efficient kernel parameter makes the + per-cpu workqueues which were observed to contribute + significantly to power consumption unbound, leading to measurably + lower power usage at the cost of small performance overhead. + + This config option determines whether workqueue.power_efficient + is enabled by default. + + If in doubt, say N. + +config PM_GENERIC_DOMAINS_SLEEP + def_bool y + depends on PM_SLEEP && PM_GENERIC_DOMAINS + config PM_GENERIC_DOMAINS_RUNTIME def_bool y depends on PM_RUNTIME && PM_GENERIC_DOMAINS diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 07e0e28ffba..29472bff11e 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,12 +1,15 @@ ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG -obj-$(CONFIG_PM) += main.o qos.o +obj-y += qos.o +obj-$(CONFIG_PM) += main.o obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o +obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o +obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c new file mode 100644 index 00000000000..9012ecf7b81 --- /dev/null +++ b/kernel/power/autosleep.c @@ -0,0 +1,128 @@ +/* + * kernel/power/autosleep.c + * + * Opportunistic sleep support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + */ + +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/pm_wakeup.h> + +#include "power.h" + +static suspend_state_t autosleep_state; +static struct workqueue_struct *autosleep_wq; +/* + * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source + * is active, otherwise a deadlock with try_to_suspend() is possible. + * Alternatively mutex_lock_interruptible() can be used. This will then fail + * if an auto_sleep cycle tries to freeze processes. + */ +static DEFINE_MUTEX(autosleep_lock); +static struct wakeup_source *autosleep_ws; + +static void try_to_suspend(struct work_struct *work) +{ + unsigned int initial_count, final_count; + + if (!pm_get_wakeup_count(&initial_count, true)) + goto out; + + mutex_lock(&autosleep_lock); + + if (!pm_save_wakeup_count(initial_count) || + system_state != SYSTEM_RUNNING) { + mutex_unlock(&autosleep_lock); + goto out; + } + + if (autosleep_state == PM_SUSPEND_ON) { + mutex_unlock(&autosleep_lock); + return; + } + if (autosleep_state >= PM_SUSPEND_MAX) + hibernate(); + else + pm_suspend(autosleep_state); + + mutex_unlock(&autosleep_lock); + + if (!pm_get_wakeup_count(&final_count, false)) + goto out; + + /* + * If the wakeup occured for an unknown reason, wait to prevent the + * system from trying to suspend and waking up in a tight loop. + */ + if (final_count == initial_count) + schedule_timeout_uninterruptible(HZ / 2); + + out: + queue_up_suspend_work(); +} + +static DECLARE_WORK(suspend_work, try_to_suspend); + +void queue_up_suspend_work(void) +{ + if (autosleep_state > PM_SUSPEND_ON) + queue_work(autosleep_wq, &suspend_work); +} + +suspend_state_t pm_autosleep_state(void) +{ + return autosleep_state; +} + +int pm_autosleep_lock(void) +{ + return mutex_lock_interruptible(&autosleep_lock); +} + +void pm_autosleep_unlock(void) +{ + mutex_unlock(&autosleep_lock); +} + +int pm_autosleep_set_state(suspend_state_t state) +{ + +#ifndef CONFIG_HIBERNATION + if (state >= PM_SUSPEND_MAX) + return -EINVAL; +#endif + + __pm_stay_awake(autosleep_ws); + + mutex_lock(&autosleep_lock); + + autosleep_state = state; + + __pm_relax(autosleep_ws); + + if (state > PM_SUSPEND_ON) { + pm_wakep_autosleep_enabled(true); + queue_up_suspend_work(); + } else { + pm_wakep_autosleep_enabled(false); + } + + mutex_unlock(&autosleep_lock); + return 0; +} + +int __init pm_autosleep_init(void) +{ + autosleep_ws = wakeup_source_register("autosleep"); + if (!autosleep_ws) + return -ENOMEM; + + autosleep_wq = alloc_ordered_workqueue("autosleep", 0); + if (autosleep_wq) + return 0; + + wakeup_source_unregister(autosleep_ws); + return -ENOMEM; +} diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c index d09dd10c5a5..9a58bc25881 100644 --- a/kernel/power/block_io.c +++ b/kernel/power/block_io.c @@ -32,7 +32,7 @@ static int submit(int rw, struct block_device *bdev, sector_t sector, struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); - bio->bi_sector = sector; + bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio->bi_end_io = end_swap_bio_read; diff --git a/kernel/power/console.c b/kernel/power/console.c index b1dc456474b..aba9c545a0e 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -4,18 +4,133 @@ * Originally from swsusp. */ +#include <linux/console.h> #include <linux/vt_kern.h> #include <linux/kbd_kern.h> #include <linux/vt.h> #include <linux/module.h> +#include <linux/slab.h> #include "power.h" #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) static int orig_fgconsole, orig_kmsg; +static DEFINE_MUTEX(vt_switch_mutex); + +struct pm_vt_switch { + struct list_head head; + struct device *dev; + bool required; +}; + +static LIST_HEAD(pm_vt_switch_list); + + +/** + * pm_vt_switch_required - indicate VT switch at suspend requirements + * @dev: device + * @required: if true, caller needs VT switch at suspend/resume time + * + * The different console drivers may or may not require VT switches across + * suspend/resume, depending on how they handle restoring video state and + * what may be running. + * + * Drivers can indicate support for switchless suspend/resume, which can + * save time and flicker, by using this routine and passing 'false' as + * the argument. If any loaded driver needs VT switching, or the + * no_console_suspend argument has been passed on the command line, VT + * switches will occur. + */ +void pm_vt_switch_required(struct device *dev, bool required) +{ + struct pm_vt_switch *entry, *tmp; + + mutex_lock(&vt_switch_mutex); + list_for_each_entry(tmp, &pm_vt_switch_list, head) { + if (tmp->dev == dev) { + /* already registered, update requirement */ + tmp->required = required; + goto out; + } + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out; + + entry->required = required; + entry->dev = dev; + + list_add(&entry->head, &pm_vt_switch_list); +out: + mutex_unlock(&vt_switch_mutex); +} +EXPORT_SYMBOL(pm_vt_switch_required); + +/** + * pm_vt_switch_unregister - stop tracking a device's VT switching needs + * @dev: device + * + * Remove @dev from the vt switch list. + */ +void pm_vt_switch_unregister(struct device *dev) +{ + struct pm_vt_switch *tmp; + + mutex_lock(&vt_switch_mutex); + list_for_each_entry(tmp, &pm_vt_switch_list, head) { + if (tmp->dev == dev) { + list_del(&tmp->head); + kfree(tmp); + break; + } + } + mutex_unlock(&vt_switch_mutex); +} +EXPORT_SYMBOL(pm_vt_switch_unregister); + +/* + * There are three cases when a VT switch on suspend/resume are required: + * 1) no driver has indicated a requirement one way or another, so preserve + * the old behavior + * 2) console suspend is disabled, we want to see debug messages across + * suspend/resume + * 3) any registered driver indicates it needs a VT switch + * + * If none of these conditions is present, meaning we have at least one driver + * that doesn't need the switch, and none that do, we can avoid it to make + * resume look a little prettier (and suspend too, but that's usually hidden, + * e.g. when closing the lid on a laptop). + */ +static bool pm_vt_switch(void) +{ + struct pm_vt_switch *entry; + bool ret = true; + + mutex_lock(&vt_switch_mutex); + if (list_empty(&pm_vt_switch_list)) + goto out; + + if (!console_suspend_enabled) + goto out; + + list_for_each_entry(entry, &pm_vt_switch_list, head) { + if (entry->required) + goto out; + } + + ret = false; +out: + mutex_unlock(&vt_switch_mutex); + return ret; +} + int pm_prepare_console(void) { + if (!pm_vt_switch()) + return 0; + orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1); if (orig_fgconsole < 0) return 1; @@ -26,6 +141,9 @@ int pm_prepare_console(void) void pm_restore_console(void) { + if (!pm_vt_switch()) + return; + if (orig_fgconsole >= 0) { vt_move_to_console(orig_fgconsole, 0); vt_kmsg_redirect(orig_kmsg); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 6d6d2887033..fcc2611d3f1 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -5,6 +5,7 @@ * Copyright (c) 2003 Open Source Development Lab * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz> * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. + * Copyright (C) 2012 Bojan Smojver <bojan@rexursive.com> * * This file is released under the GPLv2. */ @@ -16,7 +17,6 @@ #include <linux/string.h> #include <linux/device.h> #include <linux/async.h> -#include <linux/kmod.h> #include <linux/delay.h> #include <linux/fs.h> #include <linux/mount.h> @@ -26,25 +26,31 @@ #include <linux/freezer.h> #include <linux/gfp.h> #include <linux/syscore_ops.h> -#include <scsi/scsi_scan.h> +#include <linux/ctype.h> +#include <linux/genhd.h> +#include <trace/events/power.h> #include "power.h" static int nocompress; static int noresume; +static int nohibernate; static int resume_wait; -static int resume_delay; +static unsigned int resume_delay; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; -int in_suspend __nosavedata; +__visible int in_suspend __nosavedata; enum { HIBERNATION_INVALID, HIBERNATION_PLATFORM, HIBERNATION_SHUTDOWN, HIBERNATION_REBOOT, +#ifdef CONFIG_SUSPEND + HIBERNATION_SUSPEND, +#endif /* keep last */ __HIBERNATION_AFTER_LAST }; @@ -57,6 +63,11 @@ bool freezer_test_done; static const struct platform_hibernation_ops *hibernation_ops; +bool hibernation_available(void) +{ + return (nohibernate == 0); +} + /** * hibernation_set_ops - Set the global hibernate operations. * @ops: Hibernation operations to use in subsequent hibernation transitions. @@ -78,6 +89,7 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops) unlock_system_sleep(); } +EXPORT_SYMBOL_GPL(hibernation_set_ops); static bool entering_platform_hibernation; @@ -223,19 +235,23 @@ static void platform_recover(int platform_mode) void swsusp_show_speed(struct timeval *start, struct timeval *stop, unsigned nr_pages, char *msg) { - s64 elapsed_centisecs64; - int centisecs; - int k; - int kps; + u64 elapsed_centisecs64; + unsigned int centisecs; + unsigned int k; + unsigned int kps; elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); + /* + * If "(s64)elapsed_centisecs64 < 0", it will print long elapsed time, + * it is obvious enough for what went wrong. + */ do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); centisecs = elapsed_centisecs64; if (centisecs == 0) centisecs = 1; /* avoid div-by-zero */ k = nr_pages * (PAGE_SIZE / 1024); kps = (k * 100) / centisecs; - printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", + printk(KERN_INFO "PM: %s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n", msg, k, centisecs / 100, centisecs % 100, kps / 1000, (kps % 1000) / 10); @@ -245,8 +261,8 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop, * create_image - Create a hibernation image. * @platform_mode: Whether or not to use the platform driver. * - * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image - * and execute the drivers' .thaw_noirq() callbacks. + * Execute device drivers' "late" and "noirq" freeze callbacks, create a + * hibernation image and run the drivers' "noirq" and "early" thaw callbacks. * * Control reappears in this routine after the subsequent restore. */ @@ -254,7 +270,7 @@ static int create_image(int platform_mode) { int error; - error = dpm_suspend_noirq(PMSG_FREEZE); + error = dpm_suspend_end(PMSG_FREEZE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); @@ -283,16 +299,18 @@ static int create_image(int platform_mode) in_suspend = 1; save_processor_state(); + trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true); error = swsusp_arch_suspend(); + trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); if (error) printk(KERN_ERR "PM: Error %d creating hibernation image\n", error); /* Restore control flow magically appears here */ restore_processor_state(); - if (!in_suspend) { + if (!in_suspend) events_check_enabled = false; - platform_leave(platform_mode); - } + + platform_leave(platform_mode); Power_up: syscore_resume(); @@ -306,7 +324,7 @@ static int create_image(int platform_mode) Platform_finish: platform_finish(platform_mode); - dpm_resume_noirq(in_suspend ? + dpm_resume_start(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); return error; @@ -343,16 +361,17 @@ int hibernation_snapshot(int platform_mode) * successful freezer test. */ freezer_test_done = true; - goto Cleanup; + goto Thaw; } error = dpm_prepare(PMSG_FREEZE); if (error) { dpm_complete(PMSG_RECOVER); - goto Cleanup; + goto Thaw; } suspend_console(); + ftrace_stop(); pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); @@ -378,6 +397,7 @@ int hibernation_snapshot(int platform_mode) if (error || !in_suspend) pm_restore_gfp_mask(); + ftrace_start(); resume_console(); dpm_complete(msg); @@ -385,6 +405,8 @@ int hibernation_snapshot(int platform_mode) platform_end(platform_mode); return error; + Thaw: + thaw_kernel_threads(); Cleanup: swsusp_free(); goto Close; @@ -394,16 +416,16 @@ int hibernation_snapshot(int platform_mode) * resume_target_kernel - Restore system state from a hibernation image. * @platform_mode: Whether or not to use the platform driver. * - * Execute device drivers' .freeze_noirq() callbacks, restore the contents of - * highmem that have not been restored yet from the image and run the low-level - * code that will restore the remaining contents of memory and switch to the - * just restored target kernel. + * Execute device drivers' "noirq" and "late" freeze callbacks, restore the + * contents of highmem that have not been restored yet from the image and run + * the low-level code that will restore the remaining contents of memory and + * switch to the just restored target kernel. */ static int resume_target_kernel(bool platform_mode) { int error; - error = dpm_suspend_noirq(PMSG_QUIESCE); + error = dpm_suspend_end(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); @@ -460,7 +482,7 @@ static int resume_target_kernel(bool platform_mode) Cleanup: platform_restore_cleanup(platform_mode); - dpm_resume_noirq(PMSG_RECOVER); + dpm_resume_start(PMSG_RECOVER); return error; } @@ -478,6 +500,7 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); + ftrace_stop(); pm_restrict_gfp_mask(); error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { @@ -485,6 +508,7 @@ int hibernation_restore(int platform_mode) dpm_resume_end(PMSG_RECOVER); } pm_restore_gfp_mask(); + ftrace_start(); resume_console(); pm_restore_console(); return error; @@ -511,6 +535,7 @@ int hibernation_platform_enter(void) entering_platform_hibernation = true; suspend_console(); + ftrace_stop(); error = dpm_suspend_start(PMSG_HIBERNATE); if (error) { if (hibernation_ops->recover) @@ -518,7 +543,7 @@ int hibernation_platform_enter(void) goto Resume_devices; } - error = dpm_suspend_noirq(PMSG_HIBERNATE); + error = dpm_suspend_end(PMSG_HIBERNATE); if (error) goto Resume_devices; @@ -549,11 +574,12 @@ int hibernation_platform_enter(void) Platform_finish: hibernation_ops->finish(); - dpm_resume_noirq(PMSG_RESTORE); + dpm_resume_start(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; dpm_resume_end(PMSG_RESTORE); + ftrace_start(); resume_console(); Close: @@ -571,6 +597,10 @@ int hibernation_platform_enter(void) */ static void power_down(void) { +#ifdef CONFIG_SUSPEND + int error; +#endif + switch (hibernation_mode) { case HIBERNATION_REBOOT: kernel_restart(NULL); @@ -578,8 +608,28 @@ static void power_down(void) case HIBERNATION_PLATFORM: hibernation_platform_enter(); case HIBERNATION_SHUTDOWN: - kernel_power_off(); + if (pm_power_off) + kernel_power_off(); break; +#ifdef CONFIG_SUSPEND + case HIBERNATION_SUSPEND: + error = suspend_devices_and_enter(PM_SUSPEND_MEM); + if (error) { + if (hibernation_ops) + hibernation_mode = HIBERNATION_PLATFORM; + else + hibernation_mode = HIBERNATION_SHUTDOWN; + power_down(); + } + /* + * Restore swap signature. + */ + error = swsusp_unmark(); + if (error) + printk(KERN_ERR "PM: Swap will be unusable! " + "Try swapon -a.\n"); + return; +#endif } kernel_halt(); /* @@ -587,7 +637,8 @@ static void power_down(void) * corruption after resume. */ printk(KERN_CRIT "PM: Please power down manually\n"); - while(1); + while (1) + cpu_relax(); } /** @@ -597,6 +648,11 @@ int hibernate(void) { int error; + if (!hibernation_available()) { + pr_debug("PM: Hibernation not available.\n"); + return -EPERM; + } + lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { @@ -609,30 +665,23 @@ int hibernate(void) if (error) goto Exit; - error = usermodehelper_disable(); - if (error) - goto Exit; - - /* Allocate memory management structures */ - error = create_basic_memory_bitmaps(); - if (error) - goto Exit; - printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); error = freeze_processes(); if (error) - goto Finish; + goto Exit; - error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + lock_device_hotplug(); + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); if (error) goto Thaw; - if (freezer_test_done) { - freezer_test_done = false; - goto Thaw; - } + + error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + if (error || freezer_test_done) + goto Free_bitmaps; if (in_suspend) { unsigned int flags = 0; @@ -655,11 +704,14 @@ int hibernate(void) pr_debug("PM: Image restored successfully.\n"); } + Free_bitmaps: + free_basic_memory_bitmaps(); Thaw: + unlock_device_hotplug(); thaw_processes(); - Finish: - free_basic_memory_bitmaps(); - usermodehelper_enable(); + + /* Don't bother checking whether freezer_test_done is true */ + freezer_test_done = false; Exit: pm_notifier_call_chain(PM_POST_HIBERNATION); pm_restore_console(); @@ -693,7 +745,7 @@ static int software_resume(void) /* * If the user said "noresume".. bail out early. */ - if (noresume) + if (noresume || !hibernation_available()) return 0; /* @@ -726,6 +778,17 @@ static int software_resume(void) /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); + + /* + * name_to_dev_t is ineffective to verify parition if resume_file is in + * integer format. (e.g. major:minor) + */ + if (isdigit(resume_file[0]) && resume_wait) { + int partno; + while (!get_gendisk(swsusp_resume_device, &partno)) + msleep(10); + } + if (!swsusp_resume_device) { /* * Some device discovery might still be in progress; we need @@ -739,13 +802,6 @@ static int software_resume(void) async_synchronize_full(); } - /* - * We can't depend on SCSI devices being available after loading - * one of their modules until scsi_complete_async_scans() is - * called and the resume device usually is a SCSI one. - */ - scsi_complete_async_scans(); - swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { error = -ENODEV; @@ -772,27 +828,20 @@ static int software_resume(void) pm_prepare_console(); error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - goto close_finish; - - error = usermodehelper_disable(); - if (error) - goto close_finish; - - error = create_basic_memory_bitmaps(); - if (error) { - usermodehelper_enable(); - goto close_finish; - } + goto Close_Finish; pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); - if (error) { - swsusp_close(FMODE_READ); - goto Done; - } + if (error) + goto Close_Finish; pr_debug("PM: Loading hibernation image.\n"); + lock_device_hotplug(); + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; + error = swsusp_read(&flags); swsusp_close(FMODE_READ); if (!error) @@ -800,10 +849,10 @@ static int software_resume(void) printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); swsusp_free(); - thaw_processes(); - Done: free_basic_memory_bitmaps(); - usermodehelper_enable(); + Thaw: + unlock_device_hotplug(); + thaw_processes(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); pm_restore_console(); @@ -813,18 +862,21 @@ static int software_resume(void) mutex_unlock(&pm_mutex); pr_debug("PM: Hibernation image not present or could not be loaded.\n"); return error; -close_finish: + Close_Finish: swsusp_close(FMODE_READ); goto Finish; } -late_initcall(software_resume); +late_initcall_sync(software_resume); static const char * const hibernation_modes[] = { [HIBERNATION_PLATFORM] = "platform", [HIBERNATION_SHUTDOWN] = "shutdown", [HIBERNATION_REBOOT] = "reboot", +#ifdef CONFIG_SUSPEND + [HIBERNATION_SUSPEND] = "suspend", +#endif }; /* @@ -859,12 +911,18 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, int i; char *start = buf; + if (!hibernation_available()) + return sprintf(buf, "[disabled]\n"); + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (!hibernation_modes[i]) continue; switch (i) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: +#ifdef CONFIG_SUSPEND + case HIBERNATION_SUSPEND: +#endif break; case HIBERNATION_PLATFORM: if (hibernation_ops) @@ -890,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, char *p; int mode = HIBERNATION_INVALID; + if (!hibernation_available()) + return -EPERM; + p = memchr(buf, '\n', n); len = p ? p - buf : n; @@ -905,6 +966,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, switch (mode) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: +#ifdef CONFIG_SUSPEND + case HIBERNATION_SUSPEND: +#endif hibernation_mode = mode; break; case HIBERNATION_PLATFORM: @@ -935,16 +999,20 @@ static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { - unsigned int maj, min; dev_t res; - int ret = -EINVAL; + int len = n; + char *name; - if (sscanf(buf, "%u:%u", &maj, &min) != 2) - goto out; + if (len && buf[len-1] == '\n') + len--; + name = kstrndup(buf, len, GFP_KERNEL); + if (!name) + return -ENOMEM; - res = MKDEV(maj,min); - if (maj != MAJOR(res) || min != MINOR(res)) - goto out; + res = name_to_dev_t(name); + kfree(name); + if (!res) + return -EINVAL; lock_system_sleep(); swsusp_resume_device = res; @@ -952,9 +1020,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, printk(KERN_INFO "PM: Starting manual resume from disk\n"); noresume = 0; software_resume(); - ret = n; - out: - return ret; + return n; } power_attr(resume); @@ -1052,6 +1118,10 @@ static int __init hibernate_setup(char *str) noresume = 1; else if (!strncmp(str, "nocompress", 10)) nocompress = 1; + else if (!strncmp(str, "no", 2)) { + noresume = 1; + nohibernate = 1; + } return 1; } @@ -1069,13 +1139,30 @@ static int __init resumewait_setup(char *str) static int __init resumedelay_setup(char *str) { - resume_delay = simple_strtoul(str, NULL, 0); + int rc = kstrtouint(str, 0, &resume_delay); + + if (rc) + return rc; + return 1; +} + +static int __init nohibernate_setup(char *str) +{ + noresume = 1; + nohibernate = 1; return 1; } +static int __init kaslr_nohibernate_setup(char *str) +{ + return nohibernate_setup(str); +} + __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); __setup("hibernate=", hibernate_setup); __setup("resumewait", resumewait_setup); __setup("resumedelay=", resumedelay_setup); +__setup("nohibernate", nohibernate_setup); +__setup("kaslr", kaslr_nohibernate_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c index 9824b41e5a1..8e90f330f13 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -59,7 +59,7 @@ static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, { unsigned long val; - if (strict_strtoul(buf, 10, &val)) + if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) @@ -165,16 +165,20 @@ static int suspend_stats_show(struct seq_file *s, void *unused) last_errno %= REC_FAILED_NUM; last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; last_step %= REC_FAILED_NUM; - seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n" - "%s: %d\n%s: %d\n%s: %d\n%s: %d\n", + seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n" + "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n", "success", suspend_stats.success, "fail", suspend_stats.fail, "failed_freeze", suspend_stats.failed_freeze, "failed_prepare", suspend_stats.failed_prepare, "failed_suspend", suspend_stats.failed_suspend, + "failed_suspend_late", + suspend_stats.failed_suspend_late, "failed_suspend_noirq", suspend_stats.failed_suspend_noirq, "failed_resume", suspend_stats.failed_resume, + "failed_resume_early", + suspend_stats.failed_resume_early, "failed_resume_noirq", suspend_stats.failed_resume_noirq); seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", @@ -231,76 +235,130 @@ late_initcall(pm_debugfs_init); #endif /* CONFIG_PM_SLEEP */ +#ifdef CONFIG_PM_SLEEP_DEBUG +/* + * pm_print_times: print time taken by devices to suspend and resume. + * + * show() returns whether printing of suspend and resume times is enabled. + * store() accepts 0 or 1. 0 disables printing and 1 enables it. + */ +bool pm_print_times_enabled; + +static ssize_t pm_print_times_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", pm_print_times_enabled); +} + +static ssize_t pm_print_times_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + if (val > 1) + return -EINVAL; + + pm_print_times_enabled = !!val; + return n; +} + +power_attr(pm_print_times); + +static inline void pm_print_times_init(void) +{ + pm_print_times_enabled = !!initcall_debug; +} +#else /* !CONFIG_PP_SLEEP_DEBUG */ +static inline void pm_print_times_init(void) {} +#endif /* CONFIG_PM_SLEEP_DEBUG */ + struct kobject *power_kobj; /** - * state - control system power state. + * state - control system sleep states. * - * show() returns what states are supported, which is hard-coded to - * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and - * 'disk' (Suspend-to-Disk). + * show() returns available sleep state labels, which may be "mem", "standby", + * "freeze" and "disk" (hibernation). See Documentation/power/states.txt for a + * description of what they mean. * - * store() accepts one of those strings, translates it into the - * proper enumerated value, and initiates a suspend transition. + * store() accepts one of those strings, translates it into the proper + * enumerated value, and initiates a suspend transition. */ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { char *s = buf; #ifdef CONFIG_SUSPEND - int i; + suspend_state_t i; + + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + if (pm_states[i].state) + s += sprintf(s,"%s ", pm_states[i].label); - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (pm_states[i] && valid_state(i)) - s += sprintf(s,"%s ", pm_states[i]); - } #endif -#ifdef CONFIG_HIBERNATION - s += sprintf(s, "%s\n", "disk"); -#else + if (hibernation_available()) + s += sprintf(s, "disk "); if (s != buf) /* convert the last space to a newline */ *(s-1) = '\n'; -#endif return (s - buf); } -static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) +static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND - suspend_state_t state = PM_SUSPEND_STANDBY; - const char * const *s; + suspend_state_t state = PM_SUSPEND_MIN; + struct pm_sleep_state *s; #endif char *p; int len; - int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; - /* First, check if we are requested to hibernate */ - if (len == 4 && !strncmp(buf, "disk", len)) { - error = hibernate(); - goto Exit; - } + /* Check hibernation first. */ + if (len == 4 && !strncmp(buf, "disk", len)) + return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND - for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) - break; - } - if (state < PM_SUSPEND_MAX && *s) { - error = enter_state(state); - if (error) { - suspend_stats.fail++; - dpm_save_failed_errno(error); - } else - suspend_stats.success++; - } + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) + if (s->state && len == strlen(s->label) + && !strncmp(buf, s->label, len)) + return s->state; #endif - Exit: + return PM_SUSPEND_ON; +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + state = decode_state(buf, n); + if (state < PM_SUSPEND_MAX) + error = pm_suspend(state); + else if (state == PM_SUSPEND_MAX) + error = hibernate(); + else + error = -EINVAL; + + out: + pm_autosleep_unlock(); return error ? error : n; } @@ -341,7 +399,8 @@ static ssize_t wakeup_count_show(struct kobject *kobj, { unsigned int val; - return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; + return pm_get_wakeup_count(&val, true) ? + sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, @@ -349,15 +408,108 @@ static ssize_t wakeup_count_store(struct kobject *kobj, const char *buf, size_t n) { unsigned int val; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) - return n; + error = n; + else + pm_print_active_wakeup_sources(); } - return -EINVAL; + + out: + pm_autosleep_unlock(); + return error; } power_attr(wakeup_count); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t autosleep_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + suspend_state_t state = pm_autosleep_state(); + + if (state == PM_SUSPEND_ON) + return sprintf(buf, "off\n"); + +#ifdef CONFIG_SUSPEND + if (state < PM_SUSPEND_MAX) + return sprintf(buf, "%s\n", pm_states[state].state ? + pm_states[state].label : "error"); +#endif +#ifdef CONFIG_HIBERNATION + return sprintf(buf, "disk\n"); +#else + return sprintf(buf, "error"); +#endif +} + +static ssize_t autosleep_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state = decode_state(buf, n); + int error; + + if (state == PM_SUSPEND_ON + && strcmp(buf, "off") && strcmp(buf, "off\n")) + return -EINVAL; + + error = pm_autosleep_set_state(state); + return error ? error : n; +} + +power_attr(autosleep); +#endif /* CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS +static ssize_t wake_lock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, true); +} + +static ssize_t wake_lock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_lock(buf); + return error ? error : n; +} + +power_attr(wake_lock); + +static ssize_t wake_unlock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, false); +} + +static ssize_t wake_unlock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_unlock(buf); + return error ? error : n; +} + +power_attr(wake_unlock); + +#endif /* CONFIG_PM_WAKELOCKS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -377,6 +529,10 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, if (sscanf(buf, "%d", &val) == 1) { pm_trace_enabled = !!val; + if (pm_trace_enabled) { + pr_warn("PM: Enabling pm_trace changes system date and time during resume.\n" + "PM: Correct system time has to be restored manually after resume.\n"); + } return n; } return -EINVAL; @@ -402,6 +558,30 @@ power_attr(pm_trace_dev_match); #endif /* CONFIG_PM_TRACE */ +#ifdef CONFIG_FREEZER +static ssize_t pm_freeze_timeout_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", freeze_timeout_msecs); +} + +static ssize_t pm_freeze_timeout_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + freeze_timeout_msecs = val; + return n; +} + +power_attr(pm_freeze_timeout); + +#endif /* CONFIG_FREEZER*/ + static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE @@ -411,9 +591,22 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &autosleep_attr.attr, +#endif +#ifdef CONFIG_PM_WAKELOCKS + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif +#ifdef CONFIG_PM_SLEEP_DEBUG + &pm_print_times_attr.attr, +#endif +#endif +#ifdef CONFIG_FREEZER + &pm_freeze_timeout_attr.attr, #endif NULL, }; @@ -446,7 +639,11 @@ static int __init pm_init(void) power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; - return sysfs_create_group(power_kobj, &attr_group); + error = sysfs_create_group(power_kobj, &attr_group); + if (error) + return error; + pm_print_times_init(); + return pm_autosleep_init(); } core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h index 21724eee520..c60f13b5270 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -2,6 +2,7 @@ #include <linux/suspend_ioctls.h> #include <linux/utsname.h> #include <linux/freezer.h> +#include <linux/compiler.h> struct swsusp_info { struct new_utsname uts; @@ -11,7 +12,7 @@ struct swsusp_info { unsigned long image_pages; unsigned long pages; unsigned long size; -} __attribute__((aligned(PAGE_SIZE))); +} __aligned(PAGE_SIZE); #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ @@ -49,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info) */ #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) +asmlinkage int swsusp_save(void); + /* kernel/power/hibernate.c */ extern bool freezer_test_done; @@ -156,6 +159,9 @@ extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); extern int swsusp_write(unsigned int flags); extern void swsusp_close(fmode_t); +#ifdef CONFIG_SUSPEND +extern int swsusp_unmark(void); +#endif /* kernel/power/block_io.c */ extern struct block_device *hib_resume_bdev; @@ -172,19 +178,20 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); #ifdef CONFIG_SUSPEND +struct pm_sleep_state { + const char *label; + suspend_state_t state; +}; + /* kernel/power/suspend.c */ -extern const char *const pm_states[]; +extern struct pm_sleep_state pm_states[]; -extern bool valid_state(suspend_state_t state); extern int suspend_devices_and_enter(suspend_state_t state); -extern int enter_state(suspend_state_t state); #else /* !CONFIG_SUSPEND */ static inline int suspend_devices_and_enter(suspend_state_t state) { return -ENOSYS; } -static inline int enter_state(suspend_state_t state) { return -ENOSYS; } -static inline bool valid_state(suspend_state_t state) { return false; } #endif /* !CONFIG_SUSPEND */ #ifdef CONFIG_PM_TEST_SUSPEND @@ -234,16 +241,14 @@ static inline int suspend_freeze_processes(void) int error; error = freeze_processes(); - /* * freeze_processes() automatically thaws every task if freezing * fails. So we need not do anything extra upon error. */ if (error) - goto Finish; + return error; error = freeze_kernel_threads(); - /* * freeze_kernel_threads() thaws only kernel threads upon freezing * failure. So we have to thaw the userspace tasks ourselves. @@ -251,7 +256,6 @@ static inline int suspend_freeze_processes(void) if (error) thaw_processes(); - Finish: return error; } @@ -269,3 +273,30 @@ static inline void suspend_thaw_processes(void) { } #endif + +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +extern int pm_autosleep_init(void); +extern int pm_autosleep_lock(void); +extern void pm_autosleep_unlock(void); +extern suspend_state_t pm_autosleep_state(void); +extern int pm_autosleep_set_state(suspend_state_t state); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline int pm_autosleep_init(void) { return 0; } +static inline int pm_autosleep_lock(void) { return 0; } +static inline void pm_autosleep_unlock(void) {} +static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } + +#endif /* !CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS + +/* kernel/power/wakelock.c */ +extern ssize_t pm_show_wakelocks(char *buf, bool show_active); +extern int pm_wake_lock(const char *buf); +extern int pm_wake_unlock(const char *buf); + +#endif /* !CONFIG_PM_WAKELOCKS */ diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index d52359374e8..7ef6866b521 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -32,12 +32,12 @@ static void handle_poweroff(int key) static struct sysrq_key_op sysrq_poweroff_op = { .handler = handle_poweroff, - .help_msg = "powerOff", + .help_msg = "poweroff(o)", .action_msg = "Power Off", .enable_mask = SYSRQ_ENABLE_BOOT, }; -static int pm_sysrq_init(void) +static int __init pm_sysrq_init(void) { register_sysrq_key('o', &sysrq_poweroff_op); return 0; diff --git a/kernel/power/process.c b/kernel/power/process.c index 7e426459e60..4ee194eb524 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -16,11 +16,13 @@ #include <linux/freezer.h> #include <linux/delay.h> #include <linux/workqueue.h> +#include <linux/kmod.h> +#include <trace/events/power.h> /* * Timeout for stopping processes */ -#define TIMEOUT (20 * HZ) +unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; static int try_to_freeze_tasks(bool user_only) { @@ -29,13 +31,14 @@ static int try_to_freeze_tasks(bool user_only) unsigned int todo; bool wq_busy = false; struct timeval start, end; - u64 elapsed_csecs64; - unsigned int elapsed_csecs; + u64 elapsed_msecs64; + unsigned int elapsed_msecs; bool wakeup = false; + int sleep_usecs = USEC_PER_MSEC; do_gettimeofday(&start); - end_time = jiffies + TIMEOUT; + end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); if (!user_only) freeze_workqueues_begin(); @@ -47,20 +50,7 @@ static int try_to_freeze_tasks(bool user_only) if (p == current || !freeze_task(p)) continue; - /* - * Now that we've done set_freeze_flag, don't - * perturb a task in TASK_STOPPED or TASK_TRACED. - * It is "frozen enough". If the task does wake - * up, it will immediately call try_to_freeze. - * - * Because freeze_task() goes through p's - * scheduler lock after setting TIF_FREEZE, it's - * guaranteed that either we see TASK_RUNNING or - * try_to_stop() after schedule() in ptrace/signal - * stop sees TIF_FREEZE. - */ - if (!task_is_stopped_or_traced(p) && - !freezer_should_skip(p)) + if (!freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); @@ -80,34 +70,39 @@ static int try_to_freeze_tasks(bool user_only) /* * We need to retry, but first give the freezing tasks some - * time to enter the regrigerator. + * time to enter the refrigerator. Start with an initial + * 1 ms sleep followed by exponential backoff until 8 ms. */ - msleep(10); + usleep_range(sleep_usecs / 2, sleep_usecs); + if (sleep_usecs < 8 * USEC_PER_MSEC) + sleep_usecs *= 2; } do_gettimeofday(&end); - elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); - do_div(elapsed_csecs64, NSEC_PER_SEC / 100); - elapsed_csecs = elapsed_csecs64; + elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); + do_div(elapsed_msecs64, NSEC_PER_MSEC); + elapsed_msecs = elapsed_msecs64; if (todo) { printk("\n"); - printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " + printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", wakeup ? "aborted" : "failed", - elapsed_csecs / 100, elapsed_csecs % 100, + elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); - read_lock(&tasklist_lock); - do_each_thread(g, p) { - if (!wakeup && !freezer_should_skip(p) && - p != current && freezing(p) && !frozen(p)) - sched_show_task(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); + if (!wakeup) { + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (p != current && !freezer_should_skip(p) + && freezing(p) && !frozen(p)) + sched_show_task(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + } } else { - printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, - elapsed_csecs % 100); + printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, + elapsed_msecs % 1000); } return todo ? -EBUSY : 0; @@ -115,6 +110,8 @@ static int try_to_freeze_tasks(bool user_only) /** * freeze_processes - Signal user space processes to enter the refrigerator. + * The current thread will not be frozen. The same process that calls + * freeze_processes must later call thaw_processes. * * On success, returns 0. On failure, -errno and system is fully thawed. */ @@ -122,6 +119,13 @@ int freeze_processes(void) { int error; + error = __usermodehelper_disable(UMH_FREEZING); + if (error) + return error; + + /* Make sure this task doesn't get frozen */ + current->flags |= PF_SUSPEND_TASK; + if (!pm_freezing) atomic_inc(&system_freezing_cnt); @@ -130,6 +134,7 @@ int freeze_processes(void) error = try_to_freeze_tasks(true); if (!error) { printk("done."); + __usermodehelper_set_disable_depth(UMH_DISABLED); oom_killer_disable(); } printk("\n"); @@ -169,7 +174,9 @@ int freeze_kernel_threads(void) void thaw_processes(void) { struct task_struct *g, *p; + struct task_struct *curr = current; + trace_suspend_resume(TPS("thaw_processes"), 0, true); if (pm_freezing) atomic_dec(&system_freezing_cnt); pm_freezing = false; @@ -179,16 +186,25 @@ void thaw_processes(void) printk("Restarting tasks ... "); + __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); read_lock(&tasklist_lock); do_each_thread(g, p) { + /* No other threads should have PF_SUSPEND_TASK set */ + WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK)); __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); + WARN_ON(!(curr->flags & PF_SUSPEND_TASK)); + curr->flags &= ~PF_SUSPEND_TASK; + + usermodehelper_enable(); + schedule(); printk("done.\n"); + trace_suspend_resume(TPS("thaw_processes"), 0, false); } void thaw_kernel_threads(void) diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 995e3bd3417..884b7705886 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -44,6 +44,7 @@ #include <linux/uaccess.h> #include <linux/export.h> +#include <trace/events/power.h> /* * locking rule: all changes to constraints or notifiers lists @@ -65,6 +66,7 @@ static struct pm_qos_constraints cpu_dma_constraints = { .list = PLIST_HEAD_INIT(cpu_dma_constraints.list), .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .no_constraint_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, .notifiers = &cpu_dma_lat_notifier, }; @@ -78,6 +80,7 @@ static struct pm_qos_constraints network_lat_constraints = { .list = PLIST_HEAD_INIT(network_lat_constraints.list), .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .no_constraint_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, .notifiers = &network_lat_notifier, }; @@ -92,6 +95,7 @@ static struct pm_qos_constraints network_tput_constraints = { .list = PLIST_HEAD_INIT(network_tput_constraints.list), .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .no_constraint_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .type = PM_QOS_MAX, .notifiers = &network_throughput_notifier, }; @@ -127,7 +131,7 @@ static const struct file_operations pm_qos_power_fops = { static inline int pm_qos_get_value(struct pm_qos_constraints *c) { if (plist_head_empty(&c->list)) - return c->default_value; + return c->no_constraint_value; switch (c->type) { case PM_QOS_MIN: @@ -139,6 +143,7 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c) default: /* runtime check for not using enum */ BUG(); + return PM_QOS_DEFAULT_VALUE; } } @@ -168,6 +173,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, { unsigned long flags; int prev_value, curr_value, new_value; + int ret; spin_lock_irqsave(&pm_qos_lock, flags); prev_value = pm_qos_get_value(c); @@ -201,14 +207,81 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, spin_unlock_irqrestore(&pm_qos_lock, flags); + trace_pm_qos_update_target(action, prev_value, curr_value); if (prev_value != curr_value) { - blocking_notifier_call_chain(c->notifiers, - (unsigned long)curr_value, - NULL); - return 1; + ret = 1; + if (c->notifiers) + blocking_notifier_call_chain(c->notifiers, + (unsigned long)curr_value, + NULL); } else { - return 0; + ret = 0; } + return ret; +} + +/** + * pm_qos_flags_remove_req - Remove device PM QoS flags request. + * @pqf: Device PM QoS flags set to remove the request from. + * @req: Request to remove from the set. + */ +static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf, + struct pm_qos_flags_request *req) +{ + s32 val = 0; + + list_del(&req->node); + list_for_each_entry(req, &pqf->list, node) + val |= req->flags; + + pqf->effective_flags = val; +} + +/** + * pm_qos_update_flags - Update a set of PM QoS flags. + * @pqf: Set of flags to update. + * @req: Request to add to the set, to modify, or to remove from the set. + * @action: Action to take on the set. + * @val: Value of the request to add or modify. + * + * Update the given set of PM QoS flags and call notifiers if the aggregate + * value has changed. Returns 1 if the aggregate constraint value has changed, + * 0 otherwise. + */ +bool pm_qos_update_flags(struct pm_qos_flags *pqf, + struct pm_qos_flags_request *req, + enum pm_qos_req_action action, s32 val) +{ + unsigned long irqflags; + s32 prev_value, curr_value; + + spin_lock_irqsave(&pm_qos_lock, irqflags); + + prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; + + switch (action) { + case PM_QOS_REMOVE_REQ: + pm_qos_flags_remove_req(pqf, req); + break; + case PM_QOS_UPDATE_REQ: + pm_qos_flags_remove_req(pqf, req); + case PM_QOS_ADD_REQ: + req->flags = val; + INIT_LIST_HEAD(&req->node); + list_add_tail(&req->node, &pqf->list); + pqf->effective_flags |= val; + break; + default: + /* no action */ + ; + } + + curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; + + spin_unlock_irqrestore(&pm_qos_lock, irqflags); + + trace_pm_qos_update_flags(action, prev_value, curr_value); + return prev_value != curr_value; } /** @@ -229,6 +302,32 @@ int pm_qos_request_active(struct pm_qos_request *req) } EXPORT_SYMBOL_GPL(pm_qos_request_active); +static void __pm_qos_update_request(struct pm_qos_request *req, + s32 new_value) +{ + trace_pm_qos_update_request(req->pm_qos_class, new_value); + + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); +} + +/** + * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout + * @work: work struct for the delayed work (timeout) + * + * This cancels the timeout request by falling back to the default at timeout. + */ +static void pm_qos_work_fn(struct work_struct *work) +{ + struct pm_qos_request *req = container_of(to_delayed_work(work), + struct pm_qos_request, + work); + + __pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); +} + /** * pm_qos_add_request - inserts new qos request into the list * @req: pointer to a preallocated handle @@ -253,6 +352,8 @@ void pm_qos_add_request(struct pm_qos_request *req, return; } req->pm_qos_class = pm_qos_class; + INIT_DELAYED_WORK(&req->work, pm_qos_work_fn); + trace_pm_qos_add_request(pm_qos_class, value); pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, &req->node, PM_QOS_ADD_REQ, value); } @@ -279,12 +380,39 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } + cancel_delayed_work_sync(&req->work); + __pm_qos_update_request(req, new_value); +} +EXPORT_SYMBOL_GPL(pm_qos_update_request); + +/** + * pm_qos_update_request_timeout - modifies an existing qos request temporarily. + * @req : handle to list element holding a pm_qos request to use + * @new_value: defines the temporal qos request + * @timeout_us: the effective duration of this qos request in usecs. + * + * After timeout_us, this qos request is cancelled automatically. + */ +void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, + unsigned long timeout_us) +{ + if (!req) + return; + if (WARN(!pm_qos_request_active(req), + "%s called for unknown object.", __func__)) + return; + + cancel_delayed_work_sync(&req->work); + + trace_pm_qos_update_request_timeout(req->pm_qos_class, + new_value, timeout_us); if (new_value != req->node.prio) pm_qos_update_target( pm_qos_array[req->pm_qos_class]->constraints, &req->node, PM_QOS_UPDATE_REQ, new_value); + + schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us)); } -EXPORT_SYMBOL_GPL(pm_qos_update_request); /** * pm_qos_remove_request - modifies an existing qos request @@ -305,6 +433,9 @@ void pm_qos_remove_request(struct pm_qos_request *req) return; } + cancel_delayed_work_sync(&req->work); + + trace_pm_qos_remove_request(req->pm_qos_class, PM_QOS_DEFAULT_VALUE); pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, &req->node, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); @@ -366,7 +497,7 @@ static int find_pm_qos_object_by_minor(int minor) { int pm_qos_class; - for (pm_qos_class = 0; + for (pm_qos_class = PM_QOS_CPU_DMA_LATENCY; pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { if (minor == pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) @@ -380,7 +511,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) long pm_qos_class; pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); - if (pm_qos_class >= 0) { + if (pm_qos_class >= PM_QOS_CPU_DMA_LATENCY) { struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; @@ -433,30 +564,12 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) return -EFAULT; - } else if (count <= 11) { /* ASCII perhaps? */ - char ascii_value[11]; - unsigned long int ulval; + } else { int ret; - if (copy_from_user(ascii_value, buf, count)) - return -EFAULT; - - if (count > 10) { - if (ascii_value[10] == '\n') - ascii_value[10] = '\0'; - else - return -EINVAL; - } else { - ascii_value[count] = '\0'; - } - ret = strict_strtoul(ascii_value, 16, &ulval); - if (ret) { - pr_debug("%s, 0x%lx, 0x%x\n", ascii_value, ulval, ret); - return -EINVAL; - } - value = (s32)lower_32_bits(ulval); - } else { - return -EINVAL; + ret = kstrtos32_from_user(buf, count, 16, &value); + if (ret) + return ret; } req = filp->private_data; @@ -469,21 +582,18 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, static int __init pm_qos_power_init(void) { int ret = 0; + int i; - ret = register_pm_qos_misc(&cpu_dma_pm_qos); - if (ret < 0) { - printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); - return ret; - } - ret = register_pm_qos_misc(&network_lat_pm_qos); - if (ret < 0) { - printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); - return ret; + BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); + + for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) { + ret = register_pm_qos_misc(pm_qos_array[i]); + if (ret < 0) { + printk(KERN_ERR "pm_qos_param: %s setup failed\n", + pm_qos_array[i]->name); + return ret; + } } - ret = register_pm_qos_misc(&network_throughput_pm_qos); - if (ret < 0) - printk(KERN_ERR - "pm_qos_param: network_throughput setup failed\n"); return ret; } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 6a768e53700..1ea328aafdc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -27,6 +27,7 @@ #include <linux/highmem.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/compiler.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -155,7 +156,7 @@ static inline void free_image_page(void *addr, int clear_nosave_free) struct linked_page { struct linked_page *next; char data[LINKED_PAGE_DATA_SIZE]; -} __attribute__((packed)); +} __packed; static inline void free_list_of_pages(struct linked_page *list, int clear_page_nosave) @@ -352,7 +353,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) struct mem_extent *ext, *cur, *aux; zone_start = zone->zone_start_pfn; - zone_end = zone->zone_start_pfn + zone->spanned_pages; + zone_end = zone_end_pfn(zone); list_for_each_entry(ext, list, hook) if (zone_start <= ext->end) @@ -637,13 +638,14 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, BUG_ON(!region); } else /* This allocation cannot fail */ - region = alloc_bootmem(sizeof(struct nosave_region)); + region = memblock_virt_alloc(sizeof(struct nosave_region), 0); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); Report: - printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n", - start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); + printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n", + (unsigned long long) start_pfn << PAGE_SHIFT, + ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); } /* @@ -711,9 +713,10 @@ static void mark_nosave_pages(struct memory_bitmap *bm) list_for_each_entry(region, &nosave_regions, list) { unsigned long pfn; - pr_debug("PM: Marking nosave pages: %016lx - %016lx\n", - region->start_pfn << PAGE_SHIFT, - region->end_pfn << PAGE_SHIFT); + pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", + (unsigned long long) region->start_pfn << PAGE_SHIFT, + ((unsigned long long) region->end_pfn << PAGE_SHIFT) + - 1); for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) if (pfn_valid(pfn)) { @@ -741,7 +744,10 @@ int create_basic_memory_bitmaps(void) struct memory_bitmap *bm1, *bm2; int error = 0; - BUG_ON(forbidden_pages_map || free_pages_map); + if (forbidden_pages_map && free_pages_map) + return 0; + else + BUG_ON(forbidden_pages_map || free_pages_map); bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm1) @@ -787,7 +793,8 @@ void free_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; - BUG_ON(!(forbidden_pages_map && free_pages_map)); + if (WARN_ON(!(forbidden_pages_map && free_pages_map))) + return; bm1 = forbidden_pages_map; bm2 = free_pages_map; @@ -882,7 +889,7 @@ static unsigned int count_highmem_pages(void) continue; mark_free_pages(zone); - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (saveable_highmem_page(zone, pfn)) n++; @@ -946,7 +953,7 @@ static unsigned int count_data_pages(void) continue; mark_free_pages(zone); - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (saveable_page(zone, pfn)) n++; @@ -1000,20 +1007,20 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) s_page = pfn_to_page(src_pfn); d_page = pfn_to_page(dst_pfn); if (PageHighMem(s_page)) { - src = kmap_atomic(s_page, KM_USER0); - dst = kmap_atomic(d_page, KM_USER1); + src = kmap_atomic(s_page); + dst = kmap_atomic(d_page); do_copy_page(dst, src); - kunmap_atomic(dst, KM_USER1); - kunmap_atomic(src, KM_USER0); + kunmap_atomic(dst); + kunmap_atomic(src); } else { if (PageHighMem(d_page)) { /* Page pointed to by src may contain some kernel * data modified by kmap_atomic() */ safe_copy_page(buffer, s_page); - dst = kmap_atomic(d_page, KM_USER0); + dst = kmap_atomic(d_page); copy_page(dst, buffer); - kunmap_atomic(dst, KM_USER0); + kunmap_atomic(dst); } else { safe_copy_page(page_address(d_page), s_page); } @@ -1039,7 +1046,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) unsigned long max_zone_pfn; mark_free_pages(zone); - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (page_is_saveable(zone, pfn)) memory_bm_set_bit(orig_bm, pfn); @@ -1091,7 +1098,7 @@ void swsusp_free(void) unsigned long pfn, max_zone_pfn; for_each_populated_zone(zone) { - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); @@ -1262,7 +1269,7 @@ static void free_unnecessary_pages(void) * [number of saveable pages] - [number of pages that can be freed in theory] * * where the second term is the sum of (1) reclaimable slab pages, (2) active - * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages, + * and (3) inactive anonymous pages, (4) active and (5) inactive file pages, * minus mapped file pages. */ static unsigned long minimum_image_size(unsigned long saveable) @@ -1397,7 +1404,11 @@ int hibernate_preallocate_memory(void) * highmem and non-highmem zones separately. */ pages_highmem = preallocate_image_highmem(highmem / 2); - alloc = (count - max_size) - pages_highmem; + alloc = count - max_size; + if (alloc > pages_highmem) + alloc -= pages_highmem; + else + alloc = 0; pages = preallocate_image_memory(alloc, avail_normal); if (pages < alloc) { /* We have exhausted non-highmem pages, try highmem. */ @@ -1575,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, return -ENOMEM; } -asmlinkage int swsusp_save(void) +asmlinkage __visible int swsusp_save(void) { unsigned int nr_pages, nr_highmem; @@ -1650,7 +1661,7 @@ unsigned long snapshot_get_image_size(void) static int init_header(struct swsusp_info *info) { memset(info, 0, sizeof(struct swsusp_info)); - info->num_physpages = num_physpages; + info->num_physpages = get_num_physpages(); info->image_pages = nr_copy_pages; info->pages = snapshot_get_image_size(); info->size = info->pages; @@ -1728,9 +1739,9 @@ int snapshot_read_next(struct snapshot_handle *handle) */ void *kaddr; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); copy_page(buffer, kaddr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); handle->buffer = buffer; } else { handle->buffer = page_address(page); @@ -1753,7 +1764,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) /* Clear page flags */ for_each_populated_zone(zone) { - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) swsusp_unset_page_free(pfn_to_page(pfn)); @@ -1794,7 +1805,7 @@ static int check_header(struct swsusp_info *info) char *reason; reason = check_image_kernel(info); - if (!reason && info->num_physpages != num_physpages) + if (!reason && info->num_physpages != get_num_physpages()) reason = "memory size"; if (reason) { printk(KERN_ERR "PM: Image mismatch: %s\n", reason); @@ -2014,9 +2025,9 @@ static void copy_last_highmem_page(void) if (last_highmem_page) { void *dst; - dst = kmap_atomic(last_highmem_page, KM_USER0); + dst = kmap_atomic(last_highmem_page); copy_page(dst, buffer); - kunmap_atomic(dst, KM_USER0); + kunmap_atomic(dst); last_highmem_page = NULL; } } @@ -2309,13 +2320,13 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf) { void *kaddr1, *kaddr2; - kaddr1 = kmap_atomic(p1, KM_USER0); - kaddr2 = kmap_atomic(p2, KM_USER1); + kaddr1 = kmap_atomic(p1); + kaddr2 = kmap_atomic(p2); copy_page(buf, kaddr1); copy_page(kaddr1, kaddr2); copy_page(kaddr2, buf); - kunmap_atomic(kaddr2, KM_USER1); - kunmap_atomic(kaddr1, KM_USER0); + kunmap_atomic(kaddr2); + kunmap_atomic(kaddr1); } /** diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4fd51beed87..ed35a4790af 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -12,9 +12,9 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/init.h> -#include <linux/kmod.h> #include <linux/console.h> #include <linux/cpu.h> +#include <linux/cpuidle.h> #include <linux/syscalls.h> #include <linux/gfp.h> #include <linux/io.h> @@ -25,44 +25,119 @@ #include <linux/export.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> +#include <linux/ftrace.h> #include <trace/events/power.h> +#include <linux/compiler.h> #include "power.h" -const char *const pm_states[PM_SUSPEND_MAX] = { - [PM_SUSPEND_STANDBY] = "standby", - [PM_SUSPEND_MEM] = "mem", +struct pm_sleep_state pm_states[PM_SUSPEND_MAX] = { + [PM_SUSPEND_FREEZE] = { .label = "freeze", .state = PM_SUSPEND_FREEZE }, + [PM_SUSPEND_STANDBY] = { .label = "standby", }, + [PM_SUSPEND_MEM] = { .label = "mem", }, }; static const struct platform_suspend_ops *suspend_ops; +static const struct platform_freeze_ops *freeze_ops; -/** - * suspend_set_ops - Set the global suspend method table. - * @ops: Pointer to ops structure. - */ -void suspend_set_ops(const struct platform_suspend_ops *ops) +static bool need_suspend_ops(suspend_state_t state) +{ + return state > PM_SUSPEND_FREEZE; +} + +static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); +static bool suspend_freeze_wake; + +void freeze_set_ops(const struct platform_freeze_ops *ops) { lock_system_sleep(); - suspend_ops = ops; + freeze_ops = ops; unlock_system_sleep(); } -EXPORT_SYMBOL_GPL(suspend_set_ops); -bool valid_state(suspend_state_t state) +static void freeze_begin(void) +{ + suspend_freeze_wake = false; +} + +static void freeze_enter(void) +{ + cpuidle_use_deepest_state(true); + cpuidle_resume(); + wait_event(suspend_freeze_wait_head, suspend_freeze_wake); + cpuidle_pause(); + cpuidle_use_deepest_state(false); +} + +void freeze_wake(void) +{ + suspend_freeze_wake = true; + wake_up(&suspend_freeze_wait_head); +} +EXPORT_SYMBOL_GPL(freeze_wake); + +static bool valid_state(suspend_state_t state) { /* - * All states need lowlevel support and need to be valid to the lowlevel + * PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level + * support and need to be valid to the low level * implementation, no valid callback implies that none are valid. */ return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); } +/* + * If this is set, the "mem" label always corresponds to the deepest sleep state + * available, the "standby" label corresponds to the second deepest sleep state + * available (if any), and the "freeze" label corresponds to the remaining + * available sleep state (if there is one). + */ +static bool relative_states; + +static int __init sleep_states_setup(char *str) +{ + relative_states = !strncmp(str, "1", 1); + if (relative_states) { + pm_states[PM_SUSPEND_MEM].state = PM_SUSPEND_FREEZE; + pm_states[PM_SUSPEND_FREEZE].state = 0; + } + return 1; +} + +__setup("relative_sleep_states=", sleep_states_setup); + /** - * suspend_valid_only_mem - generic memory-only valid callback + * suspend_set_ops - Set the global suspend method table. + * @ops: Suspend operations to use. + */ +void suspend_set_ops(const struct platform_suspend_ops *ops) +{ + suspend_state_t i; + int j = PM_SUSPEND_MAX - 1; + + lock_system_sleep(); + + suspend_ops = ops; + for (i = PM_SUSPEND_MEM; i >= PM_SUSPEND_STANDBY; i--) + if (valid_state(i)) + pm_states[j--].state = i; + else if (!relative_states) + pm_states[j--].state = 0; + + pm_states[j--].state = PM_SUSPEND_FREEZE; + while (j >= PM_SUSPEND_MIN) + pm_states[j--].state = 0; + + unlock_system_sleep(); +} +EXPORT_SYMBOL_GPL(suspend_set_ops); + +/** + * suspend_valid_only_mem - Generic memory-only valid callback. * - * Platform drivers that implement mem suspend only and only need - * to check for that in their .valid callback can use this instead - * of rolling their own .valid callback. + * Platform drivers that implement mem suspend only and only need to check for + * that in their .valid() callback can use this instead of rolling their own + * .valid() callback. */ int suspend_valid_only_mem(suspend_state_t state) { @@ -83,16 +158,17 @@ static int suspend_test(int level) } /** - * suspend_prepare - Do prep work before entering low-power state. + * suspend_prepare - Prepare for entering system sleep state. * - * This is common code that is called for each state that we're entering. - * Run suspend notifiers, allocate a console and stop all processes. + * Common code run for every system sleep state that can be entered (except for + * hibernation). Run suspend notifiers, allocate the "suspend" console and + * freeze processes. */ -static int suspend_prepare(void) +static int suspend_prepare(suspend_state_t state) { int error; - if (!suspend_ops || !suspend_ops->enter) + if (need_suspend_ops(state) && (!suspend_ops || !suspend_ops->enter)) return -EPERM; pm_prepare_console(); @@ -101,17 +177,14 @@ static int suspend_prepare(void) if (error) goto Finish; - error = usermodehelper_disable(); - if (error) - goto Finish; - + trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); + trace_suspend_resume(TPS("freeze_processes"), 0, false); if (!error) return 0; suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); - usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); @@ -119,21 +192,21 @@ static int suspend_prepare(void) } /* default implementation */ -void __attribute__ ((weak)) arch_suspend_disable_irqs(void) +void __weak arch_suspend_disable_irqs(void) { local_irq_disable(); } /* default implementation */ -void __attribute__ ((weak)) arch_suspend_enable_irqs(void) +void __weak arch_suspend_enable_irqs(void) { local_irq_enable(); } /** - * suspend_enter - enter the desired system sleep state. - * @state: State to enter - * @wakeup: Returns information that suspend should not be entered again. + * suspend_enter - Make the system enter the given sleep state. + * @state: System sleep state to enter. + * @wakeup: Returns information that the sleep state should not be re-entered. * * This function should be called after devices have been suspended. */ @@ -141,19 +214,19 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) { int error; - if (suspend_ops->prepare) { + if (need_suspend_ops(state) && suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) goto Platform_finish; } - error = dpm_suspend_noirq(PMSG_SUSPEND); + error = dpm_suspend_end(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); goto Platform_finish; } - if (suspend_ops->prepare_late) { + if (need_suspend_ops(state) && suspend_ops->prepare_late) { error = suspend_ops->prepare_late(); if (error) goto Platform_wake; @@ -162,6 +235,20 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (suspend_test(TEST_PLATFORM)) goto Platform_wake; + /* + * PM_SUSPEND_FREEZE equals + * frozen processes + suspended devices + idle processors. + * Thus we should invoke freeze_enter() soon after + * all the devices are suspended. + */ + if (state == PM_SUSPEND_FREEZE) { + trace_suspend_resume(TPS("machine_suspend"), state, true); + freeze_enter(); + trace_suspend_resume(TPS("machine_suspend"), state, false); + goto Platform_wake; + } + + ftrace_stop(); error = disable_nonboot_cpus(); if (error || suspend_test(TEST_CPUS)) goto Enable_cpus; @@ -173,7 +260,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (!error) { *wakeup = pm_wakeup_pending(); if (!(suspend_test(TEST_CORE) || *wakeup)) { + trace_suspend_resume(TPS("machine_suspend"), + state, true); error = suspend_ops->enter(state); + trace_suspend_resume(TPS("machine_suspend"), + state, false); events_check_enabled = false; } syscore_resume(); @@ -184,44 +275,47 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) Enable_cpus: enable_nonboot_cpus(); + ftrace_start(); Platform_wake: - if (suspend_ops->wake) + if (need_suspend_ops(state) && suspend_ops->wake) suspend_ops->wake(); - dpm_resume_noirq(PMSG_RESUME); + dpm_resume_start(PMSG_RESUME); Platform_finish: - if (suspend_ops->finish) + if (need_suspend_ops(state) && suspend_ops->finish) suspend_ops->finish(); return error; } /** - * suspend_devices_and_enter - suspend devices and enter the desired system - * sleep state. - * @state: state to enter + * suspend_devices_and_enter - Suspend devices and enter system sleep state. + * @state: System sleep state to enter. */ int suspend_devices_and_enter(suspend_state_t state) { int error; bool wakeup = false; - if (!suspend_ops) + if (need_suspend_ops(state) && !suspend_ops) return -ENOSYS; - trace_machine_suspend(state); - if (suspend_ops->begin) { + if (need_suspend_ops(state) && suspend_ops->begin) { error = suspend_ops->begin(state); if (error) goto Close; + } else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) { + error = freeze_ops->begin(); + if (error) + goto Close; } suspend_console(); suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); if (error) { - printk(KERN_ERR "PM: Some devices failed to suspend\n"); + pr_err("PM: Some devices failed to suspend, or early wake event detected\n"); goto Recover_platform; } suspend_test_finish("suspend devices"); @@ -230,7 +324,7 @@ int suspend_devices_and_enter(suspend_state_t state) do { error = suspend_enter(state, &wakeup); - } while (!error && !wakeup + } while (!error && !wakeup && need_suspend_ops(state) && suspend_ops->suspend_again && suspend_ops->suspend_again()); Resume_devices: @@ -239,64 +333,78 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_test_finish("resume devices"); resume_console(); Close: - if (suspend_ops->end) + if (need_suspend_ops(state) && suspend_ops->end) suspend_ops->end(); - trace_machine_suspend(PWR_EVENT_EXIT); + else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end) + freeze_ops->end(); + return error; Recover_platform: - if (suspend_ops->recover) + if (need_suspend_ops(state) && suspend_ops->recover) suspend_ops->recover(); goto Resume_devices; } /** - * suspend_finish - Do final work before exiting suspend sequence. + * suspend_finish - Clean up before finishing the suspend sequence. * - * Call platform code to clean up, restart processes, and free the - * console that we've allocated. This is not called for suspend-to-disk. + * Call platform code to clean up, restart processes, and free the console that + * we've allocated. This routine is not called for hibernation. */ static void suspend_finish(void) { suspend_thaw_processes(); - usermodehelper_enable(); pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); } /** - * enter_state - Do common work of entering low-power state. - * @state: pm_state structure for state we're entering. + * enter_state - Do common work needed to enter system sleep state. + * @state: System sleep state to enter. * - * Make sure we're the only ones trying to enter a sleep state. Fail - * if someone has beat us to it, since we don't want anything weird to - * happen when we wake up. - * Then, do the setup for suspend, enter the state, and cleaup (after - * we've woken up). + * Make sure that no one else is trying to put the system into a sleep state. + * Fail if that's not the case. Otherwise, prepare for system suspend, make the + * system enter the given sleep state and clean up after wakeup. */ -int enter_state(suspend_state_t state) +static int enter_state(suspend_state_t state) { int error; - if (!valid_state(state)) - return -ENODEV; - + trace_suspend_resume(TPS("suspend_enter"), state, true); + if (state == PM_SUSPEND_FREEZE) { +#ifdef CONFIG_PM_DEBUG + if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) { + pr_warning("PM: Unsupported test mode for freeze state," + "please choose none/freezer/devices/platform.\n"); + return -EAGAIN; + } +#endif + } else if (!valid_state(state)) { + return -EINVAL; + } if (!mutex_trylock(&pm_mutex)) return -EBUSY; + if (state == PM_SUSPEND_FREEZE) + freeze_begin(); + + trace_suspend_resume(TPS("sync_filesystems"), 0, true); printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); + trace_suspend_resume(TPS("sync_filesystems"), 0, false); - pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); - error = suspend_prepare(); + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label); + error = suspend_prepare(state); if (error) goto Unlock; if (suspend_test(TEST_FREEZER)) goto Finish; - pr_debug("PM: Entering %s sleep\n", pm_states[state]); + trace_suspend_resume(TPS("suspend_enter"), state, false); + pr_debug("PM: Entering %s sleep\n", pm_states[state].label); pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); pm_restore_gfp_mask(); @@ -310,24 +418,26 @@ int enter_state(suspend_state_t state) } /** - * pm_suspend - Externally visible function for suspending system. - * @state: Enumerated value of state to enter. + * pm_suspend - Externally visible function for suspending the system. + * @state: System sleep state to enter. * - * Determine whether or not value is within range, get state - * structure, and enter (above). + * Check if the value of @state represents one of the supported states, + * execute enter_state() and update system suspend statistics. */ int pm_suspend(suspend_state_t state) { - int ret; - if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) { - ret = enter_state(state); - if (ret) { - suspend_stats.fail++; - dpm_save_failed_errno(ret); - } else - suspend_stats.success++; - return ret; + int error; + + if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) + return -EINVAL; + + error = enter_state(state); + if (error) { + suspend_stats.fail++; + dpm_save_failed_errno(error); + } else { + suspend_stats.success++; } - return -EINVAL; + return error; } EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 25596e450ac..269b097e78e 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -92,13 +92,13 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) } if (state == PM_SUSPEND_MEM) { - printk(info_test, pm_states[state]); + printk(info_test, pm_states[state].label); status = pm_suspend(state); if (status == -ENODEV) state = PM_SUSPEND_STANDBY; } if (state == PM_SUSPEND_STANDBY) { - printk(info_test, pm_states[state]); + printk(info_test, pm_states[state].label); status = pm_suspend(state); } if (status < 0) @@ -112,7 +112,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) rtc_set_alarm(rtc, &alm); } -static int __init has_wakealarm(struct device *dev, void *name_ptr) +static int __init has_wakealarm(struct device *dev, const void *data) { struct rtc_device *candidate = to_rtc_device(dev); @@ -121,7 +121,6 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr) if (!device_may_wakeup(candidate->dev.parent)) return 0; - *(const char **)name_ptr = dev_name(dev); return 1; } @@ -137,18 +136,16 @@ static char warn_bad_state[] __initdata = static int __init setup_test_suspend(char *value) { - unsigned i; + suspend_state_t i; /* "=mem" ==> "mem" */ value++; - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (!pm_states[i]) - continue; - if (strcmp(pm_states[i], value) != 0) - continue; - test_state = (__force suspend_state_t) i; - return 0; - } + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + if (!strcmp(pm_states[i].label, value)) { + test_state = pm_states[i].state; + return 0; + } + printk(warn_bad_state, value); return 0; } @@ -159,21 +156,21 @@ static int __init test_suspend(void) static char warn_no_rtc[] __initdata = KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; - char *pony = NULL; struct rtc_device *rtc = NULL; + struct device *dev; /* PM is initialized by now; is that state testable? */ if (test_state == PM_SUSPEND_ON) goto done; - if (!valid_state(test_state)) { - printk(warn_bad_state, pm_states[test_state]); + if (!pm_states[test_state].state) { + printk(warn_bad_state, pm_states[test_state].label); goto done; } /* RTCs have initialized by now too ... can we use one? */ - class_find_device(rtc_class, NULL, &pony, has_wakealarm); - if (pony) - rtc = rtc_class_open(pony); + dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm); + if (dev) + rtc = rtc_class_open(dev_name(dev)); if (!rtc) { printk(warn_no_rtc); goto done; diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8742fd013a9..aaa3261dea5 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -6,7 +6,7 @@ * * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> - * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com> + * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com> * * This file is released under the GPLv2. * @@ -51,6 +51,23 @@ #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) +/* + * Number of free pages that are not high. + */ +static inline unsigned long low_free_pages(void) +{ + return nr_free_pages() - nr_free_highpages(); +} + +/* + * Number of pages required to be kept free while writing the image. Always + * half of all available low pages before the writing starts. + */ +static inline unsigned long reqd_free_pages(void) +{ + return low_free_pages() / 2; +} + struct swap_map_page { sector_t entries[MAP_PAGE_ENTRIES]; sector_t next_swap; @@ -72,7 +89,7 @@ struct swap_map_handle { sector_t cur_swap; sector_t first_sector; unsigned int k; - unsigned long nr_free_pages, written; + unsigned long reqd_free_pages; u32 crc32; }; @@ -84,7 +101,7 @@ struct swsusp_header { unsigned int flags; /* Flags to pass to the "boot" kernel */ char orig_sig[10]; char sig[10]; -} __attribute__((packed)); +} __packed; static struct swsusp_header *swsusp_header; @@ -109,7 +126,7 @@ static int swsusp_extents_insert(unsigned long swap_offset) /* Figure out where to put the new node */ while (*new) { - ext = container_of(*new, struct swsusp_extent, node); + ext = rb_entry(*new, struct swsusp_extent, node); parent = *new; if (swap_offset < ext->start) { /* Try to merge */ @@ -265,14 +282,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) return -ENOSPC; if (bio_chain) { - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ if (ret) return ret; - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | + __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { @@ -316,8 +336,7 @@ static int get_swap_writer(struct swap_map_handle *handle) goto err_rel; } handle->k = 0; - handle->nr_free_pages = nr_free_pages() >> 1; - handle->written = 0; + handle->reqd_free_pages = reqd_free_pages(); handle->first_sector = handle->cur_swap; return 0; err_rel: @@ -351,12 +370,17 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - } - if (bio_chain && ++handle->written > handle->nr_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); - if (error) - goto out; - handle->written = 0; + + if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + /* + * Recalculate the number of required free pages, to + * make sure we never take more than half. + */ + handle->reqd_free_pages = reqd_free_pages(); + } } out: return error; @@ -403,8 +427,9 @@ static int swap_writer_finish(struct swap_map_handle *handle, /* Maximum number of threads for compression/decompression. */ #define LZO_THREADS 3 -/* Maximum number of pages for read buffering. */ -#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) +/* Minimum/maximum number of pages for read buffering. */ +#define LZO_MIN_RD_PAGES 1024 +#define LZO_MAX_RD_PAGES 8192 /** @@ -423,9 +448,9 @@ static int save_image(struct swap_map_handle *handle, struct timeval start; struct timeval stop; - printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ", + printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", nr_to_write); - m = nr_to_write / 100; + m = nr_to_write / 10; if (!m) m = 1; nr_pages = 0; @@ -439,7 +464,8 @@ static int save_image(struct swap_map_handle *handle, if (ret) break; if (!(nr_pages % m)) - printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); + printk(KERN_INFO "PM: Image saving progress: %3d%%\n", + nr_pages / m * 10); nr_pages++; } err2 = hib_wait_on_bio_chain(&bio); @@ -447,9 +473,7 @@ static int save_image(struct swap_map_handle *handle, if (!ret) ret = err2; if (!ret) - printk(KERN_CONT "\b\b\b\bdone\n"); - else - printk(KERN_CONT "\n"); + printk(KERN_INFO "PM: Image saving done.\n"); swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); return ret; } @@ -543,7 +567,7 @@ static int lzo_compress_threadfn(void *data) /** * save_image_lzo - Save the suspend image data compressed with LZO. - * @handle: Swap mam handle to use for saving the image. + * @handle: Swap map handle to use for saving the image. * @snapshot: Image to read data from. * @nr_to_write: Number of pages to save. */ @@ -615,12 +639,6 @@ static int save_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of free pages after all allocations have been done. - * We don't want to run out of pages when writing. - */ - handle->nr_free_pages = nr_free_pages() >> 1; - - /* * Start the CRC32 thread. */ init_waitqueue_head(&crc->go); @@ -641,11 +659,17 @@ static int save_image_lzo(struct swap_map_handle *handle, goto out_clean; } + /* + * Adjust the number of required free pages after all allocations have + * been done. We don't want to run out of pages when writing. + */ + handle->reqd_free_pages = reqd_free_pages(); + printk(KERN_INFO "PM: Using %u thread(s) for compression.\n" - "PM: Compressing and saving image data (%u pages) ... ", + "PM: Compressing and saving image data (%u pages)...\n", nr_threads, nr_to_write); - m = nr_to_write / 100; + m = nr_to_write / 10; if (!m) m = 1; nr_pages = 0; @@ -665,8 +689,10 @@ static int save_image_lzo(struct swap_map_handle *handle, data_of(*snapshot), PAGE_SIZE); if (!(nr_pages % m)) - printk(KERN_CONT "\b\b\b\b%3d%%", - nr_pages / m); + printk(KERN_INFO + "PM: Image saving progress: " + "%3d%%\n", + nr_pages / m * 10); nr_pages++; } if (!off) @@ -736,11 +762,8 @@ out_finish: do_gettimeofday(&stop); if (!ret) ret = err2; - if (!ret) { - printk(KERN_CONT "\b\b\b\bdone\n"); - } else { - printk(KERN_CONT "\n"); - } + if (!ret) + printk(KERN_INFO "PM: Image saving done.\n"); swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); out_clean: if (crc) { @@ -948,9 +971,9 @@ static int load_image(struct swap_map_handle *handle, int err2; unsigned nr_pages; - printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ", + printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n", nr_to_read); - m = nr_to_read / 100; + m = nr_to_read / 10; if (!m) m = 1; nr_pages = 0; @@ -968,7 +991,8 @@ static int load_image(struct swap_map_handle *handle, if (ret) break; if (!(nr_pages % m)) - printk("\b\b\b\b%3d%%", nr_pages / m); + printk(KERN_INFO "PM: Image loading progress: %3d%%\n", + nr_pages / m * 10); nr_pages++; } err2 = hib_wait_on_bio_chain(&bio); @@ -976,12 +1000,11 @@ static int load_image(struct swap_map_handle *handle, if (!ret) ret = err2; if (!ret) { - printk("\b\b\b\bdone\n"); + printk(KERN_INFO "PM: Image loading done.\n"); snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) ret = -ENODATA; - } else - printk("\n"); + } swsusp_show_speed(&start, &stop, nr_to_read, "Read"); return ret; } @@ -1051,7 +1074,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned i, thr, run_threads, nr_threads; unsigned ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0; - unsigned long read_pages; + unsigned long read_pages = 0; unsigned char **page = NULL; struct dec_data *data = NULL; struct crc_data *crc = NULL; @@ -1063,7 +1086,7 @@ static int load_image_lzo(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); - page = vmalloc(sizeof(*page) * LZO_READ_PAGES); + page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); ret = -ENOMEM; @@ -1128,15 +1151,22 @@ static int load_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of pages for read buffering, in case we are short. + * Set the number of pages for read buffering. + * This is complete guesswork, because we'll only know the real + * picture once prepare_image() is called, which is much later on + * during the image load phase. We'll assume the worst case and + * say that none of the image pages are from high memory. */ - read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; - read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); + if (low_free_pages() > snapshot_get_image_size()) + read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; + read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); for (i = 0; i < read_pages; i++) { page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? __GFP_WAIT | __GFP_HIGH : - __GFP_WAIT); + __GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + if (!page[i]) { if (i < LZO_CMP_PAGES) { ring_size = i; @@ -1153,9 +1183,9 @@ static int load_image_lzo(struct swap_map_handle *handle, printk(KERN_INFO "PM: Using %u thread(s) for decompression.\n" - "PM: Loading and decompressing image data (%u pages) ... ", + "PM: Loading and decompressing image data (%u pages)...\n", nr_threads, nr_to_read); - m = nr_to_read / 100; + m = nr_to_read / 10; if (!m) m = 1; nr_pages = 0; @@ -1287,7 +1317,10 @@ static int load_image_lzo(struct swap_map_handle *handle, data[thr].unc + off, PAGE_SIZE); if (!(nr_pages % m)) - printk("\b\b\b\b%3d%%", nr_pages / m); + printk(KERN_INFO + "PM: Image loading progress: " + "%3d%%\n", + nr_pages / m * 10); nr_pages++; ret = snapshot_write_next(snapshot); @@ -1312,7 +1345,7 @@ out_finish: } do_gettimeofday(&stop); if (!ret) { - printk("\b\b\b\bdone\n"); + printk(KERN_INFO "PM: Image loading done.\n"); snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) ret = -ENODATA; @@ -1325,8 +1358,7 @@ out_finish: } } } - } else - printk("\n"); + } swsusp_show_speed(&start, &stop, nr_to_read, "Read"); out_clean: for (i = 0; i < ring_size; i++) @@ -1440,6 +1472,34 @@ void swsusp_close(fmode_t mode) blkdev_put(hib_resume_bdev, mode); } +/** + * swsusp_unmark - Unmark swsusp signature in the resume device + */ + +#ifdef CONFIG_SUSPEND +int swsusp_unmark(void) +{ + int error; + + hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { + memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); + error = hib_bio_write_page(swsusp_resume_block, + swsusp_header, NULL); + } else { + printk(KERN_ERR "PM: Cannot find swsusp signature!\n"); + error = -ENODEV; + } + + /* + * We just returned from suspend, we don't need the image any more. + */ + free_all_swap_pages(root_swap); + + return error; +} +#endif + static int swsusp_header_init(void) { swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); diff --git a/kernel/power/user.c b/kernel/power/user.c index 3e100075b13..526e8911460 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -12,7 +12,6 @@ #include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/reboot.h> -#include <linux/kmod.h> #include <linux/string.h> #include <linux/device.h> #include <linux/miscdevice.h> @@ -25,7 +24,6 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> -#include <scsi/scsi_scan.h> #include <asm/uaccess.h> @@ -38,9 +36,10 @@ static struct snapshot_data { struct snapshot_handle handle; int swap; int mode; - char frozen; - char ready; - char platform_support; + bool frozen; + bool ready; + bool platform_support; + bool free_bitmaps; } snapshot_state; atomic_t snapshot_device_available = ATOMIC_INIT(1); @@ -50,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) struct snapshot_data *data; int error; + if (!hibernation_available()) + return -EPERM; + lock_system_sleep(); if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { @@ -62,11 +64,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) error = -ENOSYS; goto Unlock; } - if(create_basic_memory_bitmaps()) { - atomic_inc(&snapshot_device_available); - error = -ENOMEM; - goto Unlock; - } nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; @@ -76,6 +73,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; + data->free_bitmaps = false; error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); if (error) pm_notifier_call_chain(PM_POST_HIBERNATION); @@ -85,21 +83,23 @@ static int snapshot_open(struct inode *inode, struct file *filp) * appear. */ wait_for_device_probe(); - scsi_complete_async_scans(); data->swap = -1; data->mode = O_WRONLY; error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (!error) { + error = create_basic_memory_bitmaps(); + data->free_bitmaps = !error; + } if (error) pm_notifier_call_chain(PM_POST_RESTORE); } - if (error) { - free_basic_memory_bitmaps(); + if (error) atomic_inc(&snapshot_device_available); - } - data->frozen = 0; - data->ready = 0; - data->platform_support = 0; + + data->frozen = false; + data->ready = false; + data->platform_support = false; Unlock: unlock_system_sleep(); @@ -114,12 +114,14 @@ static int snapshot_release(struct inode *inode, struct file *filp) lock_system_sleep(); swsusp_free(); - free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); if (data->frozen) { pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); thaw_processes(); + } else if (data->free_bitmaps) { + free_basic_memory_bitmaps(); } pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); @@ -210,6 +212,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!mutex_trylock(&pm_mutex)) return -EBUSY; + lock_device_hotplug(); data = filp->private_data; switch (cmd) { @@ -222,24 +225,26 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, sys_sync(); printk("done.\n"); - error = usermodehelper_disable(); + error = freeze_processes(); if (error) break; - error = freeze_processes(); + error = create_basic_memory_bitmaps(); if (error) - usermodehelper_enable(); + thaw_processes(); else - data->frozen = 1; + data->frozen = true; + break; case SNAPSHOT_UNFREEZE: if (!data->frozen || data->ready) break; pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); + data->free_bitmaps = false; thaw_processes(); - usermodehelper_enable(); - data->frozen = 0; + data->frozen = false; break; case SNAPSHOT_CREATE_IMAGE: @@ -249,16 +254,10 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } pm_restore_gfp_mask(); error = hibernation_snapshot(data->platform_support); - if (error) { - thaw_kernel_threads(); - } else { + if (!error) { error = put_user(in_suspend, (int __user *)arg); - if (!error && !freezer_test_done) - data->ready = 1; - if (freezer_test_done) { - freezer_test_done = false; - thaw_kernel_threads(); - } + data->ready = !freezer_test_done && !error; + freezer_test_done = false; } break; @@ -275,7 +274,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, case SNAPSHOT_FREE: swsusp_free(); memset(&data->handle, 0, sizeof(struct snapshot_handle)); - data->ready = 0; + data->ready = false; /* * It is necessary to thaw kernel threads here, because * SNAPSHOT_CREATE_IMAGE may be invoked directly after @@ -339,7 +338,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, * PM_HIBERNATION_PREPARE */ error = suspend_devices_and_enter(PM_SUSPEND_MEM); - data->ready = 0; + data->ready = false; break; case SNAPSHOT_PLATFORM_SUPPORT: @@ -387,6 +386,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } + unlock_device_hotplug(); mutex_unlock(&pm_mutex); return error; diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 00000000000..019069c84ff --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,268 @@ +/* + * kernel/power/wakelock.c + * + * User space wakeup sources support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + * + * This code is based on the analogous interface allowing user space to + * manipulate wakelocks on Android. + */ + +#include <linux/capability.h> +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/hrtimer.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/slab.h> + +#include "power.h" + +static DEFINE_MUTEX(wakelocks_lock); + +struct wakelock { + char *name; + struct rb_node node; + struct wakeup_source ws; +#ifdef CONFIG_PM_WAKELOCKS_GC + struct list_head lru; +#endif +}; + +static struct rb_root wakelocks_tree = RB_ROOT; + +ssize_t pm_show_wakelocks(char *buf, bool show_active) +{ + struct rb_node *node; + struct wakelock *wl; + char *str = buf; + char *end = buf + PAGE_SIZE; + + mutex_lock(&wakelocks_lock); + + for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { + wl = rb_entry(node, struct wakelock, node); + if (wl->ws.active == show_active) + str += scnprintf(str, end - str, "%s ", wl->name); + } + if (str > buf) + str--; + + str += scnprintf(str, end - str, "\n"); + + mutex_unlock(&wakelocks_lock); + return (str - buf); +} + +#if CONFIG_PM_WAKELOCKS_LIMIT > 0 +static unsigned int number_of_wakelocks; + +static inline bool wakelocks_limit_exceeded(void) +{ + return number_of_wakelocks > CONFIG_PM_WAKELOCKS_LIMIT; +} + +static inline void increment_wakelocks_number(void) +{ + number_of_wakelocks++; +} + +static inline void decrement_wakelocks_number(void) +{ + number_of_wakelocks--; +} +#else /* CONFIG_PM_WAKELOCKS_LIMIT = 0 */ +static inline bool wakelocks_limit_exceeded(void) { return false; } +static inline void increment_wakelocks_number(void) {} +static inline void decrement_wakelocks_number(void) {} +#endif /* CONFIG_PM_WAKELOCKS_LIMIT */ + +#ifdef CONFIG_PM_WAKELOCKS_GC +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static LIST_HEAD(wakelocks_lru_list); +static unsigned int wakelocks_gc_count; + +static inline void wakelocks_lru_add(struct wakelock *wl) +{ + list_add(&wl->lru, &wakelocks_lru_list); +} + +static inline void wakelocks_lru_most_recent(struct wakelock *wl) +{ + list_move(&wl->lru, &wakelocks_lru_list); +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now; + + if (++wakelocks_gc_count <= WL_GC_COUNT_MAX) + return; + + now = ktime_get(); + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + decrement_wakelocks_number(); + } + } + wakelocks_gc_count = 0; +} +#else /* !CONFIG_PM_WAKELOCKS_GC */ +static inline void wakelocks_lru_add(struct wakelock *wl) {} +static inline void wakelocks_lru_most_recent(struct wakelock *wl) {} +static inline void wakelocks_gc(void) {} +#endif /* !CONFIG_PM_WAKELOCKS_GC */ + +static struct wakelock *wakelock_lookup_add(const char *name, size_t len, + bool add_if_not_found) +{ + struct rb_node **node = &wakelocks_tree.rb_node; + struct rb_node *parent = *node; + struct wakelock *wl; + + while (*node) { + int diff; + + parent = *node; + wl = rb_entry(*node, struct wakelock, node); + diff = strncmp(name, wl->name, len); + if (diff == 0) { + if (wl->name[len]) + diff = -1; + else + return wl; + } + if (diff < 0) + node = &(*node)->rb_left; + else + node = &(*node)->rb_right; + } + if (!add_if_not_found) + return ERR_PTR(-EINVAL); + + if (wakelocks_limit_exceeded()) + return ERR_PTR(-ENOSPC); + + /* Not found, we have to add a new one. */ + wl = kzalloc(sizeof(*wl), GFP_KERNEL); + if (!wl) + return ERR_PTR(-ENOMEM); + + wl->name = kstrndup(name, len, GFP_KERNEL); + if (!wl->name) { + kfree(wl); + return ERR_PTR(-ENOMEM); + } + wl->ws.name = wl->name; + wakeup_source_add(&wl->ws); + rb_link_node(&wl->node, parent, node); + rb_insert_color(&wl->node, &wakelocks_tree); + wakelocks_lru_add(wl); + increment_wakelocks_number(); + return wl; +} + +int pm_wake_lock(const char *buf) +{ + const char *str = buf; + struct wakelock *wl; + u64 timeout_ns = 0; + size_t len; + int ret = 0; + + if (!capable(CAP_BLOCK_SUSPEND)) + return -EPERM; + + while (*str && !isspace(*str)) + str++; + + len = str - buf; + if (!len) + return -EINVAL; + + if (*str && *str != '\n') { + /* Find out if there's a valid timeout string appended. */ + ret = kstrtou64(skip_spaces(str), 10, &timeout_ns); + if (ret) + return -EINVAL; + } + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, true); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + if (timeout_ns) { + u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1; + + do_div(timeout_ms, NSEC_PER_MSEC); + __pm_wakeup_event(&wl->ws, timeout_ms); + } else { + __pm_stay_awake(&wl->ws); + } + + wakelocks_lru_most_recent(wl); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} + +int pm_wake_unlock(const char *buf) +{ + struct wakelock *wl; + size_t len; + int ret = 0; + + if (!capable(CAP_BLOCK_SUSPEND)) + return -EPERM; + + len = strlen(buf); + if (!len) + return -EINVAL; + + if (buf[len-1] == '\n') + len--; + + if (!len) + return -EINVAL; + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, false); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + __pm_relax(&wl->ws); + + wakelocks_lru_most_recent(wl); + wakelocks_gc(); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} |
