aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/.gitignore5
-rw-r--r--kernel/acct.c16
-rw-r--r--kernel/configs.c2
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/irq/proc.c4
-rw-r--r--kernel/ksysfs.c30
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/panic.c4
-rw-r--r--kernel/power/disk.c92
-rw-r--r--kernel/power/power.h24
-rw-r--r--kernel/power/snapshot.c89
-rw-r--r--kernel/power/swsusp.c1020
-rw-r--r--kernel/sysctl.c14
14 files changed, 715 insertions, 597 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore
new file mode 100644
index 00000000000..f2ab70073bd
--- /dev/null
+++ b/kernel/.gitignore
@@ -0,0 +1,5 @@
+#
+# Generated files
+#
+config_data.h
+config_data.gz
diff --git a/kernel/acct.c b/kernel/acct.c
index 6312d6bd43e..38d57fa6b78 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -427,6 +427,7 @@ static void do_acct_process(long exitcode, struct file *file)
u64 elapsed;
u64 run_time;
struct timespec uptime;
+ unsigned long jiffies;
/*
* First check to see if there is enough free_space to continue
@@ -467,12 +468,12 @@ static void do_acct_process(long exitcode, struct file *file)
#endif
do_div(elapsed, AHZ);
ac.ac_btime = xtime.tv_sec - elapsed;
- ac.ac_utime = encode_comp_t(jiffies_to_AHZ(
- current->signal->utime +
- current->group_leader->utime));
- ac.ac_stime = encode_comp_t(jiffies_to_AHZ(
- current->signal->stime +
- current->group_leader->stime));
+ jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime,
+ current->signal->utime));
+ ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
+ jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime,
+ current->signal->stime));
+ ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
/* we really need to bite the bullet and change layout */
ac.ac_uid = current->uid;
ac.ac_gid = current->gid;
@@ -580,7 +581,8 @@ void acct_process(long exitcode)
void acct_update_integrals(struct task_struct *tsk)
{
if (likely(tsk->mm)) {
- long delta = tsk->stime - tsk->acct_stimexpd;
+ long delta =
+ cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
if (delta == 0)
return;
diff --git a/kernel/configs.c b/kernel/configs.c
index 986f7af31e0..009e1ebdcb8 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -3,7 +3,7 @@
* Echo the kernel .config file used to build the kernel
*
* Copyright (C) 2002 Khalid Aziz <khalid_aziz@hp.com>
- * Copyright (C) 2002 Randy Dunlap <rddunlap@osdl.org>
+ * Copyright (C) 2002 Randy Dunlap <rdunlap@xenotime.net>
* Copyright (C) 2002 Al Stone <ahs3@fc.hp.com>
* Copyright (C) 2002 Hewlett-Packard Company
*
diff --git a/kernel/futex.c b/kernel/futex.c
index 5e71a6bf6f6..5efa2f97803 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -356,6 +356,13 @@ retry:
if (bh1 != bh2)
spin_unlock(&bh2->lock);
+#ifndef CONFIG_MMU
+ /* we don't get EFAULT from MMU faults if we don't have an MMU,
+ * but we might get them from range checking */
+ ret = op_ret;
+ goto out;
+#endif
+
if (unlikely(op_ret != -EFAULT)) {
ret = op_ret;
goto out;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 81c49a4d679..97d5559997d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -366,6 +366,8 @@ int request_irq(unsigned int irq,
action->next = NULL;
action->dev_id = dev_id;
+ select_smp_affinity(irq);
+
retval = setup_irq(irq, action);
if (retval)
kfree(action);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index f26e534c658..8a64a4844cd 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -68,7 +68,9 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
*/
cpus_and(tmp, new_value, cpu_online_map);
if (cpus_empty(tmp))
- return -EINVAL;
+ /* Special case for empty set - allow the architecture
+ code to set default SMP affinity. */
+ return select_smp_affinity(irq) ? -EINVAL : full_count;
proc_set_irq_affinity(irq, new_value);
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 015fb69ad94..99af8b05eea 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -15,6 +15,9 @@
#include <linux/module.h>
#include <linux/init.h>
+u64 uevent_seqnum;
+char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug";
+
#define KERNEL_ATTR_RO(_name) \
static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
@@ -23,11 +26,29 @@ static struct subsys_attribute _name##_attr = \
__ATTR(_name, 0644, _name##_show, _name##_store)
#ifdef CONFIG_HOTPLUG
-static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page)
+/* current uevent sequence number */
+static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum);
+}
+KERNEL_ATTR_RO(uevent_seqnum);
+
+/* uevent helper program, used during early boo */
+static ssize_t uevent_helper_show(struct subsystem *subsys, char *page)
+{
+ return sprintf(page, "%s\n", uevent_helper);
+}
+static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, size_t count)
{
- return sprintf(page, "%llu\n", (unsigned long long)hotplug_seqnum);
+ if (count+1 > UEVENT_HELPER_PATH_LEN)
+ return -ENOENT;
+ memcpy(uevent_helper, page, count);
+ uevent_helper[count] = '\0';
+ if (count && uevent_helper[count-1] == '\n')
+ uevent_helper[count-1] = '\0';
+ return count;
}
-KERNEL_ATTR_RO(hotplug_seqnum);
+KERNEL_ATTR_RW(uevent_helper);
#endif
#ifdef CONFIG_KEXEC
@@ -45,7 +66,8 @@ EXPORT_SYMBOL_GPL(kernel_subsys);
static struct attribute * kernel_attrs[] = {
#ifdef CONFIG_HOTPLUG
- &hotplug_seqnum_attr.attr,
+ &uevent_seqnum_attr.attr,
+ &uevent_helper_attr.attr,
#endif
#ifdef CONFIG_KEXEC
&crash_notes_attr.attr,
diff --git a/kernel/module.c b/kernel/module.c
index 2ea929d51ad..4b06bbad49c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1854,8 +1854,7 @@ static struct module *load_module(void __user *umod,
kfree(args);
free_hdr:
vfree(hdr);
- if (err < 0) return ERR_PTR(err);
- else return ptr;
+ return ERR_PTR(err);
truncated:
printk(KERN_ERR "Module len %lu truncated\n", len);
diff --git a/kernel/panic.c b/kernel/panic.c
index aabc5f86fa3..c5c4ab25583 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -60,7 +60,7 @@ NORET_TYPE void panic(const char * fmt, ...)
long i;
static char buf[1024];
va_list args;
-#if defined(CONFIG_ARCH_S390)
+#if defined(CONFIG_S390)
unsigned long caller = (unsigned long) __builtin_return_address(0);
#endif
@@ -125,7 +125,7 @@ NORET_TYPE void panic(const char * fmt, ...)
printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
}
#endif
-#if defined(CONFIG_ARCH_S390)
+#if defined(CONFIG_S390)
disabled_wait(caller);
#endif
local_irq_enable();
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 027322a564f..e24446f8d8c 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -24,10 +24,11 @@
extern suspend_disk_method_t pm_disk_mode;
+extern int swsusp_shrink_memory(void);
extern int swsusp_suspend(void);
-extern int swsusp_write(void);
+extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages);
extern int swsusp_check(void);
-extern int swsusp_read(void);
+extern int swsusp_read(struct pbe **pblist_ptr);
extern void swsusp_close(void);
extern int swsusp_resume(void);
@@ -73,31 +74,6 @@ static void power_down(suspend_disk_method_t mode)
static int in_suspend __nosavedata = 0;
-/**
- * free_some_memory - Try to free as much memory as possible
- *
- * ... but do not OOM-kill anyone
- *
- * Notice: all userland should be stopped at this point, or
- * livelock is possible.
- */
-
-static void free_some_memory(void)
-{
- unsigned int i = 0;
- unsigned int tmp;
- unsigned long pages = 0;
- char *p = "-\\|/";
-
- printk("Freeing memory... ");
- while ((tmp = shrink_all_memory(10000))) {
- pages += tmp;
- printk("\b%c", p[i++ % 4]);
- }
- printk("\bdone (%li pages freed)\n", pages);
-}
-
-
static inline void platform_finish(void)
{
if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -127,8 +103,8 @@ static int prepare_processes(void)
}
/* Free memory before shutting down devices. */
- free_some_memory();
- return 0;
+ if (!(error = swsusp_shrink_memory()))
+ return 0;
thaw:
thaw_processes();
enable_nonboot_cpus();
@@ -176,7 +152,7 @@ int pm_suspend_disk(void)
if (in_suspend) {
device_resume();
pr_debug("PM: writing image.\n");
- error = swsusp_write();
+ error = swsusp_write(pagedir_nosave, nr_copy_pages);
if (!error)
power_down(pm_disk_mode);
else {
@@ -247,7 +223,7 @@ static int software_resume(void)
pr_debug("PM: Reading swsusp image.\n");
- if ((error = swsusp_read())) {
+ if ((error = swsusp_read(&pagedir_nosave))) {
swsusp_free();
goto Thaw;
}
@@ -363,37 +339,55 @@ static ssize_t resume_show(struct subsystem * subsys, char *buf)
MINOR(swsusp_resume_device));
}
-static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t n)
+static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n)
{
- int len;
- char *p;
unsigned int maj, min;
- int error = -EINVAL;
dev_t res;
+ int ret = -EINVAL;
- p = memchr(buf, '\n', n);
- len = p ? p - buf : n;
+ if (sscanf(buf, "%u:%u", &maj, &min) != 2)
+ goto out;
- if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
- res = MKDEV(maj,min);
- if (maj == MAJOR(res) && min == MINOR(res)) {
- down(&pm_sem);
- swsusp_resume_device = res;
- up(&pm_sem);
- printk("Attempting manual resume\n");
- noresume = 0;
- software_resume();
- }
- }
+ res = MKDEV(maj,min);
+ if (maj != MAJOR(res) || min != MINOR(res))
+ goto out;
- return error >= 0 ? n : error;
+ down(&pm_sem);
+ swsusp_resume_device = res;
+ up(&pm_sem);
+ printk("Attempting manual resume\n");
+ noresume = 0;
+ software_resume();
+ ret = n;
+out:
+ return ret;
}
power_attr(resume);
+static ssize_t image_size_show(struct subsystem * subsys, char *buf)
+{
+ return sprintf(buf, "%u\n", image_size);
+}
+
+static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+ unsigned int size;
+
+ if (sscanf(buf, "%u", &size) == 1) {
+ image_size = size;
+ return n;
+ }
+
+ return -EINVAL;
+}
+
+power_attr(image_size);
+
static struct attribute * g[] = {
&disk_attr.attr,
&resume_attr.attr,
+ &image_size_attr.attr,
NULL,
};
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 6c042b5ee14..7e8492fd142 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -9,19 +9,13 @@
#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
#endif
-#define MAX_PBES ((PAGE_SIZE - sizeof(struct new_utsname) \
- - 4 - 3*sizeof(unsigned long) - sizeof(int) \
- - sizeof(void *)) / sizeof(swp_entry_t))
-
struct swsusp_info {
struct new_utsname uts;
u32 version_code;
unsigned long num_physpages;
int cpus;
unsigned long image_pages;
- unsigned long pagedir_pages;
- suspend_pagedir_t * suspend_pagedir;
- swp_entry_t pagedir[MAX_PBES];
+ unsigned long pages;
} __attribute__((aligned(PAGE_SIZE)));
@@ -48,25 +42,27 @@ static struct subsys_attribute _name##_attr = { \
extern struct subsystem power_subsys;
-extern int freeze_processes(void);
-extern void thaw_processes(void);
-
extern int pm_prepare_console(void);
extern void pm_restore_console(void);
-
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
extern unsigned int nr_copy_pages;
-extern suspend_pagedir_t *pagedir_nosave;
-extern suspend_pagedir_t *pagedir_save;
+extern struct pbe *pagedir_nosave;
+
+/* Preferred image size in MB (default 500) */
+extern unsigned int image_size;
extern asmlinkage int swsusp_arch_suspend(void);
extern asmlinkage int swsusp_arch_resume(void);
+extern unsigned int count_data_pages(void);
extern void free_pagedir(struct pbe *pblist);
+extern void release_eaten_pages(void);
extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed);
-extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages);
extern void swsusp_free(void);
extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed);
+extern unsigned int snapshot_nr_pages(void);
+extern struct pbe *snapshot_pblist(void);
+extern void snapshot_pblist_set(struct pbe *pblist);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 4a6dbcefd37..41f66365f0d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -33,7 +33,35 @@
#include "power.h"
+struct pbe *pagedir_nosave;
+unsigned int nr_copy_pages;
+
#ifdef CONFIG_HIGHMEM
+unsigned int count_highmem_pages(void)
+{
+ struct zone *zone;
+ unsigned long zone_pfn;
+ unsigned int n = 0;
+
+ for_each_zone (zone)
+ if (is_highmem(zone)) {
+ mark_free_pages(zone);
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
+ struct page *page;
+ unsigned long pfn = zone_pfn + zone->zone_start_pfn;
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ if (PageReserved(page))
+ continue;
+ if (PageNosaveFree(page))
+ continue;
+ n++;
+ }
+ }
+ return n;
+}
+
struct highmem_page {
char *data;
struct page *page;
@@ -149,17 +177,15 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn)
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
return 0;
- if (PageReserved(page) && pfn_is_nosave(pfn)) {
- pr_debug("[nosave pfn 0x%lx]", pfn);
+ if (PageReserved(page) && pfn_is_nosave(pfn))
return 0;
- }
if (PageNosaveFree(page))
return 0;
return 1;
}
-static unsigned count_data_pages(void)
+unsigned int count_data_pages(void)
{
struct zone *zone;
unsigned long zone_pfn;
@@ -244,7 +270,7 @@ static inline void fill_pb_page(struct pbe *pbpage)
* of memory pages allocated with alloc_pagedir()
*/
-void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
+static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
{
struct pbe *pbpage, *p;
unsigned int num = PBES_PER_PAGE;
@@ -261,7 +287,35 @@ void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
p->next = p + 1;
p->next = NULL;
}
- pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
+}
+
+/**
+ * On resume it is necessary to trace and eventually free the unsafe
+ * pages that have been allocated, because they are needed for I/O
+ * (on x86-64 we likely will "eat" these pages once again while
+ * creating the temporary page translation tables)
+ */
+
+struct eaten_page {
+ struct eaten_page *next;
+ char padding[PAGE_SIZE - sizeof(void *)];
+};
+
+static struct eaten_page *eaten_pages = NULL;
+
+void release_eaten_pages(void)
+{
+ struct eaten_page *p, *q;
+
+ p = eaten_pages;
+ while (p) {
+ q = p->next;
+ /* We don't want swsusp_free() to free this page again */
+ ClearPageNosave(virt_to_page(p));
+ free_page((unsigned long)p);
+ p = q;
+ }
+ eaten_pages = NULL;
}
/**
@@ -282,9 +336,12 @@ static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
if (safe_needed)
do {
res = (void *)get_zeroed_page(gfp_mask);
- if (res && PageNosaveFree(virt_to_page(res)))
+ if (res && PageNosaveFree(virt_to_page(res))) {
/* This is for swsusp_free() */
SetPageNosave(virt_to_page(res));
+ ((struct eaten_page *)res)->next = eaten_pages;
+ eaten_pages = res;
+ }
} while (res && PageNosaveFree(virt_to_page(res)));
else
res = (void *)get_zeroed_page(gfp_mask);
@@ -332,7 +389,8 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed
if (!pbe) { /* get_zeroed_page() failed */
free_pagedir(pblist);
pblist = NULL;
- }
+ } else
+ create_pbe_list(pblist, nr_pages);
return pblist;
}
@@ -370,8 +428,14 @@ void swsusp_free(void)
static int enough_free_mem(unsigned int nr_pages)
{
- pr_debug("swsusp: available memory: %u pages\n", nr_free_pages());
- return nr_free_pages() > (nr_pages + PAGES_FOR_IO +
+ struct zone *zone;
+ unsigned int n = 0;
+
+ for_each_zone (zone)
+ if (!is_highmem(zone))
+ n += zone->free_pages;
+ pr_debug("swsusp: available memory: %u pages\n", n);
+ return n > (nr_pages + PAGES_FOR_IO +
(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
}
@@ -395,7 +459,6 @@ static struct pbe *swsusp_alloc(unsigned int nr_pages)
printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
return NULL;
}
- create_pbe_list(pblist, nr_pages);
if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) {
printk(KERN_ERR "suspend: Allocating image pages failed.\n");
@@ -421,10 +484,6 @@ asmlinkage int swsusp_save(void)
(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
PAGES_FOR_IO, nr_free_pages());
- /* This is needed because of the fixed size of swsusp_info */
- if (MAX_PBES < (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE)
- return -ENOSPC;
-
if (!enough_free_mem(nr_pages)) {
printk(KERN_ERR "swsusp: Not enough free memory\n");
return -ENOMEM;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index c05f46e7348..55a18d26abe 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -30,8 +30,8 @@
* Alex Badea <vampire@go.ro>:
* Fixed runaway init
*
- * Andreas Steinmetz <ast@domdv.de>:
- * Added encrypted suspend option
+ * Rafael J. Wysocki <rjw@sisk.pl>
+ * Added the swap map data structure and reworked the handling of swap
*
* More state savers are welcome. Especially for the scsi layer...
*
@@ -67,44 +67,33 @@
#include <asm/tlbflush.h>
#include <asm/io.h>
-#include <linux/random.h>
-#include <linux/crypto.h>
-#include <asm/scatterlist.h>
-
#include "power.h"
+/*
+ * Preferred image size in MB (tunable via /sys/power/image_size).
+ * When it is set to N, swsusp will do its best to ensure the image
+ * size will not exceed N MB, but if that is impossible, it will
+ * try to create the smallest image possible.
+ */
+unsigned int image_size = 500;
+
#ifdef CONFIG_HIGHMEM
+unsigned int count_highmem_pages(void);
int save_highmem(void);
int restore_highmem(void);
#else
static int save_highmem(void) { return 0; }
static int restore_highmem(void) { return 0; }
+static unsigned int count_highmem_pages(void) { return 0; }
#endif
-#define CIPHER "aes"
-#define MAXKEY 32
-#define MAXIV 32
-
extern char resume_file[];
-/* Local variables that should not be affected by save */
-unsigned int nr_copy_pages __nosavedata = 0;
-
-/* Suspend pagedir is allocated before final copy, therefore it
- must be freed after resume
-
- Warning: this is even more evil than it seems. Pagedirs this file
- talks about are completely different from page directories used by
- MMU hardware.
- */
-suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
-
#define SWSUSP_SIG "S1SUSPEND"
static struct swsusp_header {
- char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)];
- u8 key_iv[MAXKEY+MAXIV];
- swp_entry_t swsusp_info;
+ char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
+ swp_entry_t image;
char orig_sig[10];
char sig[10];
} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
@@ -115,140 +104,9 @@ static struct swsusp_info swsusp_info;
* Saving part...
*/
-/* We memorize in swapfile_used what swap devices are used for suspension */
-#define SWAPFILE_UNUSED 0
-#define SWAPFILE_SUSPEND 1 /* This is the suspending device */
-#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
-
-static unsigned short swapfile_used[MAX_SWAPFILES];
-static unsigned short root_swap;
-
-static int write_page(unsigned long addr, swp_entry_t *loc);
-static int bio_read_page(pgoff_t page_off, void *page);
-
-static u8 key_iv[MAXKEY+MAXIV];
-
-#ifdef CONFIG_SWSUSP_ENCRYPT
-
-static int crypto_init(int mode, void **mem)
-{
- int error = 0;
- int len;
- char *modemsg;
- struct crypto_tfm *tfm;
-
- modemsg = mode ? "suspend not possible" : "resume not possible";
-
- tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC);
- if(!tfm) {
- printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg);
- error = -EINVAL;
- goto out;
- }
-
- if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) {
- printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg);
- error = -ENOKEY;
- goto fail;
- }
-
- if (mode)
- get_random_bytes(key_iv, MAXKEY+MAXIV);
-
- len = crypto_tfm_alg_max_keysize(tfm);
- if (len > MAXKEY)
- len = MAXKEY;
-
- if (crypto_cipher_setkey(tfm, key_iv, len)) {
- printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg);
- error = -EKEYREJECTED;
- goto fail;
- }
-
- len = crypto_tfm_alg_ivsize(tfm);
-
- if (MAXIV < len) {
- printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg);
- error = -EOVERFLOW;
- goto fail;
- }
-
- crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len);
-
- *mem=(void *)tfm;
-
- goto out;
-
-fail: crypto_free_tfm(tfm);
-out: return error;
-}
-
-static __inline__ void crypto_exit(void *mem)
-{
- crypto_free_tfm((struct crypto_tfm *)mem);
-}
-
-static __inline__ int crypto_write(struct pbe *p, void *mem)
-{
- int error = 0;
- struct scatterlist src, dst;
-
- src.page = virt_to_page(p->address);
- src.offset = 0;
- src.length = PAGE_SIZE;
- dst.page = virt_to_page((void *)&swsusp_header);
- dst.offset = 0;
- dst.length = PAGE_SIZE;
-
- error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src,
- PAGE_SIZE);
-
- if (!error)
- error = write_page((unsigned long)&swsusp_header,
- &(p->swap_address));
- return error;
-}
-
-static __inline__ int crypto_read(struct pbe *p, void *mem)
-{
- int error = 0;
- struct scatterlist src, dst;
-
- error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
- if (!error) {
- src.offset = 0;
- src.length = PAGE_SIZE;
- dst.offset = 0;
- dst.length = PAGE_SIZE;
- src.page = dst.page = virt_to_page((void *)p->address);
-
- error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst,
- &src, PAGE_SIZE);
- }
- return error;
-}
-#else
-static __inline__ int crypto_init(int mode, void *mem)
-{
- return 0;
-}
-
-static __inline__ void crypto_exit(void *mem)
-{
-}
-
-static __inline__ int crypto_write(struct pbe *p, void *mem)
-{
- return write_page(p->address, &(p->swap_address));
-}
+static unsigned short root_swap = 0xffff;
-static __inline__ int crypto_read(struct pbe *p, void *mem)
-{
- return bio_read_page(swp_offset(p->swap_address), (void *)p->address);
-}
-#endif
-
-static int mark_swapfiles(swp_entry_t prev)
+static int mark_swapfiles(swp_entry_t start)
{
int error;
@@ -259,8 +117,7 @@ static int mark_swapfiles(swp_entry_t prev)
!memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
- memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV);
- swsusp_header.swsusp_info = prev;
+ swsusp_header.image = start;
error = rw_swap_page_sync(WRITE,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)
@@ -283,7 +140,7 @@ static int mark_swapfiles(swp_entry_t prev)
* devfs, since the resume code can only recognize the form /dev/hda4,
* but the suspend code would see the long name.)
*/
-static int is_resume_device(const struct swap_info_struct *swap_info)
+static inline int is_resume_device(const struct swap_info_struct *swap_info)
{
struct file *file = swap_info->swap_file;
struct inode *inode = file->f_dentry->d_inode;
@@ -294,54 +151,22 @@ static int is_resume_device(const struct swap_info_struct *swap_info)
static int swsusp_swap_check(void) /* This is called before saving image */
{
- int i, len;
-
- len=strlen(resume_file);
- root_swap = 0xFFFF;
-
- spin_lock(&swap_lock);
- for (i=0; i<MAX_SWAPFILES; i++) {
- if (!(swap_info[i].flags & SWP_WRITEOK)) {
- swapfile_used[i]=SWAPFILE_UNUSED;
- } else {
- if (!len) {
- printk(KERN_WARNING "resume= option should be used to set suspend device" );
- if (root_swap == 0xFFFF) {
- swapfile_used[i] = SWAPFILE_SUSPEND;
- root_swap = i;
- } else
- swapfile_used[i] = SWAPFILE_IGNORED;
- } else {
- /* we ignore all swap devices that are not the resume_file */
- if (is_resume_device(&swap_info[i])) {
- swapfile_used[i] = SWAPFILE_SUSPEND;
- root_swap = i;
- } else {
- swapfile_used[i] = SWAPFILE_IGNORED;
- }
- }
- }
- }
- spin_unlock(&swap_lock);
- return (root_swap != 0xffff) ? 0 : -ENODEV;
-}
-
-/**
- * This is called after saving image so modification
- * will be lost after resume... and that's what we want.
- * we make the device unusable. A new call to
- * lock_swapdevices can unlock the devices.
- */
-static void lock_swapdevices(void)
-{
int i;
+ if (!swsusp_resume_device)
+ return -ENODEV;
spin_lock(&swap_lock);
- for (i = 0; i< MAX_SWAPFILES; i++)
- if (swapfile_used[i] == SWAPFILE_IGNORED) {
- swap_info[i].flags ^= SWP_WRITEOK;
+ for (i = 0; i < MAX_SWAPFILES; i++) {
+ if (!(swap_info[i].flags & SWP_WRITEOK))
+ continue;
+ if (is_resume_device(swap_info + i)) {
+ spin_unlock(&swap_lock);
+ root_swap = i;
+ return 0;
}
+ }
spin_unlock(&swap_lock);
+ return -ENODEV;
}
/**
@@ -359,72 +184,217 @@ static void lock_swapdevices(void)
static int write_page(unsigned long addr, swp_entry_t *loc)
{
swp_entry_t entry;
- int error = 0;
+ int error = -ENOSPC;
- entry = get_swap_page();
- if (swp_offset(entry) &&
- swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
- error = rw_swap_page_sync(WRITE, entry,
- virt_to_page(addr));
- if (error == -EIO)
- error = 0;
- if (!error)
+ entry = get_swap_page_of_type(root_swap);
+ if (swp_offset(entry)) {
+ error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr));
+ if (!error || error == -EIO)
*loc = entry;
- } else
- error = -ENOSPC;
+ }
return error;
}
/**
- * data_free - Free the swap entries used by the saved image.
+ * Swap map-handling functions
+ *
+ * The swap map is a data structure used for keeping track of each page
+ * written to the swap. It consists of many swap_map_page structures
+ * that contain each an array of MAP_PAGE_SIZE swap entries.
+ * These structures are linked together with the help of either the
+ * .next (in memory) or the .next_swap (in swap) member.
*
- * Walk the list of used swap entries and free each one.
- * This is only used for cleanup when suspend fails.
+ * The swap map is created during suspend. At that time we need to keep
+ * it in memory, because we have to free all of the allocated swap
+ * entries if an error occurs. The memory needed is preallocated
+ * so that we know in advance if there's enough of it.
+ *
+ * The first swap_map_page structure is filled with the swap entries that
+ * correspond to the first MAP_PAGE_SIZE data pages written to swap and
+ * so on. After the all of the data pages have been written, the order
+ * of the swap_map_page structures in the map is reversed so that they
+ * can be read from swap in the original order. This causes the data
+ * pages to be loaded in exactly the same order in which they have been
+ * saved.
+ *
+ * During resume we only need to use one swap_map_page structure
+ * at a time, which means that we only need to use two memory pages for
+ * reading the image - one for reading the swap_map_page structures
+ * and the second for reading the data pages from swap.
*/
-static void data_free(void)
+
+#define MAP_PAGE_SIZE ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \
+ / sizeof(swp_entry_t))
+
+struct swap_map_page {
+ swp_entry_t entries[MAP_PAGE_SIZE];
+ swp_entry_t next_swap;
+ struct swap_map_page *next;
+};
+
+static inline void free_swap_map(struct swap_map_page *swap_map)
{
- swp_entry_t entry;
- struct pbe *p;
+ struct swap_map_page *swp;
- for_each_pbe (p, pagedir_nosave) {
- entry = p->swap_address;
- if (entry.val)
- swap_free(entry);
- else
- break;
+ while (swap_map) {
+ swp = swap_map->next;
+ free_page((unsigned long)swap_map);
+ swap_map = swp;
}
}
+static struct swap_map_page *alloc_swap_map(unsigned int nr_pages)
+{
+ struct swap_map_page *swap_map, *swp;
+ unsigned n = 0;
+
+ if (!nr_pages)
+ return NULL;
+
+ pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages);
+ swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
+ swp = swap_map;
+ for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) {
+ swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
+ swp = swp->next;
+ if (!swp) {
+ free_swap_map(swap_map);
+ return NULL;
+ }
+ }
+ return swap_map;
+}
+
/**
- * data_write - Write saved image to swap.
- *
- * Walk the list of pages in the image and sync each one to swap.
+ * reverse_swap_map - reverse the order of pages in the swap map
+ * @swap_map
*/
-static int data_write(void)
+
+static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
{
- int error = 0, i = 0;
- unsigned int mod = nr_copy_pages / 100;
- struct pbe *p;
- void *tfm;
+ struct swap_map_page *prev, *next;
+
+ prev = NULL;
+ while (swap_map) {
+ next = swap_map->next;
+ swap_map->next = prev;
+ prev = swap_map;
+ swap_map = next;
+ }
+ return prev;
+}
- if ((error = crypto_init(1, &tfm)))
- return error;
+/**
+ * free_swap_map_entries - free the swap entries allocated to store
+ * the swap map @swap_map (this is only called in case of an error)
+ */
+static inline void free_swap_map_entries(struct swap_map_page *swap_map)
+{
+ while (swap_map) {
+ if (swap_map->next_swap.val)
+ swap_free(swap_map->next_swap);
+ swap_map = swap_map->next;
+ }
+}
- if (!mod)
- mod = 1;
+/**
+ * save_swap_map - save the swap map used for tracing the data pages
+ * stored in the swap
+ */
- printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
- for_each_pbe (p, pagedir_nosave) {
- if (!(i%mod))
- printk( "\b\b\b\b%3d%%", i / mod );
- if ((error = crypto_write(p, tfm))) {
- crypto_exit(tfm);
+static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
+{
+ swp_entry_t entry = (swp_entry_t){0};
+ int error;
+
+ while (swap_map) {
+ swap_map->next_swap = entry;
+ if ((error = write_page((unsigned long)swap_map, &entry)))
<