diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 14 | ||||
-rw-r--r-- | kernel/auditfilter.c | 2 | ||||
-rw-r--r-- | kernel/auditsc.c | 6 | ||||
-rw-r--r-- | kernel/cpu.c | 138 | ||||
-rw-r--r-- | kernel/cpuset.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/irq/handle.c | 2 | ||||
-rw-r--r-- | kernel/lockdep.c | 9 | ||||
-rw-r--r-- | kernel/module.c | 6 | ||||
-rw-r--r-- | kernel/panic.c | 13 | ||||
-rw-r--r-- | kernel/pid.c | 12 | ||||
-rw-r--r-- | kernel/power/Kconfig | 22 | ||||
-rw-r--r-- | kernel/power/Makefile | 2 | ||||
-rw-r--r-- | kernel/power/disk.c | 11 | ||||
-rw-r--r-- | kernel/power/main.c | 40 | ||||
-rw-r--r-- | kernel/power/power.h | 59 | ||||
-rw-r--r-- | kernel/power/smp.c | 62 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 1155 | ||||
-rw-r--r-- | kernel/power/swap.c | 270 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 14 | ||||
-rw-r--r-- | kernel/power/user.c | 17 | ||||
-rw-r--r-- | kernel/printk.c | 3 | ||||
-rw-r--r-- | kernel/profile.c | 16 | ||||
-rw-r--r-- | kernel/ptrace.c | 54 | ||||
-rw-r--r-- | kernel/relay.c | 2 | ||||
-rw-r--r-- | kernel/resource.c | 32 | ||||
-rw-r--r-- | kernel/sched.c | 54 | ||||
-rw-r--r-- | kernel/signal.c | 5 | ||||
-rw-r--r-- | kernel/spinlock.c | 5 | ||||
-rw-r--r-- | kernel/sys.c | 31 | ||||
-rw-r--r-- | kernel/sysctl.c | 147 | ||||
-rw-r--r-- | kernel/unwind.c | 35 |
32 files changed, 1506 insertions, 744 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 963fd15c962..f9889ee7782 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) char *ctx = NULL; u32 len; int rc; - if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + if ((rc = selinux_sid_to_string(sid, &ctx, &len))) return rc; else audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, @@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; if (sid) { - if ((err = selinux_ctxid_to_string( + if ((err = selinux_sid_to_string( sid, &ctx, &len))) return err; else @@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) "user pid=%d uid=%u auid=%u", pid, uid, loginuid); if (sid) { - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( sid, &ctx, &len)) { audit_log_format(ab, " ssid=%u", sid); @@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) loginuid, sid); break; case AUDIT_SIGNAL_INFO: - err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len); + err = selinux_sid_to_string(audit_sig_sid, &ctx, &len); if (err) return err; sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a44879b0c72..1a58a81fb09 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, if (sid) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string(sid, &ctx, &len)) + if (selinux_sid_to_string(sid, &ctx, &len)) audit_log_format(ab, " ssid=%u", sid); else audit_log_format(ab, " subj=%s", ctx); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1bd8827a010..fb83c5cb8c3 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk, logged upon error */ if (f->se_rule) { if (need_sid) { - selinux_task_ctxid(tsk, &sid); + selinux_get_task_sid(tsk, &sid); need_sid = 0; } result = selinux_audit_rule_match(sid, f->type, @@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (axi->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( axi->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", axi->osid); @@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (n->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_ctxid_to_string( + if (selinux_sid_to_string( n->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", n->osid); call_panic = 2; diff --git a/kernel/cpu.c b/kernel/cpu.c index f230f9ae01c..32c96628463 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock); static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); +/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. + * Should always be manipulated under cpu_add_remove_lock + */ +static int cpu_hotplug_disabled; + #ifdef CONFIG_HOTPLUG_CPU /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ @@ -108,30 +113,25 @@ static int take_cpu_down(void *unused) return 0; } -int cpu_down(unsigned int cpu) +/* Requires cpu_add_remove_lock to be held */ +static int _cpu_down(unsigned int cpu) { int err; struct task_struct *p; cpumask_t old_allowed, tmp; - mutex_lock(&cpu_add_remove_lock); - if (num_online_cpus() == 1) { - err = -EBUSY; - goto out; - } + if (num_online_cpus() == 1) + return -EBUSY; - if (!cpu_online(cpu)) { - err = -EINVAL; - goto out; - } + if (!cpu_online(cpu)) + return -EINVAL; err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { printk("%s: attempt to take down CPU %u failed\n", __FUNCTION__, cpu); - err = -EINVAL; - goto out; + return -EINVAL; } /* Ensure that we are not runnable on dying cpu */ @@ -179,22 +179,32 @@ out_thread: err = kthread_stop(p); out_allowed: set_cpus_allowed(current, old_allowed); -out: + return err; +} + +int cpu_down(unsigned int cpu) +{ + int err = 0; + + mutex_lock(&cpu_add_remove_lock); + if (cpu_hotplug_disabled) + err = -EBUSY; + else + err = _cpu_down(cpu); + mutex_unlock(&cpu_add_remove_lock); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ -int __devinit cpu_up(unsigned int cpu) +/* Requires cpu_add_remove_lock to be held */ +static int __devinit _cpu_up(unsigned int cpu) { int ret; void *hcpu = (void *)(long)cpu; - mutex_lock(&cpu_add_remove_lock); - if (cpu_online(cpu) || !cpu_present(cpu)) { - ret = -EINVAL; - goto out; - } + if (cpu_online(cpu) || !cpu_present(cpu)) + return -EINVAL; ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { @@ -219,7 +229,95 @@ out_notify: if (ret != 0) blocking_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); + + return ret; +} + +int __devinit cpu_up(unsigned int cpu) +{ + int err = 0; + + mutex_lock(&cpu_add_remove_lock); + if (cpu_hotplug_disabled) + err = -EBUSY; + else + err = _cpu_up(cpu); + + mutex_unlock(&cpu_add_remove_lock); + return err; +} + +#ifdef CONFIG_SUSPEND_SMP +static cpumask_t frozen_cpus; + +int disable_nonboot_cpus(void) +{ + int cpu, first_cpu, error; + + mutex_lock(&cpu_add_remove_lock); + first_cpu = first_cpu(cpu_present_map); + if (!cpu_online(first_cpu)) { + error = _cpu_up(first_cpu); + if (error) { + printk(KERN_ERR "Could not bring CPU%d up.\n", + first_cpu); + goto out; + } + } + error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu)); + if (error) { + printk(KERN_ERR "Could not run on CPU%d\n", first_cpu); + goto out; + } + /* We take down all of the non-boot CPUs in one shot to avoid races + * with the userspace trying to use the CPU hotplug at the same time + */ + cpus_clear(frozen_cpus); + printk("Disabling non-boot CPUs ...\n"); + for_each_online_cpu(cpu) { + if (cpu == first_cpu) + continue; + error = _cpu_down(cpu); + if (!error) { + cpu_set(cpu, frozen_cpus); + printk("CPU%d is down\n", cpu); + } else { + printk(KERN_ERR "Error taking CPU%d down: %d\n", + cpu, error); + break; + } + } + if (!error) { + BUG_ON(num_online_cpus() > 1); + /* Make sure the CPUs won't be enabled by someone else */ + cpu_hotplug_disabled = 1; + } else { + printk(KERN_ERR "Non-boot CPUs are not disabled"); + } out: mutex_unlock(&cpu_add_remove_lock); - return ret; + return error; +} + +void enable_nonboot_cpus(void) +{ + int cpu, error; + + /* Allow everyone to use the CPU hotplug again */ + mutex_lock(&cpu_add_remove_lock); + cpu_hotplug_disabled = 0; + mutex_unlock(&cpu_add_remove_lock); + + printk("Enabling non-boot CPUs ...\n"); + for_each_cpu_mask(cpu, frozen_cpus) { + error = cpu_up(cpu); + if (!error) { + printk("CPU%d is up\n", cpu); + continue; + } + printk(KERN_WARNING "Error taking CPU%d up: %d\n", + cpu, error); + } + cpus_clear(frozen_cpus); } +#endif diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4ea6f0dc2fc..1b32c2c04c1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -289,7 +289,6 @@ static struct inode *cpuset_new_inode(mode_t mode) inode->i_mode = mode; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info; @@ -2245,7 +2244,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) int i; for (i = 0; zl->zones[i]; i++) { - int nid = zl->zones[i]->zone_pgdat->node_id; + int nid = zone_to_nid(zl->zones[i]); if (node_isset(nid, current->mems_allowed)) return 1; @@ -2316,9 +2315,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) const struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ - if (in_interrupt()) + if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) return 1; - node = z->zone_pgdat->node_id; + node = zone_to_nid(z); might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; diff --git a/kernel/fork.c b/kernel/fork.c index 8f76adf1c6a..802b1cf0e63 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -45,6 +45,7 @@ #include <linux/cn_proc.h> #include <linux/delayacct.h> #include <linux/taskstats_kern.h> +#include <linux/random.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -175,6 +176,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->thread_info = ti; setup_thread_stack(tsk, orig); +#ifdef CONFIG_CC_STACKPROTECTOR + tsk->stack_canary = get_random_int(); +#endif + /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 48a53f68af9..4c6cdbaed66 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, return retval; } +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ /** * __do_IRQ - original all in one highlevel IRQ handler * @irq: the interrupt number @@ -253,6 +254,7 @@ out: return 1; } +#endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9bad1788451..c088e5542e8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -224,7 +224,14 @@ static int save_trace(struct stack_trace *trace) trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; trace->entries = stack_trace + nr_stack_trace_entries; - save_stack_trace(trace, NULL, 0, 3); + trace->skip = 3; + trace->all_contexts = 0; + + /* Make sure to not recurse in case the the unwinder needs to tak +e locks. */ + lockdep_off(); + save_stack_trace(trace, NULL); + lockdep_on(); trace->max_entries = trace->nr_entries; diff --git a/kernel/module.c b/kernel/module.c index 2a19cd47c04..b7fe6e84096 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1054,6 +1054,12 @@ static int mod_sysfs_setup(struct module *mod, { int err; + if (!module_subsys.kset.subsys) { + printk(KERN_ERR "%s: module_subsys not initialized\n", + mod->name); + err = -EINVAL; + goto out; + } memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); if (err) diff --git a/kernel/panic.c b/kernel/panic.c index 8010b9b17ac..6ceb664fb52 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -21,6 +21,7 @@ #include <linux/debug_locks.h> int panic_on_oops; +int panic_on_unrecovered_nmi; int tainted; static int pause_on_oops; static int pause_on_oops_flag; @@ -270,3 +271,15 @@ void oops_exit(void) { do_oops_enter_exit(); } + +#ifdef CONFIG_CC_STACKPROTECTOR +/* + * Called when gcc's -fstack-protector feature is used, and + * gcc detects corruption of the on-stack canary value + */ +void __stack_chk_fail(void) +{ + panic("stack-protector: Kernel stack is corrupted"); +} +EXPORT_SYMBOL(__stack_chk_fail); +#endif diff --git a/kernel/pid.c b/kernel/pid.c index 93e212f2067..8387e8c6819 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -223,9 +223,6 @@ int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr) struct pid_link *link; struct pid *pid; - WARN_ON(!task->pid); /* to be removed soon */ - WARN_ON(!nr); /* to be removed soon */ - link = &task->pids[type]; link->pid = pid = find_pid(nr); hlist_add_head_rcu(&link->node, &pid->tasks[type]); @@ -252,6 +249,15 @@ void fastcall detach_pid(struct task_struct *task, enum pid_type type) free_pid(pid); } +/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ +void fastcall transfer_pid(struct task_struct *old, struct task_struct *new, + enum pid_type type) +{ + new->pids[type].pid = old->pids[type].pid; + hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); + old->pids[type].pid = NULL; +} + struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) { struct task_struct *result = NULL; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 619ecabf7c5..825068ca347 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -36,6 +36,17 @@ config PM_DEBUG code. This is helpful when debugging and reporting various PM bugs, like suspend support. +config DISABLE_CONSOLE_SUSPEND + bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" + depends on PM && PM_DEBUG + default n + ---help--- + This option turns off the console suspend mechanism that prevents + debug messages from reaching the console during the suspend/resume + operations. This may be helpful when debugging device drivers' + suspend/resume routines, but may itself lead to problems, for example + if netconsole is used. + config PM_TRACE bool "Suspend/resume event tracing" depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL @@ -53,6 +64,17 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. +config PM_SYSFS_DEPRECATED + bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" + depends on PM && SYSFS + default n + help + The driver model started out with a sysfs file intended to provide + a userspace hook for device power management. This feature has never + worked very well, except for limited testing purposes, and so it will + be removed. It's not clear that a generic mechanism could really + handle the wide variability of device power states; any replacements + are likely to be bus or driver specific. config SOFTWARE_SUSPEND bool "Software Suspend" diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 8d0af3d37a4..38725f526af 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -7,6 +7,4 @@ obj-y := main.o process.o console.o obj-$(CONFIG_PM_LEGACY) += pm.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o -obj-$(CONFIG_SUSPEND_SMP) += smp.o - obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e13e7406784..d7223494279 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -18,6 +18,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/pm.h> +#include <linux/cpu.h> #include "power.h" @@ -72,7 +73,10 @@ static int prepare_processes(void) int error; pm_prepare_console(); - disable_nonboot_cpus(); + + error = disable_nonboot_cpus(); + if (error) + goto enable_cpus; if (freeze_processes()) { error = -EBUSY; @@ -84,6 +88,7 @@ static int prepare_processes(void) return 0; thaw: thaw_processes(); +enable_cpus: enable_nonboot_cpus(); pm_restore_console(); return error; @@ -98,7 +103,7 @@ static void unprepare_processes(void) } /** - * pm_suspend_disk - The granpappy of power management. + * pm_suspend_disk - The granpappy of hibernation power management. * * If we're going through the firmware, then get it over with quickly. * @@ -207,7 +212,7 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); - if ((error = device_suspend(PMSG_FREEZE))) { + if ((error = device_suspend(PMSG_PRETHAW))) { printk("Some devices failed to suspend\n"); swsusp_free(); goto Thaw; diff --git a/kernel/power/main.c b/kernel/power/main.c index 6d295c77679..873228c71da 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -16,6 +16,8 @@ #include <linux/init.h> #include <linux/pm.h> #include <linux/console.h> +#include <linux/cpu.h> +#include <linux/resume-trace.h> #include "power.h" @@ -51,7 +53,7 @@ void pm_set_ops(struct pm_ops * ops) static int suspend_prepare(suspend_state_t state) { - int error = 0; + int error; unsigned int free_pages; if (!pm_ops || !pm_ops->enter) @@ -59,12 +61,9 @@ static int suspend_prepare(suspend_state_t state) pm_prepare_console(); - disable_nonboot_cpus(); - - if (num_online_cpus() != 1) { - error = -EPERM; + error = disable_nonboot_cpus(); + if (error) goto Enable_cpu; - } if (freeze_processes()) { error = -EAGAIN; @@ -283,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n power_attr(state); +#ifdef CONFIG_PM_TRACE +int pm_trace_enabled; + +static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) +{ + return sprintf(buf, "%d\n", pm_trace_enabled); +} + +static ssize_t +pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) +{ + int val; + + if (sscanf(buf, "%d", &val) == 1) { + pm_trace_enabled = !!val; + return n; + } + return -EINVAL; +} + +power_attr(pm_trace); + +static struct attribute * g[] = { + &state_attr.attr, + &pm_trace_attr.attr, + NULL, +}; +#else static struct attribute * g[] = { &state_attr.attr, NULL, }; +#endif /* CONFIG_PM_TRACE */ static struct attribute_group attr_group = { .attrs = g, diff --git a/kernel/power/power.h b/kernel/power/power.h index 57a792982fb..bfe999f7b27 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -38,8 +38,6 @@ extern struct subsystem power_subsys; /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; -extern struct pbe *pagedir_nosave; - /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; extern int in_suspend; @@ -50,21 +48,62 @@ extern asmlinkage int swsusp_arch_resume(void); extern unsigned int count_data_pages(void); +/** + * Auxiliary structure used for reading the snapshot image data and + * metadata from and writing them to the list of page backup entries + * (PBEs) which is the main data structure of swsusp. + * + * Using struct snapshot_handle we can transfer the image, including its + * metadata, as a continuous sequence of bytes with the help of + * snapshot_read_next() and snapshot_write_next(). + * + * The code that writes the image to a storage or transfers it to + * the user land is required to use snapshot_read_next() for this + * purpose and it should not make any assumptions regarding the internal + * structure of the image. Similarly, the code that reads the image from + * a storage or transfers it from the user land is required to use + * snapshot_write_next(). + * + * This may allow us to change the internal structure of the image + * in the future with considerably less effort. + */ + struct snapshot_handle { - loff_t offset; - unsigned int page; - unsigned int page_offset; - unsigned int prev; - struct pbe *pbe, *last_pbe; - void *buffer; - unsigned int buf_offset; + loff_t offset; /* number of the last byte ready for reading + * or writing in the sequence + */ + unsigned int cur; /* number of the block of PAGE_SIZE bytes the + * next operation will refer to (ie. current) + */ + unsigned int cur_offset; /* offset with respect to the current + * block (for the next operation) + */ + unsigned int prev; /* number of the block of PAGE_SIZE bytes that + * was the current one previously + */ + void *buffer; /* address of the block to read from + * or write to + */ + unsigned int buf_offset; /* location to read from or write to, + * given as a displacement from 'buffer' + */ + int sync_read; /* Set to one to notify the caller of + * snapshot_write_next() that it may + * need to call wait_on_bio_chain() + */ }; +/* This macro returns the address from/to which the caller of + * snapshot_read_next()/snapshot_write_next() is allowed to + * read/write data after the function returns + */ #define data_of(handle) ((handle).buffer + (handle).buf_offset) +extern unsigned int snapshot_additional_pages(struct zone *zone); extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); -int snapshot_image_loaded(struct snapshot_handle *handle); +extern int snapshot_image_loaded(struct snapshot_handle *handle); +extern void snapshot_free_unused_memory(struct snapshot_handle *handle); #define SNAPSHOT_IOC_MAGIC '3' #define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) diff --git a/kernel/power/smp.c b/kernel/power/smp.c deleted file mode 100644 index 5957312b2d6..00000000000 --- a/kernel/power/smp.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * drivers/power/smp.c - Functions for stopping other CPUs. - * - * Copyright 2004 Pavel Machek <pavel@suse.cz> - * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz> - * - * This file is released under the GPLv2. - */ - -#undef DEBUG - -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/suspend.h> -#include <linux/module.h> -#include <linux/cpu.h> -#include <asm/atomic.h> -#include <asm/tlbflush.h> - -/* This is protected by pm_sem semaphore */ -static cpumask_t frozen_cpus; - -void disable_nonboot_cpus(void) -{ - int cpu, error; - - error = 0; - cpus_clear(frozen_cpus); - printk("Freezing cpus ...\n"); - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - error = cpu_down(cpu); - if (!error) { - cpu_set(cpu, frozen_cpus); - printk("CPU%d is down\n", cpu); - continue; - } - printk("Error taking cpu %d down: %d\n", cpu, error); - } - BUG_ON(raw_smp_processor_id() != 0); - if (error) - panic("cpus not sleeping"); -} - -void enable_nonboot_cpus(void) -{ - int cpu, error; - - printk("Thawing cpus ...\n"); - for_each_cpu_mask(cpu, frozen_cpus) { - error = cpu_up(cpu); - if (!error) { - printk("CPU%d is up\n", cpu); - continue; - } - printk("Error taking cpu %d up: %d\n", cpu, error); - panic("Not enough cpus"); - } - cpus_clear(frozen_cpus); -} - diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 75d4886e648..1b84313cbab 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -34,10 +34,12 @@ #include "power.h" -struct pbe *pagedir_nosave; +/* List of PBEs used for creating and restoring the suspend image */ +struct pbe *restore_pblist; + static unsigned int nr_copy_pages; static unsigned int nr_meta_pages; -static unsigned long *buffer; +static void *buffer; #ifdef CONFIG_HIGHMEM unsigned int count_highmem_pages(void) @@ -156,240 +158,637 @@ static inline int save_highmem(void) {return 0;} static inline int restore_highmem(void) {return 0;} #endif -static int pfn_is_nosave(unsigned long pfn) +/** + * @safe_needed - on resume, for storing the PBE list and the image, + * we can only use memory pages that do not conflict with the pages + * used before suspend. + * + * The unsafe pages are marked with the PG_nosave_free flag + * and we count them using unsafe_pages + */ + +#define PG_ANY 0 +#define PG_SAFE 1 +#define PG_UNSAFE_CLEAR 1 +#define PG_UNSAFE_KEEP 0 + +static unsigned int allocated_unsafe_pages; + +static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) |