aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c14
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cpu.c138
-rw-r--r--kernel/cpuset.c7
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/lockdep.c9
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/panic.c13
-rw-r--r--kernel/pid.c12
-rw-r--r--kernel/power/Kconfig22
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/disk.c11
-rw-r--r--kernel/power/main.c40
-rw-r--r--kernel/power/power.h59
-rw-r--r--kernel/power/smp.c62
-rw-r--r--kernel/power/snapshot.c1155
-rw-r--r--kernel/power/swap.c270
-rw-r--r--kernel/power/swsusp.c14
-rw-r--r--kernel/power/user.c17
-rw-r--r--kernel/printk.c3
-rw-r--r--kernel/profile.c16
-rw-r--r--kernel/ptrace.c54
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/resource.c32
-rw-r--r--kernel/sched.c54
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/spinlock.c5
-rw-r--r--kernel/sys.c31
-rw-r--r--kernel/sysctl.c147
-rw-r--r--kernel/unwind.c35
32 files changed, 1506 insertions, 744 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 963fd15c962..f9889ee7782 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (status_get->mask & AUDIT_STATUS_PID) {
int old = audit_pid;
if (sid) {
- if ((err = selinux_ctxid_to_string(
+ if ((err = selinux_sid_to_string(
sid, &ctx, &len)))
return err;
else
@@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
"user pid=%d uid=%u auid=%u",
pid, uid, loginuid);
if (sid) {
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
sid, &ctx, &len)) {
audit_log_format(ab,
" ssid=%u", sid);
@@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
loginuid, sid);
break;
case AUDIT_SIGNAL_INFO:
- err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
+ err = selinux_sid_to_string(audit_sig_sid, &ctx, &len);
if (err)
return err;
sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a44879b0c72..1a58a81fb09 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
if (sid) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(sid, &ctx, &len))
+ if (selinux_sid_to_string(sid, &ctx, &len))
audit_log_format(ab, " ssid=%u", sid);
else
audit_log_format(ab, " subj=%s", ctx);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1bd8827a010..fb83c5cb8c3 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk,
logged upon error */
if (f->se_rule) {
if (need_sid) {
- selinux_task_ctxid(tsk, &sid);
+ selinux_get_task_sid(tsk, &sid);
need_sid = 0;
}
result = selinux_audit_rule_match(sid, f->type,
@@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
if (axi->osid != 0) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
axi->osid, &ctx, &len)) {
audit_log_format(ab, " osid=%u",
axi->osid);
@@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
if (n->osid != 0) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
n->osid, &ctx, &len)) {
audit_log_format(ab, " osid=%u", n->osid);
call_panic = 2;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f230f9ae01c..32c96628463 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
#ifdef CONFIG_HOTPLUG_CPU
/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
return 0;
}
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int _cpu_down(unsigned int cpu)
{
int err;
struct task_struct *p;
cpumask_t old_allowed, tmp;
- mutex_lock(&cpu_add_remove_lock);
- if (num_online_cpus() == 1) {
- err = -EBUSY;
- goto out;
- }
+ if (num_online_cpus() == 1)
+ return -EBUSY;
- if (!cpu_online(cpu)) {
- err = -EINVAL;
- goto out;
- }
+ if (!cpu_online(cpu))
+ return -EINVAL;
err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
(void *)(long)cpu);
if (err == NOTIFY_BAD) {
printk("%s: attempt to take down CPU %u failed\n",
__FUNCTION__, cpu);
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
/* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,32 @@ out_thread:
err = kthread_stop(p);
out_allowed:
set_cpus_allowed(current, old_allowed);
-out:
+ return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+ int err = 0;
+
+ mutex_lock(&cpu_add_remove_lock);
+ if (cpu_hotplug_disabled)
+ err = -EBUSY;
+ else
+ err = _cpu_down(cpu);
+
mutex_unlock(&cpu_add_remove_lock);
return err;
}
#endif /*CONFIG_HOTPLUG_CPU*/
-int __devinit cpu_up(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int __devinit _cpu_up(unsigned int cpu)
{
int ret;
void *hcpu = (void *)(long)cpu;
- mutex_lock(&cpu_add_remove_lock);
- if (cpu_online(cpu) || !cpu_present(cpu)) {
- ret = -EINVAL;
- goto out;
- }
+ if (cpu_online(cpu) || !cpu_present(cpu))
+ return -EINVAL;
ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
if (ret == NOTIFY_BAD) {
@@ -219,7 +229,95 @@ out_notify:
if (ret != 0)
blocking_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED, hcpu);
+
+ return ret;
+}
+
+int __devinit cpu_up(unsigned int cpu)
+{
+ int err = 0;
+
+ mutex_lock(&cpu_add_remove_lock);
+ if (cpu_hotplug_disabled)
+ err = -EBUSY;
+ else
+ err = _cpu_up(cpu);
+
+ mutex_unlock(&cpu_add_remove_lock);
+ return err;
+}
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+ int cpu, first_cpu, error;
+
+ mutex_lock(&cpu_add_remove_lock);
+ first_cpu = first_cpu(cpu_present_map);
+ if (!cpu_online(first_cpu)) {
+ error = _cpu_up(first_cpu);
+ if (error) {
+ printk(KERN_ERR "Could not bring CPU%d up.\n",
+ first_cpu);
+ goto out;
+ }
+ }
+ error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
+ if (error) {
+ printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
+ goto out;
+ }
+ /* We take down all of the non-boot CPUs in one shot to avoid races
+ * with the userspace trying to use the CPU hotplug at the same time
+ */
+ cpus_clear(frozen_cpus);
+ printk("Disabling non-boot CPUs ...\n");
+ for_each_online_cpu(cpu) {
+ if (cpu == first_cpu)
+ continue;
+ error = _cpu_down(cpu);
+ if (!error) {
+ cpu_set(cpu, frozen_cpus);
+ printk("CPU%d is down\n", cpu);
+ } else {
+ printk(KERN_ERR "Error taking CPU%d down: %d\n",
+ cpu, error);
+ break;
+ }
+ }
+ if (!error) {
+ BUG_ON(num_online_cpus() > 1);
+ /* Make sure the CPUs won't be enabled by someone else */
+ cpu_hotplug_disabled = 1;
+ } else {
+ printk(KERN_ERR "Non-boot CPUs are not disabled");
+ }
out:
mutex_unlock(&cpu_add_remove_lock);
- return ret;
+ return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+ int cpu, error;
+
+ /* Allow everyone to use the CPU hotplug again */
+ mutex_lock(&cpu_add_remove_lock);
+ cpu_hotplug_disabled = 0;
+ mutex_unlock(&cpu_add_remove_lock);
+
+ printk("Enabling non-boot CPUs ...\n");
+ for_each_cpu_mask(cpu, frozen_cpus) {
+ error = cpu_up(cpu);
+ if (!error) {
+ printk("CPU%d is up\n", cpu);
+ continue;
+ }
+ printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+ cpu, error);
+ }
+ cpus_clear(frozen_cpus);
}
+#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0dc2fc..1b32c2c04c1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -289,7 +289,6 @@ static struct inode *cpuset_new_inode(mode_t mode)
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
@@ -2245,7 +2244,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
int i;
for (i = 0; zl->zones[i]; i++) {
- int nid = zl->zones[i]->zone_pgdat->node_id;
+ int nid = zone_to_nid(zl->zones[i]);
if (node_isset(nid, current->mems_allowed))
return 1;
@@ -2316,9 +2315,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
const struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */
- if (in_interrupt())
+ if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
return 1;
- node = z->zone_pgdat->node_id;
+ node = zone_to_nid(z);
might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
if (node_isset(node, current->mems_allowed))
return 1;
diff --git a/kernel/fork.c b/kernel/fork.c
index 8f76adf1c6a..802b1cf0e63 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -45,6 +45,7 @@
#include <linux/cn_proc.h>
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
+#include <linux/random.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -175,6 +176,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
tsk->thread_info = ti;
setup_thread_stack(tsk, orig);
+#ifdef CONFIG_CC_STACKPROTECTOR
+ tsk->stack_canary = get_random_int();
+#endif
+
/* One for us, one for whoever does the "release_task()" (usually parent) */
atomic_set(&tsk->usage,2);
atomic_set(&tsk->fs_excl, 0);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 48a53f68af9..4c6cdbaed66 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
return retval;
}
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
/**
* __do_IRQ - original all in one highlevel IRQ handler
* @irq: the interrupt number
@@ -253,6 +254,7 @@ out:
return 1;
}
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9bad1788451..c088e5542e8 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -224,7 +224,14 @@ static int save_trace(struct stack_trace *trace)
trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
trace->entries = stack_trace + nr_stack_trace_entries;
- save_stack_trace(trace, NULL, 0, 3);
+ trace->skip = 3;
+ trace->all_contexts = 0;
+
+ /* Make sure to not recurse in case the the unwinder needs to tak
+e locks. */
+ lockdep_off();
+ save_stack_trace(trace, NULL);
+ lockdep_on();
trace->max_entries = trace->nr_entries;
diff --git a/kernel/module.c b/kernel/module.c
index 2a19cd47c04..b7fe6e84096 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1054,6 +1054,12 @@ static int mod_sysfs_setup(struct module *mod,
{
int err;
+ if (!module_subsys.kset.subsys) {
+ printk(KERN_ERR "%s: module_subsys not initialized\n",
+ mod->name);
+ err = -EINVAL;
+ goto out;
+ }
memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name);
if (err)
diff --git a/kernel/panic.c b/kernel/panic.c
index 8010b9b17ac..6ceb664fb52 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -21,6 +21,7 @@
#include <linux/debug_locks.h>
int panic_on_oops;
+int panic_on_unrecovered_nmi;
int tainted;
static int pause_on_oops;
static int pause_on_oops_flag;
@@ -270,3 +271,15 @@ void oops_exit(void)
{
do_oops_enter_exit();
}
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+/*
+ * Called when gcc's -fstack-protector feature is used, and
+ * gcc detects corruption of the on-stack canary value
+ */
+void __stack_chk_fail(void)
+{
+ panic("stack-protector: Kernel stack is corrupted");
+}
+EXPORT_SYMBOL(__stack_chk_fail);
+#endif
diff --git a/kernel/pid.c b/kernel/pid.c
index 93e212f2067..8387e8c6819 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -223,9 +223,6 @@ int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
struct pid_link *link;
struct pid *pid;
- WARN_ON(!task->pid); /* to be removed soon */
- WARN_ON(!nr); /* to be removed soon */
-
link = &task->pids[type];
link->pid = pid = find_pid(nr);
hlist_add_head_rcu(&link->node, &pid->tasks[type]);
@@ -252,6 +249,15 @@ void fastcall detach_pid(struct task_struct *task, enum pid_type type)
free_pid(pid);
}
+/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
+void fastcall transfer_pid(struct task_struct *old, struct task_struct *new,
+ enum pid_type type)
+{
+ new->pids[type].pid = old->pids[type].pid;
+ hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
+ old->pids[type].pid = NULL;
+}
+
struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result = NULL;
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 619ecabf7c5..825068ca347 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -36,6 +36,17 @@ config PM_DEBUG
code. This is helpful when debugging and reporting various PM bugs,
like suspend support.
+config DISABLE_CONSOLE_SUSPEND
+ bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
+ depends on PM && PM_DEBUG
+ default n
+ ---help---
+ This option turns off the console suspend mechanism that prevents
+ debug messages from reaching the console during the suspend/resume
+ operations. This may be helpful when debugging device drivers'
+ suspend/resume routines, but may itself lead to problems, for example
+ if netconsole is used.
+
config PM_TRACE
bool "Suspend/resume event tracing"
depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL
@@ -53,6 +64,17 @@ config PM_TRACE
CAUTION: this option will cause your machine's real-time clock to be
set to an invalid time after a resume.
+config PM_SYSFS_DEPRECATED
+ bool "Driver model /sys/devices/.../power/state files (DEPRECATED)"
+ depends on PM && SYSFS
+ default n
+ help
+ The driver model started out with a sysfs file intended to provide
+ a userspace hook for device power management. This feature has never
+ worked very well, except for limited testing purposes, and so it will
+ be removed. It's not clear that a generic mechanism could really
+ handle the wide variability of device power states; any replacements
+ are likely to be bus or driver specific.
config SOFTWARE_SUSPEND
bool "Software Suspend"
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 8d0af3d37a4..38725f526af 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,6 +7,4 @@ obj-y := main.o process.o console.o
obj-$(CONFIG_PM_LEGACY) += pm.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o
-obj-$(CONFIG_SUSPEND_SMP) += smp.o
-
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e13e7406784..d7223494279 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pm.h>
+#include <linux/cpu.h>
#include "power.h"
@@ -72,7 +73,10 @@ static int prepare_processes(void)
int error;
pm_prepare_console();
- disable_nonboot_cpus();
+
+ error = disable_nonboot_cpus();
+ if (error)
+ goto enable_cpus;
if (freeze_processes()) {
error = -EBUSY;
@@ -84,6 +88,7 @@ static int prepare_processes(void)
return 0;
thaw:
thaw_processes();
+enable_cpus:
enable_nonboot_cpus();
pm_restore_console();
return error;
@@ -98,7 +103,7 @@ static void unprepare_processes(void)
}
/**
- * pm_suspend_disk - The granpappy of power management.
+ * pm_suspend_disk - The granpappy of hibernation power management.
*
* If we're going through the firmware, then get it over with quickly.
*
@@ -207,7 +212,7 @@ static int software_resume(void)
pr_debug("PM: Preparing devices for restore.\n");
- if ((error = device_suspend(PMSG_FREEZE))) {
+ if ((error = device_suspend(PMSG_PRETHAW))) {
printk("Some devices failed to suspend\n");
swsusp_free();
goto Thaw;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6d295c77679..873228c71da 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -16,6 +16,8 @@
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/resume-trace.h>
#include "power.h"
@@ -51,7 +53,7 @@ void pm_set_ops(struct pm_ops * ops)
static int suspend_prepare(suspend_state_t state)
{
- int error = 0;
+ int error;
unsigned int free_pages;
if (!pm_ops || !pm_ops->enter)
@@ -59,12 +61,9 @@ static int suspend_prepare(suspend_state_t state)
pm_prepare_console();
- disable_nonboot_cpus();
-
- if (num_online_cpus() != 1) {
- error = -EPERM;
+ error = disable_nonboot_cpus();
+ if (error)
goto Enable_cpu;
- }
if (freeze_processes()) {
error = -EAGAIN;
@@ -283,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
power_attr(state);
+#ifdef CONFIG_PM_TRACE
+int pm_trace_enabled;
+
+static ssize_t pm_trace_show(struct subsystem * subsys, char * buf)
+{
+ return sprintf(buf, "%d\n", pm_trace_enabled);
+}
+
+static ssize_t
+pm_trace_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+ int val;
+
+ if (sscanf(buf, "%d", &val) == 1) {
+ pm_trace_enabled = !!val;
+ return n;
+ }
+ return -EINVAL;
+}
+
+power_attr(pm_trace);
+
+static struct attribute * g[] = {
+ &state_attr.attr,
+ &pm_trace_attr.attr,
+ NULL,
+};
+#else
static struct attribute * g[] = {
&state_attr.attr,
NULL,
};
+#endif /* CONFIG_PM_TRACE */
static struct attribute_group attr_group = {
.attrs = g,
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 57a792982fb..bfe999f7b27 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -38,8 +38,6 @@ extern struct subsystem power_subsys;
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
-extern struct pbe *pagedir_nosave;
-
/* Preferred image size in bytes (default 500 MB) */
extern unsigned long image_size;
extern int in_suspend;
@@ -50,21 +48,62 @@ extern asmlinkage int swsusp_arch_resume(void);
extern unsigned int count_data_pages(void);
+/**
+ * Auxiliary structure used for reading the snapshot image data and
+ * metadata from and writing them to the list of page backup entries
+ * (PBEs) which is the main data structure of swsusp.
+ *
+ * Using struct snapshot_handle we can transfer the image, including its
+ * metadata, as a continuous sequence of bytes with the help of
+ * snapshot_read_next() and snapshot_write_next().
+ *
+ * The code that writes the image to a storage or transfers it to
+ * the user land is required to use snapshot_read_next() for this
+ * purpose and it should not make any assumptions regarding the internal
+ * structure of the image. Similarly, the code that reads the image from
+ * a storage or transfers it from the user land is required to use
+ * snapshot_write_next().
+ *
+ * This may allow us to change the internal structure of the image
+ * in the future with considerably less effort.
+ */
+
struct snapshot_handle {
- loff_t offset;
- unsigned int page;
- unsigned int page_offset;
- unsigned int prev;
- struct pbe *pbe, *last_pbe;
- void *buffer;
- unsigned int buf_offset;
+ loff_t offset; /* number of the last byte ready for reading
+ * or writing in the sequence
+ */
+ unsigned int cur; /* number of the block of PAGE_SIZE bytes the
+ * next operation will refer to (ie. current)
+ */
+ unsigned int cur_offset; /* offset with respect to the current
+ * block (for the next operation)
+ */
+ unsigned int prev; /* number of the block of PAGE_SIZE bytes that
+ * was the current one previously
+ */
+ void *buffer; /* address of the block to read from
+ * or write to
+ */
+ unsigned int buf_offset; /* location to read from or write to,
+ * given as a displacement from 'buffer'
+ */
+ int sync_read; /* Set to one to notify the caller of
+ * snapshot_write_next() that it may
+ * need to call wait_on_bio_chain()
+ */
};
+/* This macro returns the address from/to which the caller of
+ * snapshot_read_next()/snapshot_write_next() is allowed to
+ * read/write data after the function returns
+ */
#define data_of(handle) ((handle).buffer + (handle).buf_offset)
+extern unsigned int snapshot_additional_pages(struct zone *zone);
extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
-int snapshot_image_loaded(struct snapshot_handle *handle);
+extern int snapshot_image_loaded(struct snapshot_handle *handle);
+extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
#define SNAPSHOT_IOC_MAGIC '3'
#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
deleted file mode 100644
index 5957312b2d6..00000000000
--- a/kernel/power/smp.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * drivers/power/smp.c - Functions for stopping other CPUs.
- *
- * Copyright 2004 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
- *
- * This file is released under the GPLv2.
- */
-
-#undef DEBUG
-
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/suspend.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <asm/atomic.h>
-#include <asm/tlbflush.h>
-
-/* This is protected by pm_sem semaphore */
-static cpumask_t frozen_cpus;
-
-void disable_nonboot_cpus(void)
-{
- int cpu, error;
-
- error = 0;
- cpus_clear(frozen_cpus);
- printk("Freezing cpus ...\n");
- for_each_online_cpu(cpu) {
- if (cpu == 0)
- continue;
- error = cpu_down(cpu);
- if (!error) {
- cpu_set(cpu, frozen_cpus);
- printk("CPU%d is down\n", cpu);
- continue;
- }
- printk("Error taking cpu %d down: %d\n", cpu, error);
- }
- BUG_ON(raw_smp_processor_id() != 0);
- if (error)
- panic("cpus not sleeping");
-}
-
-void enable_nonboot_cpus(void)
-{
- int cpu, error;
-
- printk("Thawing cpus ...\n");
- for_each_cpu_mask(cpu, frozen_cpus) {
- error = cpu_up(cpu);
- if (!error) {
- printk("CPU%d is up\n", cpu);
- continue;
- }
- printk("Error taking cpu %d up: %d\n", cpu, error);
- panic("Not enough cpus");
- }
- cpus_clear(frozen_cpus);
-}
-
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 75d4886e648..1b84313cbab 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -34,10 +34,12 @@
#include "power.h"
-struct pbe *pagedir_nosave;
+/* List of PBEs used for creating and restoring the suspend image */
+struct pbe *restore_pblist;
+
static unsigned int nr_copy_pages;
static unsigned int nr_meta_pages;
-static unsigned long *buffer;
+static void *buffer;
#ifdef CONFIG_HIGHMEM
unsigned int count_highmem_pages(void)
@@ -156,240 +158,637 @@ static inline int save_highmem(void) {return 0;}
static inline int restore_highmem(void) {return 0;}
#endif
-static int pfn_is_nosave(unsigned long pfn)
+/**
+ * @safe_needed - on resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * used before suspend.
+ *
+ * The unsafe pages are marked with the PG_nosave_free flag
+ * and we count them using unsafe_pages
+ */
+
+#define PG_ANY 0
+#define PG_SAFE 1
+#define PG_UNSAFE_CLEAR 1
+#define PG_UNSAFE_KEEP 0
+
+static unsigned int allocated_unsafe_pages;
+
+static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)