aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/capability.c338
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/irq/manage.c64
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/main.c194
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/power/snapshot.c88
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/sys_ni.c4
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/time/tick-sched.c12
-rw-r--r--kernel/workqueue.c39
18 files changed, 567 insertions, 224 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a84675..382dd5a8b2d 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
default 1000 if HZ_1000
config SCHED_HRTICK
- def_bool HIGH_RES_TIMERS
+ def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS
diff --git a/kernel/capability.c b/kernel/capability.c
index 901e0fdc3ff..0101e847603 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
return 0;
}
+#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
+
+/*
+ * Without filesystem capability support, we nominally support one process
+ * setting the capabilities of another
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+ kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+ struct task_struct *target;
+ int ret;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ if (pid && pid != task_pid_vnr(current)) {
+ target = find_task_by_vpid(pid);
+ if (!target) {
+ ret = -ESRCH;
+ goto out;
+ }
+ } else
+ target = current;
+
+ ret = security_capget(target, pEp, pIp, pPp);
+
+out:
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ return ret;
+}
+
+/*
+ * cap_set_pg - set capabilities for all processes in a given process
+ * group. We call this holding task_capability_lock and tasklist_lock.
+ */
+static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *g, *target;
+ int ret = -EPERM;
+ int found = 0;
+ struct pid *pgrp;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ pgrp = find_vpid(pgrp_nr);
+ do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
+ target = g;
+ while_each_thread(g, target) {
+ if (!security_capset_check(target, effective,
+ inheritable, permitted)) {
+ security_capset_set(target, effective,
+ inheritable, permitted);
+ ret = 0;
+ }
+ found = 1;
+ }
+ } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ if (!found)
+ ret = 0;
+ return ret;
+}
+
/*
- * For sys_getproccap() and sys_setproccap(), any of the three
- * capability set pointers may be NULL -- indicating that that set is
- * uninteresting and/or not to be changed.
+ * cap_set_all - set capabilities for all processes other than init
+ * and self. We call this holding task_capability_lock and tasklist_lock.
*/
+static inline int cap_set_all(kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *g, *target;
+ int ret = -EPERM;
+ int found = 0;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ do_each_thread(g, target) {
+ if (target == current
+ || is_container_init(target->group_leader))
+ continue;
+ found = 1;
+ if (security_capset_check(target, effective, inheritable,
+ permitted))
+ continue;
+ ret = 0;
+ security_capset_set(target, effective, inheritable, permitted);
+ } while_each_thread(g, target);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ if (!found)
+ ret = 0;
+
+ return ret;
+}
+
+/*
+ * Given the target pid does not refer to the current process we
+ * need more elaborate support... (This support is not present when
+ * filesystem capabilities are configured.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *target;
+ int ret;
+
+ if (!capable(CAP_SETPCAP))
+ return -EPERM;
+
+ if (pid == -1) /* all procs other than current and init */
+ return cap_set_all(effective, inheritable, permitted);
+
+ else if (pid < 0) /* all procs in process group */
+ return cap_set_pg(-pid, effective, inheritable, permitted);
+
+ /* target != current */
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ target = find_task_by_vpid(pid);
+ if (!target)
+ ret = -ESRCH;
+ else {
+ ret = security_capset_check(target, effective, inheritable,
+ permitted);
+
+ /* having verified that the proposed changes are legal,
+ we now put them into effect. */
+ if (!ret)
+ security_capset_set(target, effective, inheritable,
+ permitted);
+ }
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ return ret;
+}
+
+#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
+
+/*
+ * If we have configured with filesystem capability support, then the
+ * only thing that can change the capabilities of the current process
+ * is the current process. As such, we can't be in this code at the
+ * same time as we are in the process of setting capabilities in this
+ * process. The net result is that we can limit our use of locks to
+ * when we are reading the caps of another process.
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+ kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+ int ret;
+
+ if (pid && (pid != task_pid_vnr(current))) {
+ struct task_struct *target;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ target = find_task_by_vpid(pid);
+ if (!target)
+ ret = -ESRCH;
+ else
+ ret = security_capget(target, pEp, pIp, pPp);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+ } else
+ ret = security_capget(current, pEp, pIp, pPp);
+
+ return ret;
+}
+
+/*
+ * With filesystem capability support configured, the kernel does not
+ * permit the changing of capabilities in one process by another
+ * process. (CAP_SETPCAP has much less broad semantics when configured
+ * this way.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid,
+ kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ return -EPERM;
+}
+
+#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
/*
* Atomically modify the effective capabilities returning the original
@@ -155,7 +352,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
{
int ret = 0;
pid_t pid;
- struct task_struct *target;
unsigned tocopy;
kernel_cap_t pE, pI, pP;
@@ -169,23 +365,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
if (pid < 0)
return -EINVAL;
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- if (pid && pid != task_pid_vnr(current)) {
- target = find_task_by_vpid(pid);
- if (!target) {
- ret = -ESRCH;
- goto out;
- }
- } else
- target = current;
-
- ret = security_capget(target, &pE, &pI, &pP);
-
-out:
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
+ ret = cap_get_target_pid(pid, &pE, &pI, &pP);
if (!ret) {
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
@@ -216,7 +396,6 @@ out:
* before modification is attempted and the application
* fails.
*/
-
if (copy_to_user(dataptr, kdata, tocopy
* sizeof(struct __user_cap_data_struct))) {
return -EFAULT;
@@ -226,70 +405,8 @@ out:
return ret;
}
-/*
- * cap_set_pg - set capabilities for all processes in a given process
- * group. We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- struct task_struct *g, *target;
- int ret = -EPERM;
- int found = 0;
- struct pid *pgrp;
-
- pgrp = find_vpid(pgrp_nr);
- do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
- target = g;
- while_each_thread(g, target) {
- if (!security_capset_check(target, effective,
- inheritable,
- permitted)) {
- security_capset_set(target, effective,
- inheritable,
- permitted);
- ret = 0;
- }
- found = 1;
- }
- } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
-
- if (!found)
- ret = 0;
- return ret;
-}
-
-/*
- * cap_set_all - set capabilities for all processes other than init
- * and self. We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_all(kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- struct task_struct *g, *target;
- int ret = -EPERM;
- int found = 0;
-
- do_each_thread(g, target) {
- if (target == current || is_container_init(target->group_leader))
- continue;
- found = 1;
- if (security_capset_check(target, effective, inheritable,
- permitted))
- continue;
- ret = 0;
- security_capset_set(target, effective, inheritable, permitted);
- } while_each_thread(g, target);
-
- if (!found)
- ret = 0;
- return ret;
-}
-
/**
- * sys_capset - set capabilities for a process or a group of processes
+ * sys_capset - set capabilities for a process or (*) a group of processes
* @header: pointer to struct that contains capability version and
* target pid data
* @data: pointer to struct that contains the effective, permitted,
@@ -313,7 +430,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i, tocopy;
kernel_cap_t inheritable, permitted, effective;
- struct task_struct *target;
int ret;
pid_t pid;
@@ -324,9 +440,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
if (get_user(pid, &header->pid))
return -EFAULT;
- if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
- return -EPERM;
-
if (copy_from_user(&kdata, data, tocopy
* sizeof(struct __user_cap_data_struct))) {
return -EFAULT;
@@ -344,40 +457,31 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
i++;
}
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- if (pid > 0 && pid != task_pid_vnr(current)) {
- target = find_task_by_vpid(pid);
- if (!target) {
- ret = -ESRCH;
- goto out;
- }
- } else
- target = current;
-
- ret = 0;
-
- /* having verified that the proposed changes are legal,
- we now put them into effect. */
- if (pid < 0) {
- if (pid == -1) /* all procs other than current and init */
- ret = cap_set_all(&effective, &inheritable, &permitted);
+ if (pid && (pid != task_pid_vnr(current)))
+ ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
+ &permitted);
+ else {
+ /*
+ * This lock is required even when filesystem
+ * capability support is configured - it protects the
+ * sys_capget() call from returning incorrect data in
+ * the case that the targeted process is not the
+ * current one.
+ */
+ spin_lock(&task_capability_lock);
- else /* all procs in process group */
- ret = cap_set_pg(-pid, &effective, &inheritable,
- &permitted);
- } else {
- ret = security_capset_check(target, &effective, &inheritable,
+ ret = security_capset_check(current, &effective, &inheritable,
&permitted);
+ /*
+ * Having verified that the proposed changes are
+ * legal, we now put them into effect.
+ */
if (!ret)
- security_capset_set(target, &effective, &inheritable,
+ security_capset_set(current, &effective, &inheritable,
&permitted);
+ spin_unlock(&task_capability_lock);
}
-out:
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f2..552c8d8e77a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,6 +33,7 @@
#include <linux/cpu.h>
#include <linux/cgroup.h>
#include <linux/security.h>
+#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/jiffies.h>
@@ -307,6 +308,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
}
/*
+ * Clear hugetlb-related page reserves for children. This only
+ * affects MAP_PRIVATE mappings. Faults generated by the child
+ * are not guaranteed to succeed, even if read-only
+ */
+ if (is_vm_hugetlb_page(tmp))
+ reset_vma_resv_huge_pages(tmp);
+
+ /*
* Link in the new vma and copy the page table entries.
*/
*pprev = tmp;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3cfc0fefb5e..5bc6e5ecc49 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -308,6 +308,30 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
desc->handle_irq = NULL;
}
+static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
+ unsigned long flags)
+{
+ int ret;
+
+ if (!chip || !chip->set_type) {
+ /*
+ * IRQF_TRIGGER_* but the PIC does not support multiple
+ * flow-types?
+ */
+ pr_warning("No set_type function for IRQ %d (%s)\n", irq,
+ chip ? (chip->name ? : "unknown") : "unknown");
+ return 0;
+ }
+
+ ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
+
+ if (ret)
+ pr_err("setting flow type for irq %u failed (%pF)\n",
+ irq, chip->set_type);
+
+ return ret;
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
@@ -319,6 +343,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
const char *old_name = NULL;
unsigned long flags;
int shared = 0;
+ int ret;
if (irq >= NR_IRQS)
return -EINVAL;
@@ -376,35 +401,23 @@ int setup_irq(unsigned int irq, struct irqaction *new)
shared = 1;
}
- *p = new;
-
- /* Exclude IRQ from balancing */
- if (new->flags & IRQF_NOBALANCING)
- desc->status |= IRQ_NO_BALANCING;
-
if (!shared) {
irq_chip_set_defaults(desc->chip);
-#if defined(CONFIG_IRQ_PER_CPU)
- if (new->flags & IRQF_PERCPU)
- desc->status |= IRQ_PER_CPU;
-#endif
-
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
- if (desc->chip->set_type)
- desc->chip->set_type(irq,
- new->flags & IRQF_TRIGGER_MASK);
- else
- /*
- * IRQF_TRIGGER_* but the PIC does not support
- * multiple flow-types?
- */
- printk(KERN_WARNING "No IRQF_TRIGGER set_type "
- "function for IRQ %d (%s)\n", irq,
- desc->chip->name);
+ ret = __irq_set_trigger(desc->chip, irq, new->flags);
+
+ if (ret) {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return ret;
+ }
} else
compat_irq_chip_set_default_handler(desc);
+#if defined(CONFIG_IRQ_PER_CPU)
+ if (new->flags & IRQF_PERCPU)
+ desc->status |= IRQ_PER_CPU;
+#endif
desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
@@ -423,6 +436,13 @@ int setup_irq(unsigned int irq, struct irqaction *new)
/* Set default affinity mask once everything is setup */
irq_select_affinity(irq);
}
+
+ *p = new;
+
+ /* Exclude IRQ from balancing */
+ if (new->flags & IRQF_NOBALANCING)
+ desc->status |= IRQ_NO_BALANCING;
+
/* Reset broken irq detection when installing new handler */
desc->irq_count = 0;
desc->irqs_unhandled = 0;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 90d7af1c165..2989f67c444 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -417,12 +417,12 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
{
struct file *f;
- f = create_write_pipe();
+ f = create_write_pipe(0);
if (IS_ERR(f))
return PTR_ERR(f);
*filp = f;
- f = create_read_pipe(f);
+ f = create_read_pipe(f, 0);
if (IS_ERR(f)) {
free_write_pipe(*filp);
return PTR_ERR(f);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ac3fb732664..6111c27491b 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -106,7 +106,7 @@ static void create_kthread(struct kthread_create_info *create)
*/
sched_setscheduler(create->result, SCHED_NORMAL, &param);
set_user_nice(create->result, KTHREAD_NICE_LEVEL);
- set_cpus_allowed(create->result, CPU_MASK_ALL);
+ set_cpus_allowed_ptr(create->result, CPU_MASK_ALL_PTR);
}
complete(&create->done);
}
@@ -233,7 +233,7 @@ int kthreadd(void *unused)
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
set_user_nice(tsk, KTHREAD_NICE_LEVEL);
- set_cpus_allowed(tsk, CPU_MASK_ALL);
+ set_cpus_allowed_ptr(tsk, CPU_MASK_ALL_PTR);
current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 59dfdf1e1d2..dcd165f92a8 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -94,6 +94,17 @@ config SUSPEND
powered and thus its contents are preserved, such as the
suspend-to-RAM state (e.g. the ACPI S3 state).
+config PM_TEST_SUSPEND
+ bool "Test suspend/resume and wakealarm during bootup"
+ depends on SUSPEND && PM_DEBUG && RTC_LIB=y
+ ---help---
+ This option will let you suspend your machine during bootup, and
+ make it wake up a few seconds later using an RTC wakeup alarm.
+ Enable this with a kernel parameter like "test_suspend=mem".
+
+ You probably want to have your system's RTC driver statically
+ linked, ensuring that it's available when this test runs.
+
config SUSPEND_FREEZER
bool "Enable freezer for suspend to RAM/standby" \
if ARCH_WANTS_FREEZER_CONTROL || BROKEN
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3398f4651aa..95bff23ecda 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -132,6 +132,61 @@ static inline int suspend_test(int level) { return 0; }
#ifdef CONFIG_SUSPEND
+#ifdef CONFIG_PM_TEST_SUSPEND
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending. Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS 5
+
+static unsigned long suspend_test_start_time;
+
+static void suspend_test_start(void)
+{
+ /* FIXME Use better timebase than "jiffies", ideally a clocksource.
+ * What we want is a hardware counter that will work correctly even
+ * during the irqs-are-off stages of the suspend/resume cycle...
+ */
+ suspend_test_start_time = jiffies;
+}
+
+static void suspend_test_finish(const char *label)
+{
+ long nj = jiffies - suspend_test_start_time;
+ unsigned msec;
+
+ msec = jiffies_to_msecs(abs(nj));
+ pr_info("PM: %s took %d.%03d seconds\n", label,
+ msec / 1000, msec % 1000);
+
+ /* Warning on suspend means the RTC alarm period needs to be
+ * larger -- the system was sooo slooowwww to suspend that the
+ * alarm (should have) fired before the system went to sleep!
+ *
+ * Warning on either suspend or resume also means the system
+ * has some performance issues. The stack dump of a WARN_ON
+ * is more likely to get the right attention than a printk...
+ */
+ WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000));
+}
+
+#else
+
+static void suspend_test_start(void)
+{
+}
+
+static void suspend_test_finish(const char *label)
+{
+}
+
+#endif
+
/* This is just an arbitrary number */
#define FREE_PAGE_NUMBER (100)
@@ -266,12 +321,13 @@ int suspend_devices_and_enter(suspend_state_t state)
goto Close;
}
suspend_console();
+ suspend_test_start();
error = device_suspend(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: Some devices failed to suspend\n");
goto Recover_platform;
}
-
+ suspend_test_finish("suspend devices");
if (suspend_test(TEST_DEVICES))
goto Recover_platform;
@@ -293,7 +349,9 @@ int suspend_devices_and_enter(suspend_state_t state)
if (suspend_ops->finish)
suspend_ops->finish();
Resume_devices:
+ suspend_test_start();
device_resume(PMSG_RESUME);
+ suspend_test_finish("resume devices");
resume_console();
Close:
if (suspend_ops->end)
@@ -521,3 +579,137 @@ static int __init pm_init(void)
}
core_initcall(pm_init);
+
+
+#ifdef CONFIG_PM_TEST_SUSPEND
+
+#include <linux/rtc.h>
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system. RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+ static char err_readtime[] __initdata =
+ KERN_ERR "PM: can't read %s time, err %d\n";
+ static char err_wakealarm [] __initdata =
+ KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+ static char err_suspend[] __initdata =
+ KERN_ERR "PM: suspend test failed, error %d\n";
+ static char info_test[] __initdata =
+ KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+ unsigned long now;
+ struct rtc_wkalrm alm;
+ int status;
+
+ /* this may fail if the RTC hasn't been initialized */
+ status = rtc_read_time(rtc, &alm.time);
+ if (status < 0) {
+ printk(err_readtime, rtc->dev.bus_id, status);
+ return;
+ }
+ rtc_tm_to_time(&alm.time, &now);
+
+ memset(&alm, 0, sizeof alm);
+ rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+ alm.enabled = true;
+
+ status = rtc_set_alarm(rtc, &alm);
+ if (status < 0) {
+ printk(err_wakealarm, rtc->dev.bus_id, status);
+ return;
+ }
+
+ if (state == PM_SUSPEND_MEM) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ if (status == -ENODEV)
+ state = PM_SUSPEND_STANDBY;
+ }
+ if (state == PM_SUSPEND_STANDBY) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ }
+ if (status < 0)
+ printk(err_suspend, status);
+}
+
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+ struct rtc_device *candidate = to_rtc_device(dev);
+
+ if (!candidate->ops->set_alarm)
+ return 0;
+ if (!device_may_wakeup(candidate->dev.parent))
+ return 0;
+
+ *(char **)name_ptr = dev->bus_id;
+ return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time. They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
+
+static char warn_bad_state[] __initdata =
+ KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+ unsigned i;
+
+ /* "=mem" ==> "mem" */
+ value++;
+ for (i = 0; i < PM_SUSPEND_MAX; i++) {
+ if (!pm_states[i])
+ continue;
+ if (strcmp(pm_states[i], value) != 0)
+ continue;
+ test_state = (__force suspend_state_t) i;
+ return 0;
+ }
+ printk(warn_bad_state, value);
+ return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+ static char warn_no_rtc[] __initdata =
+ KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+ char *pony = NULL;
+ struct rtc_device *rtc = NULL;
+
+ /* PM is initialized by now; is that state testable? */
+ if (test_state == PM_SUSPEND_ON)
+ goto done;
+ if (!valid_state(test_state)) {
+ printk(warn_bad_state, pm_states[test_state]);
+ goto done;
+ }
+
+ /* RTCs have initialized by now too ... can we use one? */
+ class_find_device(rtc_class, NULL, &pony, has_wakealarm);
+ if (pony)
+ rtc = rtc_class_open(pony);
+ if (!rtc) {
+ printk(warn_no_rtc);
+ goto done;
+ }
+
+ /* go for it */
+ test_wakealarm(rtc, test_state);
+ rtc_class_close(rtc);
+done:
+ return 0;
+}
+late_initcall(test_suspend);
+
+#endif /* CONFIG_PM_TEST_SUSPEND */
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 678ec736076..72016f05147 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -10,6 +10,7 @@
#include <linux/pm.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
+#include <linux/cpumask.h>
/*
* When the user hits Sys-Rq o to power down the machine this is the
@@ -25,7 +26,8 @@ static DECLARE_WORK(poweroff_work, do_poweroff);
static void handle_poweroff(int key, struct tty_struct *tty)
{
- schedule_work(&poweroff_work);
+ /* run sysrq poweroff on boot cpu */
+ schedule_work_on(first_cpu(cpu_online_map), &poweroff_work);
}
static struct sysrq_key_op sysrq_poweroff_op = {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5fb87652f21..278946aecaf 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -149,7 +149,7 @@ static int try_to_freeze_tasks(bool sig_only)
unsigned long end_time;
unsigned int todo;
struct timeval start, end;
- s64 elapsed_csecs64;
+ u64 elapsed_csecs64;
unsigned int elapsed_csecs;
do_gettimeofday(&start);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5f91a07c4ea..5d2ab836e99 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -205,8 +205,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
* objects. The main list's elements are of type struct zone_bitmap
* and each of them corresonds to one zone. For each zone bitmap
* object there is a list of objects of type struct bm_block that
- * represent each blocks of bit chunks in which information is
- * stored.
+ * represent each blocks of bitmap in which information is stored.
*
* struct memory_bitmap contains a pointer to the main list of zone
* bitmap objects, a struct bm_position used for browsing the bitmap,
@@ -224,26 +223,27 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
* pfns that correspond to the start and end of the represented zone.
*
* struct bm_block contains a pointer to the memory page in which
- * information is stored (in the form of a block of bit chunks
- * of type unsigned long each). It also contains the pfns that
- * correspond to the start and end of the represented memory area and
- * the number of bit chunks in the block.
+ * information is stored (in the form of a block of bitmap)
+ * It also contains the pfns that correspond to the start and end of
+ * the represented memory area.
*/
#define BM_END_OF_MAP (~0UL)
-#define BM_CHUNKS_PER_BLOCK (PAGE_SIZE / sizeof(long))
-#define BM_BITS_PER_CHUNK (sizeof(long) << 3)
#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
struct bm_block {
struct bm_block *next; /* next element of the list */
unsigned long start_pfn; /* pfn represented by the first bit */
unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
- unsigned int size; /* number of bit chunks */
- unsigned long *data; /* chunks of bits representing pages */
+ unsigned long *data; /* bitmap representing pages */
};
+static inline unsigned long bm_block_bits(struct bm_block *bb)
+{
+ return bb->end_pfn - bb->start_pfn;
+}
+
struct zone_bitmap {
struct zone_bitmap *next; /* next element of the list */
unsigned long start_pfn; /* minimal pfn in this zone */
@@ -257,7 +257,6 @@ struct zone_bitmap {
struct bm_position {
struct zone_bitmap *zone_bm;
struct bm_block *block;
- int chunk;
int bit;
};
@@ -272,12 +271,6 @@ struct memory_bitmap {
/* Functions that operate on memory bitmaps */
-static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
-{
- bm->cur.chunk = 0;
- bm->cur.bit = -1;
-}
-
static void memory_bm_position_reset(struct memory_bitmap *bm)
{
struct zone_bitmap *zone_bm;
@@ -285,7 +278,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
zone_bm = bm->zone_bm_list;
bm->cur.zone_bm = zone_bm;
bm->cur.block = zone_bm->bm_blocks;
- memory_bm_reset_chunk(bm);
+ bm->cur.bit = 0;
}
static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
@@ -394,12 +387,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
bb->start_pfn = pfn;
if (nr >= BM_BITS_PER_BLOCK) {
pfn += BM_BITS_PER_BLOCK;
- bb->size = BM_CHUNKS_PER_BLOCK;
nr -= BM_BITS_PER_BLOCK;
} else {
/* This is executed only once in the loop */
pfn += nr;
- bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
}
bb->end_pfn = pfn;
bb = bb->next;
@@ -478,8 +469,8 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
}
zone_bm->cur_block = bb;
pfn -= bb->start_pfn;
- *bit_nr = pfn % BM_BITS_PER_CHUNK;
- *addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+ *bit_nr = pfn;
+ *addr = bb->data;
return 0;
}
@@ -528,36 +519,6 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
return test_bit(bit, addr);
}
-/* Two auxiliary functions for memory_bm_next_pf