aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power35
-rw-r--r--Documentation/ABI/testing/sysfs-power59
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.txt2
-rw-r--r--drivers/base/power/main.c10
-rw-r--r--drivers/base/power/sysfs.c54
-rw-r--r--drivers/base/power/wakeup.c174
-rw-r--r--fs/eventpoll.c90
-rw-r--r--include/linux/capability.h5
-rw-r--r--include/linux/eventpoll.h12
-rw-r--r--include/linux/pm_wakeup.h15
-rw-r--r--include/linux/suspend.h14
-rw-r--r--include/trace/events/power.h34
-rw-r--r--kernel/power/Kconfig27
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/autosleep.c127
-rw-r--r--kernel/power/main.c160
-rw-r--r--kernel/power/power.h27
-rw-r--r--kernel/power/swap.c62
-rw-r--r--kernel/power/wakelock.c259
19 files changed, 1050 insertions, 118 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 840f7d64d48..45000f0db4d 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -96,16 +96,26 @@ Description:
is read-only. If the device is not enabled to wake up the
system from sleep states, this attribute is not present.
-What: /sys/devices/.../power/wakeup_hit_count
-Date: September 2010
+What: /sys/devices/.../power/wakeup_abort_count
+Date: February 2012
Contact: Rafael J. Wysocki <rjw@sisk.pl>
Description:
- The /sys/devices/.../wakeup_hit_count attribute contains the
+ The /sys/devices/.../wakeup_abort_count attribute contains the
number of times the processing of a wakeup event associated with
- the device might prevent the system from entering a sleep state.
- This attribute is read-only. If the device is not enabled to
- wake up the system from sleep states, this attribute is not
- present.
+ the device might have aborted system transition into a sleep
+ state in progress. This attribute is read-only. If the device
+ is not enabled to wake up the system from sleep states, this
+ attribute is not present.
+
+What: /sys/devices/.../power/wakeup_expire_count
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+ The /sys/devices/.../wakeup_expire_count attribute contains the
+ number of times a wakeup event associated with the device has
+ been reported with a timeout that expired. This attribute is
+ read-only. If the device is not enabled to wake up the system
+ from sleep states, this attribute is not present.
What: /sys/devices/.../power/wakeup_active
Date: September 2010
@@ -148,6 +158,17 @@ Description:
not enabled to wake up the system from sleep states, this
attribute is not present.
+What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+ The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute
+ contains the total time the device has been preventing
+ opportunistic transitions to sleep states from occuring.
+ This attribute is read-only. If the device is not enabled to
+ wake up the system from sleep states, this attribute is not
+ present.
+
What: /sys/devices/.../power/autosuspend_delay_ms
Date: September 2010
Contact: Alan Stern <stern@rowland.harvard.edu>
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index b464d12761b..31725ffeeb3 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -172,3 +172,62 @@ Description:
Reading from this file will display the current value, which is
set to 1 MB by default.
+
+What: /sys/power/autosleep
+Date: April 2012
+Contact: Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+ The /sys/power/autosleep file can be written one of the strings
+ returned by reads from /sys/power/state. If that happens, a
+ work item attempting to trigger a transition of the system to
+ the sleep state represented by that string is queued up. This
+ attempt will only succeed if there are no active wakeup sources
+ in the system at that time. After every execution, regardless
+ of whether or not the attempt to put the system to sleep has
+ succeeded, the work item requeues itself until user space
+ writes "off" to /sys/power/autosleep.
+
+ Reading from this file causes the last string successfully
+ written to it to be returned.
+
+What: /sys/power/wake_lock
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+ The /sys/power/wake_lock file allows user space to create
+ wakeup source objects and activate them on demand (if one of
+ those wakeup sources is active, reads from the
+ /sys/power/wakeup_count file block or return false). When a
+ string without white space is written to /sys/power/wake_lock,
+ it will be assumed to represent a wakeup source name. If there
+ is a wakeup source object with that name, it will be activated
+ (unless active already). Otherwise, a new wakeup source object
+ will be registered, assigned the given name and activated.
+ If a string written to /sys/power/wake_lock contains white
+ space, the part of the string preceding the white space will be
+ regarded as a wakeup source name and handled as descrived above.
+ The other part of the string will be regarded as a timeout (in
+ nanoseconds) such that the wakeup source will be automatically
+ deactivated after it has expired. The timeout, if present, is
+ set regardless of the current state of the wakeup source object
+ in question.
+
+ Reads from this file return a string consisting of the names of
+ wakeup sources created with the help of it that are active at
+ the moment, separated with spaces.
+
+
+What: /sys/power/wake_unlock
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+ The /sys/power/wake_unlock file allows user space to deactivate
+ wakeup sources created with the help of /sys/power/wake_lock.
+ When a string is written to /sys/power/wake_unlock, it will be
+ assumed to represent the name of a wakeup source to deactivate.
+ If a wakeup source object of that name exists and is active at
+ the moment, it will be deactivated.
+
+ Reads from this file return a string consisting of the names of
+ wakeup sources created with the help of /sys/power/wake_lock
+ that are inactive at the moment, separated with spaces.
diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt
index f28f9a6f034..e13dafc8e8f 100644
--- a/Documentation/power/suspend-and-cpuhotplug.txt
+++ b/Documentation/power/suspend-and-cpuhotplug.txt
@@ -29,7 +29,7 @@ More details follow:
Write 'mem' to
/sys/power/state
- syfs file
+ sysfs file
|
v
Acquire pm_mutex lock
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index b462c0e341c..e0fb5b0435a 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -889,6 +889,11 @@ static int dpm_suspend_noirq(pm_message_t state)
if (!list_empty(&dev->power.entry))
list_move(&dev->power.entry, &dpm_noirq_list);
put_device(dev);
+
+ if (pm_wakeup_pending()) {
+ error = -EBUSY;
+ break;
+ }
}
mutex_unlock(&dpm_list_mtx);
if (error)
@@ -962,6 +967,11 @@ static int dpm_suspend_late(pm_message_t state)
if (!list_empty(&dev->power.entry))
list_move(&dev->power.entry, &dpm_late_early_list);
put_device(dev);
+
+ if (pm_wakeup_pending()) {
+ error = -EBUSY;
+ break;
+ }
}
mutex_unlock(&dpm_list_mtx);
if (error)
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 95c12f6cb5b..48be2ad4dd2 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -314,22 +314,41 @@ static ssize_t wakeup_active_count_show(struct device *dev,
static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL);
-static ssize_t wakeup_hit_count_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t wakeup_abort_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ unsigned long count = 0;
+ bool enabled = false;
+
+ spin_lock_irq(&dev->power.lock);
+ if (dev->power.wakeup) {
+ count = dev->power.wakeup->wakeup_count;
+ enabled = true;
+ }
+ spin_unlock_irq(&dev->power.lock);
+ return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL);
+
+static ssize_t wakeup_expire_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
unsigned long count = 0;
bool enabled = false;
spin_lock_irq(&dev->power.lock);
if (dev->power.wakeup) {
- count = dev->power.wakeup->hit_count;
+ count = dev->power.wakeup->expire_count;
enabled = true;
}
spin_unlock_irq(&dev->power.lock);
return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
}
-static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL);
+static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL);
static ssize_t wakeup_active_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -398,6 +417,27 @@ static ssize_t wakeup_last_time_show(struct device *dev,
}
static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL);
+
+#ifdef CONFIG_PM_AUTOSLEEP
+static ssize_t wakeup_prevent_sleep_time_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ s64 msec = 0;
+ bool enabled = false;
+
+ spin_lock_irq(&dev->power.lock);
+ if (dev->power.wakeup) {
+ msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time);
+ enabled = true;
+ }
+ spin_unlock_irq(&dev->power.lock);
+ return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444,
+ wakeup_prevent_sleep_time_show, NULL);
+#endif /* CONFIG_PM_AUTOSLEEP */
#endif /* CONFIG_PM_SLEEP */
#ifdef CONFIG_PM_ADVANCED_DEBUG
@@ -486,11 +526,15 @@ static struct attribute *wakeup_attrs[] = {
&dev_attr_wakeup.attr,
&dev_attr_wakeup_count.attr,
&dev_attr_wakeup_active_count.attr,
- &dev_attr_wakeup_hit_count.attr,
+ &dev_attr_wakeup_abort_count.attr,
+ &dev_attr_wakeup_expire_count.attr,
&dev_attr_wakeup_active.attr,
&dev_attr_wakeup_total_time_ms.attr,
&dev_attr_wakeup_max_time_ms.attr,
&dev_attr_wakeup_last_time_ms.attr,
+#ifdef CONFIG_PM_AUTOSLEEP
+ &dev_attr_wakeup_prevent_sleep_time_ms.attr,
+#endif
#endif
NULL,
};
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 2a3e581b8dc..cbb463b3a75 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -14,16 +14,15 @@
#include <linux/suspend.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <trace/events/power.h>
#include "power.h"
-#define TIMEOUT 100
-
/*
* If set, the suspend/hibernate code will abort transitions to a sleep state
* if wakeup events are registered during or immediately before the transition.
*/
-bool events_check_enabled;
+bool events_check_enabled __read_mostly;
/*
* Combined counters of registered wakeup events and wakeup events in progress.
@@ -52,6 +51,8 @@ static void pm_wakeup_timer_fn(unsigned long data);
static LIST_HEAD(wakeup_sources);
+static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue);
+
/**
* wakeup_source_prepare - Prepare a new wakeup source for initialization.
* @ws: Wakeup source to prepare.
@@ -132,6 +133,7 @@ void wakeup_source_add(struct wakeup_source *ws)
spin_lock_init(&ws->lock);
setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws);
ws->active = false;
+ ws->last_time = ktime_get();
spin_lock_irq(&events_lock);
list_add_rcu(&ws->entry, &wakeup_sources);
@@ -374,12 +376,33 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
*/
static void wakeup_source_activate(struct wakeup_source *ws)
{
+ unsigned int cec;
+
ws->active = true;
ws->active_count++;
ws->last_time = ktime_get();
+ if (ws->autosleep_enabled)
+ ws->start_prevent_time = ws->last_time;
/* Increment the counter of events in progress. */
- atomic_inc(&combined_event_count);
+ cec = atomic_inc_return(&combined_event_count);
+
+ trace_wakeup_source_activate(ws->name, cec);
+}
+
+/**
+ * wakeup_source_report_event - Report wakeup event using the given source.
+ * @ws: Wakeup source to report the event for.
+ */
+static void wakeup_source_report_event(struct wakeup_source *ws)
+{
+ ws->event_count++;
+ /* This is racy, but the counter is approximate anyway. */
+ if (events_check_enabled)
+ ws->wakeup_count++;
+
+ if (!ws->active)
+ wakeup_source_activate(ws);
}
/**
@@ -397,10 +420,7 @@ void __pm_stay_awake(struct wakeup_source *ws)
spin_lock_irqsave(&ws->lock, flags);
- ws->event_count++;
- if (!ws->active)
- wakeup_source_activate(ws);
-
+ wakeup_source_report_event(ws);
del_timer(&ws->timer);
ws->timer_expires = 0;
@@ -432,6 +452,17 @@ void pm_stay_awake(struct device *dev)
}
EXPORT_SYMBOL_GPL(pm_stay_awake);
+#ifdef CONFIG_PM_AUTOSLEEP
+static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now)
+{
+ ktime_t delta = ktime_sub(now, ws->start_prevent_time);
+ ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta);
+}
+#else
+static inline void update_prevent_sleep_time(struct wakeup_source *ws,
+ ktime_t now) {}
+#endif
+
/**
* wakup_source_deactivate - Mark given wakeup source as inactive.
* @ws: Wakeup source to handle.
@@ -442,6 +473,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake);
*/
static void wakeup_source_deactivate(struct wakeup_source *ws)
{
+ unsigned int cnt, inpr, cec;
ktime_t duration;
ktime_t now;
@@ -468,14 +500,23 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
ws->max_time = duration;
+ ws->last_time = now;
del_timer(&ws->timer);
ws->timer_expires = 0;
+ if (ws->autosleep_enabled)
+ update_prevent_sleep_time(ws, now);
+
/*
* Increment the counter of registered wakeup events and decrement the
* couter of wakeup events in progress simultaneously.
*/
- atomic_add(MAX_IN_PROGRESS, &combined_event_count);
+ cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
+ trace_wakeup_source_deactivate(ws->name, cec);
+
+ split_counters(&cnt, &inpr);
+ if (!inpr && waitqueue_active(&wakeup_count_wait_queue))
+ wake_up(&wakeup_count_wait_queue);
}
/**
@@ -536,8 +577,10 @@ static void pm_wakeup_timer_fn(unsigned long data)
spin_lock_irqsave(&ws->lock, flags);
if (ws->active && ws->timer_expires
- && time_after_eq(jiffies, ws->timer_expires))
+ && time_after_eq(jiffies, ws->timer_expires)) {
wakeup_source_deactivate(ws);
+ ws->expire_count++;
+ }
spin_unlock_irqrestore(&ws->lock, flags);
}
@@ -564,9 +607,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
spin_lock_irqsave(&ws->lock, flags);
- ws->event_count++;
- if (!ws->active)
- wakeup_source_activate(ws);
+ wakeup_source_report_event(ws);
if (!msec) {
wakeup_source_deactivate(ws);
@@ -609,24 +650,6 @@ void pm_wakeup_event(struct device *dev, unsigned int msec)
EXPORT_SYMBOL_GPL(pm_wakeup_event);
/**
- * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources.
- */
-static void pm_wakeup_update_hit_counts(void)
-{
- unsigned long flags;
- struct wakeup_source *ws;
-
- rcu_read_lock();
- list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
- spin_lock_irqsave(&ws->lock, flags);
- if (ws->active)
- ws->hit_count++;
- spin_unlock_irqrestore(&ws->lock, flags);
- }
- rcu_read_unlock();
-}
-
-/**
* pm_wakeup_pending - Check if power transition in progress should be aborted.
*
* Compare the current number of registered wakeup events with its preserved
@@ -648,32 +671,38 @@ bool pm_wakeup_pending(void)
events_check_enabled = !ret;
}
spin_unlock_irqrestore(&events_lock, flags);
- if (ret)
- pm_wakeup_update_hit_counts();
return ret;
}
/**
* pm_get_wakeup_count - Read the number of registered wakeup events.
* @count: Address to store the value at.
+ * @block: Whether or not to block.
*
- * Store the number of registered wakeup events at the address in @count. Block
- * if the current number of wakeup events being processed is nonzero.
+ * Store the number of registered wakeup events at the address in @count. If
+ * @block is set, block until the current number of wakeup events being
+ * processed is zero.
*
- * Return 'false' if the wait for the number of wakeup events being processed to
- * drop down to zero has been interrupted by a signal (and the current number
- * of wakeup events being processed is still nonzero). Otherwise return 'true'.
+ * Return 'false' if the current number of wakeup events being processed is
+ * nonzero. Otherwise return 'true'.
*/
-bool pm_get_wakeup_count(unsigned int *count)
+bool pm_get_wakeup_count(unsigned int *count, bool block)
{
unsigned int cnt, inpr;
- for (;;) {
- split_counters(&cnt, &inpr);
- if (inpr == 0 || signal_pending(current))
- break;
- pm_wakeup_update_hit_counts();
- schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
+ if (block) {
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&wakeup_count_wait_queue, &wait,
+ TASK_INTERRUPTIBLE);
+ split_counters(&cnt, &inpr);
+ if (inpr == 0 || signal_pending(current))
+ break;
+
+ schedule();
+ }
+ finish_wait(&wakeup_count_wait_queue, &wait);
}
split_counters(&cnt, &inpr);
@@ -703,11 +732,37 @@ bool pm_save_wakeup_count(unsigned int count)
events_check_enabled = true;
}
spin_unlock_irq(&events_lock);
- if (!events_check_enabled)
- pm_wakeup_update_hit_counts();
return events_check_enabled;
}
+#ifdef CONFIG_PM_AUTOSLEEP
+/**
+ * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources.
+ * @enabled: Whether to set or to clear the autosleep_enabled flags.
+ */
+void pm_wakep_autosleep_enabled(bool set)
+{
+ struct wakeup_source *ws;
+ ktime_t now = ktime_get();
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+ spin_lock_irq(&ws->lock);
+ if (ws->autosleep_enabled != set) {
+ ws->autosleep_enabled = set;
+ if (ws->active) {
+ if (set)
+ ws->start_prevent_time = now;
+ else
+ update_prevent_sleep_time(ws, now);
+ }
+ }
+ spin_unlock_irq(&ws->lock);
+ }
+ rcu_read_unlock();
+}
+#endif /* CONFIG_PM_AUTOSLEEP */
+
static struct dentry *wakeup_sources_stats_dentry;
/**
@@ -723,27 +778,37 @@ static int print_wakeup_source_stats(struct seq_file *m,
ktime_t max_time;
unsigned long active_count;
ktime_t active_time;
+ ktime_t prevent_sleep_time;
int ret;
spin_lock_irqsave(&ws->lock, flags);
total_time = ws->total_time;
max_time = ws->max_time;
+ prevent_sleep_time = ws->prevent_sleep_time;
active_count = ws->active_count;
if (ws->active) {
- active_time = ktime_sub(ktime_get(), ws->last_time);
+ ktime_t now = ktime_get();
+
+ active_time = ktime_sub(now, ws->last_time);
total_time = ktime_add(total_time, active_time);
if (active_time.tv64 > max_time.tv64)
max_time = active_time;
+
+ if (ws->autosleep_enabled)
+ prevent_sleep_time = ktime_add(prevent_sleep_time,
+ ktime_sub(now, ws->start_prevent_time));
} else {
active_time = ktime_set(0, 0);
}
- ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t"
- "%lld\t\t%lld\t\t%lld\t\t%lld\n",
- ws->name, active_count, ws->event_count, ws->hit_count,
+ ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t"
+ "%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
+ ws->name, active_count, ws->event_count,
+ ws->wakeup_count, ws->expire_count,
ktime_to_ms(active_time), ktime_to_ms(total_time),
- ktime_to_ms(max_time), ktime_to_ms(ws->last_time));
+ ktime_to_ms(max_time), ktime_to_ms(ws->last_time),
+ ktime_to_ms(prevent_sleep_time));
spin_unlock_irqrestore(&ws->lock, flags);
@@ -758,8 +823,9 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused)
{
struct wakeup_source *ws;
- seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t"
- "active_since\ttotal_time\tmax_time\tlast_change\n");
+ seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
+ "expire_count\tactive_since\ttotal_time\tmax_time\t"
+ "last_change\tprevent_suspend_time\n");
rcu_read_lock();
list_for_each_entry_rcu(ws, &wakeup_sources, entry)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c0b3c70ee87..2cf0f2153be 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -33,6 +33,7 @@
#include <linux/bitops.h>
#include <linux/mutex.h>
#include <linux/anon_inodes.h>
+#include <linux/device.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/mman.h>
@@ -87,7 +88,7 @@
*/
/* Epoll private bits inside the event mask */
-#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
+#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET)
/* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4
@@ -154,6 +155,9 @@ struct epitem {
/* List header used to link this item to the "struct file" items list */
struct list_head fllink;
+ /* wakeup_source used when EPOLLWAKEUP is set */
+ struct wakeup_source *ws;
+
/* The structure that describe the interested events and the source fd */
struct epoll_event event;
};
@@ -194,6 +198,9 @@ struct eventpoll {
*/
struct epitem *ovflist;
+ /* wakeup_source used when ep_scan_ready_list is running */
+ struct wakeup_source *ws;
+
/* The user that created the eventpoll descriptor */
struct user_struct *user;
@@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* queued into ->ovflist but the "txlist" might already
* contain them, and the list_splice() below takes care of them.
*/
- if (!ep_is_linked(&epi->rdllink))
+ if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
+ __pm_stay_awake(epi->ws);
+ }
}
/*
* We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
@@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* Quickly re-inject items left on "txlist".
*/
list_splice(&txlist, &ep->rdllist);
+ __pm_relax(ep->ws);
if (!list_empty(&ep->rdllist)) {
/*
@@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
list_del_init(&epi->rdllink);
spin_unlock_irqrestore(&ep->lock, flags);
+ wakeup_source_unregister(epi->ws);
+
/* At this point it is safe to free the eventpoll item */
kmem_cache_free(epi_cache, epi);
@@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep)
mutex_unlock(&epmutex);
mutex_destroy(&ep->mtx);
free_uid(ep->user);
+ wakeup_source_unregister(ep->ws);
kfree(ep);
}
@@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
* callback, but it's not actually ready, as far as
* caller requested events goes. We can remove it here.
*/
+ __pm_relax(epi->ws);
list_del_init(&epi->rdllink);
}
}
@@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
+ if (epi->ws) {
+ /*
+ * Activate ep->ws since epi->ws may get
+ * deactivated at any time.
+ */
+ __pm_stay_awake(ep->ws);
+ }
+
}
goto out_unlock;
}
/* If this file is already in the ready list we exit soon */
- if (!ep_is_linked(&epi->rdllink))
+ if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
+ __pm_stay_awake(epi->ws);
+ }
/*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
@@ -1091,6 +1115,30 @@ static int reverse_path_check(void)
return error;
}
+static int ep_create_wakeup_source(struct epitem *epi)
+{
+ const char *name;
+
+ if (!epi->ep->ws) {
+ epi->ep->ws = wakeup_source_register("eventpoll");
+ if (!epi->ep->ws)
+ return -ENOMEM;
+ }
+
+ name = epi->ffd.file->f_path.dentry->d_name.name;
+ epi->ws = wakeup_source_register(name);
+ if (!epi->ws)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void ep_destroy_wakeup_source(struct epitem *epi)
+{
+ wakeup_source_unregister(epi->ws);
+ epi->ws = NULL;
+}
+
/*
* Must be called with "mtx" held.
*/
@@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
epi->event = *event;
epi->nwait = 0;
epi->next = EP_UNACTIVE_PTR;
+ if (epi->event.events & EPOLLWAKEUP) {
+ error = ep_create_wakeup_source(epi);
+ if (error)
+ goto error_create_wakeup_source;
+ } else {
+ epi->ws = NULL;
+ }
/* Initialize the poll table using the queue callback */
epq.epi = epi;
@@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* If the file is already "ready" we drop it inside the ready list */
if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
+ __pm_stay_awake(epi->ws);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1204,6 +1260,9 @@ error_unregister:
list_del_init(&epi->rdllink);
spin_unlock_irqrestore(&ep->lock, flags);
+ wakeup_source_unregister(epi->ws);
+
+error_create_wakeup_source:
kmem_cache_free(epi_cache, epi);
return error;
@@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
epi->event.events = event->events;
pt._key = event->events;
epi->event.data = event->data; /* protected by mtx */
+ if (epi->event.events & EPOLLWAKEUP) {
+ if (!epi->ws)
+ ep_create_wakeup_source(epi);
+ } else if (epi->ws) {
+ ep_destroy_wakeup_source(epi);
+ }
/*
* Get current event bits. We can safely use the file* here because
@@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
spin_lock_irq(&ep->lock);
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
+ __pm_stay_awake(epi->ws);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
!list_empty(head) && eventcnt < esed->maxevents;) {
epi = list_first_entry(head, struct epitem, rdllink);
+ /*
+ * Activate ep->ws before deactivating epi->ws to prevent
+ * triggering auto-suspend here (in case we reactive epi->ws
+ * below).
+ *
+ * This could be rearranged to delay the deactivation of epi->ws
+ * instead, but then epi->ws would temporarily be out of sync
+ * with ep_is_linked().
+ */
+ if (epi->ws && epi->ws->active)
+ __pm_stay_awake(ep->ws);
+ __pm_relax(epi->ws);
list_del_init(&epi->rdllink);
pt._key = epi->event.events;
@@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
if (__put_user(revents, &uevent->events) ||
__put_user(epi->event.data, &uevent->data)) {
list_add(&epi->rdllink, head);
+ __pm_stay_awake(epi->ws);
return eventcnt ? eventcnt : -EFAULT;
}
eventcnt++;
@@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
* poll callback will queue them in ep->ovflist.
*/
list_add_tail(&epi->rdllink, &ep->rdllist);
+ __pm_stay_awake(epi->ws);
}
}
}
@@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
if (!tfile->f_op || !tfile->f_op->poll)
goto error_tgt_fput;
+ /* Check if EPOLLWAKEUP is allowed */
+ if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP))
+ goto error_tgt_fput;
+
/*
* We have to check that the file structure underneath the file descriptor
* the user passed to us _is_ an eventpoll file. And also we do not permit
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 12d52dedb22..c398cff3dab 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -360,8 +360,11 @@ struct cpu_vfs_cap_data {
#define CAP_WAKE_ALARM 35
+/* Allow preventing system suspends while epoll events are pending */
-#define CAP_LAST_CAP CAP_WAKE_ALARM
+#define CAP_EPOLLWAKEUP 36
+
+#define CAP_LAST_CAP CAP_EPOLLWAKEUP
#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 657ab55beda..6f8be328770 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -26,6 +26,18 @@
#define EPOLL_CTL_DEL 2
#define EPOLL_CTL_MOD 3
+/*
+ * Request the handling of system wakeup events so as to prevent system suspends
+ * from happening while those events are being processed.
+ *
+ * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be
+ * re-allowed until epoll_wait is called again after consuming the wakeup
+ * event(s).
+ *
+ * Requires CAP_EPOLLWAKEUP
+ */
+#define EPOLLWAKEUP (1 << 29)
+
/* Set the One Shot behaviour for the target file descriptor */
#define EPOLLONESHOT (1 << 30)
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index d9f05113e5f..569781faa50 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -33,12 +33,15 @@
*
* @total_time: Total time this wakeup source has been active.
* @max_time: Maximum time this wakeup source has been continuously active.
- * @last_time: Monotonic clock when the wakeup source's was activated last time.
+ * @last_time: Monotonic clock when the wakeup source's was touched last time.
+ * @prevent_sleep_time: Total time this source has been preventing autosleep.
* @event_count: Number of signaled wakeup events.
* @active_count: Number of times the wakeup sorce was activated.
* @relax_count: Number of times the wakeup sorce was deactivated.
- * @hit_count: Number of times the wakeup sorce might abort system suspend.
+ * @expire_count: Number of times the wakeup source's timeout has expired.
+ * @wakeup_count: Number of times the wakeup source might abort suspend.
* @active: Status of the wakeup source.
+ * @has_timeout: The wakeup source has been activated with a timeout.
*/
struct wakeup_source {
const char *name;
@@ -49,11 +52,15 @@ struct wakeup_source {
ktime_t total_time;
ktime_t max_time;
ktime_t last_time;
+ ktime_t start_prevent_time;
+ ktime_t prevent_sleep_time;
unsigned long event_count;
unsigned long active_count;
unsigned long relax_count;
- unsigned long hit_count;
- unsigned int active:1;
+ unsigned long expire_count;
+ unsigned long wakeup_count;
+ bool active:1;
+ bool autosleep_enabled:1;
};
#ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index ac1c114c499..cd83059fb59 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -356,8 +356,9 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
extern bool events_check_enabled;
extern bool pm_wakeup_pending(void);
-extern bool pm_get_wakeup_count(unsigned int *count);</