13 files changed, 3175 insertions, 929 deletions
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 2e58ebb1f6c..1cb8544598d 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,6 +1,5 @@
-obj-$(CONFIG_PM)	+= sysfs.o generic_ops.o common.o qos.o
+obj-$(CONFIG_PM)	+= sysfs.o generic_ops.o common.o qos.o runtime.o
 obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
-obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
 obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o domain_governor.o
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 428e55e012d..b99e6c06ee6 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -6,8 +6,8 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/device.h>
 #include <linux/io.h>
 #include <linux/pm.h>
 #include <linux/pm_clock.h>
@@ -32,6 +32,21 @@ struct pm_clock_entry {
 };
 
 /**
+ * pm_clk_enable - Enable a clock, reporting any errors
+ * @dev: The device for the given clock
+ * @clk: The clock being enabled.
+ */
+static inline int __pm_clk_enable(struct device *dev, struct clk *clk)
+{
+	int ret = clk_enable(clk);
+	if (ret)
+		dev_err(dev, "%s: failed to enable clk %p, error %d\n",
+			__func__, clk, ret);
+
+	return ret;
+}
+
+/**
  * pm_clk_acquire - Acquire a device clock.
  * @dev: Device whose clock is to be acquired.
  * @ce: PM clock entry corresponding to the clock.
@@ -42,6 +57,7 @@ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
 	if (IS_ERR(ce->clk)) {
 		ce->status = PCE_STATUS_ERROR;
 	} else {
+		clk_prepare(ce->clk);
 		ce->status = PCE_STATUS_ACQUIRED;
 		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
 	}
@@ -100,8 +116,10 @@ static void __pm_clk_remove(struct pm_clock_entry *ce)
 		if (ce->status == PCE_STATUS_ENABLED)
 			clk_disable(ce->clk);
 
-		if (ce->status >= PCE_STATUS_ACQUIRED)
+		if (ce->status >= PCE_STATUS_ACQUIRED) {
+			clk_unprepare(ce->clk);
 			clk_put(ce->clk);
+		}
 	}
 
 	kfree(ce->con_id);
@@ -168,8 +186,7 @@ void pm_clk_init(struct device *dev)
  */
 int pm_clk_create(struct device *dev)
 {
-	int ret = dev_pm_get_subsys_data(dev);
-	return ret < 0 ? ret : 0;
+	return dev_pm_get_subsys_data(dev);
 }
 
 /**
@@ -249,6 +266,7 @@ int pm_clk_resume(struct device *dev)
 	struct pm_subsys_data *psd = dev_to_psd(dev);
 	struct pm_clock_entry *ce;
 	unsigned long flags;
+	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -259,8 +277,9 @@ int pm_clk_resume(struct device *dev)
 
 	list_for_each_entry(ce, &psd->clock_list, node) {
 		if (ce->status < PCE_STATUS_ERROR) {
-			clk_enable(ce->clk);
-			ce->status = PCE_STATUS_ENABLED;
+			ret = __pm_clk_enable(dev, ce->clk);
+			if (!ret)
+				ce->status = PCE_STATUS_ENABLED;
 		}
 	}
 
@@ -376,7 +395,7 @@ int pm_clk_resume(struct device *dev)
 	spin_lock_irqsave(&psd->lock, flags);
 
 	list_for_each_entry(ce, &psd->clock_list, node)
-		clk_enable(ce->clk);
+		__pm_clk_enable(dev, ce->clk);
 
 	spin_unlock_irqrestore(&psd->lock, flags);
 
@@ -396,7 +415,7 @@ static void enable_clock(struct device *dev, const char *con_id)
 
 	clk = clk_get(dev, con_id);
 	if (!IS_ERR(clk)) {
-		clk_enable(clk);
+		clk_prepare_enable(clk);
 		clk_put(clk);
 		dev_info(dev, "Runtime PM disabled, clock forced on.\n");
 	}
@@ -413,7 +432,7 @@ static void disable_clock(struct device *dev, const char *con_id)
 
 	clk = clk_get(dev, con_id);
 	if (!IS_ERR(clk)) {
-		clk_disable(clk);
+		clk_disable_unprepare(clk);
 		clk_put(clk);
 		dev_info(dev, "Runtime PM disabled, clock forced off.\n");
 	}
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index 4af7c1cbf90..df2e5eeaeb0 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -6,8 +6,8 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/device.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/pm_clock.h>
@@ -23,7 +23,6 @@
 int dev_pm_get_subsys_data(struct device *dev)
 {
 	struct pm_subsys_data *psd;
-	int ret = 0;
 
 	psd = kzalloc(sizeof(*psd), GFP_KERNEL);
 	if (!psd)
@@ -39,7 +38,6 @@ int dev_pm_get_subsys_data(struct device *dev)
 		dev->power.subsys_data = psd;
 		pm_clk_init(dev);
 		psd = NULL;
-		ret = 1;
 	}
 
 	spin_unlock_irq(&dev->power.lock);
@@ -47,7 +45,7 @@ int dev_pm_get_subsys_data(struct device *dev)
 	/* kfree() verifies that its argument is nonzero. */
 	kfree(psd);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(dev_pm_get_subsys_data);
 
@@ -62,24 +60,24 @@ EXPORT_SYMBOL_GPL(dev_pm_get_subsys_data);
 int dev_pm_put_subsys_data(struct device *dev)
 {
 	struct pm_subsys_data *psd;
-	int ret = 0;
+	int ret = 1;
 
 	spin_lock_irq(&dev->power.lock);
 
 	psd = dev_to_psd(dev);
-	if (!psd) {
-		ret = -EINVAL;
+	if (!psd)
 		goto out;
-	}
 
 	if (--psd->refcount == 0) {
 		dev->power.subsys_data = NULL;
-		kfree(psd);
-		ret = 1;
+	} else {
+		psd = NULL;
+		ret = 0;
 	}
 
  out:
 	spin_unlock_irq(&dev->power.lock);
+	kfree(psd);
 
 	return ret;
 }
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 978bbf7ac6a..eee55c1e5fd 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -6,11 +6,11 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
+#include <linux/pm_qos.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sched.h>
@@ -38,11 +38,13 @@
 	ktime_t __start = ktime_get();						\
 	type __retval = GENPD_DEV_CALLBACK(genpd, type, callback, dev);		\
 	s64 __elapsed = ktime_to_ns(ktime_sub(ktime_get(), __start));		\
-	struct generic_pm_domain_data *__gpd_data = dev_gpd_data(dev);		\
-	if (__elapsed > __gpd_data->td.field) {					\
-		__gpd_data->td.field = __elapsed;				\
-		dev_warn(dev, name " latency exceeded, new value %lld ns\n",	\
+	struct gpd_timing_data *__td = &dev_gpd_data(dev)->td;			\
+	if (!__retval && __elapsed > __td->field) {				\
+		__td->field = __elapsed;					\
+		dev_dbg(dev, name " latency exceeded, new value %lld ns\n",	\
 			__elapsed);						\
+		genpd->max_off_time_changed = true;				\
+		__td->constraint_changed = true;				\
 	}									\
 	__retval;								\
 })
@@ -50,6 +52,24 @@
 static LIST_HEAD(gpd_list);
 static DEFINE_MUTEX(gpd_list_lock);
 
+static struct generic_pm_domain *pm_genpd_lookup_name(const char *domain_name)
+{
+	struct generic_pm_domain *genpd = NULL, *gpd;
+
+	if (IS_ERR_OR_NULL(domain_name))
+		return NULL;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (!strcmp(gpd->name, domain_name)) {
+			genpd = gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+	return genpd;
+}
+
 #ifdef CONFIG_PM
 
 struct generic_pm_domain *dev_to_genpd(struct device *dev)
@@ -72,19 +92,6 @@ static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev)
 					start_latency_ns, "start");
 }
 
-static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_TIMED_CALLBACK(genpd, int, save_state, dev,
-					save_state_latency_ns, "state save");
-}
-
-static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_TIMED_CALLBACK(genpd, int, restore_state, dev,
-					restore_state_latency_ns,
-					"state restore");
-}
-
 static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
 {
 	bool ret = false;
@@ -98,7 +105,7 @@ static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
 static void genpd_sd_counter_inc(struct generic_pm_domain *genpd)
 {
 	atomic_inc(&genpd->sd_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 static void genpd_acquire_lock(struct generic_pm_domain *genpd)
@@ -136,6 +143,19 @@ static void genpd_set_active(struct generic_pm_domain *genpd)
 		genpd->status = GPD_STATE_ACTIVE;
 }
 
+static void genpd_recalc_cpu_exit_latency(struct generic_pm_domain *genpd)
+{
+	s64 usecs64;
+
+	if (!genpd->cpu_data)
+		return;
+
+	usecs64 = genpd->power_on_latency_ns;
+	do_div(usecs64, NSEC_PER_USEC);
+	usecs64 += genpd->cpu_data->saved_exit_latency;
+	genpd->cpu_data->idle_state->exit_latency = usecs64;
+}
+
 /**
  * __pm_genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
@@ -143,7 +163,7 @@ static void genpd_set_active(struct generic_pm_domain *genpd)
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-int __pm_genpd_poweron(struct generic_pm_domain *genpd)
+static int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 	__releases(&genpd->lock) __acquires(&genpd->lock)
 {
 	struct gpd_link *link;
@@ -173,6 +193,13 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 		return 0;
 	}
 
+	if (genpd->cpu_data) {
+		cpuidle_pause_and_lock();
+		genpd->cpu_data->idle_state->disabled = true;
+		cpuidle_resume_and_unlock();
+		goto out;
+	}
+
 	/*
 	 * The list is guaranteed not to change while the loop below is being
 	 * executed, unless one of the masters' .power_on() callbacks fiddles
@@ -211,6 +238,8 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
 		if (elapsed_ns > genpd->power_on_latency_ns) {
 			genpd->power_on_latency_ns = elapsed_ns;
+			genpd->max_off_time_changed = true;
+			genpd_recalc_cpu_exit_latency(genpd);
 			if (genpd->name)
 				pr_warning("%s: Power-on latency exceeded, "
 					"new value %lld ns\n", genpd->name,
@@ -218,6 +247,7 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 		}
 	}
 
+ out:
 	genpd_set_active(genpd);
 
 	return 0;
@@ -243,10 +273,88 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd)
 	return ret;
 }
 
+/**
+ * pm_genpd_name_poweron - Restore power to a given PM domain and its masters.
+ * @domain_name: Name of the PM domain to power up.
+ */
+int pm_genpd_name_poweron(const char *domain_name)
+{
+	struct generic_pm_domain *genpd;
+
+	genpd = pm_genpd_lookup_name(domain_name);
+	return genpd ? pm_genpd_poweron(genpd) : -EINVAL;
+}
+
 #endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_RUNTIME
 
+static int genpd_start_dev_no_timing(struct generic_pm_domain *genpd,
+				     struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, start, dev);
+}
+
+static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_TIMED_CALLBACK(genpd, int, save_state, dev,
+					save_state_latency_ns, "state save");
+}
+
+static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_TIMED_CALLBACK(genpd, int, restore_state, dev,
+					restore_state_latency_ns,
+					"state restore");
+}
+
+static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
+				     unsigned long val, void *ptr)
+{
+	struct generic_pm_domain_data *gpd_data;
+	struct device *dev;
+
+	gpd_data = container_of(nb, struct generic_pm_domain_data, nb);
+
+	mutex_lock(&gpd_data->lock);
+	dev = gpd_data->base.dev;
+	if (!dev) {
+		mutex_unlock(&gpd_data->lock);
+		return NOTIFY_DONE;
+	}
+	mutex_unlock(&gpd_data->lock);
+
+	for (;;) {
+		struct generic_pm_domain *genpd;
+		struct pm_domain_data *pdd;
+
+		spin_lock_irq(&dev->power.lock);
+
+		pdd = dev->power.subsys_data ?
+				dev->power.subsys_data->domain_data : NULL;
+		if (pdd && pdd->dev) {
+			to_gpd_data(pdd)->td.constraint_changed = true;
+			genpd = dev_to_genpd(dev);
+		} else {
+			genpd = ERR_PTR(-ENODATA);
+		}
+
+		spin_unlock_irq(&dev->power.lock);
+
+		if (!IS_ERR(genpd)) {
+			mutex_lock(&genpd->lock);
+			genpd->max_off_time_changed = true;
+			mutex_unlock(&genpd->lock);
+		}
+
+		dev = dev->parent;
+		if (!dev || dev->power.ignore_children)
+			break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 /**
  * __pm_genpd_save_device - Save the pre-suspend state of a device.
  * @pdd: Domain data of the device to save the state of.
@@ -288,19 +396,16 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd,
 {
 	struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
 	struct device *dev = pdd->dev;
+	bool need_restore = gpd_data->need_restore;
 
-	if (!gpd_data->need_restore)
-		return;
-
+	gpd_data->need_restore = false;
 	mutex_unlock(&genpd->lock);
 
 	genpd_start_dev(genpd, dev);
-	genpd_restore_dev(genpd, dev);
-	genpd_stop_dev(genpd, dev);
+	if (need_restore)
+		genpd_restore_dev(genpd, dev);
 
 	mutex_lock(&genpd->lock);
-
-	gpd_data->need_restore = false;
 }
 
 /**
@@ -327,8 +432,7 @@ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd)
  */
 void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 {
-	if (!work_pending(&genpd->power_off_work))
-		queue_work(pm_wq, &genpd->power_off_work);
+	queue_work(pm_wq, &genpd->power_off_work);
 }
 
 /**
@@ -364,10 +468,19 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 		return -EBUSY;
 
 	not_suspended = 0;
-	list_for_each_entry(pdd, &genpd->dev_list, list_node)
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		enum pm_qos_flags_status stat;
+
+		stat = dev_pm_qos_flags(pdd->dev,
+					PM_QOS_FLAG_NO_POWER_OFF
+						| PM_QOS_FLAG_REMOTE_WAKEUP);
+		if (stat > PM_QOS_FLAGS_NONE)
+			return -EBUSY;
+
 		if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev)
 		    || pdd->dev->power.irq_safe))
 			not_suspended++;
+	}
 
 	if (not_suspended > genpd->in_progress)
 		return -EBUSY;
@@ -407,6 +520,21 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 		}
 	}
 
+	if (genpd->cpu_data) {
+		/*
+		 * If cpu_data is set, cpuidle should turn the domain off when
+		 * the CPU in it is idle.  In that case we don't decrement the
+		 * subdomain counts of the master domains, so that power is not
+		 * removed from the current domain prematurely as a result of
+		 * cutting off the masters' power.
+		 */
+		genpd->status = GPD_STATE_POWER_OFF;
+		cpuidle_pause_and_lock();
+		genpd->cpu_data->idle_state->disabled = false;
+		cpuidle_resume_and_unlock();
+		goto out;
+	}
+
 	if (genpd->power_off) {
 		ktime_t time_start;
 		s64 elapsed_ns;
@@ -435,6 +563,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
 		if (elapsed_ns > genpd->power_off_latency_ns) {
 			genpd->power_off_latency_ns = elapsed_ns;
+			genpd->max_off_time_changed = true;
 			if (genpd->name)
 				pr_warning("%s: Power-off latency exceeded, "
 					"new value %lld ns\n", genpd->name,
@@ -443,17 +572,6 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 	}
 
 	genpd->status = GPD_STATE_POWER_OFF;
-	genpd->power_off_time = ktime_get();
-
-	/* Update PM QoS information for devices in the domain. */
-	list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) {
-		struct gpd_timing_data *td = &to_gpd_data(pdd)->td;
-
-		pm_runtime_update_max_time_suspended(pdd->dev,
-					td->start_latency_ns +
-					td->restore_state_latency_ns +
-					genpd->power_on_latency_ns);
-	}
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
 		genpd_sd_counter_dec(link->master);
@@ -511,9 +629,6 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	if (ret)
 		return ret;
 
-	pm_runtime_update_max_time_suspended(dev,
-				dev_gpd_data(dev)->td.start_latency_ns);
-
 	/*
 	 * If power.irq_safe is set, this routine will be run with interrupts
 	 * off, so it can't use mutexes.
@@ -554,7 +669,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
 
 	/* If power.irq_safe, the PM domain is never powered off. */
 	if (dev->power.irq_safe)
-		goto out;
+		return genpd_start_dev_no_timing(genpd, dev);
 
 	mutex_lock(&genpd->lock);
 	ret = __pm_genpd_poweron(genpd);
@@ -587,12 +702,17 @@ static int pm_genpd_runtime_resume(struct device *dev)
 	wake_up_all(&genpd->status_wait_queue);
 	mutex_unlock(&genpd->lock);
 
- out:
-	genpd_start_dev(genpd, dev);
-
 	return 0;
 }
 
+static bool pd_ignore_unused;
+static int __init pd_ignore_unused_setup(char *__unused)
+{
+	pd_ignore_unused = true;
+	return 1;
+}
+__setup("pd_ignore_unused", pd_ignore_unused_setup);
+
 /**
  * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use.
  */
@@ -600,6 +720,11 @@ void pm_genpd_poweroff_unused(void)
 {
 	struct generic_pm_domain *genpd;
 
+	if (pd_ignore_unused) {
+		pr_warn("genpd: Not disabling unused power domains\n");
+		return;
+	}
+
 	mutex_lock(&gpd_list_lock);
 
 	list_for_each_entry(genpd, &gpd_list, gpd_list_node)
@@ -610,6 +735,12 @@ void pm_genpd_poweroff_unused(void)
 
 #else
 
+static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
+					    unsigned long val, void *ptr)
+{
+	return NOTIFY_DONE;
+}
+
 static inline void genpd_power_off_work_fn(struct work_struct *work) {}
 
 #define pm_genpd_runtime_suspend	NULL
@@ -619,6 +750,24 @@ static inline void genpd_power_off_work_fn(struct work_struct *work) {}
 
 #ifdef CONFIG_PM_SLEEP
 
+/**
+ * pm_genpd_present - Check if the given PM domain has been initialized.
+ * @genpd: PM domain to check.
+ */
+static bool pm_genpd_present(struct generic_pm_domain *genpd)
+{
+	struct generic_pm_domain *gpd;
+
+	if (IS_ERR_OR_NULL(genpd))
+		return false;
+
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node)
+		if (gpd == genpd)
+			return true;
+
+	return false;
+}
+
 static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
 				    struct device *dev)
 {
@@ -672,9 +821,10 @@ static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev)
  * Check if the given PM domain can be powered off (during system suspend or
  * hibernation) and do that if so.  Also, in that case propagate to its masters.
  *
- * This function is only called in "noirq" stages of system power transitions,
- * so it need not acquire locks (all of the "noirq" callbacks are executed
- * sequentially, so it is guaranteed that it will never run twice in parallel).
+ * This function is only called in "noirq" and "syscore" stages of system power
+ * transitions, so it need not acquire locks (all of the "noirq" callbacks are
+ * executed sequentially, so it is guaranteed that it will never run twice in
+ * parallel).
  */
 static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd)
 {
@@ -699,6 +849,33 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd)
 }
 
 /**
+ * pm_genpd_sync_poweron - Synchronously power on a PM domain and its masters.
+ * @genpd: PM domain to power on.
+ *
+ * This function is only called in "noirq" and "syscore" stages of system power
+ * transitions, so it need not acquire locks (all of the "noirq" callbacks are
+ * executed sequentially, so it is guaranteed that it will never run twice in
+ * parallel).
+ */
+static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd)
+{
+	struct gpd_link *link;
+
+	if (genpd->status != GPD_STATE_POWER_OFF)
+		return;
+
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		pm_genpd_sync_poweron(link->master);
+		genpd_sd_counter_inc(link->master);
+	}
+
+	if (genpd->power_on)
+		genpd->power_on(genpd);
+
+	genpd->status = GPD_STATE_ACTIVE;
+}
+
+/**
  * resume_needed - Check whether to resume a device before system suspend.
  * @dev: Device to check.
  * @genpd: PM domain the device belongs to.
@@ -755,7 +932,7 @@ static int pm_genpd_prepare(struct device *dev)
 		pm_wakeup_event(dev, 0);
 
 	if (pm_wakeup_pending()) {
-		pm_runtime_put_sync(dev);
+		pm_runtime_put(dev);
 		return -EBUSY;
 	}
 
@@ -764,8 +941,10 @@ static int pm_genpd_prepare(struct device *dev)
 
 	genpd_acquire_lock(genpd);
 
-	if (genpd->prepared_count++ == 0)
+	if (genpd->prepared_count++ == 0) {
+		genpd->suspended_count = 0;
 		genpd->suspend_power_off = genpd->status == GPD_STATE_POWER_OFF;
+	}
 
 	genpd_release_lock(genpd);
 
@@ -794,7 +973,7 @@ static int pm_genpd_prepare(struct device *dev)
 		pm_runtime_enable(dev);
 	}
 
-	pm_runtime_put_sync(dev);
+	pm_runtime_put(dev);
 	return ret;
 }
 
@@ -820,17 +999,16 @@ static int pm_genpd_suspend(struct device *dev)
 }
 
 /**
- * pm_genpd_suspend_noirq - Late suspend of a device from an I/O PM domain.
+ * pm_genpd_suspend_late - Late suspend of a device from an I/O PM domain.
  * @dev: Device to suspend.
  *
  * Carry out a late suspend of a device under the assumption that its
  * pm_domain field points to the domain member of an object of type
  * struct generic_pm_domain representing a PM domain consisting of I/O devices.
  */
-static int pm_genpd_suspend_noirq(struct device *dev)
+static int pm_genpd_suspend_late(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -838,14 +1016,28 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
-		return 0;
+	return genpd->suspend_power_off ? 0 : genpd_suspend_late(genpd, dev);
+}
 
-	ret = genpd_suspend_late(genpd, dev);
-	if (ret)
-		return ret;
+/**
+ * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain.
+ * @dev: Device to suspend.
+ *
+ * Stop the device and remove power from the domain if all devices in it have
+ * been stopped.
+ */
+static int pm_genpd_suspend_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
 
-	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
+	if (genpd->suspend_power_off
+	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
 		return 0;
 
 	genpd_stop_dev(genpd, dev);
@@ -862,13 +1054,10 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 }
 
 /**
- * pm_genpd_resume_noirq - Early resume of a device from an I/O power domain.
+ * pm_genpd_resume_noirq - Start of resume of device in an I/O PM domain.
  * @dev: Device to resume.
  *
- * Carry out an early resume of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
+ * Restore power to the device's PM domain, if necessary, and start the device.
  */
 static int pm_genpd_resume_noirq(struct device *dev)
 {
@@ -880,7 +1069,8 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
+	if (genpd->suspend_power_off
+	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
 		return 0;
 
 	/*
@@ -888,15 +1078,36 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	 * guaranteed that this function will never run twice in parallel for
 	 * the same PM domain, so it is not necessary to use locking here.
 	 */
-	pm_genpd_poweron(genpd);
+	pm_genpd_sync_poweron(genpd);
 	genpd->suspended_count--;
-	genpd_start_dev(genpd, dev);
 
-	return genpd_resume_early(genpd, dev);
+	return genpd_start_dev(genpd, dev);
 }
 
 /**
- * pm_genpd_resume - Resume a device belonging to an I/O power domain.
+ * pm_genpd_resume_early - Early resume of a device in an I/O PM domain.
+ * @dev: Device to resume.
+ *
+ * Carry out an early resume of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_resume_early(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_resume_early(genpd, dev);
+}
+
+/**
+ * pm_genpd_resume - Resume of device in an I/O PM domain.
  * @dev: Device to resume.
  *
  * Resume a device under the assumption that its pm_domain field points to the
@@ -917,7 +1128,7 @@ static int pm_genpd_resume(struct device *dev)
 }
 
 /**
- * pm_genpd_freeze - Freeze a device belonging to an I/O power domain.
+ * pm_genpd_freeze - Freezing a device in an I/O PM domain.
  * @dev: Device to freeze.
  *
  * Freeze a device under the assumption that its pm_domain field points to the
@@ -938,7 +1149,29 @@ static int pm_genpd_freeze(struct device *dev)
 }
 
 /**
- * pm_genpd_freeze_noirq - Late freeze of a device from an I/O power domain.
+ * pm_genpd_freeze_late - Late freeze of a device in an I/O PM domain.
+ * @dev: Device to freeze.
+ *
+ * Carry out a late freeze of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_freeze_late(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_freeze_late(genpd, dev);
+}
+
+/**
+ * pm_genpd_freeze_noirq - Completion of freezing a device in an I/O PM domain.
  * @dev: Device to freeze.
  *
  * Carry out a late freeze of a device under the assumption that its
@@ -949,7 +1182,6 @@ static int pm_genpd_freeze(struct device *dev)
 static int pm_genpd_freeze_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -957,20 +1189,31 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
-		return 0;
+	return genpd->suspend_power_off ? 0 : genpd_stop_dev(genpd, dev);
+}
 
-	ret = genpd_freeze_late(genpd, dev);
-	if (ret)
-		return ret;
+/**
+ * pm_genpd_thaw_noirq - Early thaw of device in an I/O PM domain.
+ * @dev: Device to thaw.
+ *
+ * Start the device, unless power has been removed from the domain already
+ * before the system transition.
+ */
+static int pm_genpd_thaw_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
 
-	genpd_stop_dev(genpd, dev);
+	dev_dbg(dev, "%s()\n", __func__);
 
-	return 0;
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_start_dev(genpd, dev);
 }
 
 /**
- * pm_genpd_thaw_noirq - Early thaw of a device from an I/O power domain.
+ * pm_genpd_thaw_early - Early thaw of device in an I/O PM domain.
  * @dev: Device to thaw.
  *
  * Carry out an early thaw of a device under the assumption that its
@@ -978,7 +1221,7 @@ static int pm_genpd_freeze_noirq(struct device *dev)
  * struct generic_pm_domain representing a power domain consisting of I/O
  * devices.
  */
-static int pm_genpd_thaw_noirq(struct device *dev)
+static int pm_genpd_thaw_early(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 
@@ -988,12 +1231,7 @@ static int pm_genpd_thaw_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
-		return 0;
-
-	genpd_start_dev(genpd, dev);
-
-	return genpd_thaw_early(genpd, dev);
+	return genpd->suspend_power_off ? 0 : genpd_thaw_early(genpd, dev);
 }
 
 /**
@@ -1018,13 +1256,11 @@ static int pm_genpd_thaw(struct device *dev)
 }
 
 /**
- * pm_genpd_restore_noirq - Early restore of a device from an I/O power domain.
+ * pm_genpd_restore_noirq - Start of restore of device in an I/O PM domain.
  * @dev: Device to resume.
  *
- * Carry out an early restore of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
+ * Make sure the domain will be in the same power state as before the
+ * hibernation the system is resuming from and start the device if necessary.
  */
 static int pm_genpd_restore_noirq(struct device *dev)
 {
@@ -1040,23 +1276,35 @@ static int pm_genpd_restore_noirq(struct device *dev)
 	 * Since all of the "noirq" callbacks are executed sequentially, it is
 	 * guaranteed that this function will never run twice in parallel for
 	 * the same PM domain, so it is not necessary to use locking here.
+	 *
+	 * At this point suspended_count == 0 means we are being run for the
+	 * first time for the given domain in the present cycle.
 	 */
-	genpd->status = GPD_STATE_POWER_OFF;
-	if (genpd->suspend_power_off) {
+	if (genpd->suspended_count++ == 0) {
 		/*
-		 * The boot kernel might put the domain into the power on state,
-		 * so make sure it really is powered off.
+		 * The boot kernel might put the domain into arbitrary state,
+		 * so make it appear as powered off to pm_genpd_sync_poweron(),
+		 * so that it tries to power it on in case it was really off.
 		 */
-		if (genpd->power_off)
-			genpd->power_off(genpd);
-		return 0;
+		genpd->status = GPD_STATE_POWER_OFF;
+		if (genpd->suspend_power_off) {
+			/*
+			 * If the domain was off before the hibernation, make
+			 * sure it will be off going forward.
+			 */
+			if (genpd->power_off)
+				genpd->power_off(genpd);
+
+			return 0;
+		}
 	}
 
-	pm_genpd_poweron(genpd);
-	genpd->suspended_count--;
-	genpd_start_dev(genpd, dev);
+	if (genpd->suspend_power_off)
+		return 0;
+
+	pm_genpd_sync_poweron(genpd);
 
-	return genpd_resume_early(genpd, dev);
+	return genpd_start_dev(genpd, dev);
 }
 
 /**
@@ -1091,19 +1339,48 @@ static void pm_genpd_complete(struct device *dev)
 		pm_generic_complete(dev);
 		pm_runtime_set_active(dev);
 		pm_runtime_enable(dev);
-		pm_runtime_idle(dev);
+		pm_request_idle(dev);
+	}
+}
+
+/**
+ * pm_genpd_syscore_switch - Switch power during system core suspend or resume.
+ * @dev: Device that normally is marked as "always on" to switch power for.
+ *
+ * This routine may only be called during the system core (syscore) suspend or
+ * resume phase for devices whose "always on" flags are set.
+ */
+void pm_genpd_syscore_switch(struct device *dev, bool suspend)
+{
+	struct generic_pm_domain *genpd;
+
+	genpd = dev_to_genpd(dev);
+	if (!pm_genpd_present(genpd))
+		return;
+
+	if (suspend) {
+		genpd->suspended_count++;
+		pm_genpd_sync_poweroff(genpd);
+	} else {
+		pm_genpd_sync_poweron(genpd);
+		genpd->suspended_count--;
 	}
 }
+EXPORT_SYMBOL_GPL(pm_genpd_syscore_switch);
 
 #else
 
 #define pm_genpd_prepare		NULL
 #define pm_genpd_suspend		NULL
+#define pm_genpd_suspend_late		NULL
 #define pm_genpd_suspend_noirq		NULL
+#define pm_genpd_resume_early		NULL
 #define pm_genpd_resume_noirq		NULL
 #define pm_genpd_resume			NULL
 #define pm_genpd_freeze			NULL
+#define pm_genpd_freeze_late		NULL
 #define pm_genpd_freeze_noirq		NULL
+#define pm_genpd_thaw_early		NULL
 #define pm_genpd_thaw_noirq		NULL
 #define pm_genpd_thaw			NULL
 #define pm_genpd_restore_noirq		NULL
@@ -1111,6 +1388,27 @@ static void pm_genpd_complete(struct device *dev)
 
 #endif /* CONFIG_PM_SLEEP */
 
+static struct generic_pm_domain_data *__pm_genpd_alloc_dev_data(struct device *dev)
+{
+	struct generic_pm_domain_data *gpd_data;
+
+	gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL);
+	if (!gpd_data)
+		return NULL;
+
+	mutex_init(&gpd_data->lock);
+	gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier;
+	dev_pm_qos_add_notifier(dev, &gpd_data->nb);
+	return gpd_data;
+}
+
+static void __pm_genpd_free_dev_data(struct device *dev,
+				     struct generic_pm_domain_data *gpd_data)
+{
+	dev_pm_qos_remove_notifier(dev, &gpd_data->nb);
+	kfree(gpd_data);
+}
+
 /**
  * __pm_genpd_add_device - Add a device to an I/O PM domain.
  * @genpd: PM domain to add the device to.
@@ -1120,7 +1418,7 @@ static void pm_genpd_complete(struct device *dev)
 int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 			  struct gpd_timing_data *td)
 {
-	struct generic_pm_domain_data *gpd_data;
+	struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL;
 	struct pm_domain_data *pdd;
 	int ret = 0;
 
@@ -1129,12 +1427,11 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
 		return -EINVAL;
 
-	genpd_acquire_lock(genpd);
+	gpd_data_new = __pm_genpd_alloc_dev_data(dev);
+	if (!gpd_data_new)
+		return -ENOMEM;
 
-	if (genpd->status == GPD_STATE_POWER_OFF) {
-		ret = -EINVAL;
-		goto out;
-	}
+	genpd_acquire_lock(genpd);
 
 	if (genpd->prepared_count > 0) {
 		ret = -EAGAIN;
@@ -1147,30 +1444,91 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 			goto out;
 		}
 
-	gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL);
-	if (!gpd_data) {
-		ret = -ENOMEM;
+	ret = dev_pm_get_subsys_data(dev);
+	if (ret)
 		goto out;
-	}
 
 	genpd->device_count++;
+	genpd->max_off_time_changed = true;
+
+	spin_lock_irq(&dev->power.lock);
 
 	dev->pm_domain = &genpd->domain;
-	dev_pm_get_subsys_data(dev);
-	dev->power.subsys_data->domain_data = &gpd_data->base;
-	gpd_data->base.dev = dev;
-	gpd_data->need_restore = false;
-	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
+	if (dev->power.subsys_data->domain_data) {
+		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
+	} else {
+		gpd_data = gpd_data_new;
+		dev->power.subsys_data->domain_data = &gpd_data->base;
+	}
+	gpd_data->refcount++;
 	if (td)
 		gpd_data->td = *td;
 
+	spin_unlock_irq(&dev->power.lock);
+
+	mutex_lock(&gpd_data->lock);
+	gpd_data->base.dev = dev;
+	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
+	gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF;
+	gpd_data->td.constraint_changed = true;
+	gpd_data->td.effective_constraint_ns = -1;
+	mutex_unlock(&gpd_data->lock);
+
  out:
 	genpd_release_lock(genpd);
 
+	if (gpd_data != gpd_data_new)
+		__pm_genpd_free_dev_data(dev, gpd_data_new);
+
 	return ret;
 }
 
 /**
+ * __pm_genpd_of_add_device - Add a device to an I/O PM domain.
+ * @genpd_node: Device tree node pointer representing a PM domain to which the
+ *   the device is added to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
+ */
+int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev,
+			     struct gpd_timing_data *td)
+{
+	struct generic_pm_domain *genpd = NULL, *gpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (IS_ERR_OR_NULL(genpd_node) || IS_ERR_OR_NULL(dev))
+		return -EINVAL;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (gpd->of_node == genpd_node) {
+			genpd = gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	if (!genpd)
+		return -EINVAL;
+
+	return __pm_genpd_add_device(genpd, dev, td);
+}
+
+
+/**
+ * __pm_genpd_name_add_device - Find I/O PM domain and add a device to it.
+ * @domain_name: Name of the PM domain to add the device to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
+ */
+int __pm_genpd_name_add_device(const char *domain_name, struct device *dev,
+			       struct gpd_timing_data *td)
+{
+	return __pm_genpd_add_device(pm_genpd_lookup_name(domain_name), dev, td);
+}
+
+/**
  * pm_genpd_remove_device - Remove a device from an I/O PM domain.
  * @genpd: PM domain to remove the device from.
  * @dev: Device to be removed.
@@ -1178,12 +1536,16 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 			   struct device *dev)
 {
+	struct generic_pm_domain_data *gpd_data;
 	struct pm_domain_data *pdd;
-	int ret = -EINVAL;
+	bool remove = false;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
-	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
+	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)
+	    ||  IS_ERR_OR_NULL(dev->pm_domain)
+	    ||  pd_to_genpd(dev->pm_domain) != genpd)
 		return -EINVAL;
 
 	genpd_acquire_lock(genpd);
@@ -1193,22 +1555,34 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 		goto out;
 	}
 
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		if (pdd->dev != dev)
-			continue;
-
-		list_del_init(&pdd->list_node);
-		pdd->dev = NULL;
-		dev_pm_put_subsys_data(dev);
-		dev->pm_domain = NULL;
-		kfree(to_gpd_data(pdd));
+	genpd->device_count--;
+	genpd->max_off_time_changed = true;
 
-		genpd->device_count--;
+	spin_lock_irq(&dev->power.lock);
 
-		ret = 0;
-		break;
+	dev->pm_domain = NULL;
+	pdd = dev->power.subsys_data->domain_data;
+	list_del_init(&pdd->list_node);
+	gpd_data = to_gpd_data(pdd);
+	if (--gpd_data->refcount == 0) {
+		dev->power.subsys_data->domain_data = NULL;
+		remove = true;
 	}
 
+	spin_unlock_irq(&dev->power.lock);
+
+	mutex_lock(&gpd_data->lock);
+	pdd->dev = NULL;
+	mutex_unlock(&gpd_data->lock);
+
+	genpd_release_lock(genpd);
+
+	dev_pm_put_subsys_data(dev);
+	if (remove)
+		__pm_genpd_free_dev_data(dev, gpd_data);
+
+	return 0;
+
  out:
 	genpd_release_lock(genpd);
 
@@ -1216,6 +1590,26 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 }
 
 /**
+ * pm_genpd_dev_need_restore - Set/unset the device's "need restore" flag.
+ * @dev: Device to set/unset the flag for.
+ * @val: The new value of the device's "need restore" flag.
+ */
+void pm_genpd_dev_need_restore(struct device *dev, bool val)
+{
+	struct pm_subsys_data *psd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	psd = dev_to_psd(dev);
+	if (psd && psd->domain_data)
+		to_gpd_data(psd->domain_data)->need_restore = val;
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_dev_need_restore);
+
+/**
  * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
  * @genpd: Master PM domain to add the subdomain to.
  * @subdomain: Subdomain to be added.
@@ -1226,7 +1620,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 	struct gpd_link *link;
 	int ret = 0;
 
-	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
+	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)
+	    || genpd == subdomain)
 		return -EINVAL;
 
  start:
@@ -1246,7 +1641,7 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 		goto out;
 	}
 
-	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+	list_for_each_entry(link, &genpd->master_links, master_node) {
 		if (link->slave == subdomain && link->master == genpd) {
 			ret = -EINVAL;
 			goto out;
@@ -1273,6 +1668,35 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 }
 
 /**
+ * pm_genpd_add_subdomain_names - Add a subdomain to an I/O PM domain.
+ * @master_name: Name of the master PM domain to add the subdomain to.
+ * @subdomain_name: Name of the subdomain to be added.
+ */
+int pm_genpd_add_subdomain_names(const char *master_name,
+				 const char *subdomain_name)
+{
+	struct generic_pm_domain *master = NULL, *subdomain = NULL, *gpd;
+
+	if (IS_ERR_OR_NULL(master_name) || IS_ERR_OR_NULL(subdomain_name))
+		return -EINVAL;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (!master && !strcmp(gpd->name, master_name))
+			master = gpd;
+
+		if (!subdomain && !strcmp(gpd->name, subdomain_name))
+			subdomain = gpd;
+
+		if (master && subdomain)
+			break;
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	return pm_genpd_add_subdomain(master, subdomain);
+}
+
+/**
  * pm_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain.
  * @genpd: Master PM domain to remove the subdomain from.
  * @subdomain: Subdomain to be removed.
@@ -1324,33 +1748,52 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
  * @dev: Device to add the callbacks to.
  * @ops: Set of callbacks to add.
  * @td: Timing data to add to the device along with the callbacks (optional).
+ *
+ * Every call to this routine should be balanced with a call to
+ * __pm_genpd_remove_callbacks() and they must not be nested.
  */
 int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops,
 			   struct gpd_timing_data *td)
 {
-	struct pm_domain_data *pdd;
+	struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL;
 	int ret = 0;
 
-	if (!(dev && dev->power.subsys_data && ops))
+	if (!(dev && ops))
 		return -EINVAL;
 
+	gpd_data_new = __pm_genpd_alloc_dev_data(dev);
+	if (!gpd_data_new)
+		return -ENOMEM;
+
 	pm_runtime_disable(dev);
 	device_pm_lock();
 
-	pdd = dev->power.subsys_data->domain_data;
-	if (pdd) {
-		struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
+	ret = dev_pm_get_subsys_data(dev);
+	if (ret)
+		goto out;
+
+	spin_lock_irq(&dev->power.lock);
 
-		gpd_data->ops = *ops;
-		if (td)
-			gpd_data->td = *td;
+	if (dev->power.subsys_data->domain_data) {
+		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
 	} else {
-		ret = -EINVAL;
+		gpd_data = gpd_data_new;
+		dev->power.subsys_data->domain_data = &gpd_data->base;
 	}
+	gpd_data->refcount++;
+	gpd_data->ops = *ops;
+	if (td)
+		gpd_data->td = *td;
 
+	spin_unlock_irq(&dev->power.lock);
+
+ out:
 	device_pm_unlock();
 	pm_runtime_enable(dev);
 
+	if (gpd_data != gpd_data_new)
+		__pm_genpd_free_dev_data(dev, gpd_data_new);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks);
@@ -1359,10 +1802,13 @@ EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks);
  * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device.
  * @dev: Device to remove the callbacks from.
  * @clear_td: If set, clear the device's timing data too.
+ *
+ * This routine can only be called after pm_genpd_add_callbacks().
  */
 int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
 {
-	struct pm_domain_data *pdd;
+	struct generic_pm_domain_data *gpd_data = NULL;
+	bool remove = false;
 	int ret = 0;
 
 	if (!(dev && dev->power.subsys_data))
@@ -1371,24 +1817,156 @@ int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
 	pm_runtime_disable(dev);
 	device_pm_lock();
 
-	pdd = dev->power.subsys_data->domain_data;
-	if (pdd) {
-		struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
+	spin_lock_irq(&dev->power.lock);
 
-		gpd_data->ops = (struct gpd_dev_ops){ 0 };
+	if (dev->power.subsys_data->domain_data) {
+		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
+		gpd_data->ops = (struct gpd_dev_ops){ NULL };
 		if (clear_td)
 			gpd_data->td = (struct gpd_timing_data){ 0 };
+
+		if (--gpd_data->refcount == 0) {
+			dev->power.subsys_data->domain_data = NULL;
+			remove = true;
+		}
 	} else {
 		ret = -EINVAL;
 	}
 
+	spin_unlock_irq(&dev->power.lock);
+
 	device_pm_unlock();
 	pm_runtime_enable(dev);
 
-	return ret;
+	if (ret)
+		return ret;
+
+	dev_pm_put_subsys_data(dev);
+	if (remove)
+		__pm_genpd_free_dev_data(dev, gpd_data);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks);
 
+/**
+ * pm_genpd_attach_cpuidle - Connect the given PM domain with cpuidle.
+ * @genpd: PM domain to be connected with cpuidle.
+ * @state: cpuidle state this domain can disable/enable.
+ *
+ * Make a PM domain behave as though it contained a CPU core, that is, instead
+ * of calling its power down routine it will enable the given cpuidle state so
+ * that the cpuidle subsystem can power it down (if possible and desirable).
+ */
+int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
+{
+	struct cpuidle_driver *cpuidle_drv;
+	struct gpd_cpu_data *cpu_data;
+	struct cpuidle_state *idle_state;
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(genpd) || state < 0)
+		return -EINVAL;
+
+	genpd_acquire_lock(genpd);
+
+	if (genpd->cpu_data) {
+		ret = -EEXIST;
+		goto out;
+	}
+	cpu_data = kzalloc(sizeof(*cpu_data), GFP_KERNEL);
+	if (!cpu_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	cpuidle_drv = cpuidle_driver_ref();
+	if (!cpuidle_drv) {
+		ret = -ENODEV;
+		goto err_drv;
+	}
+	if (cpuidle_drv->state_count <= state) {
+		ret = -EINVAL;
+		goto err;
+	}
+	idle_state = &cpuidle_drv->states[state];
+	if (!idle_state->disabled) {
+		ret = -EAGAIN;
+		goto err;
+	}
+	cpu_data->idle_state = idle_state;
+	cpu_data->saved_exit_latency = idle_state->exit_latency;
+	genpd->cpu_data = cpu_data;
+	genpd_recalc_cpu_exit_latency(genpd);
+
+ out:
+	genpd_release_lock(genpd);
+	return ret;
+
+ err:
+	cpuidle_driver_unref();
+
+ err_drv:
+	kfree(cpu_data);
+	goto out;
+}
+
+/**
+ * pm_genpd_name_attach_cpuidle - Find PM domain and connect cpuidle to it.
+ * @name: Name of the domain to connect to cpuidle.
+ * @state: cpuidle state this domain can manipulate.
+ */
+int pm_genpd_name_attach_cpuidle(const char *name, int state)
+{
+	return pm_genpd_attach_cpuidle(pm_genpd_lookup_name(name), state);
+}
+
+/**
+ * pm_genpd_detach_cpuidle - Remove the cpuidle connection from a PM domain.
+ * @genpd: PM domain to remove the cpuidle connection from.
+ *
+ * Remove the cpuidle connection set up by pm_genpd_attach_cpuidle() from the
+ * given PM domain.
+ */
+int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
+{
+	struct gpd_cpu_data *cpu_data;
+	struct cpuidle_state *idle_state;
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(genpd))
+		return -EINVAL;
+
+	genpd_acquire_lock(genpd);
+
+	cpu_data = genpd->cpu_data;
+	if (!cpu_data) {
+		ret = -ENODEV;
+		goto out;
+	}
+	idle_state = cpu_data->idle_state;
+	if (!idle_state->disabled) {
+		ret = -EAGAIN;
+		goto out;
+	}
+	idle_state->exit_latency = cpu_data->saved_exit_latency;
+	cpuidle_driver_unref();
+	genpd->cpu_data = NULL;
+	kfree(cpu_data);
+
+ out:
+	genpd_release_lock(genpd);
+	return ret;
+}
+
+/**
+ * pm_genpd_name_detach_cpuidle - Find PM domain and disconnect cpuidle from it.
+ * @name: Name of the domain to disconnect cpuidle from.
+ */
+int pm_genpd_name_detach_cpuidle(const char *name)
+{
+	return pm_genpd_detach_cpuidle(pm_genpd_lookup_name(name));
+}
+
 /* Default device callbacks for generic PM domains. */
 
 /**
@@ -1398,16 +1976,24 @@ EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks);
 static int pm_genpd_default_save_state(struct device *dev)
 {
 	int (*cb)(struct device *__dev);
-	struct device_driver *drv = dev->driver;
 
 	cb = dev_gpd_data(dev)->ops.save_state;
 	if (cb)
 		return cb(dev);
 
-	if (drv && drv->pm && drv->pm->runtime_suspend)
-		return drv->pm->runtime_suspend(dev);
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_suspend;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_suspend;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_suspend;
+	else
+		cb = NULL;
 
-	return 0;
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_suspend;
+
+	return cb ? cb(dev) : 0;
 }
 
 /**
@@ -1417,16 +2003,24 @@ static int pm_genpd_default_save_state(struct device *dev)
 static int pm_genpd_default_restore_state(struct device *dev)
 {
 	int (*cb)(struct device *__dev);
-	struct device_driver *drv = dev->driver;
 
 	cb = dev_gpd_data(dev)->ops.restore_state;
 	if (cb)
 		return cb(dev);
 
-	if (drv && drv->pm && drv->pm->runtime_resume)
-		return drv->pm->runtime_resume(dev);
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_resume;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_resume;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_resume;
+	else
+		cb = NULL;
 
-	return 0;
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_resume;
+
+	return cb ? cb(dev) : 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1450,7 +2044,7 @@ static int pm_genpd_default_suspend_late(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late;
 
-	return cb ? cb(dev) : pm_generic_suspend_noirq(dev);
+	return cb ? cb(dev) : pm_generic_suspend_late(dev);
 }
 
 /**
@@ -1461,7 +2055,7 @@ static int pm_genpd_default_resume_early(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early;
 
-	return cb ? cb(dev) : pm_generic_resume_noirq(dev);
+	return cb ? cb(dev) : pm_generic_resume_early(dev);
 }
 
 /**
@@ -1494,7 +2088,7 @@ static int pm_genpd_default_freeze_late(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late;
 
-	return cb ? cb(dev) : pm_generic_freeze_noirq(dev);
+	return cb ? cb(dev) : pm_generic_freeze_late(dev);
 }
 
 /**
@@ -1505,7 +2099,7 @@ static int pm_genpd_default_thaw_early(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early;
 
-	return cb ? cb(dev) : pm_generic_thaw_noirq(dev);
+	return cb ? cb(dev) : pm_generic_thaw_early(dev);
 }
 
 /**
@@ -1557,23 +2151,28 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->poweroff_task = NULL;
 	genpd->resume_count = 0;
 	genpd->device_count = 0;
-	genpd->suspended_count = 0;
 	genpd->max_off_time_ns = -1;
+	genpd->max_off_time_changed = true;
 	genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend;
 	genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume;
-	genpd->domain.ops.runtime_idle = pm_generic_runtime_idle;
 	genpd->domain.ops.prepare = pm_genpd_prepare;
 	genpd->domain.ops.suspend = pm_genpd_suspend;
+	genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
 	genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq;
+	genpd->domain.ops.resume_early = pm_genpd_resume_early;
 	genpd->domain.ops.resume = pm_genpd_resume;
 	genpd->domain.ops.freeze = pm_genpd_freeze;
+	genpd->domain.ops.freeze_late = pm_genpd_freeze_late;
 	genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
 	genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
+	genpd->domain.ops.thaw_early = pm_genpd_thaw_early;
 	genpd->domain.ops.thaw = pm_genpd_thaw;
 	genpd->domain.ops.poweroff = pm_genpd_suspend;
+	genpd->domain.ops.poweroff_late = pm_genpd_suspend_late;
 	genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
+	genpd->domain.ops.restore_early = pm_genpd_resume_early;
 	genpd->domain.ops.restore = pm_genpd_resume;
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 66a265bf586..a089e3bcdfb 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -6,7 +6,6 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
@@ -14,6 +13,31 @@
 
 #ifdef CONFIG_PM_RUNTIME
 
+static int dev_update_qos_constraint(struct device *dev, void *data)
+{
+	s64 *constraint_ns_p = data;
+	s32 constraint_ns = -1;
+
+	if (dev->power.subsys_data && dev->power.subsys_data->domain_data)
+		constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns;
+
+	if (constraint_ns < 0) {
+		constraint_ns = dev_pm_qos_read_value(dev);
+		constraint_ns *= NSEC_PER_USEC;
+	}
+	if (constraint_ns == 0)
+		return 0;
+
+	/*
+	 * constraint_ns cannot be negative here, because the device has been
+	 * suspended.
+	 */
+	if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0)
+		*constraint_ns_p = constraint_ns;
+
+	return 0;
+}
+
 /**
  * default_stop_ok - Default PM domain governor routine for stopping devices.
  * @dev: Device to check.
@@ -21,14 +45,52 @@
 bool default_stop_ok(struct device *dev)
 {
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	unsigned long flags;
+	s64 constraint_ns;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
-	if (dev->power.max_time_suspended_ns < 0 || td->break_even_ns == 0)
-		return true;
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	if (!td->constraint_changed) {
+		bool ret = td->cached_stop_ok;
 
-	return td->stop_latency_ns + td->start_latency_ns < td->break_even_ns
-		&& td->break_even_ns < dev->power.max_time_suspended_ns;
+		spin_unlock_irqrestore(&dev->power.lock, flags);
+		return ret;
+	}
+	td->constraint_changed = false;
+	td->cached_stop_ok = false;
+	td->effective_constraint_ns = -1;
+	constraint_ns = __dev_pm_qos_read_value(dev);
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	if (constraint_ns < 0)
+		return false;
+
+	constraint_ns *= NSEC_PER_USEC;
+	/*
+	 * We can walk the children without any additional locking, because
+	 * they all have been suspended at this point and their
+	 * effective_constraint_ns fields won't be modified in parallel with us.
+	 */
+	if (!dev->power.ignore_children)
+		device_for_each_child(dev, &constraint_ns,
+				      dev_update_qos_constraint);
+
+	if (constraint_ns > 0) {
+		constraint_ns -= td->start_latency_ns;
+		if (constraint_ns == 0)
+			return false;
+	}
+	td->effective_constraint_ns = constraint_ns;
+	td->cached_stop_ok = constraint_ns > td->stop_latency_ns ||
+				constraint_ns == 0;
+	/*
+	 * The children have been suspended already, so we don't need to take
+	 * their stop latencies into account here.
+	 */
+	return td->cached_stop_ok;
 }
 
 /**
@@ -42,9 +104,27 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 	struct generic_pm_domain *genpd = pd_to_genpd(pd);
 	struct gpd_link *link;
 	struct pm_domain_data *pdd;
-	s64 min_dev_off_time_ns;
+	s64 min_off_time_ns;
 	s64 off_on_time_ns;
-	ktime_t time_now = ktime_get();
+
+	if (genpd->max_off_time_changed) {
+		struct gpd_link *link;
+
+		/*
+		 * We have to invalidate the cached results for the masters, so
+		 * use the observation that default_power_down_ok() is not
+		 * going to be called for any master until this instance
+		 * returns.
+		 */
+		list_for_each_entry(link, &genpd->slave_links, slave_node)
+			link->master->max_off_time_changed = true;
+
+		genpd->max_off_time_changed = false;
+		genpd->cached_power_down_ok = false;
+		genpd->max_off_time_ns = -1;
+	} else {
+		return genpd->cached_power_down_ok;
+	}
 
 	off_on_time_ns = genpd->power_off_latency_ns +
 				genpd->power_on_latency_ns;
@@ -61,6 +141,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 				to_gpd_data(pdd)->td.save_state_latency_ns;
 	}
 
+	min_off_time_ns = -1;
 	/*
 	 * Check if subdomains can be off for enough time.
 	 *
@@ -73,8 +154,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 		if (sd_max_off_ns < 0)
 			continue;
 
-		sd_max_off_ns -= ktime_to_ns(ktime_sub(time_now,
-						       sd->power_off_time));
 		/*
 		 * Check if the subdomain is allowed to be off long enough for
 		 * the current domain to turn off and on (that's how much time
@@ -82,60 +161,64 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 		 */
 		if (sd_max_off_ns <= off_on_time_ns)
 			return false;
+
+		if (min_off_time_ns > sd_max_off_ns || min_off_time_ns < 0)
+			min_off_time_ns = sd_max_off_ns;
 	}
 
 	/*
 	 * Check if the devices in the domain can be off enough time.
 	 */
-	min_dev_off_time_ns = -1;
 	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
 		struct gpd_timing_data *td;
-		struct device *dev = pdd->dev;
-		s64 dev_off_time_ns;
+		s64 constraint_ns;
 
-		if (!dev->driver || dev->power.max_time_suspended_ns < 0)
+		if (!pdd->dev->driver)
 			continue;
 
+		/*
+		 * Check if the device is allowed to be off long enough for the
+		 * domain to turn off and on (that's how much time it will
+		 * have to wait worst case).
+		 */
 		td = &to_gpd_data(pdd)->td;
-		dev_off_time_ns = dev->power.max_time_suspended_ns -
-			(td->start_latency_ns + td->restore_state_latency_ns +
-				ktime_to_ns(ktime_sub(time_now,
-						dev->power.suspend_time)));
-		if (dev_off_time_ns <= off_on_time_ns)
-			return false;
-
-		if (min_dev_off_time_ns > dev_off_time_ns
-		    || min_dev_off_time_ns < 0)
-			min_dev_off_time_ns = dev_off_time_ns;
-	}
+		constraint_ns = td->effective_constraint_ns;
+		/* default_stop_ok() need not be called before us. */
+		if (constraint_ns < 0) {
+			constraint_ns = dev_pm_qos_read_value(pdd->dev);
+			constraint_ns *= NSEC_PER_USEC;
+		}
+		if (constraint_ns == 0)
+			continue;
 
-	if (min_dev_off_time_ns < 0) {
 		/*
-		 * There are no latency constraints, so the domain can spend
-		 * arbitrary time in the "off" state.
+		 * constraint_ns cannot be negative here, because the device has
+		 * been suspended.
 		 */
-		genpd->max_off_time_ns = -1;
-		return true;
+		constraint_ns -= td->restore_state_latency_ns;
+		if (constraint_ns <= off_on_time_ns)
+			return false;
+
+		if (min_off_time_ns > constraint_ns || min_off_time_ns < 0)
+			min_off_time_ns = constraint_ns;
 	}
 
+	genpd->cached_power_down_ok = true;
+
 	/*
-	 * The difference between the computed minimum delta and the time needed
-	 * to turn the domain on is the maximum theoretical time this domain can
-	 * spend in the "off" state.
+	 * If the computed minimum device off time is negative, there are no
+	 * latency constraints, so the domain can spend arbitrary time in the
+	 * "off" state.
 	 */
-	min_dev_off_time_ns -= genpd->power_on_latency_ns;
+	if (min_off_time_ns < 0)
+		return true;
 
 	/*
-	 * If the difference between the computed minimum delta and the time
-	 * needed to turn the domain off and back on on is smaller than the
-	 * domain's power break even time, removing power from the domain is not
-	 * worth it.
+	 * The difference between the computed minimum subdomain or device off
+	 * time and the time needed to turn the domain on is the maximum
+	 * theoretical time this domain can spend in the "off" state.
 	 */
-	if (genpd->break_even_ns >
-	    min_dev_off_time_ns - genpd->power_off_latency_ns)
-		return false;
-
-	genpd->max_off_time_ns = min_dev_off_time_ns;
+	genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns;
 	return true;
 }
 
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 10bdd793f0b..96a92db83ca 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -10,30 +10,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
 
-#ifdef CONFIG_PM_RUNTIME
-/**
- * pm_generic_runtime_idle - Generic runtime idle callback for subsystems.
- * @dev: Device to handle.
- *
- * If PM operations are defined for the @dev's driver and they include
- * ->runtime_idle(), execute it and return its error code, if nonzero.
- * Otherwise, execute pm_runtime_suspend() for the device and return 0.
- */
-int pm_generic_runtime_idle(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm && pm->runtime_idle) {
-		int ret = pm->runtime_idle(dev);
-		if (ret)
-			return ret;
-	}
-
-	pm_runtime_suspend(dev);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pm_generic_runtime_idle);
-
+#ifdef CONFIG_PM
 /**
  * pm_generic_runtime_suspend - Generic runtime suspend callback for subsystems.
  * @dev: Device to suspend.
@@ -71,7 +48,7 @@ int pm_generic_runtime_resume(struct device *dev)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 /**
@@ -92,59 +69,28 @@ int pm_generic_prepare(struct device *dev)
 }
 
 /**
- * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
- * @dev: Device to handle.
- * @event: PM transition of the system under way.
- * @bool: Whether or not this is the "noirq" stage.
- *
- * Execute the PM callback corresponding to @event provided by the driver of
- * @dev, if defined, and return its error code.    Return 0 if the callback is
- * not present.
+ * pm_generic_suspend_noirq - Generic suspend_noirq callback for subsystems.
+ * @dev: Device to suspend.
  */
-static int __pm_generic_call(struct device *dev, int event, bool noirq)
+int pm_generic_suspend_noirq(struct device *dev)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-	int (*callback)(struct device *);
-
-	if (!pm)
-		return 0;
-
-	switch (event) {
-	case PM_EVENT_SUSPEND:
-		callback = noirq ? pm->suspend_noirq : pm->suspend;
-		break;
-	case PM_EVENT_FREEZE:
-		callback = noirq ? pm->freeze_noirq : pm->freeze;
-		break;
-	case PM_EVENT_HIBERNATE:
-		callback = noirq ? pm->poweroff_noirq : pm->poweroff;
-		break;
-	case PM_EVENT_RESUME:
-		callback = noirq ? pm->resume_noirq : pm->resume;
-		break;
-	case PM_EVENT_THAW:
-		callback = noirq ? pm->thaw_noirq : pm->thaw;
-		break;
-	case PM_EVENT_RESTORE:
-		callback = noirq ? pm->restore_noirq : pm->restore;
-		break;
-	default:
-		callback = NULL;
-		break;
-	}
-
-	return callback ? callback(dev) : 0;
+
+	return pm && pm->suspend_noirq ? pm->suspend_noirq(dev) : 0;
 }
+EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
 
 /**
- * pm_generic_suspend_noirq - Generic suspend_noirq callback for subsystems.
+ * pm_generic_suspend_late - Generic suspend_late callback for subsystems.
  * @dev: Device to suspend.
  */
-int pm_generic_suspend_noirq(struct device *dev)
+int pm_generic_suspend_late(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_SUSPEND, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->suspend_late ? pm->suspend_late(dev) : 0;
 }
-EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
+EXPORT_SYMBOL_GPL(pm_generic_suspend_late);
 
 /**
  * pm_generic_suspend - Generic suspend callback for subsystems.
@@ -152,7 +98,9 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
  */
 int pm_generic_suspend(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_SUSPEND, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->suspend ? pm->suspend(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_suspend);
 
@@ -162,17 +110,33 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend);
  */
 int pm_generic_freeze_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_FREEZE, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze_noirq ? pm->freeze_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq);
 
 /**
+ * pm_generic_freeze_late - Generic freeze_late callback for subsystems.
+ * @dev: Device to freeze.
+ */
+int pm_generic_freeze_late(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze_late ? pm->freeze_late(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_freeze_late);
+
+/**
  * pm_generic_freeze - Generic freeze callback for subsystems.
  * @dev: Device to freeze.
  */
 int pm_generic_freeze(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_FREEZE, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze ? pm->freeze(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_freeze);
 
@@ -182,17 +146,33 @@ EXPORT_SYMBOL_GPL(pm_generic_freeze);
  */
 int pm_generic_poweroff_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_HIBERNATE, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff_noirq ? pm->poweroff_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq);
 
 /**
+ * pm_generic_poweroff_late - Generic poweroff_late callback for subsystems.
+ * @dev: Device to handle.
+ */
+int pm_generic_poweroff_late(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff_late ? pm->poweroff_late(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_poweroff_late);
+
+/**
  * pm_generic_poweroff - Generic poweroff callback for subsystems.
  * @dev: Device to handle.
  */
 int pm_generic_poweroff(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_HIBERNATE, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff ? pm->poweroff(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff);
 
@@ -202,17 +182,33 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff);
  */
 int pm_generic_thaw_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_THAW, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw_noirq ? pm->thaw_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq);
 
 /**
+ * pm_generic_thaw_early - Generic thaw_early callback for subsystems.
+ * @dev: Device to thaw.
+ */
+int pm_generic_thaw_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw_early ? pm->thaw_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_thaw_early);
+
+/**
  * pm_generic_thaw - Generic thaw callback for subsystems.
  * @dev: Device to thaw.
  */
 int pm_generic_thaw(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_THAW, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw ? pm->thaw(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_thaw);
 
@@ -222,17 +218,33 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw);
  */
 int pm_generic_resume_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESUME, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume_noirq ? pm->resume_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
 
 /**
+ * pm_generic_resume_early - Generic resume_early callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_resume_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume_early ? pm->resume_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_resume_early);
+
+/**
  * pm_generic_resume - Generic resume callback for subsystems.
  * @dev: Device to resume.
  */
 int pm_generic_resume(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESUME, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume ? pm->resume(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume);
 
@@ -242,22 +254,38 @@ EXPORT_SYMBOL_GPL(pm_generic_resume);
  */
 int pm_generic_restore_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESTORE, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore_noirq ? pm->restore_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
 
 /**
+ * pm_generic_restore_early - Generic restore_early callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_restore_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore_early ? pm->restore_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_restore_early);
+
+/**
  * pm_generic_restore - Generic restore callback for subsystems.
  * @dev: Device to restore.
  */
 int pm_generic_restore(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESTORE, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore ? pm->restore(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
 
 /**
- * pm_generic_complete - Generic routine competing a device power transition.
+ * pm_generic_complete - Generic routine completing a device power transition.
  * @dev: Device to handle.
  *
  * Complete a device power transition during a system-wide power transition.
@@ -273,6 +301,6 @@ void pm_generic_complete(struct device *dev)
 	 * Let runtime PM try to suspend devices that haven't been in use before
 	 * going into the system-wide sleep state we're resuming from.
 	 */
-	pm_runtime_idle(dev);
+	pm_request_idle(dev);
 }
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e2cc3d2e0ec..bf412961a93 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -28,6 +28,10 @@
 #include <linux/sched.h>
 #include <linux/async.h>
 #include <linux/suspend.h>
+#include <trace/events/power.h>
+#include <linux/cpufreq.h>
+#include <linux/cpuidle.h>
+#include <linux/timer.h>
 
 #include "../base.h"
 #include "power.h"
@@ -45,9 +49,10 @@ typedef int (*pm_callback_t)(struct device *);
  */
 
 LIST_HEAD(dpm_list);
-LIST_HEAD(dpm_prepared_list);
-LIST_HEAD(dpm_suspended_list);
-LIST_HEAD(dpm_noirq_list);
+static LIST_HEAD(dpm_prepared_list);
+static LIST_HEAD(dpm_suspended_list);
+static LIST_HEAD(dpm_late_early_list);
+static LIST_HEAD(dpm_noirq_list);
 
 struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
@@ -55,21 +60,44 @@ static pm_message_t pm_transition;
 
 static int async_error;
 
+static char *pm_verb(int event)
+{
+	switch (event) {
+	case PM_EVENT_SUSPEND:
+		return "suspend";
+	case PM_EVENT_RESUME:
+		return "resume";
+	case PM_EVENT_FREEZE:
+		return "freeze";
+	case PM_EVENT_QUIESCE:
+		return "quiesce";
+	case PM_EVENT_HIBERNATE:
+		return "hibernate";
+	case PM_EVENT_THAW:
+		return "thaw";
+	case PM_EVENT_RESTORE:
+		return "restore";
+	case PM_EVENT_RECOVER:
+		return "recover";
+	default:
+		return "(unknown PM event)";
+	}
+}
+
 /**
- * device_pm_init - Initialize the PM-related part of a device object.
+ * device_pm_sleep_init - Initialize system suspend-related device fields.
  * @dev: Device object being initialized.
  */
-void device_pm_init(struct device *dev)
+void device_pm_sleep_init(struct device *dev)
 {
 	dev->power.is_prepared = false;
 	dev->power.is_suspended = false;
+	dev->power.is_noirq_suspended = false;
+	dev->power.is_late_suspended = false;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
 	dev->power.wakeup = NULL;
-	spin_lock_init(&dev->power.lock);
-	pm_runtime_init(dev);
 	INIT_LIST_HEAD(&dev->power.entry);
-	dev->power.power_state = PMSG_INVALID;
 }
 
 /**
@@ -101,7 +129,6 @@ void device_pm_add(struct device *dev)
 		dev_warn(dev, "parent %s should not be sleeping\n",
 			dev_name(dev->parent));
 	list_add_tail(&dev->power.entry, &dpm_list);
-	dev_pm_qos_constraints_init(dev);
 	mutex_unlock(&dpm_list_mtx);
 }
 
@@ -115,7 +142,6 @@ void device_pm_remove(struct device *dev)
 		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	complete_all(&dev->power.completion);
 	mutex_lock(&dpm_list_mtx);
-	dev_pm_qos_constraints_destroy(dev);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
 	device_wakeup_disable(dev);
@@ -165,7 +191,7 @@ static ktime_t initcall_debug_start(struct device *dev)
 {
 	ktime_t calltime = ktime_set(0, 0);
 
-	if (initcall_debug) {
+	if (pm_print_times_enabled) {
 		pr_info("calling  %s+ @ %i, parent: %s\n",
 			dev_name(dev), task_pid_nr(current),
 			dev->parent ? dev_name(dev->parent) : "none");
@@ -176,15 +202,17 @@ static ktime_t initcall_debug_start(struct device *dev)
 }
 
 static void initcall_debug_report(struct device *dev, ktime_t calltime,
-				  int error)
+				  int error, pm_message_t state, char *info)
 {
-	ktime_t delta, rettime;
+	ktime_t rettime;
+	s64 nsecs;
+
+	rettime = ktime_get();
+	nsecs = (s64) ktime_to_ns(ktime_sub(rettime, calltime));
 
-	if (initcall_debug) {
-		rettime = ktime_get();
-		delta = ktime_sub(rettime, calltime);
+	if (pm_print_times_enabled) {
 		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
-			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+			error, (unsigned long long)nsecs >> 10);
 	}
 }
 
@@ -246,61 +274,71 @@ static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
 }
 
 /**
- * pm_noirq_op - Return the PM operation appropriate for given PM event.
+ * pm_late_early_op - Return the PM operation appropriate for given PM event.
  * @ops: PM operations to choose from.
  * @state: PM transition of the system being carried out.
  *
- * The driver of @dev will not receive interrupts while this function is being
- * executed.
+ * Runtime PM is disabled for @dev while this function is being executed.
  */
-static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t state)
+static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops,
+				      pm_message_t state)
 {
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		return ops->suspend_noirq;
+		return ops->suspend_late;
 	case PM_EVENT_RESUME:
-		return ops->resume_noirq;
+		return ops->resume_early;
 #endif /* CONFIG_SUSPEND */
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
-		return ops->freeze_noirq;
+		return ops->freeze_late;
 	case PM_EVENT_HIBERNATE:
-		return ops->poweroff_noirq;
+		return ops->poweroff_late;
 	case PM_EVENT_THAW:
 	case PM_EVENT_RECOVER:
-		return ops->thaw_noirq;
+		return ops->thaw_early;
 	case PM_EVENT_RESTORE:
-		return ops->restore_noirq;
+		return ops->restore_early;
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 	}
 
 	return NULL;
 }
 
-static char *pm_verb(int event)
+/**
+ * pm_noirq_op - Return the PM operation appropriate for given PM event.
+ * @ops: PM operations to choose from.
+ * @state: PM transition of the system being carried out.
+ *
+ * The driver of @dev will not receive interrupts while this function is being
+ * executed.
+ */
+static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t state)
 {
-	switch (event) {
+	switch (state.event) {
+#ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		return "suspend";
+		return ops->suspend_noirq;
 	case PM_EVENT_RESUME:
-		return "resume";
+		return ops->resume_noirq;
+#endif /* CONFIG_SUSPEND */
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
-		return "freeze";
 	case PM_EVENT_QUIESCE:
-		return "quiesce";
+		return ops->freeze_noirq;
 	case PM_EVENT_HIBERNATE:
-		return "hibernate";
+		return ops->poweroff_noirq;
 	case PM_EVENT_THAW:
-		return "thaw";
-	case PM_EVENT_RESTORE:
-		return "restore";
 	case PM_EVENT_RECOVER:
-		return "recover";
-	default:
-		return "(unknown PM event)";
+		return ops->thaw_noirq;
+	case PM_EVENT_RESTORE:
+		return ops->restore_noirq;
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	}
+
+	return NULL;
 }
 
 static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
@@ -346,14 +384,81 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
 	calltime = initcall_debug_start(dev);
 
 	pm_dev_dbg(dev, state, info);
+	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev);
+	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
-	initcall_debug_report(dev, calltime, error);
+	initcall_debug_report(dev, calltime, error, state, info);
 
 	return error;
 }
 
+#ifdef CONFIG_DPM_WATCHDOG
+struct dpm_watchdog {
+	struct device		*dev;
+	struct task_struct	*tsk;
+	struct timer_list	timer;
+};
+
+#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
+	struct dpm_watchdog wd
+
+/**
+ * dpm_watchdog_handler - Driver suspend / resume watchdog handler.
+ * @data: Watchdog object address.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so panic() to
+ * capture a crash-dump in pstore.
+ */
+static void dpm_watchdog_handler(unsigned long data)
+{
+	struct dpm_watchdog *wd = (void *)data;
+
+	dev_emerg(wd->dev, "**** DPM device timeout ****\n");
+	show_stack(wd->tsk, NULL);
+	panic("%s %s: unrecoverable failure\n",
+		dev_driver_string(wd->dev), dev_name(wd->dev));
+}
+
+/**
+ * dpm_watchdog_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
+{
+	struct timer_list *timer = &wd->timer;
+
+	wd->dev = dev;
+	wd->tsk = current;
+
+	init_timer_on_stack(timer);
+	/* use same timeout value for both suspend and resume */
+	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
+	timer->function = dpm_watchdog_handler;
+	timer->data = (unsigned long)wd;
+	add_timer(timer);
+}
+
+/**
+ * dpm_watchdog_clear - Disable suspend/resume watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_watchdog_clear(struct dpm_watchdog *wd)
+{
+	struct timer_list *timer = &wd->timer;
+
+	del_timer_sync(timer);
+	destroy_timer_on_stack(timer);
+}
+#else
+#define DECLARE_DPM_WATCHDOG_ON_STACK(wd)
+#define dpm_watchdog_set(x, y)
+#define dpm_watchdog_clear(x)
+#endif
+
 /*------------------------- Resume routines -------------------------*/
 
 /**
@@ -364,7 +469,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int device_resume_noirq(struct device *dev, pm_message_t state)
+static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	char *info = NULL;
@@ -373,67 +478,246 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Out;
+
+	if (!dev->power.is_noirq_suspended)
+		goto Out;
+
+	dpm_wait(dev->parent, async);
+
 	if (dev->pm_domain) {
-		info = "EARLY power domain ";
+		info = "noirq power domain ";
 		callback = pm_noirq_op(&dev->pm_domain->ops, state);
 	} else if (dev->type && dev->type->pm) {
-		info = "EARLY type ";
+		info = "noirq type ";
 		callback = pm_noirq_op(dev->type->pm, state);
 	} else if (dev->class && dev->class->pm) {
-		info = "EARLY class ";
+		info = "noirq class ";
 		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		info = "EARLY bus ";
+		info = "noirq bus ";
 		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
 	if (!callback && dev->driver && dev->driver->pm) {
-		info = "EARLY driver ";
+		info = "noirq driver ";
 		callback = pm_noirq_op(dev->driver->pm, state);
 	}
 
 	error = dpm_run_callback(callback, dev, state, info);
+	dev->power.is_noirq_suspended = false;
 
+ Out:
+	complete_all(&dev->power.completion);
 	TRACE_RESUME(error);
 	return error;
 }
 
+static bool is_async(struct device *dev)
+{
+	return dev->power.async_suspend && pm_async_enabled
+		&& !pm_trace_is_enabled();
+}
+
+static void async_resume_noirq(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = device_resume_noirq(dev, pm_transition, true);
+	if (error)
+		pm_dev_err(dev, pm_transition, " async", error);
+
+	put_device(dev);
+}
+
 /**
- * dpm_resume_noirq - Execute "early resume" callbacks for non-sysdev devices.
+ * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  *
- * Call the "noirq" resume handlers for all devices marked as DPM_OFF_IRQ and
+ * Call the "noirq" resume handlers for all devices in dpm_noirq_list and
  * enable device drivers to receive interrupts.
  */
-void dpm_resume_noirq(pm_message_t state)
+static void dpm_resume_noirq(pm_message_t state)
 {
+	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
+
+	/*
+	 * Advanced the async threads upfront,
+	 * in case the starting of async threads is
+	 * delayed by non-async resuming devices.
+	 */
+	list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
+		reinit_completion(&dev->power.completion);
+		if (is_async(dev)) {
+			get_device(dev);
+			async_schedule(async_resume_noirq, dev);
+		}
+	}
+
 	while (!list_empty(&dpm_noirq_list)) {
-		struct device *dev = to_device(dpm_noirq_list.next);
-		int error;
+		dev = to_device(dpm_noirq_list.next);
+		get_device(dev);
+		list_move_tail(&dev->power.entry, &dpm_late_early_list);
+		mutex_unlock(&dpm_list_mtx);
+
+		if (!is_async(dev)) {
+			int error;
+
+			error = device_resume_noirq(dev, state, false);
+			if (error) {
+				suspend_stats.failed_resume_noirq++;
+				dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
+				dpm_save_failed_dev(dev_name(dev));
+				pm_dev_err(dev, state, " noirq", error);
+			}
+		}
+
+		mutex_lock(&dpm_list_mtx);
+		put_device(dev);
+	}
+	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
+	dpm_show_time(starttime, state, "noirq");
+	resume_device_irqs();
+	cpuidle_resume();
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
+}
+
+/**
+ * device_resume_early - Execute an "early resume" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
+ *
+ * Runtime PM is disabled for @dev while this function is being executed.
+ */
+static int device_resume_early(struct device *dev, pm_message_t state, bool async)
+{
+	pm_callback_t callback = NULL;
+	char *info = NULL;
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
+
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Out;
+
+	if (!dev->power.is_late_suspended)
+		goto Out;
+
+	dpm_wait(dev->parent, async);
+
+	if (dev->pm_domain) {
+		info = "early power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "early type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "early class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "early bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
 
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "early driver ";
+		callback = pm_late_early_op(dev->driver->pm, state);
+	}
+
+	error = dpm_run_callback(callback, dev, state, info);
+	dev->power.is_late_suspended = false;
+
+ Out:
+	TRACE_RESUME(error);
+
+	pm_runtime_enable(dev);
+	complete_all(&dev->power.completion);
+	return error;
+}
+
+static void async_resume_early(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = device_resume_early(dev, pm_transition, true);
+	if (error)
+		pm_dev_err(dev, pm_transition, " async", error);
+
+	put_device(dev);
+}
+
+/**
+ * dpm_resume_early - Execute "early resume" callbacks for all devices.
+ * @state: PM transition of the system being carried out.
+ */
+static void dpm_resume_early(pm_message_t state)
+{
+	struct device *dev;
+	ktime_t starttime = ktime_get();
+
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, true);
+	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
+
+	/*
+	 * Advanced the async threads upfront,
+	 * in case the starting of async threads is
+	 * delayed by non-async resuming devices.
+	 */
+	list_for_each_entry(dev, &dpm_late_early_list, power.entry) {
+		reinit_completion(&dev->power.completion);
+		if (is_async(dev)) {
+			get_device(dev);
+			async_schedule(async_resume_early, dev);
+		}
+	}
+
+	while (!list_empty(&dpm_late_early_list)) {
+		dev = to_device(dpm_late_early_list.next);
 		get_device(dev);
 		list_move_tail(&dev->power.entry, &dpm_suspended_list);
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_resume_noirq(dev, state);
-		if (error) {
-			suspend_stats.failed_resume_noirq++;
-			dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
-			dpm_save_failed_dev(dev_name(dev));
-			pm_dev_err(dev, state, " early", error);
-		}
+		if (!is_async(dev)) {
+			int error;
 
+			error = device_resume_early(dev, state, false);
+			if (error) {
+				suspend_stats.failed_resume_early++;
+				dpm_save_failed_step(SUSPEND_RESUME_EARLY);
+				dpm_save_failed_dev(dev_name(dev));
+				pm_dev_err(dev, state, " early", error);
+			}
+		}
 		mutex_lock(&dpm_list_mtx);
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
 	dpm_show_time(starttime, state, "early");
-	resume_device_irqs();
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, false);
 }
-EXPORT_SYMBOL_GPL(dpm_resume_noirq);
+
+/**
+ * dpm_resume_start - Execute "noirq" and "early" device callbacks.
+ * @state: PM transition of the system being carried out.
+ */
+void dpm_resume_start(pm_message_t state)
+{
+	dpm_resume_noirq(state);
+	dpm_resume_early(state);
+}
+EXPORT_SYMBOL_GPL(dpm_resume_start);
 
 /**
  * device_resume - Execute "resume" callbacks for given device.
@@ -446,12 +730,22 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	pm_callback_t callback = NULL;
 	char *info = NULL;
 	int error = 0;
-	bool put = false;
+	DECLARE_DPM_WATCHDOG_ON_STACK(wd);
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
+	if (dev->power.syscore)
+		goto Complete;
+
+	if (dev->power.direct_complete) {
+		/* Match the pm_runtime_disable() in __device_suspend(). */
+		pm_runtime_enable(dev);
+		goto Complete;
+	}
+
 	dpm_wait(dev->parent, async);
+	dpm_watchdog_set(&wd, dev);
 	device_lock(dev);
 
 	/*
@@ -463,9 +757,6 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	if (!dev->power.is_suspended)
 		goto Unlock;
 
-	pm_runtime_enable(dev);
-	put = true;
-
 	if (dev->pm_domain) {
 		info = "power domain ";
 		callback = pm_op(&dev->pm_domain->ops, state);
@@ -513,13 +804,13 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 
  Unlock:
 	device_unlock(dev);
+	dpm_watchdog_clear(&wd);
+
+ Complete:
 	complete_all(&dev->power.completion);
 
 	TRACE_RESUME(error);
 
-	if (put)
-		pm_runtime_put_sync(dev);
-
 	return error;
 }
 
@@ -534,12 +825,6 @@ static void async_resume(void *data, async_cookie_t cookie)
 	put_device(dev);
 }
 
-static bool is_async(struct device *dev)
-{
-	return dev->power.async_suspend && pm_async_enabled
-		&& !pm_trace_is_enabled();
-}
-
 /**
  * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -552,6 +837,7 @@ void dpm_resume(pm_message_t state)
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume"), state.event, true);
 	might_sleep();
 
 	mutex_lock(&dpm_list_mtx);
@@ -559,7 +845,7 @@ void dpm_resume(pm_message_t state)
 	async_error = 0;
 
 	list_for_each_entry(dev, &dpm_suspended_list, power.entry) {
-		INIT_COMPLETION(dev->power.completion);
+		reinit_completion(&dev->power.completion);
 		if (is_async(dev)) {
 			get_device(dev);
 			async_schedule(async_resume, dev);
@@ -591,6 +877,9 @@ void dpm_resume(pm_message_t state)
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, NULL);
+
+	cpufreq_resume();
+	trace_suspend_resume(TPS("dpm_resume"), state.event, false);
 }
 
 /**
@@ -603,6 +892,9 @@ static void device_complete(struct device *dev, pm_message_t state)
 	void (*callback)(struct device *) = NULL;
 	char *info = NULL;
 
+	if (dev->power.syscore)
+		return;
+
 	device_lock(dev);
 
 	if (dev->pm_domain) {
@@ -626,10 +918,14 @@ static void device_complete(struct device *dev, pm_message_t state)
 
 	if (callback) {
 		pm_dev_dbg(dev, state, info);
+		trace_device_pm_callback_start(dev, info, state.event);
 		callback(dev);
+		trace_device_pm_callback_end(dev, 0);
 	}
 
 	device_unlock(dev);
+
+	pm_runtime_put(dev);
 }
 
 /**
@@ -643,6 +939,7 @@ void dpm_complete(pm_message_t state)
 {
 	struct list_head list;
 
+	trace_suspend_resume(TPS("dpm_complete"), state.event, true);
 	might_sleep();
 
 	INIT_LIST_HEAD(&list);
@@ -662,6 +959,7 @@ void dpm_complete(pm_message_t state)
 	}
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_complete"), state.event, false);
 }
 
 /**
@@ -710,60 +1008,111 @@ static pm_message_t resume_event(pm_message_t sleep_state)
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int device_suspend_noirq(struct device *dev, pm_message_t state)
+static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	char *info = NULL;
+	int error = 0;
+
+	if (async_error)
+		goto Complete;
+
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
+	}
+
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Complete;
+
+	dpm_wait_for_children(dev, async);
 
 	if (dev->pm_domain) {
-		info = "LATE power domain ";
+		info = "noirq power domain ";
 		callback = pm_noirq_op(&dev->pm_domain->ops, state);
 	} else if (dev->type && dev->type->pm) {
-		info = "LATE type ";
+		info = "noirq type ";
 		callback = pm_noirq_op(dev->type->pm, state);
 	} else if (dev->class && dev->class->pm) {
-		info = "LATE class ";
+		info = "noirq class ";
 		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		info = "LATE bus ";
+		info = "noirq bus ";
 		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
 	if (!callback && dev->driver && dev->driver->pm) {
-		info = "LATE driver ";
+		info = "noirq driver ";
 		callback = pm_noirq_op(dev->driver->pm, state);
 	}
 
-	return dpm_run_callback(callback, dev, state, info);
+	error = dpm_run_callback(callback, dev, state, info);
+	if (!error)
+		dev->power.is_noirq_suspended = true;
+	else
+		async_error = error;
+
+Complete:
+	complete_all(&dev->power.completion);
+	return error;
+}
+
+static void async_suspend_noirq(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_suspend_noirq(dev, pm_transition, true);
+	if (error) {
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, pm_transition, " async", error);
+	}
+
+	put_device(dev);
+}
+
+static int device_suspend_noirq(struct device *dev)
+{
+	reinit_completion(&dev->power.completion);
+
+	if (pm_async_enabled && dev->power.async_suspend) {
+		get_device(dev);
+		async_schedule(async_suspend_noirq, dev);
+		return 0;
+	}
+	return __device_suspend_noirq(dev, pm_transition, false);
 }
 
 /**
- * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
+ * dpm_suspend_noirq - Execute "noirq suspend" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  *
  * Prevent device drivers from receiving interrupts and call the "noirq" suspend
  * handlers for all non-sysdev devices.
  */
-int dpm_suspend_noirq(pm_message_t state)
+static int dpm_suspend_noirq(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);
+	cpuidle_pause();
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_suspended_list)) {
-		struct device *dev = to_device(dpm_suspended_list.prev);
+	pm_transition = state;
+	async_error = 0;
+
+	while (!list_empty(&dpm_late_early_list)) {
+		struct device *dev = to_device(dpm_late_early_list.prev);
 
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_suspend_noirq(dev, state);
+		error = device_suspend_noirq(dev);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
-			pm_dev_err(dev, state, " late", error);
-			suspend_stats.failed_suspend_noirq++;
-			dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ);
+			pm_dev_err(dev, state, " noirq", error);
 			dpm_save_failed_dev(dev_name(dev));
 			put_device(dev);
 			break;
@@ -771,15 +1120,178 @@ int dpm_suspend_noirq(pm_message_t state)
 		if (!list_empty(&dev->power.entry))
 			list_move(&dev->power.entry, &dpm_noirq_list);
 		put_device(dev);
+
+		if (async_error)
+			break;
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
+	async_synchronize_full();
+	if (!error)
+		error = async_error;
+
+	if (error) {
+		suspend_stats.failed_suspend_noirq++;
+		dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ);
 		dpm_resume_noirq(resume_event(state));
+	} else {
+		dpm_show_time(starttime, state, "noirq");
+	}
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false);
+	return error;
+}
+
+/**
+ * device_suspend_late - Execute a "late suspend" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
+ *
+ * Runtime PM is disabled for @dev while this function is being executed.
+ */
+static int __device_suspend_late(struct device *dev, pm_message_t state, bool async)
+{
+	pm_callback_t callback = NULL;
+	char *info = NULL;
+	int error = 0;
+
+	__pm_runtime_disable(dev, false);
+
+	if (async_error)
+		goto Complete;
+
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
+	}
+
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Complete;
+
+	dpm_wait_for_children(dev, async);
+
+	if (dev->pm_domain) {
+		info = "late power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "late type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "late class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "late bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
+
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "late driver ";
+		callback = pm_late_early_op(dev->driver->pm, state);
+	}
+
+	error = dpm_run_callback(callback, dev, state, info);
+	if (!error)
+		dev->power.is_late_suspended = true;
 	else
+		async_error = error;
+
+Complete:
+	complete_all(&dev->power.completion);
+	return error;
+}
+
+static void async_suspend_late(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_suspend_late(dev, pm_transition, true);
+	if (error) {
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, pm_transition, " async", error);
+	}
+	put_device(dev);
+}
+
+static int device_suspend_late(struct device *dev)
+{
+	reinit_completion(&dev->power.completion);
+
+	if (pm_async_enabled && dev->power.async_suspend) {
+		get_device(dev);
+		async_schedule(async_suspend_late, dev);
+		return 0;
+	}
+
+	return __device_suspend_late(dev, pm_transition, false);
+}
+
+/**
+ * dpm_suspend_late - Execute "late suspend" callbacks for all devices.
+ * @state: PM transition of the system being carried out.
+ */
+static int dpm_suspend_late(pm_message_t state)
+{
+	ktime_t starttime = ktime_get();
+	int error = 0;
+
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);
+	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
+	async_error = 0;
+
+	while (!list_empty(&dpm_suspended_list)) {
+		struct device *dev = to_device(dpm_suspended_list.prev);
+
+		get_device(dev);
+		mutex_unlock(&dpm_list_mtx);
+
+		error = device_suspend_late(dev);
+
+		mutex_lock(&dpm_list_mtx);
+		if (error) {
+			pm_dev_err(dev, state, " late", error);
+			dpm_save_failed_dev(dev_name(dev));
+			put_device(dev);
+			break;
+		}
+		if (!list_empty(&dev->power.entry))
+			list_move(&dev->power.entry, &dpm_late_early_list);
+		put_device(dev);
+
+		if (async_error)
+			break;
+	}
+	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
+	if (error) {
+		suspend_stats.failed_suspend_late++;
+		dpm_save_failed_step(SUSPEND_SUSPEND_LATE);
+		dpm_resume_early(resume_event(state));
+	} else {
 		dpm_show_time(starttime, state, "late");
+	}
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, false);
 	return error;
 }
-EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
+
+/**
+ * dpm_suspend_end - Execute "late" and "noirq" device suspend callbacks.
+ * @state: PM transition of the system being carried out.
+ */
+int dpm_suspend_end(pm_message_t state)
+{
+	int error = dpm_suspend_late(state);
+	if (error)
+		return error;
+
+	error = dpm_suspend_noirq(state);
+	if (error) {
+		dpm_resume_early(resume_event(state));
+		return error;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpm_suspend_end);
 
 /**
  * legacy_suspend - Execute a legacy (bus or class) suspend callback for device.
@@ -788,17 +1300,20 @@ EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
  * @cb: Suspend callback to execute.
  */
 static int legacy_suspend(struct device *dev, pm_message_t state,
-			  int (*cb)(struct device *dev, pm_message_t state))
+			  int (*cb)(struct device *dev, pm_message_t state),
+			  char *info)
 {
 	int error;
 	ktime_t calltime;
 
 	calltime = initcall_debug_start(dev);
 
+	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
+	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
-	initcall_debug_report(dev, calltime, error);
+	initcall_debug_report(dev, calltime, error, state, info);
 
 	return error;
 }
@@ -814,22 +1329,42 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	pm_callback_t callback = NULL;
 	char *info = NULL;
 	int error = 0;
+	DECLARE_DPM_WATCHDOG_ON_STACK(wd);
 
 	dpm_wait_for_children(dev, async);
 
 	if (async_error)
-		return 0;
+		goto Complete;
 
-	pm_runtime_get_noresume(dev);
+	/*
+	 * If a device configured to wake up the system from sleep states
+	 * has been suspended at run time and there's a resume request pending
+	 * for it, this is equivalent to the device signaling wakeup, so the
+	 * system suspend operation should be aborted.
+	 */
 	if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
 		pm_wakeup_event(dev, 0);
 
 	if (pm_wakeup_pending()) {
-		pm_runtime_put_sync(dev);
 		async_error = -EBUSY;
-		return 0;
+		goto Complete;
+	}
+
+	if (dev->power.syscore)
+		goto Complete;
+
+	if (dev->power.direct_complete) {
+		if (pm_runtime_status_suspended(dev)) {
+			pm_runtime_disable(dev);
+			if (pm_runtime_suspended_if_enabled(dev))
+				goto Complete;
+
+			pm_runtime_enable(dev);
+		}
+		dev->power.direct_complete = false;
 	}
 
+	dpm_watchdog_set(&wd, dev);
 	device_lock(dev);
 
 	if (dev->pm_domain) {
@@ -851,7 +1386,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 			goto Run;
 		} else if (dev->class->suspend) {
 			pm_dev_dbg(dev, state, "legacy class ");
-			error = legacy_suspend(dev, state, dev->class->suspend);
+			error = legacy_suspend(dev, state, dev->class->suspend,
+						"legacy class ");
 			goto End;
 		}
 	}
@@ -862,7 +1398,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 			callback = pm_op(dev->bus->pm, state);
 		} else if (dev->bus->suspend) {
 			pm_dev_dbg(dev, state, "legacy bus ");
-			error = legacy_suspend(dev, state, dev->bus->suspend);
+			error = legacy_suspend(dev, state, dev->bus->suspend,
+						"legacy bus ");
 			goto End;
 		}
 	}
@@ -877,21 +1414,28 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
  End:
 	if (!error) {
+		struct device *parent = dev->parent;
+
 		dev->power.is_suspended = true;
-		if (dev->power.wakeup_path
-		    && dev->parent && !dev->parent->power.ignore_children)
-			dev->parent->power.wakeup_path = true;
+		if (parent) {
+			spin_lock_irq(&parent->power.lock);
+
+			dev->parent->power.direct_complete = false;
+			if (dev->power.wakeup_path
+			    && !dev->parent->power.ignore_children)
+				dev->parent->power.wakeup_path = true;
+
+			spin_unlock_irq(&parent->power.lock);
+		}
 	}
 
 	device_unlock(dev);
-	complete_all(&dev->power.completion);
+	dpm_watchdog_clear(&wd);
 
-	if (error) {
-		pm_runtime_put_sync(dev);
+ Complete:
+	complete_all(&dev->power.completion);
+	if (error)
 		async_error = error;
-	} else if (dev->power.is_suspended) {
-		__pm_runtime_disable(dev, false);
-	}
 
 	return error;
 }
@@ -912,7 +1456,7 @@ static void async_suspend(void *data, async_cookie_t cookie)
 
 static int device_suspend(struct device *dev)
 {
-	INIT_COMPLETION(dev->power.completion);
+	reinit_completion(&dev->power.completion);
 
 	if (pm_async_enabled && dev->power.async_suspend) {
 		get_device(dev);
@@ -932,8 +1476,11 @@ int dpm_suspend(pm_message_t state)
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, true);
 	might_sleep();
 
+	cpufreq_suspend();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -967,6 +1514,7 @@ int dpm_suspend(pm_message_t state)
 		dpm_save_failed_step(SUSPEND_SUSPEND);
 	} else
 		dpm_show_time(starttime, state, NULL);
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, false);
 	return error;
 }
 
@@ -982,7 +1530,18 @@ static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int (*callback)(struct device *) = NULL;
 	char *info = NULL;
-	int error = 0;
+	int ret = 0;
+
+	if (dev->power.syscore)
+		return 0;
+
+	/*
+	 * If a device's parent goes into runtime suspend at the wrong time,
+	 * it won't be possible to resume the device.  To prevent this we
+	 * block runtime suspend here, during the prepare phase, and allow
+	 * it again during the complete phase.
+	 */
+	pm_runtime_get_noresume(dev);
 
 	device_lock(dev);
 
@@ -1008,13 +1567,29 @@ static int device_prepare(struct device *dev, pm_message_t state)
 	}
 
 	if (callback) {
-		error = callback(dev);
-		suspend_report_result(callback, error);
+		trace_device_pm_callback_start(dev, info, state.event);
+		ret = callback(dev);
+		trace_device_pm_callback_end(dev, ret);
 	}
 
 	device_unlock(dev);
 
-	return error;
+	if (ret < 0) {
+		suspend_report_result(callback, ret);
+		pm_runtime_put(dev);
+		return ret;
+	}
+	/*
+	 * A positive return value from ->prepare() means "this device appears
+	 * to be runtime-suspended and its state is fine, so if it really is
+	 * runtime-suspended, you can leave it in that state provided that you
+	 * will do the same thing with all of its descendants".  This only
+	 * applies to suspend transitions, however.
+	 */
+	spin_lock_irq(&dev->power.lock);
+	dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND;
+	spin_unlock_irq(&dev->power.lock);
+	return 0;
 }
 
 /**
@@ -1027,6 +1602,7 @@ int dpm_prepare(pm_message_t state)
 {
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, true);
 	might_sleep();
 
 	mutex_lock(&dpm_list_mtx);
@@ -1057,6 +1633,7 @@ int dpm_prepare(pm_message_t state)
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
 	return error;
 }
 
@@ -1099,3 +1676,25 @@ int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
 	return async_error;
 }
 EXPORT_SYMBOL_GPL(device_pm_wait_for_dev);
+
+/**
+ * dpm_for_each_dev - device iterator.
+ * @data: data for the callback.
+ * @fn: function to be called for each device.
+ *
+ * Iterate over devices in dpm_list, and call @fn for each device,
+ * passing it @data.
+ */
+void dpm_for_each_dev(void *data, void (*fn)(struct device *, void *))
+{
+	struct device *dev;
+
+	if (!fn)
+		return;
+
+	device_pm_lock();
+	list_for_each_entry(dev, &dpm_list, power.entry)
+		fn(dev, data);
+	device_pm_unlock();
+}
+EXPORT_SYMBOL_GPL(dpm_for_each_dev);
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 95706fa24c7..89ced955faf 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -14,13 +14,14 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/cpufreq.h>
+#include <linux/device.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
+#include <linux/of.h>
+#include <linux/export.h>
 
 /*
  * Internal data structure organization with the OPP layer library is as
@@ -39,7 +40,7 @@
  */
 
 /**
- * struct opp - Generic OPP description structure
+ * struct dev_pm_opp - Generic OPP description structure
  * @node:	opp list node. The nodes are maintained throughout the lifetime
  *		of boot. It is expected only an optimal set of OPPs are
  *		added to the library by the SoC framework.
@@ -52,10 +53,11 @@
  * @rate:	Frequency in hertz
  * @u_volt:	Nominal voltage in microvolts corresponding to this OPP
  * @dev_opp:	points back to the device_opp struct this opp belongs to
+ * @head:	RCU callback head used for deferred freeing
  *
  * This structure stores the OPP information for a given device.
  */
-struct opp {
+struct dev_pm_opp {
 	struct list_head node;
 
 	bool available;
@@ -63,6 +65,7 @@ struct opp {
 	unsigned long u_volt;
 
 	struct device_opp *dev_opp;
+	struct rcu_head head;
 };
 
 /**
@@ -131,7 +134,7 @@ static struct device_opp *find_device_opp(struct device *dev)
 }
 
 /**
- * opp_get_voltage() - Gets the voltage corresponding to an available opp
+ * dev_pm_opp_get_voltage() - Gets the voltage corresponding to an available opp
  * @opp:	opp for which voltage has to be returned for
  *
  * Return voltage in micro volt corresponding to the opp, else
@@ -145,9 +148,9 @@ static struct device_opp *find_device_opp(struct device *dev)
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long opp_get_voltage(struct opp *opp)
+unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
-	struct opp *tmp_opp;
+	struct dev_pm_opp *tmp_opp;
 	unsigned long v = 0;
 
 	tmp_opp = rcu_dereference(opp);
@@ -158,9 +161,10 @@ unsigned long opp_get_voltage(struct opp *opp)
 
 	return v;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage);
 
 /**
- * opp_get_freq() - Gets the frequency corresponding to an available opp
+ * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp
  * @opp:	opp for which frequency has to be returned for
  *
  * Return frequency in hertz corresponding to the opp, else
@@ -174,9 +178,9 @@ unsigned long opp_get_voltage(struct opp *opp)
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long opp_get_freq(struct opp *opp)
+unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 {
-	struct opp *tmp_opp;
+	struct dev_pm_opp *tmp_opp;
 	unsigned long f = 0;
 
 	tmp_opp = rcu_dereference(opp);
@@ -187,9 +191,10 @@ unsigned long opp_get_freq(struct opp *opp)
 
 	return f;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
 
 /**
- * opp_get_opp_count() - Get number of opps available in the opp list
+ * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
  * @dev:	device for which we do this operation
  *
  * This function returns the number of available opps if there are any,
@@ -199,10 +204,10 @@ unsigned long opp_get_freq(struct opp *opp)
  * internally references two RCU protected structures: device_opp and opp which
  * are safe as long as we are under a common RCU locked section.
  */
-int opp_get_opp_count(struct device *dev)
+int dev_pm_opp_get_opp_count(struct device *dev)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp;
+	struct dev_pm_opp *temp_opp;
 	int count = 0;
 
 	dev_opp = find_device_opp(dev);
@@ -219,16 +224,20 @@ int opp_get_opp_count(struct device *dev)
 
 	return count;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
 
 /**
- * opp_find_freq_exact() - search for an exact frequency
+ * dev_pm_opp_find_freq_exact() - search for an exact frequency
  * @dev:		device for which we do this operation
  * @freq:		frequency to search for
  * @available:		true/false - match for available opp
  *
  * Searches for exact match in the opp list and returns pointer to the matching
  * opp if found, else returns ERR_PTR in case of error and should be handled
- * using IS_ERR.
+ * using IS_ERR. Error return values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
  *
  * Note: available is a modifier for the search. if available=true, then the
  * match is for exact matching frequency and is available in the stored OPP
@@ -243,11 +252,12 @@ int opp_get_opp_count(struct device *dev)
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
-				bool available)
+struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+					      unsigned long freq,
+					      bool available)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
@@ -266,9 +276,10 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
 
 	return opp;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
 
 /**
- * opp_find_freq_ceil() - Search for an rounded ceil freq
+ * dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq
  * @dev:	device for which we do this operation
  * @freq:	Start frequency
  *
@@ -276,7 +287,11 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
  * for a device.
  *
  * Returns matching *opp and refreshes *freq accordingly, else returns
- * ERR_PTR in case of error and should be handled using IS_ERR.
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
  *
  * Locking: This function must be called under rcu_read_lock(). opp is a rcu
  * protected pointer. The reason for the same is that the opp pointer which is
@@ -284,10 +299,11 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+					     unsigned long *freq)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
@@ -296,7 +312,7 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
 
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp))
-		return opp;
+		return ERR_CAST(dev_opp);
 
 	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
 		if (temp_opp->available && temp_opp->rate >= *freq) {
@@ -308,9 +324,10 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
 
 	return opp;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
 
 /**
- * opp_find_freq_floor() - Search for a rounded floor freq
+ * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq
  * @dev:	device for which we do this operation
  * @freq:	Start frequency
  *
@@ -318,7 +335,11 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
  * for a device.
  *
  * Returns matching *opp and refreshes *freq accordingly, else returns
- * ERR_PTR in case of error and should be handled using IS_ERR.
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
  *
  * Locking: This function must be called under rcu_read_lock(). opp is a rcu
  * protected pointer. The reason for the same is that the opp pointer which is
@@ -326,10 +347,11 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
+struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+					      unsigned long *freq)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
@@ -338,7 +360,7 @@ struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
 
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp))
-		return opp;
+		return ERR_CAST(dev_opp);
 
 	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
 		if (temp_opp->available) {
@@ -354,31 +376,39 @@ struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
 
 	return opp;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
 /**
- * opp_add()  - Add an OPP table from a table definitions
+ * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
  * @freq:	Frequency in Hz for this OPP
  * @u_volt:	Voltage in uVolts for this OPP
  *
  * This function adds an opp definition to the opp list and returns status.
  * The opp is made available by default and it can be controlled using
- * opp_enable/disable functions.
+ * dev_pm_opp_enable/disable functions.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
+ *
+ * Return:
+ * 0:		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST:	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM:	Memory allocation failure
  */
-int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 {
 	struct device_opp *dev_opp = NULL;
-	struct opp *opp, *new_opp;
+	struct dev_pm_opp *opp, *new_opp;
 	struct list_head *head;
 
 	/* allocate new OPP node */
-	new_opp = kzalloc(sizeof(struct opp), GFP_KERNEL);
+	new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
 	if (!new_opp) {
 		dev_warn(dev, "%s: Unable to create new OPP node\n", __func__);
 		return -ENOMEM;
@@ -419,15 +449,31 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 	new_opp->u_volt = u_volt;
 	new_opp->available = true;
 
-	/* Insert new OPP in order of increasing frequency */
+	/*
+	 * Insert new OPP in order of increasing frequency
+	 * and discard if already present
+	 */
 	head = &dev_opp->opp_list;
 	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
-		if (new_opp->rate < opp->rate)
+		if (new_opp->rate <= opp->rate)
 			break;
 		else
 			head = &opp->node;
 	}
 
+	/* Duplicate OPPs ? */
+	if (new_opp->rate == opp->rate) {
+		int ret = opp->available && new_opp->u_volt == opp->u_volt ?
+			0 : -EEXIST;
+
+		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
+			 __func__, opp->rate, opp->u_volt, opp->available,
+			 new_opp->rate, new_opp->u_volt, new_opp->available);
+		mutex_unlock(&dev_opp_list_lock);
+		kfree(new_opp);
+		return ret;
+	}
+
 	list_add_rcu(&new_opp->node, head);
 	mutex_unlock(&dev_opp_list_lock);
 
@@ -438,6 +484,7 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 	srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_add);
 
 /**
  * opp_set_availability() - helper to set the availability of an opp
@@ -462,11 +509,11 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
 		bool availability_req)
 {
 	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
-	struct opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
 	int r = 0;
 
 	/* keep the node allocated */
-	new_opp = kmalloc(sizeof(struct opp), GFP_KERNEL);
+	new_opp = kmalloc(sizeof(*new_opp), GFP_KERNEL);
 	if (!new_opp) {
 		dev_warn(dev, "%s: Unable to create OPP\n", __func__);
 		return -ENOMEM;
@@ -510,7 +557,7 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
 
 	list_replace_rcu(&opp->node, &new_opp->node);
 	mutex_unlock(&dev_opp_list_lock);
-	synchronize_rcu();
+	kfree_rcu(opp, head);
 
 	/* Notify the change of the OPP availability */
 	if (availability_req)
@@ -520,25 +567,22 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
 		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE,
 					 new_opp);
 
-	/* clean up old opp */
-	new_opp = opp;
-	goto out;
+	return 0;
 
 unlock:
 	mutex_unlock(&dev_opp_list_lock);
-out:
 	kfree(new_opp);
 	return r;
 }
 
 /**
- * opp_enable() - Enable a specific OPP
+ * dev_pm_opp_enable() - Enable a specific OPP
  * @dev:	device for which we do this operation
  * @freq:	OPP frequency to enable
  *
  * Enables a provided opp. If the operation is valid, this returns 0, else the
  * corresponding error value. It is meant to be used for users an OPP available
- * after being temporarily made unavailable with opp_disable.
+ * after being temporarily made unavailable with dev_pm_opp_disable.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
@@ -546,20 +590,21 @@ out:
  * this function is *NOT* called under RCU protection or in contexts where
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  */
-int opp_enable(struct device *dev, unsigned long freq)
+int dev_pm_opp_enable(struct device *dev, unsigned long freq)
 {
 	return opp_set_availability(dev, freq, true);
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
 
 /**
- * opp_disable() - Disable a specific OPP
+ * dev_pm_opp_disable() - Disable a specific OPP
  * @dev:	device for which we do this operation
  * @freq:	OPP frequency to disable
  *
  * Disables a provided opp. If the operation is valid, this returns
  * 0, else the corresponding error value. It is meant to be a temporary
  * control by users to make this OPP not available until the circumstances are
- * right to make it available again (with a call to opp_enable).
+ * right to make it available again (with a call to dev_pm_opp_enable).
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
@@ -567,109 +612,67 @@ int opp_enable(struct device *dev, unsigned long freq)
  * this function is *NOT* called under RCU protection or in contexts where
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  */
-int opp_disable(struct device *dev, unsigned long freq)
+int dev_pm_opp_disable(struct device *dev, unsigned long freq)
 {
 	return opp_set_availability(dev, freq, false);
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
 
-#ifdef CONFIG_CPU_FREQ
 /**
- * opp_init_cpufreq_table() - create a cpufreq table for a device
- * @dev:	device for which we do this operation
- * @table:	Cpufreq table returned back to caller
- *
- * Generate a cpufreq table for a provided device- this assumes that the
- * opp list is already initialized and ready for usage.
- *
- * This function allocates required memory for the cpufreq table. It is
- * expected that the caller does the required maintenance such as freeing
- * the table as required.
- *
- * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
- * if no memory available for the operation (table is not populated), returns 0
- * if successful and table is populated.
- *
- * WARNING: It is  important for the callers to ensure refreshing their copy of
- * the table if any of the mentioned functions have been invoked in the interim.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * To simplify the logic, we pretend we are updater and hold relevant mutex here
- * Callers should ensure that this function is *NOT* called under RCU protection
- * or in contexts where mutex locking cannot be used.
+ * dev_pm_opp_get_notifier() - find notifier_head of the device with opp
+ * @dev:	device pointer used to lookup device OPPs.
  */
-int opp_init_cpufreq_table(struct device *dev,
-			    struct cpufreq_frequency_table **table)
+struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
 {
-	struct device_opp *dev_opp;
-	struct opp *opp;
-	struct cpufreq_frequency_table *freq_table;
-	int i = 0;
-
-	/* Pretend as if I am an updater */
-	mutex_lock(&dev_opp_list_lock);
-
-	dev_opp = find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		int r = PTR_ERR(dev_opp);
-		mutex_unlock(&dev_opp_list_lock);
-		dev_err(dev, "%s: Device OPP not found (%d)\n", __func__, r);
-		return r;
-	}
-
-	freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
-			     (opp_get_opp_count(dev) + 1), GFP_KERNEL);
-	if (!freq_table) {
-		mutex_unlock(&dev_opp_list_lock);
-		dev_warn(dev, "%s: Unable to allocate frequency table\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	list_for_each_entry(opp, &dev_opp->opp_list, node) {
-		if (opp->available) {
-			freq_table[i].index = i;
-			freq_table[i].frequency = opp->rate / 1000;
-			i++;
-		}
-	}
-	mutex_unlock(&dev_opp_list_lock);
-
-	freq_table[i].index = i;
-	freq_table[i].frequency = CPUFREQ_TABLE_END;
+	struct device_opp *dev_opp = find_device_opp(dev);
 
-	*table = &freq_table[0];
+	if (IS_ERR(dev_opp))
+		return ERR_CAST(dev_opp); /* matching type */
 
-	return 0;
+	return &dev_opp->head;
 }
 
+#ifdef CONFIG_OF
 /**
- * opp_free_cpufreq_table() - free the cpufreq table
- * @dev:	device for which we do this operation
- * @table:	table to free
+ * of_init_opp_table() - Initialize opp table from device tree
+ * @dev:	device pointer used to lookup device OPPs.
  *
- * Free up the table allocated by opp_init_cpufreq_table
+ * Register the initial OPP table with the OPP library for given device.
  */
-void opp_free_cpufreq_table(struct device *dev,
-				struct cpufreq_frequency_table **table)
+int of_init_opp_table(struct device *dev)
 {
-	if (!table)
-		return;
+	const struct property *prop;
+	const __be32 *val;
+	int nr;
 
-	kfree(*table);
-	*table = NULL;
-}
-#endif		/* CONFIG_CPU_FREQ */
+	prop = of_find_property(dev->of_node, "operating-points", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (!prop->value)
+		return -ENODATA;
 
-/**
- * opp_get_notifier() - find notifier_head of the device with opp
- * @dev:	device pointer used to lookup device OPPs.
- */
-struct srcu_notifier_head *opp_get_notifier(struct device *dev)
-{
-	struct device_opp *dev_opp = find_device_opp(dev);
+	/*
+	 * Each OPP is a set of tuples consisting of frequency and
+	 * voltage like <freq-kHz vol-uV>.
+	 */
+	nr = prop->length / sizeof(u32);
+	if (nr % 2) {
+		dev_err(dev, "%s: Invalid OPP list\n", __func__);
+		return -EINVAL;
+	}
 
-	if (IS_ERR(dev_opp))
-		return ERR_CAST(dev_opp); /* matching type */
+	val = prop->value;
+	while (nr) {
+		unsigned long freq = be32_to_cpup(val++) * 1000;
+		unsigned long volt = be32_to_cpup(val++);
 
-	return &dev_opp->head;
+		if (dev_pm_opp_add(dev, freq, volt))
+			dev_warn(dev, "%s: Failed to add OPP %ld\n",
+				 __func__, freq);
+		nr -= 2;
+	}
+
+	return 0;
 }
+EXPORT_SYMBOL_GPL(of_init_opp_table);
+#endif
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 9bf62323aaf..a21223d9592 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -1,12 +1,32 @@
 #include <linux/pm_qos.h>
 
+static inline void device_pm_init_common(struct device *dev)
+{
+	if (!dev->power.early_init) {
+		spin_lock_init(&dev->power.lock);
+		dev->power.qos = NULL;
+		dev->power.early_init = true;
+	}
+}
+
 #ifdef CONFIG_PM_RUNTIME
 
+static inline void pm_runtime_early_init(struct device *dev)
+{
+	dev->power.disable_depth = 1;
+	device_pm_init_common(dev);
+}
+
 extern void pm_runtime_init(struct device *dev);
 extern void pm_runtime_remove(struct device *dev);
 
 #else /* !CONFIG_PM_RUNTIME */
 
+static inline void pm_runtime_early_init(struct device *dev)
+{
+	device_pm_init_common(dev);
+}
+
 static inline void pm_runtime_init(struct device *dev) {}
 static inline void pm_runtime_remove(struct device *dev) {}
 
@@ -25,7 +45,7 @@ static inline struct device *to_device(struct list_head *entry)
 	return container_of(entry, struct device, power.entry);
 }
 
-extern void device_pm_init(struct device *dev);
+extern void device_pm_sleep_init(struct device *dev);
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
 extern void device_pm_move_before(struct device *, struct device *);
@@ -34,21 +54,12 @@ extern void device_pm_move_last(struct device *);
 
 #else /* !CONFIG_PM_SLEEP */
 
-static inline void device_pm_init(struct device *dev)
-{
-	spin_lock_init(&dev->power.lock);
-	dev->power.power_state = PMSG_INVALID;
-	pm_runtime_init(dev);
-}
+static inline void device_pm_sleep_init(struct device *dev) {}
 
-static inline void device_pm_add(struct device *dev)
-{
-	dev_pm_qos_constraints_init(dev);
-}
+static inline void device_pm_add(struct device *dev) {}
 
 static inline void device_pm_remove(struct device *dev)
 {
-	dev_pm_qos_constraints_destroy(dev);
 	pm_runtime_remove(dev);
 }
 
@@ -60,6 +71,13 @@ static inline void device_pm_move_last(struct device *dev) {}
 
 #endif /* !CONFIG_PM_SLEEP */
 
+static inline void device_pm_init(struct device *dev)
+{
+	device_pm_init_common(dev);
+	device_pm_sleep_init(dev);
+	pm_runtime_init(dev);
+}
+
 #ifdef CONFIG_PM
 
 /*
@@ -71,6 +89,10 @@ extern void dpm_sysfs_remove(struct device *dev);
 extern void rpm_sysfs_remove(struct device *dev);
 extern int wakeup_sysfs_add(struct device *dev);
 extern void wakeup_sysfs_remove(struct device *dev);
+extern int pm_qos_sysfs_add_resume_latency(struct device *dev);
+extern void pm_qos_sysfs_remove_resume_latency(struct device *dev);
+extern int pm_qos_sysfs_add_flags(struct device *dev);
+extern void pm_qos_sysfs_remove_flags(struct device *dev);
 
 #else /* CONFIG_PM */
 
@@ -79,5 +101,7 @@ static inline void dpm_sysfs_remove(struct device *dev) {}
 static inline void rpm_sysfs_remove(struct device *dev) {}
 static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
 static inline void wakeup_sysfs_remove(struct device *dev) {}
+static inline int pm_qos_sysfs_add(struct device *dev) { return 0; }
+static inline void pm_qos_sysfs_remove(struct device *dev) {}
 
 #endif
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index c5d35883746..36b9eb4862c 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -40,13 +40,63 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/export.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <trace/events/power.h>
 
+#include "power.h"
 
 static DEFINE_MUTEX(dev_pm_qos_mtx);
+static DEFINE_MUTEX(dev_pm_qos_sysfs_mtx);
 
 static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
 
 /**
+ * __dev_pm_qos_flags - Check PM QoS flags for a given device.
+ * @dev: Device to check the PM QoS flags for.
+ * @mask: Flags to check against.
+ *
+ * This routine must be called with dev->power.lock held.
+ */
+enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask)
+{
+	struct dev_pm_qos *qos = dev->power.qos;
+	struct pm_qos_flags *pqf;
+	s32 val;
+
+	if (IS_ERR_OR_NULL(qos))
+		return PM_QOS_FLAGS_UNDEFINED;
+
+	pqf = &qos->flags;
+	if (list_empty(&pqf->list))
+		return PM_QOS_FLAGS_UNDEFINED;
+
+	val = pqf->effective_flags & mask;
+	if (val)
+		return (val == mask) ? PM_QOS_FLAGS_ALL : PM_QOS_FLAGS_SOME;
+
+	return PM_QOS_FLAGS_NONE;
+}
+
+/**
+ * dev_pm_qos_flags - Check PM QoS flags for a given device (locked).
+ * @dev: Device to check the PM QoS flags for.
+ * @mask: Flags to check against.
+ */
+enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask)
+{
+	unsigned long irqflags;
+	enum pm_qos_flags_status ret;
+
+	spin_lock_irqsave(&dev->power.lock, irqflags);
+	ret = __dev_pm_qos_flags(dev, mask);
+	spin_unlock_irqrestore(&dev->power.lock, irqflags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_flags);
+
+/**
  * __dev_pm_qos_read_value - Get PM QoS constraint for a given device.
  * @dev: Device to get the PM QoS constraint value for.
  *
@@ -54,9 +104,8 @@ static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
  */
 s32 __dev_pm_qos_read_value(struct device *dev)
 {
-	struct pm_qos_constraints *c = dev->power.constraints;
-
-	return c ? pm_qos_read_value(c) : 0;
+	return IS_ERR_OR_NULL(dev->power.qos) ?
+		0 : pm_qos_read_value(&dev->power.qos->resume_latency);
 }
 
 /**
@@ -75,30 +124,47 @@ s32 dev_pm_qos_read_value(struct device *dev)
 	return ret;
 }
 
-/*
- * apply_constraint
- * @req: constraint request to apply
- * @action: action to perform add/update/remove, of type enum pm_qos_req_action
- * @value: defines the qos request
+/**
+ * apply_constraint - Add/modify/remove device PM QoS request.
+ * @req: Constraint request to apply
+ * @action: Action to perform (add/update/remove).
+ * @value: Value to assign to the QoS request.
  *
  * Internal function to update the constraints list using the PM QoS core
  * code and if needed call the per-device and the global notification
  * callbacks
  */
 static int apply_constraint(struct dev_pm_qos_request *req,
-			    enum pm_qos_req_action action, int value)
+			    enum pm_qos_req_action action, s32 value)
 {
-	int ret, curr_value;
-
-	ret = pm_qos_update_target(req->dev->power.constraints,
-				   &req->node, action, value);
-
-	if (ret) {
-		/* Call the global callbacks if needed */
-		curr_value = pm_qos_read_value(req->dev->power.constraints);
-		blocking_notifier_call_chain(&dev_pm_notifiers,
-					     (unsigned long)curr_value,
-					     req);
+	struct dev_pm_qos *qos = req->dev->power.qos;
+	int ret;
+
+	switch(req->type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+		ret = pm_qos_update_target(&qos->resume_latency,
+					   &req->data.pnode, action, value);
+		if (ret) {
+			value = pm_qos_read_value(&qos->resume_latency);
+			blocking_notifier_call_chain(&dev_pm_notifiers,
+						     (unsigned long)value,
+						     req);
+		}
+		break;
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		ret = pm_qos_update_target(&qos->latency_tolerance,
+					   &req->data.pnode, action, value);
+		if (ret) {
+			value = pm_qos_read_value(&qos->latency_tolerance);
+			req->dev->power.set_latency_tolerance(req->dev, value);
+		}
+		break;
+	case DEV_PM_QOS_FLAGS:
+		ret = pm_qos_update_flags(&qos->flags, &req->data.flr,
+					  action, value);
+		break;
+	default:
+		ret = -EINVAL;
 	}
 
 	return ret;
@@ -113,47 +179,47 @@ static int apply_constraint(struct dev_pm_qos_request *req,
  */
 static int dev_pm_qos_constraints_allocate(struct device *dev)
 {
+	struct dev_pm_qos *qos;
 	struct pm_qos_constraints *c;
 	struct blocking_notifier_head *n;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
-	if (!c)
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (!qos)
 		return -ENOMEM;
 
 	n = kzalloc(sizeof(*n), GFP_KERNEL);
 	if (!n) {
-		kfree(c);
+		kfree(qos);
 		return -ENOMEM;
 	}
 	BLOCKING_INIT_NOTIFIER_HEAD(n);
 
+	c = &qos->resume_latency;
 	plist_head_init(&c->list);
-	c->target_value = PM_QOS_DEV_LAT_DEFAULT_VALUE;
-	c->default_value = PM_QOS_DEV_LAT_DEFAULT_VALUE;
+	c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
+	c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
+	c->no_constraint_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
 	c->type = PM_QOS_MIN;
 	c->notifiers = n;
 
+	c = &qos->latency_tolerance;
+	plist_head_init(&c->list);
+	c->target_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE;
+	c->default_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE;
+	c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
+	c->type = PM_QOS_MIN;
+
+	INIT_LIST_HEAD(&qos->flags.list);
+
 	spin_lock_irq(&dev->power.lock);
-	dev->power.constraints = c;
+	dev->power.qos = qos;
 	spin_unlock_irq(&dev->power.lock);
 
 	return 0;
 }
 
-/**
- * dev_pm_qos_constraints_init - Initalize device's PM QoS constraints pointer.
- * @dev: target device
- *
- * Called from the device PM subsystem during device insertion under
- * device_pm_lock().
- */
-void dev_pm_qos_constraints_init(struct device *dev)
-{
-	mutex_lock(&dev_pm_qos_mtx);
-	dev->power.constraints = NULL;
-	dev->power.power_state = PMSG_ON;
-	mutex_unlock(&dev_pm_qos_mtx);
-}
+static void __dev_pm_qos_hide_latency_limit(struct device *dev);
+static void __dev_pm_qos_hide_flags(struct device *dev);
 
 /**
  * dev_pm_qos_constraints_destroy
@@ -163,18 +229,32 @@ void dev_pm_qos_constraints_init(struct device *dev)
  */
 void dev_pm_qos_constraints_destroy(struct device *dev)
 {
+	struct dev_pm_qos *qos;
 	struct dev_pm_qos_request *req, *tmp;
 	struct pm_qos_constraints *c;
+	struct pm_qos_flags *f;
+
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	/*
+	 * If the device's PM QoS resume latency limit or PM QoS flags have been
+	 * exposed to user space, they have to be hidden at this point.
+	 */
+	pm_qos_sysfs_remove_resume_latency(dev);
+	pm_qos_sysfs_remove_flags(dev);
 
 	mutex_lock(&dev_pm_qos_mtx);
 
-	dev->power.power_state = PMSG_INVALID;
-	c = dev->power.constraints;
-	if (!c)
+	__dev_pm_qos_hide_latency_limit(dev);
+	__dev_pm_qos_hide_flags(dev);
+
+	qos = dev->power.qos;
+	if (!qos)
 		goto out;
 
-	/* Flush the constraints list for the device */
-	plist_for_each_entry_safe(req, tmp, &c->list, node) {
+	/* Flush the constraints lists for the device. */
+	c = &qos->resume_latency;
+	plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
 		/*
 		 * Update constraints list and call the notification
 		 * callbacks if needed
@@ -182,22 +262,69 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 		apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
 		memset(req, 0, sizeof(*req));
 	}
+	c = &qos->latency_tolerance;
+	plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
+		apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+		memset(req, 0, sizeof(*req));
+	}
+	f = &qos->flags;
+	list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) {
+		apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+		memset(req, 0, sizeof(*req));
+	}
 
 	spin_lock_irq(&dev->power.lock);
-	dev->power.constraints = NULL;
+	dev->power.qos = ERR_PTR(-ENODEV);
 	spin_unlock_irq(&dev->power.lock);
 
 	kfree(c->notifiers);
-	kfree(c);
+	kfree(qos);
 
  out:
 	mutex_unlock(&dev_pm_qos_mtx);
+
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+}
+
+static bool dev_pm_qos_invalid_request(struct device *dev,
+				       struct dev_pm_qos_request *req)
+{
+	return !req || (req->type == DEV_PM_QOS_LATENCY_TOLERANCE
+			&& !dev->power.set_latency_tolerance);
+}
+
+static int __dev_pm_qos_add_request(struct device *dev,
+				    struct dev_pm_qos_request *req,
+				    enum dev_pm_qos_req_type type, s32 value)
+{
+	int ret = 0;
+
+	if (!dev || dev_pm_qos_invalid_request(dev, req))
+		return -EINVAL;
+
+	if (WARN(dev_pm_qos_request_active(req),
+		 "%s() called for already added request\n", __func__))
+		return -EINVAL;
+
+	if (IS_ERR(dev->power.qos))
+		ret = -ENODEV;
+	else if (!dev->power.qos)
+		ret = dev_pm_qos_constraints_allocate(dev);
+
+	trace_dev_pm_qos_add_request(dev_name(dev), type, value);
+	if (!ret) {
+		req->dev = dev;
+		req->type = type;
+		ret = apply_constraint(req, PM_QOS_ADD_REQ, value);
+	}
+	return ret;
 }
 
 /**
  * dev_pm_qos_add_request - inserts new qos request into the list
  * @dev: target device for the constraint
  * @req: pointer to a preallocated handle
+ * @type: type of the request
  * @value: defines the qos request
  *
  * This function inserts a new entry in the device constraints list of
@@ -211,48 +338,62 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
  * -EINVAL in case of wrong parameters, -ENOMEM if there's not enough memory
  * to allocate for data structures, -ENODEV if the device has just been removed
  * from the system.
+ *
+ * Callers should ensure that the target device is not RPM_SUSPENDED before
+ * using this function for requests of type DEV_PM_QOS_FLAGS.
  */
 int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
-			   s32 value)
+			   enum dev_pm_qos_req_type type, s32 value)
 {
+	int ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+	ret = __dev_pm_qos_add_request(dev, req, type, value);
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);
+
+/**
+ * __dev_pm_qos_update_request - Modify an existing device PM QoS request.
+ * @req : PM QoS request to modify.
+ * @new_value: New value to request.
+ */
+static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req,
+				       s32 new_value)
+{
+	s32 curr_value;
 	int ret = 0;
 
-	if (!dev || !req) /*guard against callers passing in null */
+	if (!req) /*guard against callers passing in null */
 		return -EINVAL;
 
-	if (WARN(dev_pm_qos_request_active(req),
-		 "%s() called for already added request\n", __func__))
+	if (WARN(!dev_pm_qos_request_active(req),
+		 "%s() called for unknown object\n", __func__))
 		return -EINVAL;
 
-	req->dev = dev;
-
-	mutex_lock(&dev_pm_qos_mtx);
-
-	if (!dev->power.constraints) {
-		if (dev->power.power_state.event == PM_EVENT_INVALID) {
-			/* The device has been removed from the system. */
-			req->dev = NULL;
-			ret = -ENODEV;
-			goto out;
-		} else {
-			/*
-			 * Allocate the constraints data on the first call to
-			 * add_request, i.e. only if the data is not already
-			 * allocated and if the device has not been removed.
-			 */
-			ret = dev_pm_qos_constraints_allocate(dev);
-		}
+	if (IS_ERR_OR_NULL(req->dev->power.qos))
+		return -ENODEV;
+
+	switch(req->type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		curr_value = req->data.pnode.prio;
+		break;
+	case DEV_PM_QOS_FLAGS:
+		curr_value = req->data.flr.flags;
+		break;
+	default:
+		return -EINVAL;
 	}
 
-	if (!ret)
-		ret = apply_constraint(req, PM_QOS_ADD_REQ, value);
-
- out:
-	mutex_unlock(&dev_pm_qos_mtx);
+	trace_dev_pm_qos_update_request(dev_name(req->dev), req->type,
+					new_value);
+	if (curr_value != new_value)
+		ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);
 
 /**
  * dev_pm_qos_update_request - modifies an existing qos request
@@ -268,11 +409,24 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);
  * 0 if the aggregated constraint value has not changed,
  * -EINVAL in case of wrong parameters, -ENODEV if the device has been
  * removed from the system
+ *
+ * Callers should ensure that the target device is not RPM_SUSPENDED before
+ * using this function for requests of type DEV_PM_QOS_FLAGS.
  */
-int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
-			      s32 new_value)
+int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value)
 {
-	int ret = 0;
+	int ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+	ret = __dev_pm_qos_update_request(req, new_value);
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_update_request);
+
+static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
+{
+	int ret;
 
 	if (!req) /*guard against callers passing in null */
 		return -EINVAL;
@@ -281,21 +435,15 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
 		 "%s() called for unknown object\n", __func__))
 		return -EINVAL;
 
-	mutex_lock(&dev_pm_qos_mtx);
+	if (IS_ERR_OR_NULL(req->dev->power.qos))
+		return -ENODEV;
 
-	if (req->dev->power.constraints) {
-		if (new_value != req->node.prio)
-			ret = apply_constraint(req, PM_QOS_UPDATE_REQ,
-					       new_value);
-	} else {
-		/* Return if the device has been removed */
-		ret = -ENODEV;
-	}
-
-	mutex_unlock(&dev_pm_qos_mtx);
+	trace_dev_pm_qos_remove_request(dev_name(req->dev), req->type,
+					PM_QOS_DEFAULT_VALUE);
+	ret = apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+	memset(req, 0, sizeof(*req));
 	return ret;
 }
-EXPORT_SYMBOL_GPL(dev_pm_qos_update_request);
 
 /**
  * dev_pm_qos_remove_request - modifies an existing qos request
@@ -308,29 +456,16 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_update_request);
  * 0 if the aggregated constraint value has not changed,
  * -EINVAL in case of wrong parameters, -ENODEV if the device has been
  * removed from the system
+ *
+ * Callers should ensure that the target device is not RPM_SUSPENDED before
+ * using this function for requests of type DEV_PM_QOS_FLAGS.
  */
 int dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
 {
-	int ret = 0;
-
-	if (!req) /*guard against callers passing in null */
-		return -EINVAL;
-
-	if (WARN(!dev_pm_qos_request_active(req),
-		 "%s() called for unknown object\n", __func__))
-		return -EINVAL;
+	int ret;
 
 	mutex_lock(&dev_pm_qos_mtx);
-
-	if (req->dev->power.constraints) {
-		ret = apply_constraint(req, PM_QOS_REMOVE_REQ,
-				       PM_QOS_DEFAULT_VALUE);
-		memset(req, 0, sizeof(*req));
-	} else {
-		/* Return if the device has been removed */
-		ret = -ENODEV;
-	}
-
+	ret = __dev_pm_qos_remove_request(req);
 	mutex_unlock(&dev_pm_qos_mtx);
 	return ret;
 }
@@ -345,21 +480,27 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request);
  *
  * Will register the notifier into a notification chain that gets called
  * upon changes to the target value for the device.
+ *
+ * If the device's constraints object doesn't exist when this routine is called,
+ * it will be created (or error code will be returned if that fails).
  */
 int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier)
 {
-	int retval = 0;
+	int ret = 0;
 
 	mutex_lock(&dev_pm_qos_mtx);
 
-	/* Silently return if the constraints object is not present. */
-	if (dev->power.constraints)
-		retval = blocking_notifier_chain_register(
-				dev->power.constraints->notifiers,
-				notifier);
+	if (IS_ERR(dev->power.qos))
+		ret = -ENODEV;
+	else if (!dev->power.qos)
+		ret = dev_pm_qos_constraints_allocate(dev);
+
+	if (!ret)
+		ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers,
+						       notifier);
 
 	mutex_unlock(&dev_pm_qos_mtx);
-	return retval;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier);
 
@@ -381,10 +522,9 @@ int dev_pm_qos_remove_notifier(struct device *dev,
 	mutex_lock(&dev_pm_qos_mtx);
 
 	/* Silently return if the constraints object is not present. */
-	if (dev->power.constraints)
-		retval = blocking_notifier_chain_unregister(
-				dev->power.constraints->notifiers,
-				notifier);
+	if (!IS_ERR_OR_NULL(dev->power.qos))
+		retval = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers,
+							    notifier);
 
 	mutex_unlock(&dev_pm_qos_mtx);
 	return retval;
@@ -425,23 +565,322 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier);
  * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor.
  * @dev: Device whose ancestor to add the request for.
  * @req: Pointer to the preallocated handle.
+ * @type: Type of the request.
  * @value: Constraint latency value.
  */
 int dev_pm_qos_add_ancestor_request(struct device *dev,
-				    struct dev_pm_qos_request *req, s32 value)
+				    struct dev_pm_qos_request *req,
+				    enum dev_pm_qos_req_type type, s32 value)
 {
 	struct device *ancestor = dev->parent;
-	int error = -ENODEV;
+	int ret = -ENODEV;
 
-	while (ancestor && !ancestor->power.ignore_children)
-		ancestor = ancestor->parent;
+	switch (type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+		while (ancestor && !ancestor->power.ignore_children)
+			ancestor = ancestor->parent;
 
+		break;
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		while (ancestor && !ancestor->power.set_latency_tolerance)
+			ancestor = ancestor->parent;
+
+		break;
+	default:
+		ancestor = NULL;
+	}
 	if (ancestor)
-		error = dev_pm_qos_add_request(ancestor, req, value);
+		ret = dev_pm_qos_add_request(ancestor, req, type, value);
 
-	if (error)
+	if (ret < 0)
 		req->dev = NULL;
 
-	return error;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request);
+
+#ifdef CONFIG_PM_RUNTIME
+static void __dev_pm_qos_drop_user_request(struct device *dev,
+					   enum dev_pm_qos_req_type type)
+{
+	struct dev_pm_qos_request *req = NULL;
+
+	switch(type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+		req = dev->power.qos->resume_latency_req;
+		dev->power.qos->resume_latency_req = NULL;
+		break;
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		req = dev->power.qos->latency_tolerance_req;
+		dev->power.qos->latency_tolerance_req = NULL;
+		break;
+	case DEV_PM_QOS_FLAGS:
+		req = dev->power.qos->flags_req;
+		dev->power.qos->flags_req = NULL;
+		break;
+	}
+	__dev_pm_qos_remove_request(req);
+	kfree(req);
+}
+
+static void dev_pm_qos_drop_user_request(struct device *dev,
+					 enum dev_pm_qos_req_type type)
+{
+	mutex_lock(&dev_pm_qos_mtx);
+	__dev_pm_qos_drop_user_request(dev, type);
+	mutex_unlock(&dev_pm_qos_mtx);
+}
+
+/**
+ * dev_pm_qos_expose_latency_limit - Expose PM QoS latency limit to user space.
+ * @dev: Device whose PM QoS latency limit is to be exposed to user space.
+ * @value: Initial value of the latency limit.
+ */
+int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
+{
+	struct dev_pm_qos_request *req;
+	int ret;
+
+	if (!device_is_registered(dev) || value < 0)
+		return -EINVAL;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_RESUME_LATENCY, value);
+	if (ret < 0) {
+		kfree(req);
+		return ret;
+	}
+
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos))
+		ret = -ENODEV;
+	else if (dev->power.qos->resume_latency_req)
+		ret = -EEXIST;
+
+	if (ret < 0) {
+		__dev_pm_qos_remove_request(req);
+		kfree(req);
+		mutex_unlock(&dev_pm_qos_mtx);
+		goto out;
+	}
+	dev->power.qos->resume_latency_req = req;
+
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	ret = pm_qos_sysfs_add_resume_latency(dev);
+	if (ret)
+		dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY);
+
+ out:
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit);
+
+static void __dev_pm_qos_hide_latency_limit(struct device *dev)
+{
+	if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->resume_latency_req)
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY);
+}
+
+/**
+ * dev_pm_qos_hide_latency_limit - Hide PM QoS latency limit from user space.
+ * @dev: Device whose PM QoS latency limit is to be hidden from user space.
+ */
+void dev_pm_qos_hide_latency_limit(struct device *dev)
+{
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	pm_qos_sysfs_remove_resume_latency(dev);
+
+	mutex_lock(&dev_pm_qos_mtx);
+	__dev_pm_qos_hide_latency_limit(dev);
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit);
+
+/**
+ * dev_pm_qos_expose_flags - Expose PM QoS flags of a device to user space.
+ * @dev: Device whose PM QoS flags are to be exposed to user space.
+ * @val: Initial values of the flags.
+ */
+int dev_pm_qos_expose_flags(struct device *dev, s32 val)
+{
+	struct dev_pm_qos_request *req;
+	int ret;
+
+	if (!device_is_registered(dev))
+		return -EINVAL;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val);
+	if (ret < 0) {
+		kfree(req);
+		return ret;
+	}
+
+	pm_runtime_get_sync(dev);
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos))
+		ret = -ENODEV;
+	else if (dev->power.qos->flags_req)
+		ret = -EEXIST;
+
+	if (ret < 0) {
+		__dev_pm_qos_remove_request(req);
+		kfree(req);
+		mutex_unlock(&dev_pm_qos_mtx);
+		goto out;
+	}
+	dev->power.qos->flags_req = req;
+
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	ret = pm_qos_sysfs_add_flags(dev);
+	if (ret)
+		dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);
+
+ out:
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+	pm_runtime_put(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags);
+
+static void __dev_pm_qos_hide_flags(struct device *dev)
+{
+	if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->flags_req)
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);
+}
+
+/**
+ * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space.
+ * @dev: Device whose PM QoS flags are to be hidden from user space.
+ */
+void dev_pm_qos_hide_flags(struct device *dev)
+{
+	pm_runtime_get_sync(dev);
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	pm_qos_sysfs_remove_flags(dev);
+
+	mutex_lock(&dev_pm_qos_mtx);
+	__dev_pm_qos_hide_flags(dev);
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+	pm_runtime_put(dev);
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags);
+
+/**
+ * dev_pm_qos_update_flags - Update PM QoS flags request owned by user space.
+ * @dev: Device to update the PM QoS flags request for.
+ * @mask: Flags to set/clear.
+ * @set: Whether to set or clear the flags (true means set).
+ */
+int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set)
+{
+	s32 value;
+	int ret;
+
+	pm_runtime_get_sync(dev);
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->flags_req) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	value = dev_pm_qos_requested_flags(dev);
+	if (set)
+		value |= mask;
+	else
+		value &= ~mask;
+
+	ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value);
+
+ out:
+	mutex_unlock(&dev_pm_qos_mtx);
+	pm_runtime_put(dev);
+	return ret;
+}
+
+/**
+ * dev_pm_qos_get_user_latency_tolerance - Get user space latency tolerance.
+ * @dev: Device to obtain the user space latency tolerance for.
+ */
+s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev)
+{
+	s32 ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+	ret = IS_ERR_OR_NULL(dev->power.qos)
+		|| !dev->power.qos->latency_tolerance_req ?
+			PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT :
+			dev->power.qos->latency_tolerance_req->data.pnode.prio;
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+
+/**
+ * dev_pm_qos_update_user_latency_tolerance - Update user space latency tolerance.
+ * @dev: Device to update the user space latency tolerance for.
+ * @val: New user space latency tolerance for @dev (negative values disable).
+ */
+int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val)
+{
+	int ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos)
+	    || !dev->power.qos->latency_tolerance_req) {
+		struct dev_pm_qos_request *req;
+
+		if (val < 0) {
+			ret = -EINVAL;
+			goto out;
+		}
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		ret = __dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY_TOLERANCE, val);
+		if (ret < 0) {
+			kfree(req);
+			goto out;
+		}
+		dev->power.qos->latency_tolerance_req = req;
+	} else {
+		if (val < 0) {
+			__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY_TOLERANCE);
+			ret = 0;
+		} else {
+			ret = __dev_pm_qos_update_request(dev->power.qos->latency_tolerance_req, val);
+		}
+	}
+
+ out:
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+#else /* !CONFIG_PM_RUNTIME */
+static void __dev_pm_qos_hide_latency_limit(struct device *dev) {}
+static void __dev_pm_qos_hide_flags(struct device *dev) {}
+#endif /* CONFIG_PM_RUNTIME */
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 541f821d4ea..67c7938e430 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -13,6 +13,43 @@
 #include <trace/events/rpm.h>
 #include "power.h"
 
+#define RPM_GET_CALLBACK(dev, cb)				\
+({								\
+	int (*__rpm_cb)(struct device *__d);			\
+								\
+	if (dev->pm_domain)					\
+		__rpm_cb = dev->pm_domain->ops.cb;		\
+	else if (dev->type && dev->type->pm)			\
+		__rpm_cb = dev->type->pm->cb;			\
+	else if (dev->class && dev->class->pm)			\
+		__rpm_cb = dev->class->pm->cb;			\
+	else if (dev->bus && dev->bus->pm)			\
+		__rpm_cb = dev->bus->pm->cb;			\
+	else							\
+		__rpm_cb = NULL;				\
+								\
+	if (!__rpm_cb && dev->driver && dev->driver->pm)	\
+		__rpm_cb = dev->driver->pm->cb;			\
+								\
+	__rpm_cb;						\
+})
+
+static int (*rpm_get_suspend_cb(struct device *dev))(struct device *)
+{
+	return RPM_GET_CALLBACK(dev, runtime_suspend);
+}
+
+static int (*rpm_get_resume_cb(struct device *dev))(struct device *)
+{
+	return RPM_GET_CALLBACK(dev, runtime_resume);
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int (*rpm_get_idle_cb(struct device *dev))(struct device *)
+{
+	return RPM_GET_CALLBACK(dev, runtime_idle);
+}
+
 static int rpm_resume(struct device *dev, int rpmflags);
 static int rpm_suspend(struct device *dev, int rpmflags);
 
@@ -124,6 +161,76 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
 
+static int dev_memalloc_noio(struct device *dev, void *data)
+{
+	return dev->power.memalloc_noio;
+}
+
+/*
+ * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag.
+ * @dev: Device to handle.
+ * @enable: True for setting the flag and False for clearing the flag.
+ *
+ * Set the flag for all devices in the path from the device to the
+ * root device in the device tree if @enable is true, otherwise clear
+ * the flag for devices in the path whose siblings don't set the flag.
+ *
+ * The function should only be called by block device, or network
+ * device driver for solving the deadlock problem during runtime
+ * resume/suspend:
+ *
+ *     If memory allocation with GFP_KERNEL is called inside runtime
+ *     resume/suspend callback of any one of its ancestors(or the
+ *     block device itself), the deadlock may be triggered inside the
+ *     memory allocation since it might not complete until the block
+ *     device becomes active and the involed page I/O finishes. The
+ *     situation is pointed out first by Alan Stern. Network device
+ *     are involved in iSCSI kind of situation.
+ *
+ * The lock of dev_hotplug_mutex is held in the function for handling
+ * hotplug race because pm_runtime_set_memalloc_noio() may be called
+ * in async probe().
+ *
+ * The function should be called between device_add() and device_del()
+ * on the affected device(block/network device).
+ */
+void pm_runtime_set_memalloc_noio(struct device *dev, bool enable)
+{
+	static DEFINE_MUTEX(dev_hotplug_mutex);
+
+	mutex_lock(&dev_hotplug_mutex);
+	for (;;) {
+		bool enabled;
+
+		/* hold power lock since bitfield is not SMP-safe. */
+		spin_lock_irq(&dev->power.lock);
+		enabled = dev->power.memalloc_noio;
+		dev->power.memalloc_noio = enable;
+		spin_unlock_irq(&dev->power.lock);
+
+		/*
+		 * not need to enable ancestors any more if the device
+		 * has been enabled.
+		 */
+		if (enabled && enable)
+			break;
+
+		dev = dev->parent;
+
+		/*
+		 * clear flag of the parent device only if all the
+		 * children don't set the flag because ancestor's
+		 * flag was set by any one of the descendants.
+		 */
+		if (!dev || (!enable &&
+			     device_for_each_child(dev, NULL,
+						   dev_memalloc_noio)))
+			break;
+	}
+	mutex_unlock(&dev_hotplug_mutex);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio);
+
 /**
  * rpm_check_suspend_allowed - Test whether a device may be suspended.
  * @dev: Device to test.
@@ -147,6 +254,8 @@ static int rpm_check_suspend_allowed(struct device *dev)
 	    || (dev->power.request_pending
 			&& dev->power.request == RPM_REQ_RESUME))
 		retval = -EAGAIN;
+	else if (__dev_pm_qos_read_value(dev) < 0)
+		retval = -EPERM;
 	else if (dev->power.runtime_status == RPM_SUSPENDED)
 		retval = 1;
 
@@ -186,7 +295,8 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
  * Check if the device's runtime PM status allows it to be suspended.  If
  * another idle notification has been started earlier, return immediately.  If
  * the RPM_ASYNC flag is set then queue an idle-notification request; otherwise
- * run the ->runtime_idle() callback directly.
+ * run the ->runtime_idle() callback directly. If the ->runtime_idle callback
+ * doesn't exist or if it returns 0, call rpm_suspend with the RPM_AUTO flag.
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
@@ -221,11 +331,8 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	/* Pending requests need to be canceled. */
 	dev->power.request = RPM_REQ_NONE;
 
-	if (dev->power.no_callbacks) {
-		/* Assume ->runtime_idle() callback would have suspended. */
-		retval = rpm_suspend(dev, rpmflags);
+	if (dev->power.no_callbacks)
 		goto out;
-	}
 
 	/* Carry out an asynchronous or a synchronous idle notification. */
 	if (rpmflags & RPM_ASYNC) {
@@ -234,34 +341,23 @@ static int rpm_idle(struct device *dev, int rpmflags)
 			dev->power.request_pending = true;
 			queue_work(pm_wq, &dev->power.work);
 		}
-		goto out;
+		trace_rpm_return_int(dev, _THIS_IP_, 0);
+		return 0;
 	}
 
 	dev->power.idle_notification = true;
 
-	if (dev->pm_domain)
-		callback = dev->pm_domain->ops.runtime_idle;
-	else if (dev->type && dev->type->pm)
-		callback = dev->type->pm->runtime_idle;
-	else if (dev->class && dev->class->pm)
-		callback = dev->class->pm->runtime_idle;
-	else if (dev->bus && dev->bus->pm)
-		callback = dev->bus->pm->runtime_idle;
-	else
-		callback = NULL;
-
-	if (!callback && dev->driver && dev->driver->pm)
-		callback = dev->driver->pm->runtime_idle;
+	callback = rpm_get_idle_cb(dev);
 
 	if (callback)
-		__rpm_callback(callback, dev);
+		retval = __rpm_callback(callback, dev);
 
 	dev->power.idle_notification = false;
 	wake_up_all(&dev->power.wait_queue);
 
  out:
 	trace_rpm_return_int(dev, _THIS_IP_, retval);
-	return retval;
+	return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
 }
 
 /**
@@ -276,51 +372,27 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev)
 	if (!cb)
 		return -ENOSYS;
 
-	retval = __rpm_callback(cb, dev);
-
-	dev->power.runtime_error = retval;
-	return retval != -EACCES ? retval : -EIO;
-}
-
-struct rpm_qos_data {
-	ktime_t time_now;
-	s64 constraint_ns;
-};
-
-/**
- * rpm_update_qos_constraint - Update a given PM QoS constraint data.
- * @dev: Device whose timing data to use.
- * @data: PM QoS constraint data to update.
- *
- * Use the suspend timing data of @dev to update PM QoS constraint data pointed
- * to by @data.
- */
-static int rpm_update_qos_constraint(struct device *dev, void *data)
-{
-	struct rpm_qos_data *qos = data;
-	unsigned long flags;
-	s64 delta_ns;
-	int ret = 0;
-
-	spin_lock_irqsave(&dev->power.lock, flags);
+	if (dev->power.memalloc_noio) {
+		unsigned int noio_flag;
 
-	if (dev->power.max_time_suspended_ns < 0)
-		goto out;
-
-	delta_ns = dev->power.max_time_suspended_ns -
-		ktime_to_ns(ktime_sub(qos->time_now, dev->power.suspend_time));
-	if (delta_ns <= 0) {
-		ret = -EBUSY;
-		goto out;
+		/*
+		 * Deadlock might be caused if memory allocation with
+		 * GFP_KERNEL happens inside runtime_suspend and
+		 * runtime_resume callbacks of one block device's
+		 * ancestor or the block device itself. Network
+		 * device might be thought as part of iSCSI block
+		 * device, so network device and its ancestor should
+		 * be marked as memalloc_noio too.
+		 */
+		noio_flag = memalloc_noio_save();
+		retval = __rpm_callback(cb, dev);
+		memalloc_noio_restore(noio_flag);
+	} else {
+		retval = __rpm_callback(cb, dev);
 	}
 
-	if (qos->constraint_ns > delta_ns || qos->constraint_ns == 0)
-		qos->constraint_ns = delta_ns;
-
- out:
-	spin_unlock_irqrestore(&dev->power.lock, flags);
-
-	return ret;
+	dev->power.runtime_error = retval;
+	return retval != -EACCES ? retval : -EIO;
 }
 
 /**
@@ -349,7 +421,6 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 {
 	int (*callback)(struct device *);
 	struct device *parent = NULL;
-	struct rpm_qos_data qos;
 	int retval;
 
 	trace_rpm_suspend(dev, rpmflags);
@@ -430,7 +501,6 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		goto repeat;
 	}
 
-	dev->power.deferred_resume = false;
 	if (dev->power.no_callbacks)
 		goto no_callback;	/* Assume success. */
 
@@ -445,51 +515,9 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		goto out;
 	}
 
-	qos.constraint_ns = __dev_pm_qos_read_value(dev);
-	if (qos.constraint_ns < 0) {
-		/* Negative constraint means "never suspend". */
-		retval = -EPERM;
-		goto out;
-	}
-	qos.constraint_ns *= NSEC_PER_USEC;
-	qos.time_now = ktime_get();
-
 	__update_runtime_status(dev, RPM_SUSPENDING);
 
-	if (!dev->power.ignore_children) {
-		if (dev->power.irq_safe)
-			spin_unlock(&dev->power.lock);
-		else
-			spin_unlock_irq(&dev->power.lock);
-
-		retval = device_for_each_child(dev, &qos,
-					       rpm_update_qos_constraint);
-
-		if (dev->power.irq_safe)
-			spin_lock(&dev->power.lock);
-		else
-			spin_lock_irq(&dev->power.lock);
-
-		if (retval)
-			goto fail;
-	}
-
-	dev->power.suspend_time = qos.time_now;
-	dev->power.max_time_suspended_ns = qos.constraint_ns ? : -1;
-
-	if (dev->pm_domain)
-		callback = dev->pm_domain->ops.runtime_suspend;
-	else if (dev->type && dev->type->pm)
-		callback = dev->type->pm->runtime_suspend;
-	else if (dev->class && dev->class->pm)
-		callback = dev->class->pm->runtime_suspend;
-	else if (dev->bus && dev->bus->pm)
-		callback = dev->bus->pm->runtime_suspend;
-	else
-		callback = NULL;
-
-	if (!callback && dev->driver && dev->driver->pm)
-		callback = dev->driver->pm->runtime_suspend;
+	callback = rpm_get_suspend_cb(dev);
 
 	retval = rpm_callback(callback, dev);
 	if (retval)
@@ -506,6 +534,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	wake_up_all(&dev->power.wait_queue);
 
 	if (dev->power.deferred_resume) {
+		dev->power.deferred_resume = false;
 		rpm_resume(dev, 0);
 		retval = -EAGAIN;
 		goto out;
@@ -529,9 +558,9 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 
  fail:
 	__update_runtime_status(dev, RPM_ACTIVE);
-	dev->power.suspend_time = ktime_set(0, 0);
-	dev->power.max_time_suspended_ns = -1;
 	dev->power.deferred_resume = false;
+	wake_up_all(&dev->power.wait_queue);
+
 	if (retval == -EAGAIN || retval == -EBUSY) {
 		dev->power.runtime_error = 0;
 
@@ -547,7 +576,6 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	} else {
 		pm_runtime_cancel_pending(dev);
 	}
-	wake_up_all(&dev->power.wait_queue);
 	goto out;
 }
 
@@ -580,6 +608,9 @@ static int rpm_resume(struct device *dev, int rpmflags)
  repeat:
 	if (dev->power.runtime_error)
 		retval = -EINVAL;
+	else if (dev->power.disable_depth == 1 && dev->power.is_suspended
+	    && dev->power.runtime_status == RPM_ACTIVE)
+		retval = 1;
 	else if (dev->power.disable_depth > 0)
 		retval = -EACCES;
 	if (retval)
@@ -651,6 +682,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		    || dev->parent->power.runtime_status == RPM_ACTIVE) {
 			atomic_inc(&dev->parent->power.child_count);
 			spin_unlock(&dev->parent->power.lock);
+			retval = 1;
 			goto no_callback;	/* Assume success. */
 		}
 		spin_unlock(&dev->parent->power.lock);
@@ -703,24 +735,9 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	if (dev->power.no_callbacks)
 		goto no_callback;	/* Assume success. */
 
-	dev->power.suspend_time = ktime_set(0, 0);
-	dev->power.max_time_suspended_ns = -1;
-
 	__update_runtime_status(dev, RPM_RESUMING);
 
-	if (dev->pm_domain)
-		callback = dev->pm_domain->ops.runtime_resume;
-	else if (dev->type && dev->type->pm)
-		callback = dev->type->pm->runtime_resume;
-	else if (dev->class && dev->class->pm)
-		callback = dev->class->pm->runtime_resume;
-	else if (dev->bus && dev->bus->pm)
-		callback = dev->bus->pm->runtime_resume;
-	else
-		callback = NULL;
-
-	if (!callback && dev->driver && dev->driver->pm)
-		callback = dev->driver->pm->runtime_resume;
+	callback = rpm_get_resume_cb(dev);
 
 	retval = rpm_callback(callback, dev);
 	if (retval) {
@@ -734,7 +751,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	}
 	wake_up_all(&dev->power.wait_queue);
 
-	if (!retval)
+	if (retval >= 0)
 		rpm_idle(dev, RPM_ASYNC);
 
  out:
@@ -1114,7 +1131,7 @@ EXPORT_SYMBOL_GPL(pm_runtime_barrier);
  * @dev: Device to handle.
  * @check_resume: If set, check if there's a resume request for the device.
  *
- * Increment power.disable_depth for the device and if was zero previously,
+ * Increment power.disable_depth for the device and if it was zero previously,
  * cancel all pending runtime PM requests for the device and wait for all
  * operations in progress to complete.  The device can be either active or
  * suspended after its runtime PM has been disabled.
@@ -1368,9 +1385,6 @@ void pm_runtime_init(struct device *dev)
 	setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn,
 			(unsigned long)dev);
 
-	dev->power.suspend_time = ktime_set(0, 0);
-	dev->power.max_time_suspended_ns = -1;
-
 	init_waitqueue_head(&dev->power.wait_queue);
 }
 
@@ -1386,30 +1400,88 @@ void pm_runtime_remove(struct device *dev)
 	if (dev->power.runtime_status == RPM_ACTIVE)
 		pm_runtime_set_suspended(dev);
 	if (dev->power.irq_safe && dev->parent)
-		pm_runtime_put_sync(dev->parent);
+		pm_runtime_put(dev->parent);
 }
+#endif
 
 /**
- * pm_runtime_update_max_time_suspended - Update device's suspend time data.
- * @dev: Device to handle.
- * @delta_ns: Value to subtract from the device's max_time_suspended_ns field.
+ * pm_runtime_force_suspend - Force a device into suspend state if needed.
+ * @dev: Device to suspend.
  *
- * Update the device's power.max_time_suspended_ns field by subtracting
- * @delta_ns from it.  The resulting value of power.max_time_suspended_ns is
- * never negative.
+ * Disable runtime PM so we safely can check the device's runtime PM status and
+ * if it is active, invoke it's .runtime_suspend callback to bring it into
+ * suspend state. Keep runtime PM disabled to preserve the state unless we
+ * encounter errors.
+ *
+ * Typically this function may be invoked from a system suspend callback to make
+ * sure the device is put into low power state.
  */
-void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns)
+int pm_runtime_force_suspend(struct device *dev)
 {
-	unsigned long flags;
+	int (*callback)(struct device *);
+	int ret = 0;
 
-	spin_lock_irqsave(&dev->power.lock, flags);
+	pm_runtime_disable(dev);
+
+	/*
+	 * Note that pm_runtime_status_suspended() returns false while
+	 * !CONFIG_PM_RUNTIME, which means the device will be put into low
+	 * power state.
+	 */
+	if (pm_runtime_status_suspended(dev))
+		return 0;
 
-	if (delta_ns > 0 && dev->power.max_time_suspended_ns > 0) {
-		if (dev->power.max_time_suspended_ns > delta_ns)
-			dev->power.max_time_suspended_ns -= delta_ns;
-		else
-			dev->power.max_time_suspended_ns = 0;
+	callback = rpm_get_suspend_cb(dev);
+
+	if (!callback) {
+		ret = -ENOSYS;
+		goto err;
 	}
 
-	spin_unlock_irqrestore(&dev->power.lock, flags);
+	ret = callback(dev);
+	if (ret)
+		goto err;
+
+	pm_runtime_set_suspended(dev);
+	return 0;
+err:
+	pm_runtime_enable(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_force_suspend);
+
+/**
+ * pm_runtime_force_resume - Force a device into resume state.
+ * @dev: Device to resume.
+ *
+ * Prior invoking this function we expect the user to have brought the device
+ * into low power state by a call to pm_runtime_force_suspend(). Here we reverse
+ * those actions and brings the device into full power. We update the runtime PM
+ * status and re-enables runtime PM.
+ *
+ * Typically this function may be invoked from a system resume callback to make
+ * sure the device is put into full power state.
+ */
+int pm_runtime_force_resume(struct device *dev)
+{
+	int (*callback)(struct device *);
+	int ret = 0;
+
+	callback = rpm_get_resume_cb(dev);
+
+	if (!callback) {
+		ret = -ENOSYS;
+		goto out;
+	}
+
+	ret = callback(dev);
+	if (ret)
+		goto out;
+
+	pm_runtime_set_active(dev);
+	pm_runtime_mark_last_busy(dev);
+out:
+	pm_runtime_enable(dev);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(pm_runtime_force_resume);
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index adf41be0ea6..95b181d1ca6 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -5,6 +5,7 @@
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/export.h>
+#include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 #include <linux/atomic.h>
 #include <linux/jiffies.h>
@@ -205,7 +206,7 @@ static ssize_t autosuspend_delay_ms_store(struct device *dev,
 	if (!dev->power.use_autosuspend)
 		return -EIO;
 
-	if (strict_strtol(buf, 10, &delay) != 0 || delay != (int) delay)
+	if (kstrtol(buf, 10, &delay) != 0 || delay != (int) delay)
 		return -EINVAL;
 
 	device_lock(dev);
@@ -217,6 +218,121 @@ static ssize_t autosuspend_delay_ms_store(struct device *dev,
 static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
 		autosuspend_delay_ms_store);
 
+static ssize_t pm_qos_resume_latency_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	return sprintf(buf, "%d\n", dev_pm_qos_requested_resume_latency(dev));
+}
+
+static ssize_t pm_qos_resume_latency_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t n)
+{
+	s32 value;
+	int ret;
+
+	if (kstrtos32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value < 0)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req,
+					value);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_resume_latency_us, 0644,
+		   pm_qos_resume_latency_show, pm_qos_resume_latency_store);
+
+static ssize_t pm_qos_latency_tolerance_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	s32 value = dev_pm_qos_get_user_latency_tolerance(dev);
+
+	if (value < 0)
+		return sprintf(buf, "auto\n");
+	else if (value == PM_QOS_LATENCY_ANY)
+		return sprintf(buf, "any\n");
+
+	return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t pm_qos_latency_tolerance_store(struct device *dev,
+					      struct device_attribute *attr,
+					      const char *buf, size_t n)
+{
+	s32 value;
+	int ret;
+
+	if (kstrtos32(buf, 0, &value)) {
+		if (!strcmp(buf, "auto") || !strcmp(buf, "auto\n"))
+			value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
+		else if (!strcmp(buf, "any") || !strcmp(buf, "any\n"))
+			value = PM_QOS_LATENCY_ANY;
+	}
+	ret = dev_pm_qos_update_user_latency_tolerance(dev, value);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_latency_tolerance_us, 0644,
+		   pm_qos_latency_tolerance_show, pm_qos_latency_tolerance_store);
+
+static ssize_t pm_qos_no_power_off_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
+					& PM_QOS_FLAG_NO_POWER_OFF));
+}
+
+static ssize_t pm_qos_no_power_off_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t n)
+{
+	int ret;
+
+	if (kstrtoint(buf, 0, &ret))
+		return -EINVAL;
+
+	if (ret != 0 && ret != 1)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_no_power_off, 0644,
+		   pm_qos_no_power_off_show, pm_qos_no_power_off_store);
+
+static ssize_t pm_qos_remote_wakeup_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
+					& PM_QOS_FLAG_REMOTE_WAKEUP));
+}
+
+static ssize_t pm_qos_remote_wakeup_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t n)
+{
+	int ret;
+
+	if (kstrtoint(buf, 0, &ret))
+		return -EINVAL;
+
+	if (ret != 0 && ret != 1)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_REMOTE_WAKEUP, ret);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_remote_wakeup, 0644,
+		   pm_qos_remote_wakeup_show, pm_qos_remote_wakeup_store);
 #endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
@@ -288,22 +404,41 @@ static ssize_t wakeup_active_count_show(struct device *dev,
 
 static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL);
 
-static ssize_t wakeup_hit_count_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t wakeup_abort_count_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
 {
 	unsigned long count = 0;
 	bool enabled = false;
 
 	spin_lock_irq(&dev->power.lock);
 	if (dev->power.wakeup) {
-		count = dev->power.wakeup->hit_count;
+		count = dev->power.wakeup->wakeup_count;
 		enabled = true;
 	}
 	spin_unlock_irq(&dev->power.lock);
 	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
 }
 
-static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL);
+static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL);
+
+static ssize_t wakeup_expire_count_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->expire_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL);
 
 static ssize_t wakeup_active_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -372,6 +507,27 @@ static ssize_t wakeup_last_time_show(struct device *dev,
 }
 
 static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL);
+
+#ifdef CONFIG_PM_AUTOSLEEP
+static ssize_t wakeup_prevent_sleep_time_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444,
+		   wakeup_prevent_sleep_time_show, NULL);
+#endif /* CONFIG_PM_AUTOSLEEP */
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM_ADVANCED_DEBUG
@@ -408,6 +564,8 @@ static DEVICE_ATTR(runtime_enabled, 0444, rtpm_enabled_show, NULL);
 
 #endif
 
+#ifdef CONFIG_PM_SLEEP
+
 static ssize_t async_show(struct device *dev, struct device_attribute *attr,
 			  char *buf)
 {
@@ -434,6 +592,8 @@ static ssize_t async_store(struct device *dev, struct device_attribute *attr,
 }
 
 static DEVICE_ATTR(async, 0644, async_show, async_store);
+
+#endif
 #endif /* CONFIG_PM_ADVANCED_DEBUG */
 
 static struct attribute *power_attrs[] = {
@@ -460,11 +620,15 @@ static struct attribute *wakeup_attrs[] = {
 	&dev_attr_wakeup.attr,
 	&dev_attr_wakeup_count.attr,
 	&dev_attr_wakeup_active_count.attr,
-	&dev_attr_wakeup_hit_count.attr,
+	&dev_attr_wakeup_abort_count.attr,
+	&dev_attr_wakeup_expire_count.attr,
 	&dev_attr_wakeup_active.attr,
 	&dev_attr_wakeup_total_time_ms.attr,
 	&dev_attr_wakeup_max_time_ms.attr,
 	&dev_attr_wakeup_last_time_ms.attr,
+#ifdef CONFIG_PM_AUTOSLEEP
+	&dev_attr_wakeup_prevent_sleep_time_ms.attr,
+#endif
 #endif
 	NULL,
 };
@@ -490,6 +654,40 @@ static struct attribute_group pm_runtime_attr_group = {
 	.attrs	= runtime_attrs,
 };
 
+static struct attribute *pm_qos_resume_latency_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_resume_latency_us.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_resume_latency_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_resume_latency_attrs,
+};
+
+static struct attribute *pm_qos_latency_tolerance_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_latency_tolerance_us.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_latency_tolerance_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_latency_tolerance_attrs,
+};
+
+static struct attribute *pm_qos_flags_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_no_power_off.attr,
+	&dev_attr_pm_qos_remote_wakeup.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_flags_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_flags_attrs,
+};
+
 int dpm_sysfs_add(struct device *dev)
 {
 	int rc;
@@ -503,18 +701,23 @@ int dpm_sysfs_add(struct device *dev)
 		if (rc)
 			goto err_out;
 	}
-
 	if (device_can_wakeup(dev)) {
 		rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
-		if (rc) {
-			if (pm_runtime_callbacks_present(dev))
-				sysfs_unmerge_group(&dev->kobj,
-						    &pm_runtime_attr_group);
-			goto err_out;
-		}
+		if (rc)
+			goto err_runtime;
+	}
+	if (dev->power.set_latency_tolerance) {
+		rc = sysfs_merge_group(&dev->kobj,
+				       &pm_qos_latency_tolerance_attr_group);
+		if (rc)
+			goto err_wakeup;
 	}
 	return 0;
 
+ err_wakeup:
+	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
+ err_runtime:
+	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
  err_out:
 	sysfs_remove_group(&dev->kobj, &pm_attr_group);
 	return rc;
@@ -530,6 +733,26 @@ void wakeup_sysfs_remove(struct device *dev)
 	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
 }
 
+int pm_qos_sysfs_add_resume_latency(struct device *dev)
+{
+	return sysfs_merge_group(&dev->kobj, &pm_qos_resume_latency_attr_group);
+}
+
+void pm_qos_sysfs_remove_resume_latency(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_resume_latency_attr_group);
+}
+
+int pm_qos_sysfs_add_flags(struct device *dev)
+{
+	return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group);
+}
+
+void pm_qos_sysfs_remove_flags(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group);
+}
+
 void rpm_sysfs_remove(struct device *dev)
 {
 	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
@@ -537,6 +760,8 @@ void rpm_sysfs_remove(struct device *dev)
 
 void dpm_sysfs_remove(struct device *dev)
 {
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group);
+	dev_pm_qos_constraints_destroy(dev);
 	rpm_sysfs_remove(dev);
 	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
 	sysfs_remove_group(&dev->kobj, &pm_attr_group);
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index caf995fb774..eb1bd2ecad8 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -14,16 +14,15 @@
 #include <linux/suspend.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <trace/events/power.h>
 
 #include "power.h"
 
-#define TIMEOUT		100
-
 /*
  * If set, the suspend/hibernate code will abort transitions to a sleep state
  * if wakeup events are registered during or immediately before the transition.
  */
-bool events_check_enabled;
+bool events_check_enabled __read_mostly;
 
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
@@ -52,6 +51,25 @@ static void pm_wakeup_timer_fn(unsigned long data);
 
 static LIST_HEAD(wakeup_sources);
 
+static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue);
+
+/**
+ * wakeup_source_prepare - Prepare a new wakeup source for initialization.
+ * @ws: Wakeup source to prepare.
+ * @name: Pointer to the name of the new wakeup source.
+ *
+ * Callers must ensure that the @name string won't be freed when @ws is still in
+ * use.
+ */
+void wakeup_source_prepare(struct wakeup_source *ws, const char *name)
+{
+	if (ws) {
+		memset(ws, 0, sizeof(*ws));
+		ws->name = name;
+	}
+}
+EXPORT_SYMBOL_GPL(wakeup_source_prepare);
+
 /**
  * wakeup_source_create - Create a struct wakeup_source object.
  * @name: Name of the new wakeup source.
@@ -60,37 +78,44 @@ struct wakeup_source *wakeup_source_create(const char *name)
 {
 	struct wakeup_source *ws;
 
-	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+	ws = kmalloc(sizeof(*ws), GFP_KERNEL);
 	if (!ws)
 		return NULL;
 
-	spin_lock_init(&ws->lock);
-	if (name)
-		ws->name = kstrdup(name, GFP_KERNEL);
-
+	wakeup_source_prepare(ws, name ? kstrdup(name, GFP_KERNEL) : NULL);
 	return ws;
 }
 EXPORT_SYMBOL_GPL(wakeup_source_create);
 
 /**
+ * wakeup_source_drop - Prepare a struct wakeup_source object for destruction.
+ * @ws: Wakeup source to prepare for destruction.
+ *
+ * Callers must ensure that __pm_stay_awake() or __pm_wakeup_event() will never
+ * be run in parallel with this function for the same wakeup source object.
+ */
+void wakeup_source_drop(struct wakeup_source *ws)
+{
+	if (!ws)
+		return;
+
+	del_timer_sync(&ws->timer);
+	__pm_relax(ws);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_drop);
+
+/**
  * wakeup_source_destroy - Destroy a struct wakeup_source object.
  * @ws: Wakeup source to destroy.
+ *
+ * Use only for wakeup source objects created with wakeup_source_create().
  */
 void wakeup_source_destroy(struct wakeup_source *ws)
 {
 	if (!ws)
 		return;
 
-	spin_lock_irq(&ws->lock);
-	while (ws->active) {
-		spin_unlock_irq(&ws->lock);
-
-		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
-
-		spin_lock_irq(&ws->lock);
-	}
-	spin_unlock_irq(&ws->lock);
-
+	wakeup_source_drop(ws);
 	kfree(ws->name);
 	kfree(ws);
 }
@@ -102,15 +127,19 @@ EXPORT_SYMBOL_GPL(wakeup_source_destroy);
  */
 void wakeup_source_add(struct wakeup_source *ws)
 {
+	unsigned long flags;
+
 	if (WARN_ON(!ws))
 		return;
 
+	spin_lock_init(&ws->lock);
 	setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws);
 	ws->active = false;
+	ws->last_time = ktime_get();
 
-	spin_lock_irq(&events_lock);
+	spin_lock_irqsave(&events_lock, flags);
 	list_add_rcu(&ws->entry, &wakeup_sources);
-	spin_unlock_irq(&events_lock);
+	spin_unlock_irqrestore(&events_lock, flags);
 }
 EXPORT_SYMBOL_GPL(wakeup_source_add);
 
@@ -120,12 +149,14 @@ EXPORT_SYMBOL_GPL(wakeup_source_add);
  */
 void wakeup_source_remove(struct wakeup_source *ws)
 {
+	unsigned long flags;
+
 	if (WARN_ON(!ws))
 		return;
 
-	spin_lock_irq(&events_lock);
+	spin_lock_irqsave(&events_lock, flags);
 	list_del_rcu(&ws->entry);
-	spin_unlock_irq(&events_lock);
+	spin_unlock_irqrestore(&events_lock, flags);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(wakeup_source_remove);
@@ -152,8 +183,10 @@ EXPORT_SYMBOL_GPL(wakeup_source_register);
  */
 void wakeup_source_unregister(struct wakeup_source *ws)
 {
-	wakeup_source_remove(ws);
-	wakeup_source_destroy(ws);
+	if (ws) {
+		wakeup_source_remove(ws);
+		wakeup_source_destroy(ws);
+	}
 }
 EXPORT_SYMBOL_GPL(wakeup_source_unregister);
 
@@ -285,10 +318,16 @@ int device_init_wakeup(struct device *dev, bool enable)
 {
 	int ret = 0;
 
+	if (!dev)
+		return -EINVAL;
+
 	if (enable) {
 		device_set_wakeup_capable(dev, true);
 		ret = device_wakeup_enable(dev);
 	} else {
+		if (dev->power.can_wakeup)
+			device_wakeup_disable(dev);
+
 		device_set_wakeup_capable(dev, false);
 	}
 
@@ -347,13 +386,39 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
  */
 static void wakeup_source_activate(struct wakeup_source *ws)
 {
+	unsigned int cec;
+
+	/*
+	 * active wakeup source should bring the system
+	 * out of PM_SUSPEND_FREEZE state
+	 */
+	freeze_wake();
+
 	ws->active = true;
 	ws->active_count++;
-	ws->timer_expires = jiffies;
 	ws->last_time = ktime_get();
+	if (ws->autosleep_enabled)
+		ws->start_prevent_time = ws->last_time;
 
 	/* Increment the counter of events in progress. */
-	atomic_inc(&combined_event_count);
+	cec = atomic_inc_return(&combined_event_count);
+
+	trace_wakeup_source_activate(ws->name, cec);
+}
+
+/**
+ * wakeup_source_report_event - Report wakeup event using the given source.
+ * @ws: Wakeup source to report the event for.
+ */
+static void wakeup_source_report_event(struct wakeup_source *ws)
+{
+	ws->event_count++;
+	/* This is racy, but the counter is approximate anyway. */
+	if (events_check_enabled)
+		ws->wakeup_count++;
+
+	if (!ws->active)
+		wakeup_source_activate(ws);
 }
 
 /**
@@ -370,9 +435,11 @@ void __pm_stay_awake(struct wakeup_source *ws)
 		return;
 
 	spin_lock_irqsave(&ws->lock, flags);
-	ws->event_count++;
-	if (!ws->active)
-		wakeup_source_activate(ws);
+
+	wakeup_source_report_event(ws);
+	del_timer(&ws->timer);
+	ws->timer_expires = 0;
+
 	spin_unlock_irqrestore(&ws->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__pm_stay_awake);
@@ -401,6 +468,17 @@ void pm_stay_awake(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_stay_awake);
 
+#ifdef CONFIG_PM_AUTOSLEEP
+static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now)
+{
+	ktime_t delta = ktime_sub(now, ws->start_prevent_time);
+	ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta);
+}
+#else
+static inline void update_prevent_sleep_time(struct wakeup_source *ws,
+					     ktime_t now) {}
+#endif
+
 /**
  * wakup_source_deactivate - Mark given wakeup source as inactive.
  * @ws: Wakeup source to handle.
@@ -411,6 +489,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake);
  */
 static void wakeup_source_deactivate(struct wakeup_source *ws)
 {
+	unsigned int cnt, inpr, cec;
 	ktime_t duration;
 	ktime_t now;
 
@@ -437,13 +516,23 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
 	if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
 		ws->max_time = duration;
 
+	ws->last_time = now;
 	del_timer(&ws->timer);
+	ws->timer_expires = 0;
+
+	if (ws->autosleep_enabled)
+		update_prevent_sleep_time(ws, now);
 
 	/*
 	 * Increment the counter of registered wakeup events and decrement the
 	 * couter of wakeup events in progress simultaneously.
 	 */
-	atomic_add(MAX_IN_PROGRESS, &combined_event_count);
+	cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
+	trace_wakeup_source_deactivate(ws->name, cec);
+
+	split_counters(&cnt, &inpr);
+	if (!inpr && waitqueue_active(&wakeup_count_wait_queue))
+		wake_up(&wakeup_count_wait_queue);
 }
 
 /**
@@ -492,11 +581,24 @@ EXPORT_SYMBOL_GPL(pm_relax);
  * pm_wakeup_timer_fn - Delayed finalization of a wakeup event.
  * @data: Address of the wakeup source object associated with the event source.
  *
- * Call __pm_relax() for the wakeup source whose address is stored in @data.
+ * Call wakeup_source_deactivate() for the wakeup source whose address is stored
+ * in @data if it is currently active and its timer has not been canceled and
+ * the expiration time of the timer is not in future.
  */
 static void pm_wakeup_timer_fn(unsigned long data)
 {
-	__pm_relax((struct wakeup_source *)data);
+	struct wakeup_source *ws = (struct wakeup_source *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ws->lock, flags);
+
+	if (ws->active && ws->timer_expires
+	    && time_after_eq(jiffies, ws->timer_expires)) {
+		wakeup_source_deactivate(ws);
+		ws->expire_count++;
+	}
+
+	spin_unlock_irqrestore(&ws->lock, flags);
 }
 
 /**
@@ -521,9 +623,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 
 	spin_lock_irqsave(&ws->lock, flags);
 
-	ws->event_count++;
-	if (!ws->active)
-		wakeup_source_activate(ws);
+	wakeup_source_report_event(ws);
 
 	if (!msec) {
 		wakeup_source_deactivate(ws);
@@ -534,7 +634,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 	if (!expires)
 		expires = 1;
 
-	if (time_after(expires, ws->timer_expires)) {
+	if (!ws->timer_expires || time_after(expires, ws->timer_expires)) {
 		mod_timer(&ws->timer, expires);
 		ws->timer_expires = expires;
 	}
@@ -565,23 +665,31 @@ void pm_wakeup_event(struct device *dev, unsigned int msec)
 }
 EXPORT_SYMBOL_GPL(pm_wakeup_event);
 
-/**
- * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources.
- */
-static void pm_wakeup_update_hit_counts(void)
+void pm_print_active_wakeup_sources(void)
 {
-	unsigned long flags;
 	struct wakeup_source *ws;
+	int active = 0;
+	struct wakeup_source *last_activity_ws = NULL;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
-		spin_lock_irqsave(&ws->lock, flags);
-		if (ws->active)
-			ws->hit_count++;
-		spin_unlock_irqrestore(&ws->lock, flags);
+		if (ws->active) {
+			pr_info("active wakeup source: %s\n", ws->name);
+			active = 1;
+		} else if (!active &&
+			   (!last_activity_ws ||
+			    ktime_to_ns(ws->last_time) >
+			    ktime_to_ns(last_activity_ws->last_time))) {
+			last_activity_ws = ws;
+		}
 	}
+
+	if (!active && last_activity_ws)
+		pr_info("last active wakeup source: %s\n",
+			last_activity_ws->name);
 	rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
 
 /**
  * pm_wakeup_pending - Check if power transition in progress should be aborted.
@@ -605,32 +713,44 @@ bool pm_wakeup_pending(void)
 		events_check_enabled = !ret;
 	}
 	spin_unlock_irqrestore(&events_lock, flags);
-	if (ret)
-		pm_wakeup_update_hit_counts();
+
+	if (ret) {
+		pr_info("PM: Wakeup pending, aborting suspend\n");
+		pm_print_active_wakeup_sources();
+	}
+
 	return ret;
 }
 
 /**
  * pm_get_wakeup_count - Read the number of registered wakeup events.
  * @count: Address to store the value at.
+ * @block: Whether or not to block.
  *
- * Store the number of registered wakeup events at the address in @count.  Block
- * if the current number of wakeup events being processed is nonzero.
+ * Store the number of registered wakeup events at the address in @count.  If
+ * @block is set, block until the current number of wakeup events being
+ * processed is zero.
  *
- * Return 'false' if the wait for the number of wakeup events being processed to
- * drop down to zero has been interrupted by a signal (and the current number
- * of wakeup events being processed is still nonzero).  Otherwise return 'true'.
+ * Return 'false' if the current number of wakeup events being processed is
+ * nonzero.  Otherwise return 'true'.
  */
-bool pm_get_wakeup_count(unsigned int *count)
+bool pm_get_wakeup_count(unsigned int *count, bool block)
 {
 	unsigned int cnt, inpr;
 
-	for (;;) {
-		split_counters(&cnt, &inpr);
-		if (inpr == 0 || signal_pending(current))
-			break;
-		pm_wakeup_update_hit_counts();
-		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
+	if (block) {
+		DEFINE_WAIT(wait);
+
+		for (;;) {
+			prepare_to_wait(&wakeup_count_wait_queue, &wait,
+					TASK_INTERRUPTIBLE);
+			split_counters(&cnt, &inpr);
+			if (inpr == 0 || signal_pending(current))
+				break;
+
+			schedule();
+		}
+		finish_wait(&wakeup_count_wait_queue, &wait);
 	}
 
 	split_counters(&cnt, &inpr);
@@ -651,20 +771,47 @@ bool pm_get_wakeup_count(unsigned int *count)
 bool pm_save_wakeup_count(unsigned int count)
 {
 	unsigned int cnt, inpr;
+	unsigned long flags;
 
 	events_check_enabled = false;
-	spin_lock_irq(&events_lock);
+	spin_lock_irqsave(&events_lock, flags);
 	split_counters(&cnt, &inpr);
 	if (cnt == count && inpr == 0) {
 		saved_count = count;
 		events_check_enabled = true;
 	}
-	spin_unlock_irq(&events_lock);
-	if (!events_check_enabled)
-		pm_wakeup_update_hit_counts();
+	spin_unlock_irqrestore(&events_lock, flags);
 	return events_check_enabled;
 }
 
+#ifdef CONFIG_PM_AUTOSLEEP
+/**
+ * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources.
+ * @enabled: Whether to set or to clear the autosleep_enabled flags.
+ */
+void pm_wakep_autosleep_enabled(bool set)
+{
+	struct wakeup_source *ws;
+	ktime_t now = ktime_get();
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		spin_lock_irq(&ws->lock);
+		if (ws->autosleep_enabled != set) {
+			ws->autosleep_enabled = set;
+			if (ws->active) {
+				if (set)
+					ws->start_prevent_time = now;
+				else
+					update_prevent_sleep_time(ws, now);
+			}
+		}
+		spin_unlock_irq(&ws->lock);
+	}
+	rcu_read_unlock();
+}
+#endif /* CONFIG_PM_AUTOSLEEP */
+
 static struct dentry *wakeup_sources_stats_dentry;
 
 /**
@@ -680,27 +827,37 @@ static int print_wakeup_source_stats(struct seq_file *m,
 	ktime_t max_time;
 	unsigned long active_count;
 	ktime_t active_time;
+	ktime_t prevent_sleep_time;
 	int ret;
 
 	spin_lock_irqsave(&ws->lock, flags);
 
 	total_time = ws->total_time;
 	max_time = ws->max_time;
+	prevent_sleep_time = ws->prevent_sleep_time;
 	active_count = ws->active_count;
 	if (ws->active) {
-		active_time = ktime_sub(ktime_get(), ws->last_time);
+		ktime_t now = ktime_get();
+
+		active_time = ktime_sub(now, ws->last_time);
 		total_time = ktime_add(total_time, active_time);
 		if (active_time.tv64 > max_time.tv64)
 			max_time = active_time;
+
+		if (ws->autosleep_enabled)
+			prevent_sleep_time = ktime_add(prevent_sleep_time,
+				ktime_sub(now, ws->start_prevent_time));
 	} else {
 		active_time = ktime_set(0, 0);
 	}
 
-	ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t"
-			"%lld\t\t%lld\t\t%lld\t\t%lld\n",
-			ws->name, active_count, ws->event_count, ws->hit_count,
+	ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t"
+			"%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
+			ws->name, active_count, ws->event_count,
+			ws->wakeup_count, ws->expire_count,
 			ktime_to_ms(active_time), ktime_to_ms(total_time),
-			ktime_to_ms(max_time), ktime_to_ms(ws->last_time));
+			ktime_to_ms(max_time), ktime_to_ms(ws->last_time),
+			ktime_to_ms(prevent_sleep_time));
 
 	spin_unlock_irqrestore(&ws->lock, flags);
 
@@ -715,8 +872,9 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused)
 {
 	struct wakeup_source *ws;
 
-	seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t"
-		"active_since\ttotal_time\tmax_time\tlast_change\n");
+	seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
+		"expire_count\tactive_since\ttotal_time\tmax_time\t"
+		"last_change\tprevent_suspend_time\n");
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ws, &wakeup_sources, entry)