60 files changed, 19353 insertions, 3417 deletions
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index fd96345bc35..23b8726962a 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -1,11 +1,10 @@
 menu "Generic Driver Options"
 
-config UEVENT_HELPER_PATH
-	string "path to uevent helper"
-	depends on HOTPLUG
-	default ""
+config UEVENT_HELPER
+	bool "Support for uevent helper"
+	default y
 	help
-	  Path to uevent helper program forked by the kernel for
+	  The uevent helper program is forked by the kernel for
 	  every uevent.
 	  Before the switch to the netlink-based uevent source, this was
 	  used to hook hotplug scripts into kernel device events. It
@@ -16,9 +15,18 @@ config UEVENT_HELPER_PATH
 	  that it creates a high system load, or on smaller systems
 	  it is known to create out-of-memory situations during bootup.
 
+config UEVENT_HELPER_PATH
+	string "path to uevent helper"
+	depends on UEVENT_HELPER
+	default ""
+	help
+	  To disable user space helper program execution at by default
+	  specify an empty string here. This setting can still be altered
+	  via /proc/sys/kernel/hotplug or via /sys/kernel/uevent_helper
+	  later at runtime.
+
 config DEVTMPFS
 	bool "Maintain a devtmpfs filesystem to mount at /dev"
-	depends on HOTPLUG
 	help
 	  This creates a tmpfs/ramfs filesystem instance early at bootup.
 	  In this filesystem, the kernel driver core maintains device
@@ -46,13 +54,13 @@ config DEVTMPFS_MOUNT
 	  with the commandline parameter: devtmpfs.mount=0|1.
 	  This option does not affect initramfs based booting, here
 	  the devtmpfs filesystem always needs to be mounted manually
-	  after the roots is mounted.
+	  after the rootfs is mounted.
 	  With this option enabled, it allows to bring up a system in
 	  rescue mode with init=/bin/sh, even when the /dev directory
 	  on the rootfs is completely empty.
 
 config STANDALONE
-	bool "Select only drivers that don't need compile-time external firmware" if EXPERIMENTAL
+	bool "Select only drivers that don't need compile-time external firmware"
 	default y
 	help
 	  Select this option if you don't have magic firmware for drivers that
@@ -65,17 +73,17 @@ config PREVENT_FIRMWARE_BUILD
 	default y
 	help
 	  Say yes to avoid building firmware. Firmware is usually shipped
-	  with the driver, and only when updating the firmware a rebuild
-	  should be made.
-	  If unsure say Y here.
+	  with the driver and only when updating the firmware should a
+	  rebuild be made.
+	  If unsure, say Y here.
 
 config FW_LOADER
-	tristate "Userspace firmware loading support" if EMBEDDED
+	tristate "Userspace firmware loading support" if EXPERT
 	default y
 	---help---
-	  This option is provided for the case where no in-kernel-tree modules
-	  require userspace firmware loading support, but a module built outside
-	  the kernel tree does.
+	  This option is provided for the case where none of the in-tree modules
+	  require userspace firmware loading support, but a module built
+	  out-of-tree does.
 
 config FIRMWARE_IN_KERNEL
 	bool "Include in-kernel firmware blobs in kernel binary"
@@ -83,22 +91,22 @@ config FIRMWARE_IN_KERNEL
 	default y
 	help
 	  The kernel source tree includes a number of firmware 'blobs'
-	  which are used by various drivers. The recommended way to
-	  use these is to run "make firmware_install" and to copy the
-	  resulting binary files created in usr/lib/firmware directory
-	  of the kernel tree to the /lib/firmware on your system so
+	  that are used by various drivers. The recommended way to
+	  use these is to run "make firmware_install", which, after
+	  converting ihex files to binary, copies all of the needed
+	  binary files in firmware/ to /lib/firmware/ on your system so
 	  that they can be loaded by userspace helpers on request.
 
 	  Enabling this option will build each required firmware blob
 	  into the kernel directly, where request_firmware() will find
 	  them without having to call out to userspace. This may be
-	  useful if your root file system requires a device which uses
-	  such firmware, and do not wish to use an initrd.
+	  useful if your root file system requires a device that uses
+	  such firmware and do not wish to use an initrd.
 
 	  This single option controls the inclusion of firmware for
-	  every driver which uses request_firmware() and ships its
-	  firmware in the kernel source tree, to avoid a proliferation
-	  of 'Include firmware for xxx device' options.
+	  every driver that uses request_firmware() and ships its
+	  firmware in the kernel source tree, which avoids a
+	  proliferation of 'Include firmware for xxx device' options.
 
 	  Say 'N' and let firmware be loaded from userspace.
 
@@ -106,27 +114,27 @@ config EXTRA_FIRMWARE
 	string "External firmware blobs to build into the kernel binary"
 	depends on FW_LOADER
 	help
-	  This option allows firmware to be built into the kernel, for the
-	  cases where the user either cannot or doesn't want to provide it from
+	  This option allows firmware to be built into the kernel for the case
+	  where the user either cannot or doesn't want to provide it from
 	  userspace at runtime (for example, when the firmware in question is
 	  required for accessing the boot device, and the user doesn't want to
 	  use an initrd).
 
-	  This option is a string, and takes the (space-separated) names of the
-	  firmware files -- the same names which appear in MODULE_FIRMWARE()
+	  This option is a string and takes the (space-separated) names of the
+	  firmware files -- the same names that appear in MODULE_FIRMWARE()
 	  and request_firmware() in the source. These files should exist under
 	  the directory specified by the EXTRA_FIRMWARE_DIR option, which is
-	  by default the firmware/ subdirectory of the kernel source tree.
+	  by default the firmware subdirectory of the kernel source tree.
 
-	  So, for example, you might set CONFIG_EXTRA_FIRMWARE="usb8388.bin",
-	  copy the usb8388.bin file into the firmware/ directory, and build the
-	  kernel. Then any request_firmware("usb8388.bin") will be
-	  satisfied internally without needing to call out to userspace.
+	  For example, you might set CONFIG_EXTRA_FIRMWARE="usb8388.bin", copy
+	  the usb8388.bin file into the firmware directory, and build the kernel.
+	  Then any request_firmware("usb8388.bin") will be satisfied internally
+	  without needing to call out to userspace.
 
 	  WARNING: If you include additional firmware files into your binary
-	  kernel image which are not available under the terms of the GPL,
+	  kernel image that are not available under the terms of the GPL,
 	  then it may be a violation of the GPL to distribute the resulting
-	  image -- since it combines both GPL and non-GPL work. You should
+	  image since it combines both GPL and non-GPL work. You should
 	  consult a lawyer of your own before distributing such an image.
 
 config EXTRA_FIRMWARE_DIR
@@ -136,10 +144,20 @@ config EXTRA_FIRMWARE_DIR
 	help
 	  This option controls the directory in which the kernel build system
 	  looks for the firmware files listed in the EXTRA_FIRMWARE option.
-	  The default is the firmware/ directory in the kernel source tree,
-	  but by changing this option you can point it elsewhere, such as
-	  the /lib/firmware/ directory or another separate directory
-	  containing firmware files.
+	  The default is firmware/ in the kernel source tree, but by changing
+	  this option you can point it elsewhere, such as /lib/firmware/ or
+	  some other directory containing the firmware files.
+
+config FW_LOADER_USER_HELPER
+	bool "Fallback user-helper invocation for firmware loading"
+	depends on FW_LOADER
+	default y
+	help
+	  This option enables / disables the invocation of user-helper
+	  (e.g. udev) for loading firmware files as a fallback after the
+	  direct file loading in kernel fails.  The user-mode helper is
+	  no longer required unless you have a special firmware file that
+	  resides in a non-standard path.
 
 config DEBUG_DRIVER
 	bool "Driver Core verbose debug messages"
@@ -168,4 +186,104 @@ config SYS_HYPERVISOR
 	bool
 	default n
 
+config GENERIC_CPU_DEVICES
+	bool
+	default n
+
+config GENERIC_CPU_AUTOPROBE
+	bool
+
+config SOC_BUS
+	bool
+
+source "drivers/base/regmap/Kconfig"
+
+config DMA_SHARED_BUFFER
+	bool
+	default n
+	select ANON_INODES
+	help
+	  This option enables the framework for buffer-sharing between
+	  multiple drivers. A buffer is associated with a file using driver
+	  APIs extension; the file's descriptor can then be passed on to other
+	  driver.
+
+config DMA_CMA
+	bool "DMA Contiguous Memory Allocator"
+	depends on HAVE_DMA_CONTIGUOUS && CMA
+	help
+	  This enables the Contiguous Memory Allocator which allows drivers
+	  to allocate big physically-contiguous blocks of memory for use with
+	  hardware components that do not support I/O map nor scatter-gather.
+
+	  For more information see <include/linux/dma-contiguous.h>.
+	  If unsure, say "n".
+
+if  DMA_CMA
+comment "Default contiguous memory area size:"
+
+config CMA_SIZE_MBYTES
+	int "Size in Mega Bytes"
+	depends on !CMA_SIZE_SEL_PERCENTAGE
+	default 16
+	help
+	  Defines the size (in MiB) of the default memory area for Contiguous
+	  Memory Allocator.
+
+config CMA_SIZE_PERCENTAGE
+	int "Percentage of total memory"
+	depends on !CMA_SIZE_SEL_MBYTES
+	default 10
+	help
+	  Defines the size of the default memory area for Contiguous Memory
+	  Allocator as a percentage of the total memory in the system.
+
+choice
+	prompt "Selected region size"
+	default CMA_SIZE_SEL_MBYTES
+
+config CMA_SIZE_SEL_MBYTES
+	bool "Use mega bytes value only"
+
+config CMA_SIZE_SEL_PERCENTAGE
+	bool "Use percentage value only"
+
+config CMA_SIZE_SEL_MIN
+	bool "Use lower value (minimum)"
+
+config CMA_SIZE_SEL_MAX
+	bool "Use higher value (maximum)"
+
+endchoice
+
+config CMA_ALIGNMENT
+	int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
+	range 4 12
+	default 8
+	help
+	  DMA mapping framework by default aligns all buffers to the smallest
+	  PAGE_SIZE order which is greater than or equal to the requested buffer
+	  size. This works well for buffers up to a few hundreds kilobytes, but
+	  for larger buffers it just a memory waste. With this parameter you can
+	  specify the maximum PAGE_SIZE order for contiguous buffers. Larger
+	  buffers will be aligned only to this specified order. The order is
+	  expressed as a power of two multiplied by the PAGE_SIZE.
+
+	  For example, if your system defaults to 4KiB pages, the order value
+	  of 8 means that the buffers will be aligned up to 1MiB only.
+
+	  If unsure, leave the default value "8".
+
+config CMA_AREAS
+	int "Maximum count of the CMA device-private areas"
+	default 7
+	help
+	  CMA allows to create CMA areas for particular devices. This parameter
+	  sets the maximum number of such device private CMA areas in the
+	  system.
+
+	  If unsure, leave the default value "7".
+
+endif
+
 endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5f51c3b4451..04b314e0fa5 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,23 +1,27 @@
 # Makefile for the Linux device tree
 
-obj-y			:= core.o sys.o bus.o dd.o \
+obj-y			:= component.o core.o bus.o dd.o syscore.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
-			   attribute_container.o transport_class.o
+			   attribute_container.o transport_class.o \
+			   topology.o container.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
+obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
+obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf.o reservation.o
 obj-$(CONFIG_ISA)	+= isa.o
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
-obj-$(CONFIG_SMP)	+= topology.o
-obj-$(CONFIG_IOMMU_API) += iommu.o
 ifeq ($(CONFIG_SYSFS),y)
 obj-$(CONFIG_MODULES)	+= module.o
 endif
 obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
+obj-$(CONFIG_REGMAP)	+= regmap/
+obj-$(CONFIG_SOC_BUS) += soc.o
+obj-$(CONFIG_PINCTRL) += pinctrl.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 
diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c
index 8fc200b2e2c..b84ca8f13f9 100644
--- a/drivers/base/attribute_container.c
+++ b/drivers/base/attribute_container.c
@@ -12,7 +12,6 @@
  */
 
 #include <linux/attribute_container.h>
-#include <linux/init.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -158,7 +157,7 @@ attribute_container_add_device(struct device *dev,
 
 		ic = kzalloc(sizeof(*ic), GFP_KERNEL);
 		if (!ic) {
-			dev_printk(KERN_ERR, dev, "failed to allocate class container\n");
+			dev_err(dev, "failed to allocate class container\n");
 			continue;
 		}
 
@@ -167,7 +166,7 @@ attribute_container_add_device(struct device *dev,
 		ic->classdev.parent = get_device(dev);
 		ic->classdev.class = cont->class;
 		cont->class->dev_release = attribute_container_release;
-		dev_set_name(&ic->classdev, dev_name(dev));
+		dev_set_name(&ic->classdev, "%s", dev_name(dev));
 		if (fn)
 			fn(cont, dev, &ic->classdev);
 		else
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 2ca7f5b7b82..251c5d30f96 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -1,31 +1,47 @@
+#include <linux/notifier.h>
 
 /**
- * struct bus_type_private - structure to hold the private to the driver core portions of the bus_type structure.
+ * struct subsys_private - structure to hold the private to the driver core portions of the bus_type/class structure.
  *
- * @subsys - the struct kset that defines this bus.  This is the main kobject
- * @drivers_kset - the list of drivers associated with this bus
- * @devices_kset - the list of devices associated with this bus
+ * @subsys - the struct kset that defines this subsystem
+ * @devices_kset - the subsystem's 'devices' directory
+ * @interfaces - list of subsystem interfaces associated
+ * @mutex - protect the devices, and interfaces lists.
+ *
+ * @drivers_kset - the list of drivers associated
  * @klist_devices - the klist to iterate over the @devices_kset
  * @klist_drivers - the klist to iterate over the @drivers_kset
  * @bus_notifier - the bus notifier list for anything that cares about things
- * on this bus.
+ *                 on this bus.
  * @bus - pointer back to the struct bus_type that this structure is associated
- * with.
+ *        with.
+ *
+ * @glue_dirs - "glue" directory to put in-between the parent device to
+ *              avoid namespace conflicts
+ * @class - pointer back to the struct class that this structure is associated
+ *          with.
  *
  * This structure is the one that is the actual kobject allowing struct
- * bus_type to be statically allocated safely.  Nothing outside of the driver
- * core should ever touch these fields.
+ * bus_type/class to be statically allocated safely.  Nothing outside of the
+ * driver core should ever touch these fields.
  */
-struct bus_type_private {
+struct subsys_private {
 	struct kset subsys;
-	struct kset *drivers_kset;
 	struct kset *devices_kset;
+	struct list_head interfaces;
+	struct mutex mutex;
+
+	struct kset *drivers_kset;
 	struct klist klist_devices;
 	struct klist klist_drivers;
 	struct blocking_notifier_head bus_notifier;
 	unsigned int drivers_autoprobe:1;
 	struct bus_type *bus;
+
+	struct kset glue_dirs;
+	struct class *class;
 };
+#define to_subsys_private(obj) container_of(obj, struct subsys_private, subsys.kobj)
 
 struct driver_private {
 	struct kobject kobj;
@@ -36,33 +52,6 @@ struct driver_private {
 };
 #define to_driver(obj) container_of(obj, struct driver_private, kobj)
 
-
-/**
- * struct class_private - structure to hold the private to the driver core portions of the class structure.
- *
- * @class_subsys - the struct kset that defines this class.  This is the main kobject
- * @class_devices - list of devices associated with this class
- * @class_interfaces - list of class_interfaces associated with this class
- * @class_dirs - "glue" directory for virtual devices associated with this class
- * @class_mutex - mutex to protect the children, devices, and interfaces lists.
- * @class - pointer back to the struct class that this structure is associated
- * with.
- *
- * This structure is the one that is the actual kobject allowing struct
- * class to be statically allocated safely.  Nothing outside of the driver
- * core should ever touch these fields.
- */
-struct class_private {
-	struct kset class_subsys;
-	struct klist class_devices;
-	struct list_head class_interfaces;
-	struct kset class_dirs;
-	struct mutex class_mutex;
-	struct class *class;
-};
-#define to_class(obj)	\
-	container_of(obj, struct class_private, class_subsys.kobj)
-
 /**
  * struct device_private - structure to hold the private to the driver core portions of the device structure.
  *
@@ -70,8 +59,10 @@ struct class_private {
  * @knode_parent - node in sibling list
  * @knode_driver - node in driver list
  * @knode_bus - node in bus list
- * @driver_data - private pointer for driver specific info.  Will turn into a
- * list soon.
+ * @deferred_probe - entry in deferred_probe_list which is used to retry the
+ *	binding of drivers which were unable to get all the resources needed by
+ *	the device; typically because it depends on another driver getting
+ *	probed first.
  * @device - pointer back to the struct class that this structure is
  * associated with.
  *
@@ -82,7 +73,7 @@ struct device_private {
 	struct klist_node knode_parent;
 	struct klist_node knode_driver;
 	struct klist_node knode_bus;
-	void *driver_data;
+	struct list_head deferred_probe;
 	struct device *device;
 };
 #define to_device_private_parent(obj)	\
@@ -105,8 +96,10 @@ extern int hypervisor_init(void);
 static inline int hypervisor_init(void) { return 0; }
 #endif
 extern int platform_bus_init(void);
-extern int system_bus_init(void);
-extern int cpu_dev_init(void);
+extern void cpu_dev_init(void);
+extern void container_dev_init(void);
+
+struct kobject *virtual_device_parent(struct device *dev);
 
 extern int bus_add_device(struct device *dev);
 extern void bus_probe_device(struct device *dev);
@@ -117,18 +110,28 @@ extern void bus_remove_driver(struct device_driver *drv);
 
 extern void driver_detach(struct device_driver *drv);
 extern int driver_probe_device(struct device_driver *drv, struct device *dev);
+extern void driver_deferred_probe_del(struct device *dev);
 static inline int driver_match_device(struct device_driver *drv,
 				      struct device *dev)
 {
 	return drv->bus->match ? drv->bus->match(dev, drv) : 1;
 }
 
-extern void sysdev_shutdown(void);
+extern int driver_add_groups(struct device_driver *drv,
+			     const struct attribute_group **groups);
+extern void driver_remove_groups(struct device_driver *drv,
+				 const struct attribute_group **groups);
+
+extern int device_add_groups(struct device *dev,
+			     const struct attribute_group **groups);
+extern void device_remove_groups(struct device *dev,
+				 const struct attribute_group **groups);
 
 extern char *make_class_name(const char *name, struct kobject *kobj);
 
 extern int devres_release_all(struct device *dev);
 
+/* /sys/devices directory */
 extern struct kset *devices_kset;
 
 #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 33c270a64db..83e910a5756 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -16,11 +16,15 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
 #include "base.h"
 #include "power/power.h"
 
+/* /sys/devices/system */
+static struct kset *system_kset;
+
 #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr)
-#define to_bus(obj) container_of(obj, struct bus_type_private, subsys.kobj)
 
 /*
  * sysfs bindings for drivers
@@ -96,11 +100,11 @@ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr,
 			     char *buf)
 {
 	struct bus_attribute *bus_attr = to_bus_attr(attr);
-	struct bus_type_private *bus_priv = to_bus(kobj);
+	struct subsys_private *subsys_priv = to_subsys_private(kobj);
 	ssize_t ret = 0;
 
 	if (bus_attr->show)
-		ret = bus_attr->show(bus_priv->bus, buf);
+		ret = bus_attr->show(subsys_priv->bus, buf);
 	return ret;
 }
 
@@ -108,11 +112,11 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
 			      const char *buf, size_t count)
 {
 	struct bus_attribute *bus_attr = to_bus_attr(attr);
-	struct bus_type_private *bus_priv = to_bus(kobj);
+	struct subsys_private *subsys_priv = to_subsys_private(kobj);
 	ssize_t ret = 0;
 
 	if (bus_attr->store)
-		ret = bus_attr->store(bus_priv->bus, buf, count);
+		ret = bus_attr->store(subsys_priv->bus, buf, count);
 	return ret;
 }
 
@@ -142,8 +146,19 @@ void bus_remove_file(struct bus_type *bus, struct bus_attribute *attr)
 }
 EXPORT_SYMBOL_GPL(bus_remove_file);
 
+static void bus_release(struct kobject *kobj)
+{
+	struct subsys_private *priv =
+		container_of(kobj, typeof(*priv), subsys.kobj);
+	struct bus_type *bus = priv->bus;
+
+	kfree(priv);
+	bus->p = NULL;
+}
+
 static struct kobj_type bus_ktype = {
 	.sysfs_ops	= &bus_sysfs_ops,
+	.release	= bus_release,
 };
 
 static int bus_uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -161,11 +176,9 @@ static const struct kset_uevent_ops bus_uevent_ops = {
 
 static struct kset *bus_kset;
 
-
-#ifdef CONFIG_HOTPLUG
 /* Manually detach a device from its associated driver. */
-static ssize_t driver_unbind(struct device_driver *drv,
-			     const char *buf, size_t count)
+static ssize_t unbind_store(struct device_driver *drv, const char *buf,
+			    size_t count)
 {
 	struct bus_type *bus = bus_get(drv->bus);
 	struct device *dev;
@@ -184,15 +197,15 @@ static ssize_t driver_unbind(struct device_driver *drv,
 	bus_put(bus);
 	return err;
 }
-static DRIVER_ATTR(unbind, S_IWUSR, NULL, driver_unbind);
+static DRIVER_ATTR_WO(unbind);
 
 /*
  * Manually attach a device to a driver.
  * Note: the driver must want to bind to the device,
  * it is not possible to override the driver's id table.
  */
-static ssize_t driver_bind(struct device_driver *drv,
-			   const char *buf, size_t count)
+static ssize_t bind_store(struct device_driver *drv, const char *buf,
+			  size_t count)
 {
 	struct bus_type *bus = bus_get(drv->bus);
 	struct device *dev;
@@ -220,7 +233,7 @@ static ssize_t driver_bind(struct device_driver *drv,
 	bus_put(bus);
 	return err;
 }
-static DRIVER_ATTR(bind, S_IWUSR, NULL, driver_bind);
+static DRIVER_ATTR_WO(bind);
 
 static ssize_t show_drivers_autoprobe(struct bus_type *bus, char *buf)
 {
@@ -249,7 +262,6 @@ static ssize_t store_drivers_probe(struct bus_type *bus,
 		return -EINVAL;
 	return count;
 }
-#endif
 
 static struct device *next_device(struct klist_iter *i)
 {
@@ -290,7 +302,7 @@ int bus_for_each_dev(struct bus_type *bus, struct device *start,
 	struct device *dev;
 	int error = 0;
 
-	if (!bus)
+	if (!bus || !bus->p)
 		return -EINVAL;
 
 	klist_iter_init_node(&bus->p->klist_devices, &i,
@@ -324,7 +336,7 @@ struct device *bus_find_device(struct bus_type *bus,
 	struct klist_iter i;
 	struct device *dev;
 
-	if (!bus)
+	if (!bus || !bus->p)
 		return NULL;
 
 	klist_iter_init_node(&bus->p->klist_devices, &i,
@@ -361,6 +373,47 @@ struct device *bus_find_device_by_name(struct bus_type *bus,
 }
 EXPORT_SYMBOL_GPL(bus_find_device_by_name);
 
+/**
+ * subsys_find_device_by_id - find a device with a specific enumeration number
+ * @subsys: subsystem
+ * @id: index 'id' in struct device
+ * @hint: device to check first
+ *
+ * Check the hint's next object and if it is a match return it directly,
+ * otherwise, fall back to a full list search. Either way a reference for
+ * the returned object is taken.
+ */
+struct device *subsys_find_device_by_id(struct bus_type *subsys, unsigned int id,
+					struct device *hint)
+{
+	struct klist_iter i;
+	struct device *dev;
+
+	if (!subsys)
+		return NULL;
+
+	if (hint) {
+		klist_iter_init_node(&subsys->p->klist_devices, &i, &hint->p->knode_bus);
+		dev = next_device(&i);
+		if (dev && dev->id == id && get_device(dev)) {
+			klist_iter_exit(&i);
+			return dev;
+		}
+		klist_iter_exit(&i);
+	}
+
+	klist_iter_init_node(&subsys->p->klist_devices, &i, NULL);
+	while ((dev = next_device(&i))) {
+		if (dev->id == id && get_device(dev)) {
+			klist_iter_exit(&i);
+			return dev;
+		}
+	}
+	klist_iter_exit(&i);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(subsys_find_device_by_id);
+
 static struct device_driver *next_driver(struct klist_iter *i)
 {
 	struct klist_node *n = klist_next(i);
@@ -419,7 +472,7 @@ static int device_add_attrs(struct bus_type *bus, struct device *dev)
 	if (!bus->dev_attrs)
 		return 0;
 
-	for (i = 0; attr_name(bus->dev_attrs[i]); i++) {
+	for (i = 0; bus->dev_attrs[i].attr.name; i++) {
 		error = device_create_file(dev, &bus->dev_attrs[i]);
 		if (error) {
 			while (--i >= 0)
@@ -435,7 +488,7 @@ static void device_remove_attrs(struct bus_type *bus, struct device *dev)
 	int i;
 
 	if (bus->dev_attrs) {
-		for (i = 0; attr_name(bus->dev_attrs[i]); i++)
+		for (i = 0; bus->dev_attrs[i].attr.name; i++)
 			device_remove_file(dev, &bus->dev_attrs[i]);
 	}
 }
@@ -458,6 +511,9 @@ int bus_add_device(struct device *dev)
 		error = device_add_attrs(bus, dev);
 		if (error)
 			goto out_put;
+		error = device_add_groups(dev, bus->dev_groups);
+		if (error)
+			goto out_groups;
 		error = sysfs_create_link(&bus->p->devices_kset->kobj,
 						&dev->kobj, dev_name(dev));
 		if (error)
@@ -472,6 +528,8 @@ int bus_add_device(struct device *dev)
 
 out_subsys:
 	sysfs_remove_link(&bus->p->devices_kset->kobj, dev_name(dev));
+out_groups:
+	device_remove_groups(dev, bus->dev_groups);
 out_id:
 	device_remove_attrs(bus, dev);
 out_put:
@@ -488,76 +546,62 @@ out_put:
 void bus_probe_device(struct device *dev)
 {
 	struct bus_type *bus = dev->bus;
+	struct subsys_interface *sif;
 	int ret;
 
-	if (bus && bus->p->drivers_autoprobe) {
+	if (!bus)
+		return;
+
+	if (bus->p->drivers_autoprobe) {
 		ret = device_attach(dev);
 		WARN_ON(ret < 0);
 	}
+
+	mutex_lock(&bus->p->mutex);
+	list_for_each_entry(sif, &bus->p->interfaces, node)
+		if (sif->add_dev)
+			sif->add_dev(dev, sif);
+	mutex_unlock(&bus->p->mutex);
 }
 
 /**
  * bus_remove_device - remove device from bus
  * @dev: device to be removed
  *
- * - Remove symlink from bus's directory.
+ * - Remove device from all interfaces.
+ * - Remove symlink from bus' directory.
  * - Delete device from bus's list.
  * - Detach from its driver.
  * - Drop reference taken in bus_add_device().
  */
 void bus_remove_device(struct device *dev)
 {
-	if (dev->bus) {
-		sysfs_remove_link(&dev->kobj, "subsystem");
-		sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
-				  dev_name(dev));
-		device_remove_attrs(dev->bus, dev);
-		if (klist_node_attached(&dev->p->knode_bus))
-			klist_del(&dev->p->knode_bus);
-
-		pr_debug("bus: '%s': remove device %s\n",
-			 dev->bus->name, dev_name(dev));
-		device_release_driver(dev);
-		bus_put(dev->bus);
-	}
-}
-
-static int driver_add_attrs(struct bus_type *bus, struct device_driver *drv)
-{
-	int error = 0;
-	int i;
-
-	if (bus->drv_attrs) {
-		for (i = 0; attr_name(bus->drv_attrs[i]); i++) {
-			error = driver_create_file(drv, &bus->drv_attrs[i]);
-			if (error)
-				goto err;
-		}
-	}
-done:
-	return error;
-err:
-	while (--i >= 0)
-		driver_remove_file(drv, &bus->drv_attrs[i]);
-	goto done;
-}
+	struct bus_type *bus = dev->bus;
+	struct subsys_interface *sif;
 
-static void driver_remove_attrs(struct bus_type *bus,
-				struct device_driver *drv)
-{
-	int i;
+	if (!bus)
+		return;
 
-	if (bus->drv_attrs) {
-		for (i = 0; attr_name(bus->drv_attrs[i]); i++)
-			driver_remove_file(drv, &bus->drv_attrs[i]);
-	}
+	mutex_lock(&bus->p->mutex);
+	list_for_each_entry(sif, &bus->p->interfaces, node)
+		if (sif->remove_dev)
+			sif->remove_dev(dev, sif);
+	mutex_unlock(&bus->p->mutex);
+
+	sysfs_remove_link(&dev->kobj, "subsystem");
+	sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
+			  dev_name(dev));
+	device_remove_attrs(dev->bus, dev);
+	device_remove_groups(dev, dev->bus->dev_groups);
+	if (klist_node_attached(&dev->p->knode_bus))
+		klist_del(&dev->p->knode_bus);
+
+	pr_debug("bus: '%s': remove device %s\n",
+		 dev->bus->name, dev_name(dev));
+	device_release_driver(dev);
+	bus_put(dev->bus);
 }
 
-#ifdef CONFIG_HOTPLUG
-/*
- * Thanks to drivers making their tables __devinit, we can't allow manual
- * bind and unbind from userspace unless CONFIG_HOTPLUG is enabled.
- */
 static int __must_check add_bind_files(struct device_driver *drv)
 {
 	int ret;
@@ -601,15 +645,9 @@ static void remove_probe_files(struct bus_type *bus)
 	bus_remove_file(bus, &bus_attr_drivers_autoprobe);
 	bus_remove_file(bus, &bus_attr_drivers_probe);
 }
-#else
-static inline int add_bind_files(struct device_driver *drv) { return 0; }
-static inline void remove_bind_files(struct device_driver *drv) {}
-static inline int add_probe_files(struct bus_type *bus) { return 0; }
-static inline void remove_probe_files(struct bus_type *bus) {}
-#endif
-
-static ssize_t driver_uevent_store(struct device_driver *drv,
-				   const char *buf, size_t count)
+
+static ssize_t uevent_store(struct device_driver *drv, const char *buf,
+			    size_t count)
 {
 	enum kobject_action action;
 
@@ -617,7 +655,7 @@ static ssize_t driver_uevent_store(struct device_driver *drv,
 		kobject_uevent(&drv->p->kobj, action);
 	return count;
 }
-static DRIVER_ATTR(uevent, S_IWUSR, NULL, driver_uevent_store);
+static DRIVER_ATTR_WO(uevent);
 
 /**
  * bus_add_driver - Add a driver to the bus.
@@ -649,12 +687,12 @@ int bus_add_driver(struct device_driver *drv)
 	if (error)
 		goto out_unregister;
 
+	klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers);
 	if (drv->bus->p->drivers_autoprobe) {
 		error = driver_attach(drv);
 		if (error)
 			goto out_unregister;
 	}
-	klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers);
 	module_add_driver(drv->owner, drv);
 
 	error = driver_create_file(drv, &driver_attr_uevent);
@@ -662,10 +700,10 @@ int bus_add_driver(struct device_driver *drv)
 		printk(KERN_ERR "%s: uevent attr (%s) failed\n",
 			__func__, drv->name);
 	}
-	error = driver_add_attrs(bus, drv);
+	error = driver_add_groups(drv, bus->drv_groups);
 	if (error) {
 		/* How the hell do we get out of this pickle? Give up */
-		printk(KERN_ERR "%s: driver_add_attrs(%s) failed\n",
+		printk(KERN_ERR "%s: driver_create_groups(%s) failed\n",
 			__func__, drv->name);
 	}
 
@@ -678,7 +716,6 @@ int bus_add_driver(struct device_driver *drv)
 		}
 	}
 
-	kobject_uevent(&priv->kobj, KOBJ_ADD);
 	return 0;
 
 out_unregister:
@@ -705,7 +742,7 @@ void bus_remove_driver(struct device_driver *drv)
 
 	if (!drv->suppress_bind_attrs)
 		remove_bind_files(drv);
-	driver_remove_attrs(drv->bus, drv);
+	driver_remove_groups(drv, drv->bus->drv_groups);
 	driver_remove_file(drv, &driver_attr_uevent);
 	klist_remove(&drv->p->knode_bus);
 	pr_debug("bus: '%s': remove driver %s\n", drv->bus->name, drv->name);
@@ -784,40 +821,16 @@ struct bus_type *find_bus(char *name)
 }
 #endif  /*  0  */
 
-
-/**
- * bus_add_attrs - Add default attributes for this bus.
- * @bus: Bus that has just been registered.
- */
-
-static int bus_add_attrs(struct bus_type *bus)
+static int bus_add_groups(struct bus_type *bus,
+			  const struct attribute_group **groups)
 {
-	int error = 0;
-	int i;
-
-	if (bus->bus_attrs) {
-		for (i = 0; attr_name(bus->bus_attrs[i]); i++) {
-			error = bus_create_file(bus, &bus->bus_attrs[i]);
-			if (error)
-				goto err;
-		}
-	}
-done:
-	return error;
-err:
-	while (--i >= 0)
-		bus_remove_file(bus, &bus->bus_attrs[i]);
-	goto done;
+	return sysfs_create_groups(&bus->p->subsys.kobj, groups);
 }
 
-static void bus_remove_attrs(struct bus_type *bus)
+static void bus_remove_groups(struct bus_type *bus,
+			      const struct attribute_group **groups)
 {
-	int i;
-
-	if (bus->bus_attrs) {
-		for (i = 0; attr_name(bus->bus_attrs[i]); i++)
-			bus_remove_file(bus, &bus->bus_attrs[i]);
-	}
+	sysfs_remove_groups(&bus->p->subsys.kobj, groups);
 }
 
 static void klist_devices_get(struct klist_node *n)
@@ -848,19 +861,20 @@ static ssize_t bus_uevent_store(struct bus_type *bus,
 static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store);
 
 /**
- * bus_register - register a bus with the system.
- * @bus: bus.
+ * bus_register - register a driver-core subsystem
+ * @bus: bus to register
  *
- * Once we have that, we registered the bus with the kobject
+ * Once we have that, we register the bus with the kobject
  * infrastructure, then register the children subsystems it has:
- * the devices and drivers that belong to the bus.
+ * the devices and drivers that belong to the subsystem.
  */
 int bus_register(struct bus_type *bus)
 {
 	int retval;
-	struct bus_type_private *priv;
+	struct subsys_private *priv;
+	struct lock_class_key *key = &bus->lock_key;
 
-	priv = kzalloc(sizeof(struct bus_type_private), GFP_KERNEL);
+	priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
@@ -899,6 +913,8 @@ int bus_register(struct bus_type *bus)
 		goto bus_drivers_fail;
 	}
 
+	INIT_LIST_HEAD(&priv->interfaces);
+	__mutex_init(&priv->mutex, "subsys mutex", key);
 	klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
 	klist_init(&priv->klist_drivers, NULL, NULL);
 
@@ -906,14 +922,14 @@ int bus_register(struct bus_type *bus)
 	if (retval)
 		goto bus_probe_files_fail;
 
-	retval = bus_add_attrs(bus);
+	retval = bus_add_groups(bus, bus->bus_groups);
 	if (retval)
-		goto bus_attrs_fail;
+		goto bus_groups_fail;
 
 	pr_debug("bus: '%s': registered\n", bus->name);
 	return 0;
 
-bus_attrs_fail:
+bus_groups_fail:
 	remove_probe_files(bus);
 bus_probe_files_fail:
 	kset_unregister(bus->p->drivers_kset);
@@ -940,14 +956,14 @@ EXPORT_SYMBOL_GPL(bus_register);
 void bus_unregister(struct bus_type *bus)
 {
 	pr_debug("bus: '%s': unregistering\n", bus->name);
-	bus_remove_attrs(bus);
+	if (bus->dev_root)
+		device_unregister(bus->dev_root);
+	bus_remove_groups(bus, bus->bus_groups);
 	remove_probe_files(bus);
 	kset_unregister(bus->p->drivers_kset);
 	kset_unregister(bus->p->devices_kset);
 	bus_remove_file(bus, &bus_attr_uevent);
 	kset_unregister(&bus->p->subsys);
-	kfree(bus->p);
-	bus->p = NULL;
 }
 EXPORT_SYMBOL_GPL(bus_unregister);
 
@@ -976,7 +992,7 @@ struct klist *bus_get_device_klist(struct bus_type *bus)
 EXPORT_SYMBOL_GPL(bus_get_device_klist);
 
 /*
- * Yes, this forcably breaks the klist abstraction temporarily.  It
+ * Yes, this forcibly breaks the klist abstraction temporarily.  It
  * just wants to sort the klist, not change reference counts and
  * take/drop locks rapidly in the process.  It does all this while
  * holding the lock for the list, so objects can't otherwise be
@@ -1029,10 +1045,228 @@ void bus_sort_breadthfirst(struct bus_type *bus,
 }
 EXPORT_SYMBOL_GPL(bus_sort_breadthfirst);
 
+/**
+ * subsys_dev_iter_init - initialize subsys device iterator
+ * @iter: subsys iterator to initialize
+ * @subsys: the subsys we wanna iterate over
+ * @start: the device to start iterating from, if any
+ * @type: device_type of the devices to iterate over, NULL for all
+ *
+ * Initialize subsys iterator @iter such that it iterates over devices
+ * of @subsys.  If @start is set, the list iteration will start there,
+ * otherwise if it is NULL, the iteration starts at the beginning of
+ * the list.
+ */
+void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys,
+			  struct device *start, const struct device_type *type)
+{
+	struct klist_node *start_knode = NULL;
+
+	if (start)
+		start_knode = &start->p->knode_bus;
+	klist_iter_init_node(&subsys->p->klist_devices, &iter->ki, start_knode);
+	iter->type = type;
+}
+EXPORT_SYMBOL_GPL(subsys_dev_iter_init);
+
+/**
+ * subsys_dev_iter_next - iterate to the next device
+ * @iter: subsys iterator to proceed
+ *
+ * Proceed @iter to the next device and return it.  Returns NULL if
+ * iteration is complete.
+ *
+ * The returned device is referenced and won't be released till
+ * iterator is proceed to the next device or exited.  The caller is
+ * free to do whatever it wants to do with the device including
+ * calling back into subsys code.
+ */
+struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter)
+{
+	struct klist_node *knode;
+	struct device *dev;
+
+	for (;;) {
+		knode = klist_next(&iter->ki);
+		if (!knode)
+			return NULL;
+		dev = container_of(knode, struct device_private, knode_bus)->device;
+		if (!iter->type || iter->type == dev->type)
+			return dev;
+	}
+}
+EXPORT_SYMBOL_GPL(subsys_dev_iter_next);
+
+/**
+ * subsys_dev_iter_exit - finish iteration
+ * @iter: subsys iterator to finish
+ *
+ * Finish an iteration.  Always call this function after iteration is
+ * complete whether the iteration ran till the end or not.
+ */
+void subsys_dev_iter_exit(struct subsys_dev_iter *iter)
+{
+	klist_iter_exit(&iter->ki);
+}
+EXPORT_SYMBOL_GPL(subsys_dev_iter_exit);
+
+int subsys_interface_register(struct subsys_interface *sif)
+{
+	struct bus_type *subsys;
+	struct subsys_dev_iter iter;
+	struct device *dev;
+
+	if (!sif || !sif->subsys)
+		return -ENODEV;
+
+	subsys = bus_get(sif->subsys);
+	if (!subsys)
+		return -EINVAL;
+
+	mutex_lock(&subsys->p->mutex);
+	list_add_tail(&sif->node, &subsys->p->interfaces);
+	if (sif->add_dev) {
+		subsys_dev_iter_init(&iter, subsys, NULL, NULL);
+		while ((dev = subsys_dev_iter_next(&iter)))
+			sif->add_dev(dev, sif);
+		subsys_dev_iter_exit(&iter);
+	}
+	mutex_unlock(&subsys->p->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(subsys_interface_register);
+
+void subsys_interface_unregister(struct subsys_interface *sif)
+{
+	struct bus_type *subsys;
+	struct subsys_dev_iter iter;
+	struct device *dev;
+
+	if (!sif || !sif->subsys)
+		return;
+
+	subsys = sif->subsys;
+
+	mutex_lock(&subsys->p->mutex);
+	list_del_init(&sif->node);
+	if (sif->remove_dev) {
+		subsys_dev_iter_init(&iter, subsys, NULL, NULL);
+		while ((dev = subsys_dev_iter_next(&iter)))
+			sif->remove_dev(dev, sif);
+		subsys_dev_iter_exit(&iter);
+	}
+	mutex_unlock(&subsys->p->mutex);
+
+	bus_put(subsys);
+}
+EXPORT_SYMBOL_GPL(subsys_interface_unregister);
+
+static void system_root_device_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static int subsys_register(struct bus_type *subsys,
+			   const struct attribute_group **groups,
+			   struct kobject *parent_of_root)
+{
+	struct device *dev;
+	int err;
+
+	err = bus_register(subsys);
+	if (err < 0)
+		return err;
+
+	dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev) {
+		err = -ENOMEM;
+		goto err_dev;
+	}
+
+	err = dev_set_name(dev, "%s", subsys->name);
+	if (err < 0)
+		goto err_name;
+
+	dev->kobj.parent = parent_of_root;
+	dev->groups = groups;
+	dev->release = system_root_device_release;
+
+	err = device_register(dev);
+	if (err < 0)
+		goto err_dev_reg;
+
+	subsys->dev_root = dev;
+	return 0;
+
+err_dev_reg:
+	put_device(dev);
+	dev = NULL;
+err_name:
+	kfree(dev);
+err_dev:
+	bus_unregister(subsys);
+	return err;
+}
+
+/**
+ * subsys_system_register - register a subsystem at /sys/devices/system/
+ * @subsys: system subsystem
+ * @groups: default attributes for the root device
+ *
+ * All 'system' subsystems have a /sys/devices/system/<name> root device
+ * with the name of the subsystem. The root device can carry subsystem-
+ * wide attributes. All registered devices are below this single root
+ * device and are named after the subsystem with a simple enumeration
+ * number appended. The registered devices are not explicitly named;
+ * only 'id' in the device needs to be set.
+ *
+ * Do not use this interface for anything new, it exists for compatibility
+ * with bad ideas only. New subsystems should use plain subsystems; and
+ * add the subsystem-wide attributes should be added to the subsystem
+ * directory itself and not some create fake root-device placed in
+ * /sys/devices/system/<name>.
+ */
+int subsys_system_register(struct bus_type *subsys,
+			   const struct attribute_group **groups)
+{
+	return subsys_register(subsys, groups, &system_kset->kobj);
+}
+EXPORT_SYMBOL_GPL(subsys_system_register);
+
+/**
+ * subsys_virtual_register - register a subsystem at /sys/devices/virtual/
+ * @subsys: virtual subsystem
+ * @groups: default attributes for the root device
+ *
+ * All 'virtual' subsystems have a /sys/devices/system/<name> root device
+ * with the name of the subystem.  The root device can carry subsystem-wide
+ * attributes.  All registered devices are below this single root device.
+ * There's no restriction on device naming.  This is for kernel software
+ * constructs which need sysfs interface.
+ */
+int subsys_virtual_register(struct bus_type *subsys,
+			    const struct attribute_group **groups)
+{
+	struct kobject *virtual_dir;
+
+	virtual_dir = virtual_device_parent(NULL);
+	if (!virtual_dir)
+		return -ENOMEM;
+
+	return subsys_register(subsys, groups, virtual_dir);
+}
+EXPORT_SYMBOL_GPL(subsys_virtual_register);
+
 int __init buses_init(void)
 {
 	bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL);
 	if (!bus_kset)
 		return -ENOMEM;
+
+	system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
+	if (!system_kset)
+		return -ENOMEM;
+
 	return 0;
 }
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 9c63a5687d6..f96f70419a7 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -27,7 +27,7 @@ static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr,
 			       char *buf)
 {
 	struct class_attribute *class_attr = to_class_attr(attr);
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	ssize_t ret = -EIO;
 
 	if (class_attr->show)
@@ -39,7 +39,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 				const char *buf, size_t count)
 {
 	struct class_attribute *class_attr = to_class_attr(attr);
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	ssize_t ret = -EIO;
 
 	if (class_attr->store)
@@ -49,7 +49,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 
 static void class_release(struct kobject *kobj)
 {
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	struct class *class = cp->class;
 
 	pr_debug("class '%s': release.\n", class->name);
@@ -65,15 +65,15 @@ static void class_release(struct kobject *kobj)
 
 static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj)
 {
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	struct class *class = cp->class;
 
 	return class->ns_type;
 }
 
 static const struct sysfs_ops class_sysfs_ops = {
-	.show	= class_attr_show,
-	.store	= class_attr_store,
+	.show	   = class_attr_show,
+	.store	   = class_attr_store,
 };
 
 static struct kobj_type class_ktype = {
@@ -82,38 +82,40 @@ static struct kobj_type class_ktype = {
 	.child_ns_type	= class_child_ns_type,
 };
 
-/* Hotplug events for classes go to the class class_subsys */
+/* Hotplug events for classes go to the class subsys */
 static struct kset *class_kset;
 
 
-int class_create_file(struct class *cls, const struct class_attribute *attr)
+int class_create_file_ns(struct class *cls, const struct class_attribute *attr,
+			 const void *ns)
 {
 	int error;
 	if (cls)
-		error = sysfs_create_file(&cls->p->class_subsys.kobj,
-					  &attr->attr);
+		error = sysfs_create_file_ns(&cls->p->subsys.kobj,
+					     &attr->attr, ns);
 	else
 		error = -EINVAL;
 	return error;
 }
 
-void class_remove_file(struct class *cls, const struct class_attribute *attr)
+void class_remove_file_ns(struct class *cls, const struct class_attribute *attr,
+			  const void *ns)
 {
 	if (cls)
-		sysfs_remove_file(&cls->p->class_subsys.kobj, &attr->attr);
+		sysfs_remove_file_ns(&cls->p->subsys.kobj, &attr->attr, ns);
 }
 
 static struct class *class_get(struct class *cls)
 {
 	if (cls)
-		kset_get(&cls->p->class_subsys);
+		kset_get(&cls->p->subsys);
 	return cls;
 }
 
 static void class_put(struct class *cls)
 {
 	if (cls)
-		kset_put(&cls->p->class_subsys);
+		kset_put(&cls->p->subsys);
 }
 
 static int add_class_attrs(struct class *cls)
@@ -122,7 +124,7 @@ static int add_class_attrs(struct class *cls)
 	int error = 0;
 
 	if (cls->class_attrs) {
-		for (i = 0; attr_name(cls->class_attrs[i]); i++) {
+		for (i = 0; cls->class_attrs[i].attr.name; i++) {
 			error = class_create_file(cls, &cls->class_attrs[i]);
 			if (error)
 				goto error;
@@ -141,7 +143,7 @@ static void remove_class_attrs(struct class *cls)
 	int i;
 
 	if (cls->class_attrs) {
-		for (i = 0; attr_name(cls->class_attrs[i]); i++)
+		for (i = 0; cls->class_attrs[i].attr.name; i++)
 			class_remove_file(cls, &cls->class_attrs[i]);
 	}
 }
@@ -162,7 +164,7 @@ static void klist_class_dev_put(struct klist_node *n)
 
 int __class_register(struct class *cls, struct lock_class_key *key)
 {
-	struct class_private *cp;
+	struct subsys_private *cp;
 	int error;
 
 	pr_debug("device class '%s': registering\n", cls->name);
@@ -170,11 +172,11 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
-	klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put);
-	INIT_LIST_HEAD(&cp->class_interfaces);
-	kset_init(&cp->class_dirs);
-	__mutex_init(&cp->class_mutex, "struct class mutex", key);
-	error = kobject_set_name(&cp->class_subsys.kobj, "%s", cls->name);
+	klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put);
+	INIT_LIST_HEAD(&cp->interfaces);
+	kset_init(&cp->glue_dirs);
+	__mutex_init(&cp->mutex, "subsys mutex", key);
+	error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name);
 	if (error) {
 		kfree(cp);
 		return error;
@@ -187,15 +189,15 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 #if defined(CONFIG_BLOCK)
 	/* let the block class directory show up in the root of sysfs */
 	if (!sysfs_deprecated || cls != &block_class)
-		cp->class_subsys.kobj.kset = class_kset;
+		cp->subsys.kobj.kset = class_kset;
 #else
-	cp->class_subsys.kobj.kset = class_kset;
+	cp->subsys.kobj.kset = class_kset;
 #endif
-	cp->class_subsys.kobj.ktype = &class_ktype;
+	cp->subsys.kobj.ktype = &class_ktype;
 	cp->class = cls;
 	cls->p = cp;
 
-	error = kset_register(&cp->class_subsys);
+	error = kset_register(&cp->subsys);
 	if (error) {
 		kfree(cp);
 		return error;
@@ -210,7 +212,7 @@ void class_unregister(struct class *cls)
 {
 	pr_debug("device class '%s': unregistering\n", cls->name);
 	remove_class_attrs(cls);
-	kset_unregister(&cls->p->class_subsys);
+	kset_unregister(&cls->p->subsys);
 }
 
 static void class_create_release(struct class *cls)
@@ -295,7 +297,7 @@ void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
 
 	if (start)
 		start_knode = &start->knode_class;
-	klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode);
+	klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode);
 	iter->type = type;
 }
 EXPORT_SYMBOL_GPL(class_dev_iter_init);
@@ -407,8 +409,8 @@ EXPORT_SYMBOL_GPL(class_for_each_device);
  * code.  There's no locking restriction.
  */
 struct device *class_find_device(struct class *class, struct device *start,
-				 void *data,
-				 int (*match)(struct device *, void *))
+				 const void *data,
+				 int (*match)(struct device *, const void *))
 {
 	struct class_dev_iter iter;
 	struct device *dev;
@@ -447,15 +449,15 @@ int class_interface_register(struct class_interface *class_intf)
 	if (!parent)
 		return -EINVAL;
 
-	mutex_lock(&parent->p->class_mutex);
-	list_add_tail(&class_intf->node, &parent->p->class_interfaces);
+	mutex_lock(&parent->p->mutex);
+	list_add_tail(&class_intf->node, &parent->p->interfaces);
 	if (class_intf->add_dev) {
 		class_dev_iter_init(&iter, parent, NULL, NULL);
 		while ((dev = class_dev_iter_next(&iter)))
 			class_intf->add_dev(dev, class_intf);
 		class_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&parent->p->class_mutex);
+	mutex_unlock(&parent->p->mutex);
 
 	return 0;
 }
@@ -469,7 +471,7 @@ void class_interface_unregister(struct class_interface *class_intf)
 	if (!parent)
 		return;
 
-	mutex_lock(&parent->p->class_mutex);
+	mutex_lock(&parent->p->mutex);
 	list_del_init(&class_intf->node);
 	if (class_intf->remove_dev) {
 		class_dev_iter_init(&iter, parent, NULL, NULL);
@@ -477,13 +479,13 @@ void class_interface_unregister(struct class_interface *class_intf)
 			class_intf->remove_dev(dev, class_intf);
 		class_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&parent->p->class_mutex);
+	mutex_unlock(&parent->p->mutex);
 
 	class_put(parent);
 }
 
-ssize_t show_class_attr_string(struct class *class, struct class_attribute *attr,
-                        	char *buf)
+ssize_t show_class_attr_string(struct class *class,
+			       struct class_attribute *attr, char *buf)
 {
 	struct class_attribute_string *cs;
 	cs = container_of(attr, struct class_attribute_string, attr);
@@ -587,8 +589,8 @@ int __init classes_init(void)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(class_create_file);
-EXPORT_SYMBOL_GPL(class_remove_file);
+EXPORT_SYMBOL_GPL(class_create_file_ns);
+EXPORT_SYMBOL_GPL(class_remove_file_ns);
 EXPORT_SYMBOL_GPL(class_unregister);
 EXPORT_SYMBOL_GPL(class_destroy);
 
diff --git a/drivers/base/component.c b/drivers/base/component.c
new file mode 100644
index 00000000000..c4778995cd7
--- /dev/null
+++ b/drivers/base/component.c
@@ -0,0 +1,390 @@
+/*
+ * Componentized device handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This is work in progress.  We gather up the component devices into a list,
+ * and bind them when instructed.  At the moment, we're specific to the DRM
+ * subsystem, and only handles one master device, but this doesn't have to be
+ * the case.
+ */
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+struct master {
+	struct list_head node;
+	struct list_head components;
+	bool bound;
+
+	const struct component_master_ops *ops;
+	struct device *dev;
+};
+
+struct component {
+	struct list_head node;
+	struct list_head master_node;
+	struct master *master;
+	bool bound;
+
+	const struct component_ops *ops;
+	struct device *dev;
+};
+
+static DEFINE_MUTEX(component_mutex);
+static LIST_HEAD(component_list);
+static LIST_HEAD(masters);
+
+static struct master *__master_find(struct device *dev,
+	const struct component_master_ops *ops)
+{
+	struct master *m;
+
+	list_for_each_entry(m, &masters, node)
+		if (m->dev == dev && (!ops || m->ops == ops))
+			return m;
+
+	return NULL;
+}
+
+/* Attach an unattached component to a master. */
+static void component_attach_master(struct master *master, struct component *c)
+{
+	c->master = master;
+
+	list_add_tail(&c->master_node, &master->components);
+}
+
+/* Detach a component from a master. */
+static void component_detach_master(struct master *master, struct component *c)
+{
+	list_del(&c->master_node);
+
+	c->master = NULL;
+}
+
+int component_master_add_child(struct master *master,
+	int (*compare)(struct device *, void *), void *compare_data)
+{
+	struct component *c;
+	int ret = -ENXIO;
+
+	list_for_each_entry(c, &component_list, node) {
+		if (c->master)
+			continue;
+
+		if (compare(c->dev, compare_data)) {
+			component_attach_master(master, c);
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(component_master_add_child);
+
+/* Detach all attached components from this master */
+static void master_remove_components(struct master *master)
+{
+	while (!list_empty(&master->components)) {
+		struct component *c = list_first_entry(&master->components,
+					struct component, master_node);
+
+		WARN_ON(c->master != master);
+
+		component_detach_master(master, c);
+	}
+}
+
+/*
+ * Try to bring up a master.  If component is NULL, we're interested in
+ * this master, otherwise it's a component which must be present to try
+ * and bring up the master.
+ *
+ * Returns 1 for successful bringup, 0 if not ready, or -ve errno.
+ */
+static int try_to_bring_up_master(struct master *master,
+	struct component *component)
+{
+	int ret = 0;
+
+	if (!master->bound) {
+		/*
+		 * Search the list of components, looking for components that
+		 * belong to this master, and attach them to the master.
+		 */
+		if (master->ops->add_components(master->dev, master)) {
+			/* Failed to find all components */
+			master_remove_components(master);
+			ret = 0;
+			goto out;
+		}
+
+		if (component && component->master != master) {
+			master_remove_components(master);
+			ret = 0;
+			goto out;
+		}
+
+		if (!devres_open_group(master->dev, NULL, GFP_KERNEL)) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		/* Found all components */
+		ret = master->ops->bind(master->dev);
+		if (ret < 0) {
+			devres_release_group(master->dev, NULL);
+			dev_info(master->dev, "master bind failed: %d\n", ret);
+			master_remove_components(master);
+			goto out;
+		}
+
+		master->bound = true;
+		ret = 1;
+	}
+out:
+
+	return ret;
+}
+
+static int try_to_bring_up_masters(struct component *component)
+{
+	struct master *m;
+	int ret = 0;
+
+	list_for_each_entry(m, &masters, node) {
+		ret = try_to_bring_up_master(m, component);
+		if (ret != 0)
+			break;
+	}
+
+	return ret;
+}
+
+static void take_down_master(struct master *master)
+{
+	if (master->bound) {
+		master->ops->unbind(master->dev);
+		devres_release_group(master->dev, NULL);
+		master->bound = false;
+	}
+
+	master_remove_components(master);
+}
+
+int component_master_add(struct device *dev,
+	const struct component_master_ops *ops)
+{
+	struct master *master;
+	int ret;
+
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	master->dev = dev;
+	master->ops = ops;
+	INIT_LIST_HEAD(&master->components);
+
+	/* Add to the list of available masters. */
+	mutex_lock(&component_mutex);
+	list_add(&master->node, &masters);
+
+	ret = try_to_bring_up_master(master, NULL);
+
+	if (ret < 0) {
+		/* Delete off the list if we weren't successful */
+		list_del(&master->node);
+		kfree(master);
+	}
+	mutex_unlock(&component_mutex);
+
+	return ret < 0 ? ret : 0;
+}
+EXPORT_SYMBOL_GPL(component_master_add);
+
+void component_master_del(struct device *dev,
+	const struct component_master_ops *ops)
+{
+	struct master *master;
+
+	mutex_lock(&component_mutex);
+	master = __master_find(dev, ops);
+	if (master) {
+		take_down_master(master);
+
+		list_del(&master->node);
+		kfree(master);
+	}
+	mutex_unlock(&component_mutex);
+}
+EXPORT_SYMBOL_GPL(component_master_del);
+
+static void component_unbind(struct component *component,
+	struct master *master, void *data)
+{
+	WARN_ON(!component->bound);
+
+	component->ops->unbind(component->dev, master->dev, data);
+	component->bound = false;
+
+	/* Release all resources claimed in the binding of this component */
+	devres_release_group(component->dev, component);
+}
+
+void component_unbind_all(struct device *master_dev, void *data)
+{
+	struct master *master;
+	struct component *c;
+
+	WARN_ON(!mutex_is_locked(&component_mutex));
+
+	master = __master_find(master_dev, NULL);
+	if (!master)
+		return;
+
+	list_for_each_entry_reverse(c, &master->components, master_node)
+		component_unbind(c, master, data);
+}
+EXPORT_SYMBOL_GPL(component_unbind_all);
+
+static int component_bind(struct component *component, struct master *master,
+	void *data)
+{
+	int ret;
+
+	/*
+	 * Each component initialises inside its own devres group.
+	 * This allows us to roll-back a failed component without
+	 * affecting anything else.
+	 */
+	if (!devres_open_group(master->dev, NULL, GFP_KERNEL))
+		return -ENOMEM;
+
+	/*
+	 * Also open a group for the device itself: this allows us
+	 * to release the resources claimed against the sub-device
+	 * at the appropriate moment.
+	 */
+	if (!devres_open_group(component->dev, component, GFP_KERNEL)) {
+		devres_release_group(master->dev, NULL);
+		return -ENOMEM;
+	}
+
+	dev_dbg(master->dev, "binding %s (ops %ps)\n",
+		dev_name(component->dev), component->ops);
+
+	ret = component->ops->bind(component->dev, master->dev, data);
+	if (!ret) {
+		component->bound = true;
+
+		/*
+		 * Close the component device's group so that resources
+		 * allocated in the binding are encapsulated for removal
+		 * at unbind.  Remove the group on the DRM device as we
+		 * can clean those resources up independently.
+		 */
+		devres_close_group(component->dev, NULL);
+		devres_remove_group(master->dev, NULL);
+
+		dev_info(master->dev, "bound %s (ops %ps)\n",
+			 dev_name(component->dev), component->ops);
+	} else {
+		devres_release_group(component->dev, NULL);
+		devres_release_group(master->dev, NULL);
+
+		dev_err(master->dev, "failed to bind %s (ops %ps): %d\n",
+			dev_name(component->dev), component->ops, ret);
+	}
+
+	return ret;
+}
+
+int component_bind_all(struct device *master_dev, void *data)
+{
+	struct master *master;
+	struct component *c;
+	int ret = 0;
+
+	WARN_ON(!mutex_is_locked(&component_mutex));
+
+	master = __master_find(master_dev, NULL);
+	if (!master)
+		return -EINVAL;
+
+	list_for_each_entry(c, &master->components, master_node) {
+		ret = component_bind(c, master, data);
+		if (ret)
+			break;
+	}
+
+	if (ret != 0) {
+		list_for_each_entry_continue_reverse(c, &master->components,
+						     master_node)
+			component_unbind(c, master, data);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(component_bind_all);
+
+int component_add(struct device *dev, const struct component_ops *ops)
+{
+	struct component *component;
+	int ret;
+
+	component = kzalloc(sizeof(*component), GFP_KERNEL);
+	if (!component)
+		return -ENOMEM;
+
+	component->ops = ops;
+	component->dev = dev;
+
+	dev_dbg(dev, "adding component (ops %ps)\n", ops);
+
+	mutex_lock(&component_mutex);
+	list_add_tail(&component->node, &component_list);
+
+	ret = try_to_bring_up_masters(component);
+	if (ret < 0) {
+		list_del(&component->node);
+
+		kfree(component);
+	}
+	mutex_unlock(&component_mutex);
+
+	return ret < 0 ? ret : 0;
+}
+EXPORT_SYMBOL_GPL(component_add);
+
+void component_del(struct device *dev, const struct component_ops *ops)
+{
+	struct component *c, *component = NULL;
+
+	mutex_lock(&component_mutex);
+	list_for_each_entry(c, &component_list, node)
+		if (c->dev == dev && c->ops == ops) {
+			list_del(&c->node);
+			component = c;
+			break;
+		}
+
+	if (component && component->master)
+		take_down_master(component->master);
+
+	mutex_unlock(&component_mutex);
+
+	WARN_ON(!component);
+	kfree(component);
+}
+EXPORT_SYMBOL_GPL(component_del);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/container.c b/drivers/base/container.c
new file mode 100644
index 00000000000..ecbfbe2e908
--- /dev/null
+++ b/drivers/base/container.c
@@ -0,0 +1,44 @@
+/*
+ * System bus type for containers.
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/container.h>
+
+#include "base.h"
+
+#define CONTAINER_BUS_NAME	"container"
+
+static int trivial_online(struct device *dev)
+{
+	return 0;
+}
+
+static int container_offline(struct device *dev)
+{
+	struct container_dev *cdev = to_container_dev(dev);
+
+	return cdev->offline ? cdev->offline(cdev) : 0;
+}
+
+struct bus_type container_subsys = {
+	.name = CONTAINER_BUS_NAME,
+	.dev_name = CONTAINER_BUS_NAME,
+	.online = trivial_online,
+	.offline = container_offline,
+};
+
+void __init container_dev_init(void)
+{
+	int ret;
+
+	ret = subsys_system_register(&container_subsys, NULL);
+	if (ret)
+		pr_err("%s() failed: %d\n", __func__, ret);
+}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 6ed645411c4..20da3ad1696 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -18,10 +18,14 @@
 #include <linux/string.h>
 #include <linux/kdev_t.h>
 #include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/genhd.h>
 #include <linux/kallsyms.h>
 #include <linux/mutex.h>
-#include <linux/async.h>
+#include <linux/pm_runtime.h>
+#include <linux/netdevice.h>
+#include <linux/sysfs.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -32,9 +36,9 @@ long sysfs_deprecated = 1;
 #else
 long sysfs_deprecated = 0;
 #endif
-static __init int sysfs_deprecated_setup(char *arg)
+static int __init sysfs_deprecated_setup(char *arg)
 {
-	return strict_strtol(arg, 10, &sysfs_deprecated);
+	return kstrtol(arg, 10, &sysfs_deprecated);
 }
 early_param("sysfs.deprecated", sysfs_deprecated_setup);
 #endif
@@ -45,6 +49,28 @@ static struct kobject *dev_kobj;
 struct kobject *sysfs_dev_char_kobj;
 struct kobject *sysfs_dev_block_kobj;
 
+static DEFINE_MUTEX(device_hotplug_lock);
+
+void lock_device_hotplug(void)
+{
+	mutex_lock(&device_hotplug_lock);
+}
+
+void unlock_device_hotplug(void)
+{
+	mutex_unlock(&device_hotplug_lock);
+}
+
+int lock_device_hotplug_sysfs(void)
+{
+	if (mutex_trylock(&device_hotplug_lock))
+		return 0;
+
+	/* Avoid busy looping (5 ms of sleep should do). */
+	msleep(5);
+	return restart_syscall();
+}
+
 #ifdef CONFIG_BLOCK
 static inline int device_is_not_partition(struct device *dev)
 {
@@ -62,7 +88,7 @@ static inline int device_is_not_partition(struct device *dev)
  * @dev: struct device to get the name of
  *
  * Will return the device's driver's name if it is bound to a device.  If
- * the device is not bound to a device, it will return the name of the bus
+ * the device is not bound to a driver, it will return the name of the bus
  * it is attached to.  If it is not attached to a bus either, an empty
  * string will be returned.
  */
@@ -81,14 +107,13 @@ const char *dev_driver_string(const struct device *dev)
 }
 EXPORT_SYMBOL(dev_driver_string);
 
-#define to_dev(obj) container_of(obj, struct device, kobj)
 #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
 
 static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
 			     char *buf)
 {
 	struct device_attribute *dev_attr = to_dev_attr(attr);
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	ssize_t ret = -EIO;
 
 	if (dev_attr->show)
@@ -104,7 +129,7 @@ static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr,
 			      const char *buf, size_t count)
 {
 	struct device_attribute *dev_attr = to_dev_attr(attr);
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	ssize_t ret = -EIO;
 
 	if (dev_attr->store)
@@ -117,20 +142,102 @@ static const struct sysfs_ops dev_sysfs_ops = {
 	.store	= dev_attr_store,
 };
 
+#define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr)
+
+ssize_t device_store_ulong(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+	char *end;
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	*(unsigned long *)(ea->var) = new;
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+EXPORT_SYMBOL_GPL(device_store_ulong);
+
+ssize_t device_show_ulong(struct device *dev,
+			  struct device_attribute *attr,
+			  char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+	return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
+}
+EXPORT_SYMBOL_GPL(device_show_ulong);
+
+ssize_t device_store_int(struct device *dev,
+			 struct device_attribute *attr,
+			 const char *buf, size_t size)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+	char *end;
+	long new = simple_strtol(buf, &end, 0);
+	if (end == buf || new > INT_MAX || new < INT_MIN)
+		return -EINVAL;
+	*(int *)(ea->var) = new;
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+EXPORT_SYMBOL_GPL(device_store_int);
+
+ssize_t device_show_int(struct device *dev,
+			struct device_attribute *attr,
+			char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
+}
+EXPORT_SYMBOL_GPL(device_show_int);
+
+ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	if (strtobool(buf, ea->var) < 0)
+		return -EINVAL;
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(device_store_bool);
+
+ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var));
+}
+EXPORT_SYMBOL_GPL(device_show_bool);
 
 /**
- *	device_release - free device structure.
- *	@kobj:	device's kobject.
+ * device_release - free device structure.
+ * @kobj: device's kobject.
  *
- *	This is called once the reference count for the object
- *	reaches 0. We forward the call to the device's release
- *	method, which should handle actually freeing the structure.
+ * This is called once the reference count for the object
+ * reaches 0. We forward the call to the device's release
+ * method, which should handle actually freeing the structure.
  */
 static void device_release(struct kobject *kobj)
 {
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct device_private *p = dev->p;
 
+	/*
+	 * Some platform devices are driven without driver attached
+	 * and managed resources may have been acquired.  Make sure
+	 * all resources are released.
+	 *
+	 * Drivers still can add resources into device after device
+	 * is deleted but alive, so release devres here to avoid
+	 * possible memory leak.
+	 */
+	devres_release_all(dev);
+
 	if (dev->release)
 		dev->release(dev);
 	else if (dev->type && dev->type->release)
@@ -146,7 +253,7 @@ static void device_release(struct kobject *kobj)
 
 static const void *device_namespace(struct kobject *kobj)
 {
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	const void *ns = NULL;
 
 	if (dev->class && dev->class->ns_type)
@@ -167,7 +274,7 @@ static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
 	struct kobj_type *ktype = get_ktype(kobj);
 
 	if (ktype == &device_ktype) {
-		struct device *dev = to_dev(kobj);
+		struct device *dev = kobj_to_dev(kobj);
 		if (dev->bus)
 			return 1;
 		if (dev->class)
@@ -178,7 +285,7 @@ static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
 
 static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
 {
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 
 	if (dev->bus)
 		return dev->bus->name;
@@ -190,23 +297,29 @@ static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
 static int dev_uevent(struct kset *kset, struct kobject *kobj,
 		      struct kobj_uevent_env *env)
 {
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	int retval = 0;
 
 	/* add device node properties if present */
 	if (MAJOR(dev->devt)) {
 		const char *tmp;
 		const char *name;
-		mode_t mode = 0;
+		umode_t mode = 0;
+		kuid_t uid = GLOBAL_ROOT_UID;
+		kgid_t gid = GLOBAL_ROOT_GID;
 
 		add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
 		add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
-		name = device_get_devnode(dev, &mode, &tmp);
+		name = device_get_devnode(dev, &mode, &uid, &gid, &tmp);
 		if (name) {
 			add_uevent_var(env, "DEVNAME=%s", name);
-			kfree(tmp);
 			if (mode)
 				add_uevent_var(env, "DEVMODE=%#o", mode & 0777);
+			if (!uid_eq(uid, GLOBAL_ROOT_UID))
+				add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid));
+			if (!gid_eq(gid, GLOBAL_ROOT_GID))
+				add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid));
+			kfree(tmp);
 		}
 	}
 
@@ -216,6 +329,9 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	if (dev->driver)
 		add_uevent_var(env, "DRIVER=%s", dev->driver->name);
 
+	/* Add common DT information about the device */
+	of_device_uevent(dev, env);
+
 	/* have the bus specific function add its stuff */
 	if (dev->bus && dev->bus->uevent) {
 		retval = dev->bus->uevent(dev, env);
@@ -251,7 +367,7 @@ static const struct kset_uevent_ops device_uevent_ops = {
 	.uevent =	dev_uevent,
 };
 
-static ssize_t show_uevent(struct device *dev, struct device_attribute *attr,
+static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
 			   char *buf)
 {
 	struct kobject *top_kobj;
@@ -294,7 +410,7 @@ out:
 	return count;
 }
 
-static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
+static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
 	enum kobject_action action;
@@ -305,77 +421,58 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
 		dev_err(dev, "uevent: unknown action-string\n");
 	return count;
 }
+static DEVICE_ATTR_RW(uevent);
 
-static struct device_attribute uevent_attr =
-	__ATTR(uevent, S_IRUGO | S_IWUSR, show_uevent, store_uevent);
-
-static int device_add_attributes(struct device *dev,
-				 struct device_attribute *attrs)
+static ssize_t online_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
 {
-	int error = 0;
-	int i;
+	bool val;
 
-	if (attrs) {
-		for (i = 0; attr_name(attrs[i]); i++) {
-			error = device_create_file(dev, &attrs[i]);
-			if (error)
-				break;
-		}
-		if (error)
-			while (--i >= 0)
-				device_remove_file(dev, &attrs[i]);
-	}
-	return error;
+	device_lock(dev);
+	val = !dev->offline;
+	device_unlock(dev);
+	return sprintf(buf, "%u\n", val);
 }
 
-static void device_remove_attributes(struct device *dev,
-				     struct device_attribute *attrs)
+static ssize_t online_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
 {
-	int i;
+	bool val;
+	int ret;
 
-	if (attrs)
-		for (i = 0; attr_name(attrs[i]); i++)
-			device_remove_file(dev, &attrs[i]);
-}
+	ret = strtobool(buf, &val);
+	if (ret < 0)
+		return ret;
 
-static int device_add_groups(struct device *dev,
-			     const struct attribute_group **groups)
-{
-	int error = 0;
-	int i;
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
 
-	if (groups) {
-		for (i = 0; groups[i]; i++) {
-			error = sysfs_create_group(&dev->kobj, groups[i]);
-			if (error) {
-				while (--i >= 0)
-					sysfs_remove_group(&dev->kobj,
-							   groups[i]);
-				break;
-			}
-		}
-	}
-	return error;
+	ret = val ? device_online(dev) : device_offline(dev);
+	unlock_device_hotplug();
+	return ret < 0 ? ret : count;
 }
+static DEVICE_ATTR_RW(online);
 
-static void device_remove_groups(struct device *dev,
-				 const struct attribute_group **groups)
+int device_add_groups(struct device *dev, const struct attribute_group **groups)
 {
-	int i;
+	return sysfs_create_groups(&dev->kobj, groups);
+}
 
-	if (groups)
-		for (i = 0; groups[i]; i++)
-			sysfs_remove_group(&dev->kobj, groups[i]);
+void device_remove_groups(struct device *dev,
+			  const struct attribute_group **groups)
+{
+	sysfs_remove_groups(&dev->kobj, groups);
 }
 
 static int device_add_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 	int error;
 
 	if (class) {
-		error = device_add_attributes(dev, class->dev_attrs);
+		error = device_add_groups(dev, class->dev_groups);
 		if (error)
 			return error;
 	}
@@ -383,21 +480,29 @@ static int device_add_attrs(struct device *dev)
 	if (type) {
 		error = device_add_groups(dev, type->groups);
 		if (error)
-			goto err_remove_class_attrs;
+			goto err_remove_class_groups;
 	}
 
 	error = device_add_groups(dev, dev->groups);
 	if (error)
 		goto err_remove_type_groups;
 
+	if (device_supports_offline(dev) && !dev->offline_disabled) {
+		error = device_create_file(dev, &dev_attr_online);
+		if (error)
+			goto err_remove_dev_groups;
+	}
+
 	return 0;
 
+ err_remove_dev_groups:
+	device_remove_groups(dev, dev->groups);
  err_remove_type_groups:
 	if (type)
 		device_remove_groups(dev, type->groups);
- err_remove_class_attrs:
+ err_remove_class_groups:
 	if (class)
-		device_remove_attributes(dev, class->dev_attrs);
+		device_remove_groups(dev, class->dev_groups);
 
 	return error;
 }
@@ -405,28 +510,26 @@ static int device_add_attrs(struct device *dev)
 static void device_remove_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 
+	device_remove_file(dev, &dev_attr_online);
 	device_remove_groups(dev, dev->groups);
 
 	if (type)
 		device_remove_groups(dev, type->groups);
 
 	if (class)
-		device_remove_attributes(dev, class->dev_attrs);
+		device_remove_groups(dev, class->dev_groups);
 }
 
-
-static ssize_t show_dev(struct device *dev, struct device_attribute *attr,
+static ssize_t dev_show(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
 	return print_dev_t(buf, dev->devt);
 }
+static DEVICE_ATTR_RO(dev);
 
-static struct device_attribute devt_attr =
-	__ATTR(dev, S_IRUGO, show_dev, NULL);
-
-/* kset to create /sys/devices/  */
+/* /sys/devices/ */
 struct kset *devices_kset;
 
 /**
@@ -438,10 +541,20 @@ int device_create_file(struct device *dev,
 		       const struct device_attribute *attr)
 {
 	int error = 0;
-	if (dev)
+
+	if (dev) {
+		WARN(((attr->attr.mode & S_IWUGO) && !attr->store),
+			"Attribute %s: write permission without 'store'\n",
+			attr->attr.name);
+		WARN(((attr->attr.mode & S_IRUGO) && !attr->show),
+			"Attribute %s: read permission without 'show'\n",
+			attr->attr.name);
 		error = sysfs_create_file(&dev->kobj, &attr->attr);
+	}
+
 	return error;
 }
+EXPORT_SYMBOL_GPL(device_create_file);
 
 /**
  * device_remove_file - remove sysfs attribute file.
@@ -454,6 +567,24 @@ void device_remove_file(struct device *dev,
 	if (dev)
 		sysfs_remove_file(&dev->kobj, &attr->attr);
 }
+EXPORT_SYMBOL_GPL(device_remove_file);
+
+/**
+ * device_remove_file_self - remove sysfs attribute file from its own method.
+ * @dev: device.
+ * @attr: device attribute descriptor.
+ *
+ * See kernfs_remove_self() for details.
+ */
+bool device_remove_file_self(struct device *dev,
+			     const struct device_attribute *attr)
+{
+	if (dev)
+		return sysfs_remove_file_self(&dev->kobj, &attr->attr);
+	else
+		return false;
+}
+EXPORT_SYMBOL_GPL(device_remove_file_self);
 
 /**
  * device_create_bin_file - create sysfs binary attribute file for device.
@@ -483,39 +614,6 @@ void device_remove_bin_file(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(device_remove_bin_file);
 
-/**
- * device_schedule_callback_owner - helper to schedule a callback for a device
- * @dev: device.
- * @func: callback function to invoke later.
- * @owner: module owning the callback routine
- *
- * Attribute methods must not unregister themselves or their parent device
- * (which would amount to the same thing).  Attempts to do so will deadlock,
- * since unregistration is mutually exclusive with driver callbacks.
- *
- * Instead methods can call this routine, which will attempt to allocate
- * and schedule a workqueue request to call back @func with @dev as its
- * argument in the workqueue's process context.  @dev will be pinned until
- * @func returns.
- *
- * This routine is usually called via the inline device_schedule_callback(),
- * which automatically sets @owner to THIS_MODULE.
- *
- * Returns 0 if the request was submitted, -ENOMEM if storage could not
- * be allocated, -ENODEV if a reference to @owner isn't available.
- *
- * NOTE: This routine won't work if CONFIG_SYSFS isn't set!  It uses an
- * underlying sysfs routine (since it is intended for use by attribute
- * methods), and if sysfs isn't available you'll get nothing but -ENOSYS.
- */
-int device_schedule_callback_owner(struct device *dev,
-		void (*func)(struct device *), struct module *owner)
-{
-	return sysfs_schedule_callback(&dev->kobj,
-			(void (*)(void *)) func, dev, owner);
-}
-EXPORT_SYMBOL_GPL(device_schedule_callback_owner);
-
 static void klist_children_get(struct klist_node *n)
 {
 	struct device_private *p = to_device_private_parent(n);
@@ -544,6 +642,11 @@ static void klist_children_put(struct klist_node *n)
  * may be used for reference counting of @dev after calling this
  * function.
  *
+ * All fields in @dev must be initialized by the caller to 0, except
+ * for those explicitly set to some other value.  The simplest
+ * approach is to use kzalloc() to allocate the structure containing
+ * @dev.
+ *
  * NOTE: Use put_device() to give up your reference instead of freeing
  * @dev directly once you have called this function.
  */
@@ -559,8 +662,9 @@ void device_initialize(struct device *dev)
 	device_pm_init(dev);
 	set_dev_node(dev, -1);
 }
+EXPORT_SYMBOL_GPL(device_initialize);
 
-static struct kobject *virtual_device_parent(struct device *dev)
+struct kobject *virtual_device_parent(struct device *dev)
 {
 	static struct kobject *virtual_dir = NULL;
 
@@ -610,7 +714,7 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
 	dir->class = class;
 	kobject_init(&dir->kobj, &class_dir_ktype);
 
-	dir->kobj.kset = &class->p->class_dirs;
+	dir->kobj.kset = &class->p->glue_dirs;
 
 	retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
 	if (retval < 0) {
@@ -635,7 +739,7 @@ static struct kobject *get_device_parent(struct device *dev,
 		if (sysfs_deprecated && dev->class == &block_class) {
 			if (parent && parent->class == &block_class)
 				return &parent->kobj;
-			return &block_class.p->class_subsys.kobj;
+			return &block_class.p->subsys.kobj;
 		}
 #endif
 
@@ -654,13 +758,13 @@ static struct kobject *get_device_parent(struct device *dev,
 		mutex_lock(&gdp_mutex);
 
 		/* find our class-directory at the parent and reference it */
-		spin_lock(&dev->class->p->class_dirs.list_lock);
-		list_for_each_entry(k, &dev->class->p->class_dirs.list, entry)
+		spin_lock(&dev->class->p->glue_dirs.list_lock);
+		list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry)
 			if (k->parent == parent_kobj) {
 				kobj = kobject_get(k);
 				break;
 			}
-		spin_unlock(&dev->class->p->class_dirs.list_lock);
+		spin_unlock(&dev->class->p->glue_dirs.list_lock);
 		if (kobj) {
 			mutex_unlock(&gdp_mutex);
 			return kobj;
@@ -673,6 +777,10 @@ static struct kobject *get_device_parent(struct device *dev,
 		return k;
 	}
 
+	/* subsystems can specify a default root directory for their devices */
+	if (!parent && dev->bus && dev->bus->dev_root)
+		return &dev->bus->dev_root->kobj;
+
 	if (parent)
 		return &parent->kobj;
 	return NULL;
@@ -682,7 +790,7 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
 {
 	/* see if we live in a "glue" directory */
 	if (!glue_dir || !dev->class ||
-	    glue_dir->kset != &dev->class->p->class_dirs)
+	    glue_dir->kset != &dev->class->p->glue_dirs)
 		return;
 
 	kobject_put(glue_dir);
@@ -693,14 +801,6 @@ static void cleanup_device_parent(struct device *dev)
 	cleanup_glue_dir(dev, dev->kobj.parent);
 }
 
-static void setup_parent(struct device *dev, struct device *parent)
-{
-	struct kobject *kobj;
-	kobj = get_device_parent(dev, parent);
-	if (kobj)
-		dev->kobj.parent = kobj;
-}
-
 static int device_add_class_symlinks(struct device *dev)
 {
 	int error;
@@ -709,7 +809,7 @@ static int device_add_class_symlinks(struct device *dev)
 		return 0;
 
 	error = sysfs_create_link(&dev->kobj,
-				  &dev->class->p->class_subsys.kobj,
+				  &dev->class->p->subsys.kobj,
 				  "subsystem");
 	if (error)
 		goto out;
@@ -728,7 +828,7 @@ static int device_add_class_symlinks(struct device *dev)
 #endif
 
 	/* link in the class directory pointing to the device */
-	error = sysfs_create_link(&dev->class->p->class_subsys.kobj,
+	error = sysfs_create_link(&dev->class->p->subsys.kobj,
 				  &dev->kobj, dev_name(dev));
 	if (error)
 		goto out_device;
@@ -756,7 +856,7 @@ static void device_remove_class_symlinks(struct device *dev)
 	if (sysfs_deprecated && dev->class == &block_class)
 		return;
 #endif
-	sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj, dev_name(dev));
+	sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev));
 }
 
 /**
@@ -784,8 +884,8 @@ EXPORT_SYMBOL_GPL(dev_set_name);
  * to NULL prevents an entry from being created.  class->dev_kobj must
  * be set (or cleared) before any devices are registered to the class
  * otherwise device_create_sys_dev_entry() and
- * device_remove_sys_dev_entry() will disagree about the the presence
- * of the link.
+ * device_remove_sys_dev_entry() will disagree about the presence of
+ * the link.
  */
 static struct kobject *device_to_dev_kobj(struct device *dev)
 {
@@ -832,6 +932,7 @@ int device_private_init(struct device *dev)
 	dev->p->device = dev;
 	klist_init(&dev->p->klist_children, klist_children_get,
 		   klist_children_put);
+	INIT_LIST_HEAD(&dev->p->deferred_probe);
 	return 0;
 }
 
@@ -846,6 +947,13 @@ int device_private_init(struct device *dev)
  * to the global and sibling lists for the device, then
  * adds it to the other relevant subsystems of the driver model.
  *
+ * Do not call this routine or device_register() more than once for
+ * any device structure.  The driver model core is not designed to work
+ * with devices that get unregistered and then spring back to life.
+ * (Among other things, it's very hard to guarantee that all references
+ * to the previous incarnation of @dev have been dropped.)  Allocate
+ * and register a fresh new struct device instead.
+ *
  * NOTE: _Never_ directly free @dev after calling this function, even
  * if it returned an error! Always use put_device() to give up your
  * reference instead.
@@ -853,6 +961,7 @@ int device_private_init(struct device *dev)
 int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
+	struct kobject *kobj;
 	struct class_interface *class_intf;
 	int error = -EINVAL;
 
@@ -876,6 +985,10 @@ int device_add(struct device *dev)
 		dev->init_name = NULL;
 	}
 
+	/* subsystems can specify simple device enumeration */
+	if (!dev_name(dev) && dev->bus && dev->bus->dev_name)
+		dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
+
 	if (!dev_name(dev)) {
 		error = -EINVAL;
 		goto name_error;
@@ -884,7 +997,9 @@ int device_add(struct device *dev)
 	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
 
 	parent = get_device(dev->parent);
-	setup_parent(dev, parent);
+	kobj = get_device_parent(dev, parent);
+	if (kobj)
+		dev->kobj.parent = kobj;
 
 	/* use parent numa_node */
 	if (parent)
@@ -900,12 +1015,12 @@ int device_add(struct device *dev)
 	if (platform_notify)
 		platform_notify(dev);
 
-	error = device_create_file(dev, &uevent_attr);
+	error = device_create_file(dev, &dev_attr_uevent);
 	if (error)
 		goto attrError;
 
 	if (MAJOR(dev->devt)) {
-		error = device_create_file(dev, &devt_attr);
+		error = device_create_file(dev, &dev_attr_dev);
 		if (error)
 			goto ueventattrError;
 
@@ -931,7 +1046,7 @@ int device_add(struct device *dev)
 	device_pm_add(dev);
 
 	/* Notify clients of device addition.  This call must come
-	 * after dpm_sysf_add() and before kobject_uevent().
+	 * after dpm_sysfs_add() and before kobject_uevent().
 	 */
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
@@ -944,17 +1059,17 @@ int device_add(struct device *dev)
 			       &parent->p->klist_children);
 
 	if (dev->class) {
-		mutex_lock(&dev->class->p->class_mutex);
+		mutex_lock(&dev->class->p->mutex);
 		/* tie the class to the device */
 		klist_add_tail(&dev->knode_class,
-			       &dev->class->p->class_devices);
+			       &dev->class->p->klist_devices);
 
 		/* notify any interfaces that the device is here */
 		list_for_each_entry(class_intf,
-				    &dev->class->p->class_interfaces, node)
+				    &dev->class->p->interfaces, node)
 			if (class_intf->add_dev)
 				class_intf->add_dev(dev, class_intf);
-		mutex_unlock(&dev->class->p->class_mutex);
+		mutex_unlock(&dev->class->p->mutex);
 	}
 done:
 	put_device(dev);
@@ -972,9 +1087,9 @@ done:
 		device_remove_sys_dev_entry(dev);
  devtattrError:
 	if (MAJOR(dev->devt))
-		device_remove_file(dev, &devt_attr);
+		device_remove_file(dev, &dev_attr_dev);
  ueventattrError:
-	device_remove_file(dev, &uevent_attr);
+	device_remove_file(dev, &dev_attr_uevent);
  attrError:
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	kobject_del(&dev->kobj);
@@ -987,6 +1102,7 @@ name_error:
 	dev->p = NULL;
 	goto done;
 }
+EXPORT_SYMBOL_GPL(device_add);
 
 /**
  * device_register - register a device with the system.
@@ -999,6 +1115,9 @@ name_error:
  * have a clearly defined need to use and refcount the device
  * before it is added to the hierarchy.
  *
+ * For more information, see the kerneldoc for device_initialize()
+ * and device_add().
+ *
  * NOTE: _Never_ directly free @dev after calling this function, even
  * if it returned an error! Always use put_device() to give up the
  * reference initialized in this function instead.
@@ -1008,6 +1127,7 @@ int device_register(struct device *dev)
 	device_initialize(dev);
 	return device_add(dev);
 }
+EXPORT_SYMBOL_GPL(device_register);
 
 /**
  * get_device - increment reference count for device.
@@ -1019,8 +1139,9 @@ int device_register(struct device *dev)
  */
 struct device *get_device(struct device *dev)
 {
-	return dev ? to_dev(kobject_get(&dev->kobj)) : NULL;
+	return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL;
 }
+EXPORT_SYMBOL_GPL(get_device);
 
 /**
  * put_device - decrement reference count.
@@ -1032,6 +1153,7 @@ void put_device(struct device *dev)
 	if (dev)
 		kobject_put(&dev->kobj);
 }
+EXPORT_SYMBOL_GPL(put_device);
 
 /**
  * device_del - delete device from system.
@@ -1057,38 +1179,32 @@ void device_del(struct device *dev)
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_DEL_DEVICE, dev);
-	device_pm_remove(dev);
 	dpm_sysfs_remove(dev);
 	if (parent)
 		klist_del(&dev->p->knode_parent);
 	if (MAJOR(dev->devt)) {
 		devtmpfs_delete_node(dev);
 		device_remove_sys_dev_entry(dev);
-		device_remove_file(dev, &devt_attr);
+		device_remove_file(dev, &dev_attr_dev);
 	}
 	if (dev->class) {
 		device_remove_class_symlinks(dev);
 
-		mutex_lock(&dev->class->p->class_mutex);
+		mutex_lock(&dev->class->p->mutex);
 		/* notify any interfaces that the device is now gone */
 		list_for_each_entry(class_intf,
-				    &dev->class->p->class_interfaces, node)
+				    &dev->class->p->interfaces, node)
 			if (class_intf->remove_dev)
 				class_intf->remove_dev(dev, class_intf);
 		/* remove the device from the class list */
 		klist_del(&dev->knode_class);
-		mutex_unlock(&dev->class->p->class_mutex);
+		mutex_unlock(&dev->class->p->mutex);
 	}
-	device_remove_file(dev, &uevent_attr);
+	device_remove_file(dev, &dev_attr_uevent);
 	device_remove_attrs(dev);
 	bus_remove_device(dev);
-
-	/*
-	 * Some platform devices are driven without driver attached
-	 * and managed resources may have been acquired.  Make sure
-	 * all resources are released.
-	 */
-	devres_release_all(dev);
+	device_pm_remove(dev);
+	driver_deferred_probe_del(dev);
 
 	/* Notify the platform of the removal, in case they
 	 * need to do anything...
@@ -1100,6 +1216,7 @@ void device_del(struct device *dev)
 	kobject_del(&dev->kobj);
 	put_device(parent);
 }
+EXPORT_SYMBOL_GPL(device_del);
 
 /**
  * device_unregister - unregister device from system.
@@ -1118,6 +1235,7 @@ void device_unregister(struct device *dev)
 	device_del(dev);
 	put_device(dev);
 }
+EXPORT_SYMBOL_GPL(device_unregister);
 
 static struct device *next_device(struct klist_iter *i)
 {
@@ -1136,6 +1254,8 @@ static struct device *next_device(struct klist_iter *i)
  * device_get_devnode - path of device node file
  * @dev: device
  * @mode: returned file access mode
+ * @uid: returned file owner
+ * @gid: returned file group
  * @tmp: possibly allocated string
  *
  * Return the relative path of a possible device node.
@@ -1144,7 +1264,8 @@ static struct device *next_device(struct klist_iter *i)
  * freed by the caller.
  */
 const char *device_get_devnode(struct device *dev,
-			       mode_t *mode, const char **tmp)
+			       umode_t *mode, kuid_t *uid, kgid_t *gid,
+			       const char **tmp)
 {
 	char *s;
 
@@ -1152,7 +1273,7 @@ const char *device_get_devnode(struct device *dev,
 
 	/* the device type may provide a specific name */
 	if (dev->type && dev->type->devnode)
-		*tmp = dev->type->devnode(dev, mode);
+		*tmp = dev->type->devnode(dev, mode, uid, gid);
 	if (*tmp)
 		return *tmp;
 
@@ -1178,8 +1299,8 @@ const char *device_get_devnode(struct device *dev,
 /**
  * device_for_each_child - device child iterator.
  * @parent: parent struct device.
- * @data: data for the callback.
  * @fn: function to be called for each device.
+ * @data: data for the callback.
  *
  * Iterate over @parent's child devices, and call @fn for each,
  * passing it @data.
@@ -1203,12 +1324,13 @@ int device_for_each_child(struct device *parent, void *data,
 	klist_iter_exit(&i);
 	return error;
 }
+EXPORT_SYMBOL_GPL(device_for_each_child);
 
 /**
  * device_find_child - device iterator for locating a particular device.
  * @parent: parent struct device
- * @data: Data to pass to match function
  * @match: Callback function to check device
+ * @data: Data to pass to match function
  *
  * This is similar to the device_for_each_child() function above, but it
  * returns a reference to a device that is 'found' for later use, as
@@ -1218,6 +1340,8 @@ int device_for_each_child(struct device *parent, void *data,
  * if it does.  If the callback returns non-zero and a reference to the
  * current device can be obtained, this function will return to the caller
  * and not iterate over any more devices.
+ *
+ * NOTE: you will need to drop the reference with put_device() after use.
  */
 struct device *device_find_child(struct device *parent, void *data,
 				 int (*match)(struct device *dev, void *data))
@@ -1235,6 +1359,7 @@ struct device *device_find_child(struct device *parent, void *data,
 	klist_iter_exit(&i);
 	return child;
 }
+EXPORT_SYMBOL_GPL(device_find_child);
 
 int __init devices_init(void)
 {
@@ -1262,28 +1387,96 @@ int __init devices_init(void)
 	return -ENOMEM;
 }
 
-EXPORT_SYMBOL_GPL(device_for_each_child);
-EXPORT_SYMBOL_GPL(device_find_child);
+static int device_check_offline(struct device *dev, void *not_used)
+{
+	int ret;
 
-EXPORT_SYMBOL_GPL(device_initialize);
-EXPORT_SYMBOL_GPL(device_add);
-EXPORT_SYMBOL_GPL(device_register);
+	ret = device_for_each_child(dev, NULL, device_check_offline);
+	if (ret)
+		return ret;
 
-EXPORT_SYMBOL_GPL(device_del);
-EXPORT_SYMBOL_GPL(device_unregister);
-EXPORT_SYMBOL_GPL(get_device);
-EXPORT_SYMBOL_GPL(put_device);
+	return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0;
+}
 
-EXPORT_SYMBOL_GPL(device_create_file);
-EXPORT_SYMBOL_GPL(device_remove_file);
+/**
+ * device_offline - Prepare the device for hot-removal.
+ * @dev: Device to be put offline.
+ *
+ * Execute the device bus type's .offline() callback, if present, to prepare
+ * the device for a subsequent hot-removal.  If that succeeds, the device must
+ * not be used until either it is removed or its bus type's .online() callback
+ * is executed.
+ *
+ * Call under device_hotplug_lock.
+ */
+int device_offline(struct device *dev)
+{
+	int ret;
+
+	if (dev->offline_disabled)
+		return -EPERM;
+
+	ret = device_for_each_child(dev, NULL, device_check_offline);
+	if (ret)
+		return ret;
+
+	device_lock(dev);
+	if (device_supports_offline(dev)) {
+		if (dev->offline) {
+			ret = 1;
+		} else {
+			ret = dev->bus->offline(dev);
+			if (!ret) {
+				kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
+				dev->offline = true;
+			}
+		}
+	}
+	device_unlock(dev);
 
-struct root_device
+	return ret;
+}
+
+/**
+ * device_online - Put the device back online after successful device_offline().
+ * @dev: Device to be put back online.
+ *
+ * If device_offline() has been successfully executed for @dev, but the device
+ * has not been removed subsequently, execute its bus type's .online() callback
+ * to indicate that the device can be used again.
+ *
+ * Call under device_hotplug_lock.
+ */
+int device_online(struct device *dev)
 {
+	int ret = 0;
+
+	device_lock(dev);
+	if (device_supports_offline(dev)) {
+		if (dev->offline) {
+			ret = dev->bus->online(dev);
+			if (!ret) {
+				kobject_uevent(&dev->kobj, KOBJ_ONLINE);
+				dev->offline = false;
+			}
+		} else {
+			ret = 1;
+		}
+	}
+	device_unlock(dev);
+
+	return ret;
+}
+
+struct root_device {
 	struct device dev;
 	struct module *owner;
 };
 
-#define to_root_device(dev) container_of(dev, struct root_device, dev)
+static inline struct root_device *to_root_device(struct device *d)
+{
+	return container_of(d, struct root_device, dev);
+}
 
 static void root_device_release(struct device *dev)
 {
@@ -1377,34 +1570,11 @@ static void device_create_release(struct device *dev)
 	kfree(dev);
 }
 
-/**
- * device_create_vargs - creates a device and registers it with sysfs
- * @class: pointer to the struct class that this device should be registered to
- * @parent: pointer to the parent struct device of this new device, if any
- * @devt: the dev_t for the char device to be added
- * @drvdata: the data to be added to the device for callbacks
- * @fmt: string for the device's name
- * @args: va_list for the device's name
- *
- * This function can be used by char device classes.  A struct device
- * will be created in sysfs, registered to the specified class.
- *
- * A "dev" file will be created, showing the dev_t for the device, if
- * the dev_t is not 0,0.
- * If a pointer to a parent struct device is passed in, the newly created
- * struct device will be a child of that device in sysfs.
- * The pointer to the struct device will be returned from the call.
- * Any further sysfs files that might be required can be created using this
- * pointer.
- *
- * Returns &struct device pointer on success, or ERR_PTR() on error.
- *
- * Note: the struct class passed to this function must have previously
- * been created with a call to class_create().
- */
-struct device *device_create_vargs(struct class *class, struct device *parent,
-				   dev_t devt, void *drvdata, const char *fmt,
-				   va_list args)
+static struct device *
+device_create_groups_vargs(struct class *class, struct device *parent,
+			   dev_t devt, void *drvdata,
+			   const struct attribute_group **groups,
+			   const char *fmt, va_list args)
 {
 	struct device *dev = NULL;
 	int retval = -ENODEV;
@@ -1418,9 +1588,11 @@ struct device *device_create_vargs(struct class *class, struct device *parent,
 		goto error;
 	}
 
+	device_initialize(dev);
 	dev->devt = devt;
 	dev->class = class;
 	dev->parent = parent;
+	dev->groups = groups;
 	dev->release = device_create_release;
 	dev_set_drvdata(dev, drvdata);
 
@@ -1428,7 +1600,7 @@ struct device *device_create_vargs(struct class *class, struct device *parent,
 	if (retval)
 		goto error;
 
-	retval = device_register(dev);
+	retval = device_add(dev);
 	if (retval)
 		goto error;
 
@@ -1438,6 +1610,39 @@ error:
 	put_device(dev);
 	return ERR_PTR(retval);
 }
+
+/**
+ * device_create_vargs - creates a device and registers it with sysfs
+ * @class: pointer to the struct class that this device should be registered to
+ * @parent: pointer to the parent struct device of this new device, if any
+ * @devt: the dev_t for the char device to be added
+ * @drvdata: the data to be added to the device for callbacks
+ * @fmt: string for the device's name
+ * @args: va_list for the device's name
+ *
+ * This function can be used by char device classes.  A struct device
+ * will be created in sysfs, registered to the specified class.
+ *
+ * A "dev" file will be created, showing the dev_t for the device, if
+ * the dev_t is not 0,0.
+ * If a pointer to a parent struct device is passed in, the newly created
+ * struct device will be a child of that device in sysfs.
+ * The pointer to the struct device will be returned from the call.
+ * Any further sysfs files that might be required can be created using this
+ * pointer.
+ *
+ * Returns &struct device pointer on success, or ERR_PTR() on error.
+ *
+ * Note: the struct class passed to this function must have previously
+ * been created with a call to class_create().
+ */
+struct device *device_create_vargs(struct class *class, struct device *parent,
+				   dev_t devt, void *drvdata, const char *fmt,
+				   va_list args)
+{
+	return device_create_groups_vargs(class, parent, devt, drvdata, NULL,
+					  fmt, args);
+}
 EXPORT_SYMBOL_GPL(device_create_vargs);
 
 /**
@@ -1477,9 +1682,53 @@ struct device *device_create(struct class *class, struct device *parent,
 }
 EXPORT_SYMBOL_GPL(device_create);
 
-static int __match_devt(struct device *dev, void *data)
+/**
+ * device_create_with_groups - creates a device and registers it with sysfs
+ * @class: pointer to the struct class that this device should be registered to
+ * @parent: pointer to the parent struct device of this new device, if any
+ * @devt: the dev_t for the char device to be added
+ * @drvdata: the data to be added to the device for callbacks
+ * @groups: NULL-terminated list of attribute groups to be created
+ * @fmt: string for the device's name
+ *
+ * This function can be used by char device classes.  A struct device
+ * will be created in sysfs, registered to the specified class.
+ * Additional attributes specified in the groups parameter will also
+ * be created automatically.
+ *
+ * A "dev" file will be created, showing the dev_t for the device, if
+ * the dev_t is not 0,0.
+ * If a pointer to a parent struct device is passed in, the newly created
+ * struct device will be a child of that device in sysfs.
+ * The pointer to the struct device will be returned from the call.
+ * Any further sysfs files that might be required can be created using this
+ * pointer.
+ *
+ * Returns &struct device pointer on success, or ERR_PTR() on error.
+ *
+ * Note: the struct class passed to this function must have previously
+ * been created with a call to class_create().
+ */
+struct device *device_create_with_groups(struct class *class,
+					 struct device *parent, dev_t devt,
+					 void *drvdata,
+					 const struct attribute_group **groups,
+					 const char *fmt, ...)
+{
+	va_list vargs;
+	struct device *dev;
+
+	va_start(vargs, fmt);
+	dev = device_create_groups_vargs(class, parent, devt, drvdata, groups,
+					 fmt, vargs);
+	va_end(vargs);
+	return dev;
+}
+EXPORT_SYMBOL_GPL(device_create_with_groups);
+
+static int __match_devt(struct device *dev, const void *data)
 {
-	dev_t *devt = data;
+	const dev_t *devt = data;
 
 	return dev->devt == *devt;
 }
@@ -1513,11 +1762,39 @@ EXPORT_SYMBOL_GPL(device_destroy);
  * exclusion between two different calls of device_rename
  * on the same device to ensure that new_name is valid and
  * won't conflict with other devices.
+ *
+ * Note: Don't call this function.  Currently, the networking layer calls this
+ * function, but that will change.  The following text from Kay Sievers offers
+ * some insight:
+ *
+ * Renaming devices is racy at many levels, symlinks and other stuff are not
+ * replaced atomically, and you get a "move" uevent, but it's not easy to
+ * connect the event to the old and new device. Device nodes are not renamed at
+ * all, there isn't even support for that in the kernel now.
+ *
+ * In the meantime, during renaming, your target name might be taken by another
+ * driver, creating conflicts. Or the old name is taken directly after you
+ * renamed it -- then you get events for the same DEVPATH, before you even see
+ * the "move" event. It's just a mess, and nothing new should ever rely on
+ * kernel device renaming. Besides that, it's not even implemented now for
+ * other things than (driver-core wise very simple) network devices.
+ *
+ * We are currently about to change network renaming in udev to completely
+ * disallow renaming of devices in the same namespace as the kernel uses,
+ * because we can't solve the problems properly, that arise with swapping names
+ * of multiple interfaces without races. Means, renaming of eth[0-9]* will only
+ * be allowed to some other name than eth[0-9]*, for the aforementioned
+ * reasons.
+ *
+ * Make up a "real" name in the driver before you register anything, or add
+ * some other attributes for userspace to find the device, or use udev to add
+ * symlinks -- but never rename kernel devices later, it's a complete mess. We
+ * don't even want to get into that and try to implement the missing pieces in
+ * the core. We really have other pieces to fix in the driver core mess. :)
  */
 int device_rename(struct device *dev, const char *new_name)
 {
-	char *old_class_name = NULL;
-	char *new_class_name = NULL;
+	struct kobject *kobj = &dev->kobj;
 	char *old_device_name = NULL;
 	int error;
 
@@ -1525,8 +1802,7 @@ int device_rename(struct device *dev, const char *new_name)
 	if (!dev)
 		return -EINVAL;
 
-	pr_debug("device: '%s': %s: renaming to '%s'\n", dev_name(dev),
-		 __func__, new_name);
+	dev_dbg(dev, "renaming to %s\n", new_name);
 
 	old_device_name = kstrdup(dev_name(dev), GFP_KERNEL);
 	if (!old_device_name) {
@@ -1535,21 +1811,20 @@ int device_rename(struct device *dev, const char *new_name)
 	}
 
 	if (dev->class) {
-		error = sysfs_rename_link(&dev->class->p->class_subsys.kobj,
-			&dev->kobj, old_device_name, new_name);
+		error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj,
+					     kobj, old_device_name,
+					     new_name, kobject_namespace(kobj));
 		if (error)
 			goto out;
 	}
 
-	error = kobject_rename(&dev->kobj, new_name);
+	error = kobject_rename(kobj, new_name);
 	if (error)
 		goto out;
 
 out:
 	put_device(dev);
 
-	kfree(new_class_name);
-	kfree(old_class_name);
 	kfree(old_device_name);
 
 	return error;
@@ -1609,25 +1884,25 @@ int device_move(struct device *dev, struct device *new_parent,
 		set_dev_node(dev, dev_to_node(new_parent));
 	}
 
-	if (!dev->class)
-		goto out_put;
-	error = device_move_class_links(dev, old_parent, new_parent);
-	if (error) {
-		/* We ignore errors on cleanup since we're hosed anyway... */
-		device_move_class_links(dev, new_parent, old_parent);
-		if (!kobject_move(&dev->kobj, &old_parent->kobj)) {
-			if (new_parent)
-				klist_remove(&dev->p->knode_parent);
-			dev->parent = old_parent;
-			if (old_parent) {
-				klist_add_tail(&dev->p->knode_parent,
-					       &old_parent->p->klist_children);
-				set_dev_node(dev, dev_to_node(old_parent));
+	if (dev->class) {
+		error = device_move_class_links(dev, old_parent, new_parent);
+		if (error) {
+			/* We ignore errors on cleanup since we're hosed anyway... */
+			device_move_class_links(dev, new_parent, old_parent);
+			if (!kobject_move(&dev->kobj, &old_parent->kobj)) {
+				if (new_parent)
+					klist_remove(&dev->p->knode_parent);
+				dev->parent = old_parent;
+				if (old_parent) {
+					klist_add_tail(&dev->p->knode_parent,
+						       &old_parent->p->klist_children);
+					set_dev_node(dev, dev_to_node(old_parent));
+				}
 			}
+			cleanup_glue_dir(dev, new_parent_kobj);
+			put_device(new_parent);
+			goto out;
 		}
-		cleanup_glue_dir(dev, new_parent_kobj);
-		put_device(new_parent);
-		goto out;
 	}
 	switch (dpm_order) {
 	case DPM_ORDER_NONE:
@@ -1642,7 +1917,7 @@ int device_move(struct device *dev, struct device *new_parent,
 		device_pm_move_last(dev);
 		break;
 	}
-out_put:
+
 	put_device(old_parent);
 out:
 	device_pm_unlock();
@@ -1656,7 +1931,7 @@ EXPORT_SYMBOL_GPL(device_move);
  */
 void device_shutdown(void)
 {
-	struct device *dev;
+	struct device *dev, *parent;
 
 	spin_lock(&devices_kset->list_lock);
 	/*
@@ -1667,6 +1942,13 @@ void device_shutdown(void)
 	while (!list_empty(&devices_kset->list)) {
 		dev = list_entry(devices_kset->list.prev, struct device,
 				kobj.entry);
+
+		/*
+		 * hold reference count of device's parent to
+		 * prevent it from being freed because parent's
+		 * lock is to be held
+		 */
+		parent = get_device(dev->parent);
 		get_device(dev);
 		/*
 		 * Make sure the device is off the kset list, in the
@@ -1675,19 +1957,35 @@ void device_shutdown(void)
 		list_del_init(&dev->kobj.entry);
 		spin_unlock(&devices_kset->list_lock);
 
+		/* hold lock to avoid race with probe/release */
+		if (parent)
+			device_lock(parent);
+		device_lock(dev);
+
+		/* Don't allow any more runtime suspends */
+		pm_runtime_get_noresume(dev);
+		pm_runtime_barrier(dev);
+
 		if (dev->bus && dev->bus->shutdown) {
-			dev_dbg(dev, "shutdown\n");
+			if (initcall_debug)
+				dev_info(dev, "shutdown\n");
 			dev->bus->shutdown(dev);
 		} else if (dev->driver && dev->driver->shutdown) {
-			dev_dbg(dev, "shutdown\n");
+			if (initcall_debug)
+				dev_info(dev, "shutdown\n");
 			dev->driver->shutdown(dev);
 		}
+
+		device_unlock(dev);
+		if (parent)
+			device_unlock(parent);
+
 		put_device(dev);
+		put_device(parent);
 
 		spin_lock(&devices_kset->list_lock);
 	}
 	spin_unlock(&devices_kset->list_lock);
-	async_synchronize_full();
 }
 
 /*
@@ -1695,6 +1993,80 @@ void device_shutdown(void)
  */
 
 #ifdef CONFIG_PRINTK
+static int
+create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
+{
+	const char *subsys;
+	size_t pos = 0;
+
+	if (dev->class)
+		subsys = dev->class->name;
+	else if (dev->bus)
+		subsys = dev->bus->name;
+	else
+		return 0;
+
+	pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
+
+	/*
+	 * Add device identifier DEVICE=:
+	 *   b12:8         block dev_t
+	 *   c127:3        char dev_t
+	 *   n8            netdev ifindex
+	 *   +sound:card0  subsystem:devname
+	 */
+	if (MAJOR(dev->devt)) {
+		char c;
+
+		if (strcmp(subsys, "block") == 0)
+			c = 'b';
+		else
+			c = 'c';
+		pos++;
+		pos += snprintf(hdr + pos, hdrlen - pos,
+				"DEVICE=%c%u:%u",
+				c, MAJOR(dev->devt), MINOR(dev->devt));
+	} else if (strcmp(subsys, "net") == 0) {
+		struct net_device *net = to_net_dev(dev);
+
+		pos++;
+		pos += snprintf(hdr + pos, hdrlen - pos,
+				"DEVICE=n%u", net->ifindex);
+	} else {
+		pos++;
+		pos += snprintf(hdr + pos, hdrlen - pos,
+				"DEVICE=+%s:%s", subsys, dev_name(dev));
+	}
+
+	return pos;
+}
+
+int dev_vprintk_emit(int level, const struct device *dev,
+		     const char *fmt, va_list args)
+{
+	char hdr[128];
+	size_t hdrlen;
+
+	hdrlen = create_syslog_header(dev, hdr, sizeof(hdr));
+
+	return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args);
+}
+EXPORT_SYMBOL(dev_vprintk_emit);
+
+int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+
+	r = dev_vprintk_emit(level, dev, fmt, args);
+
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(dev_printk_emit);
 
 static int __dev_printk(const char *level, const struct device *dev,
 			struct va_format *vaf)
@@ -1702,8 +2074,9 @@ static int __dev_printk(const char *level, const struct device *dev,
 	if (!dev)
 		return printk("%s(NULL device *): %pV", level, vaf);
 
-	return printk("%s%s %s: %pV",
-		      level, dev_driver_string(dev), dev_name(dev), vaf);
+	return dev_printk_emit(level[1] - '0', dev,
+			       "%s %s: %pV",
+			       dev_driver_string(dev), dev_name(dev), vaf);
 }
 
 int dev_printk(const char *level, const struct device *dev,
@@ -1719,6 +2092,7 @@ int dev_printk(const char *level, const struct device *dev,
 	vaf.va = &args;
 
 	r = __dev_printk(level, dev, &vaf);
+
 	va_end(args);
 
 	return r;
@@ -1738,6 +2112,7 @@ int func(const struct device *dev, const char *fmt, ...)	\
 	vaf.va = &args;						\
 								\
 	r = __dev_printk(kern_level, dev, &vaf);		\
+								\
 	va_end(args);						\
 								\
 	return r;						\
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 251acea3d35..006b1bc5297 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -1,8 +1,8 @@
 /*
- * drivers/base/cpu.c - basic CPU class support
+ * CPU subsystem support
  */
 
-#include <linux/sysdev.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -11,113 +11,139 @@
 #include <linux/device.h>
 #include <linux/node.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <linux/cpufeature.h>
 
 #include "base.h"
 
-static struct sysdev_class_attribute *cpu_sysdev_class_attrs[];
+static DEFINE_PER_CPU(struct device *, cpu_sys_devices);
 
-struct sysdev_class cpu_sysdev_class = {
-	.name = "cpu",
-	.attrs = cpu_sysdev_class_attrs,
-};
-EXPORT_SYMBOL(cpu_sysdev_class);
+static int cpu_subsys_match(struct device *dev, struct device_driver *drv)
+{
+	/* ACPI style match is the only one that may succeed. */
+	if (acpi_driver_match_device(dev, drv))
+		return 1;
 
-static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
+	return 0;
+}
 
 #ifdef CONFIG_HOTPLUG_CPU
-static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
-			   char *buf)
+static void change_cpu_under_node(struct cpu *cpu,
+			unsigned int from_nid, unsigned int to_nid)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
-
-	return sprintf(buf, "%u\n", !!cpu_online(cpu->sysdev.id));
+	int cpuid = cpu->dev.id;
+	unregister_cpu_under_node(cpuid, from_nid);
+	register_cpu_under_node(cpuid, to_nid);
+	cpu->node_id = to_nid;
 }
 
-static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribute *attr,
-				 const char *buf, size_t count)
+static int __ref cpu_subsys_online(struct device *dev)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
-	ssize_t ret;
-
-	cpu_hotplug_driver_lock();
-	switch (buf[0]) {
-	case '0':
-		ret = cpu_down(cpu->sysdev.id);
-		if (!ret)
-			kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
-		break;
-	case '1':
-		ret = cpu_up(cpu->sysdev.id);
-		if (!ret)
-			kobject_uevent(&dev->kobj, KOBJ_ONLINE);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-	cpu_hotplug_driver_unlock();
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	int cpuid = dev->id;
+	int from_nid, to_nid;
+	int ret;
+
+	from_nid = cpu_to_node(cpuid);
+	if (from_nid == NUMA_NO_NODE)
+		return -ENODEV;
+
+	ret = cpu_up(cpuid);
+	/*
+	 * When hot adding memory to memoryless node and enabling a cpu
+	 * on the node, node number of the cpu may internally change.
+	 */
+	to_nid = cpu_to_node(cpuid);
+	if (from_nid != to_nid)
+		change_cpu_under_node(cpu, from_nid, to_nid);
 
-	if (ret >= 0)
-		ret = count;
 	return ret;
 }
-static SYSDEV_ATTR(online, 0644, show_online, store_online);
 
-static void __cpuinit register_cpu_control(struct cpu *cpu)
+static int cpu_subsys_offline(struct device *dev)
 {
-	sysdev_create_file(&cpu->sysdev, &attr_online);
+	return cpu_down(dev->id);
 }
+
 void unregister_cpu(struct cpu *cpu)
 {
-	int logical_cpu = cpu->sysdev.id;
+	int logical_cpu = cpu->dev.id;
 
 	unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu));
 
-	sysdev_remove_file(&cpu->sysdev, &attr_online);
-
-	sysdev_unregister(&cpu->sysdev);
+	device_unregister(&cpu->dev);
 	per_cpu(cpu_sys_devices, logical_cpu) = NULL;
 	return;
 }
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-static ssize_t cpu_probe_store(struct sysdev_class *class,
-			       struct sysdev_class_attribute *attr,
+static ssize_t cpu_probe_store(struct device *dev,
+			       struct device_attribute *attr,
 			       const char *buf,
 			       size_t count)
 {
-	return arch_cpu_probe(buf, count);
+	ssize_t cnt;
+	int ret;
+
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
+
+	cnt = arch_cpu_probe(buf, count);
+
+	unlock_device_hotplug();
+	return cnt;
 }
 
-static ssize_t cpu_release_store(struct sysdev_class *class,
-				 struct sysdev_class_attribute *attr,
+static ssize_t cpu_release_store(struct device *dev,
+				 struct device_attribute *attr,
 				 const char *buf,
 				 size_t count)
 {
-	return arch_cpu_release(buf, count);
-}
+	ssize_t cnt;
+	int ret;
 
-static SYSDEV_CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
-static SYSDEV_CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
 
-#else /* ... !CONFIG_HOTPLUG_CPU */
-static inline void register_cpu_control(struct cpu *cpu)
-{
+	cnt = arch_cpu_release(buf, count);
+
+	unlock_device_hotplug();
+	return cnt;
 }
+
+static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store);
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 #endif /* CONFIG_HOTPLUG_CPU */
 
+struct bus_type cpu_subsys = {
+	.name = "cpu",
+	.dev_name = "cpu",
+	.match = cpu_subsys_match,
+#ifdef CONFIG_HOTPLUG_CPU
+	.online = cpu_subsys_online,
+	.offline = cpu_subsys_offline,
+#endif
+};
+EXPORT_SYMBOL_GPL(cpu_subsys);
+
 #ifdef CONFIG_KEXEC
 #include <linux/kexec.h>
 
-static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute *attr,
+static ssize_t show_crash_notes(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	ssize_t rc;
 	unsigned long long addr;
 	int cpunum;
 
-	cpunum = cpu->sysdev.id;
+	cpunum = cpu->dev.id;
 
 	/*
 	 * Might be reading other cpu's data based on which cpu read thread
@@ -129,20 +155,55 @@ static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute
 	rc = sprintf(buf, "%Lx\n", addr);
 	return rc;
 }
-static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL);
+static DEVICE_ATTR(crash_notes, 0400, show_crash_notes, NULL);
+
+static ssize_t show_crash_notes_size(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	ssize_t rc;
+
+	rc = sprintf(buf, "%zu\n", sizeof(note_buf_t));
+	return rc;
+}
+static DEVICE_ATTR(crash_notes_size, 0400, show_crash_notes_size, NULL);
+
+static struct attribute *crash_note_cpu_attrs[] = {
+	&dev_attr_crash_notes.attr,
+	&dev_attr_crash_notes_size.attr,
+	NULL
+};
+
+static struct attribute_group crash_note_cpu_attr_group = {
+	.attrs = crash_note_cpu_attrs,
+};
+#endif
+
+static const struct attribute_group *common_cpu_attr_groups[] = {
+#ifdef CONFIG_KEXEC
+	&crash_note_cpu_attr_group,
 #endif
+	NULL
+};
+
+static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
+#ifdef CONFIG_KEXEC
+	&crash_note_cpu_attr_group,
+#endif
+	NULL
+};
 
 /*
  * Print cpu online, possible, present, and system maps
  */
 
 struct cpu_attr {
-	struct sysdev_class_attribute attr;
+	struct device_attribute attr;
 	const struct cpumask *const * const map;
 };
 
-static ssize_t show_cpus_attr(struct sysdev_class *class,
-			      struct sysdev_class_attribute *attr,
+static ssize_t show_cpus_attr(struct device *dev,
+			      struct device_attribute *attr,
 			      char *buf)
 {
 	struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr);
@@ -153,10 +214,10 @@ static ssize_t show_cpus_attr(struct sysdev_class *class,
 	return n;
 }
 
-#define _CPU_ATTR(name, map)						\
-	{ _SYSDEV_CLASS_ATTR(name, 0444, show_cpus_attr, NULL), map }
+#define _CPU_ATTR(name, map) \
+	{ __ATTR(name, 0444, show_cpus_attr, NULL), map }
 
-/* Keep in sync with cpu_sysdev_class_attrs */
+/* Keep in sync with cpu_subsys_attrs */
 static struct cpu_attr cpu_attrs[] = {
 	_CPU_ATTR(online, &cpu_online_mask),
 	_CPU_ATTR(possible, &cpu_possible_mask),
@@ -166,19 +227,19 @@ static struct cpu_attr cpu_attrs[] = {
 /*
  * Print values for NR_CPUS and offlined cpus
  */
-static ssize_t print_cpus_kernel_max(struct sysdev_class *class,
-				     struct sysdev_class_attribute *attr, char *buf)
+static ssize_t print_cpus_kernel_max(struct device *dev,
+				     struct device_attribute *attr, char *buf)
 {
 	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", NR_CPUS - 1);
 	return n;
 }
-static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
+static DEVICE_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
 
 /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
 unsigned int total_cpus;
 
-static ssize_t print_cpus_offline(struct sysdev_class *class,
-				  struct sysdev_class_attribute *attr, char *buf)
+static ssize_t print_cpus_offline(struct device *dev,
+				  struct device_attribute *attr, char *buf)
 {
 	int n = 0, len = PAGE_SIZE-2;
 	cpumask_var_t offline;
@@ -205,7 +266,61 @@ static ssize_t print_cpus_offline(struct sysdev_class *class,
 	n += snprintf(&buf[n], len - n, "\n");
 	return n;
 }
-static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL);
+static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL);
+
+static void cpu_device_release(struct device *dev)
+{
+	/*
+	 * This is an empty function to prevent the driver core from spitting a
+	 * warning at us.  Yes, I know this is directly opposite of what the
+	 * documentation for the driver core and kobjects say, and the author
+	 * of this code has already been publically ridiculed for doing
+	 * something as foolish as this.  However, at this point in time, it is
+	 * the only way to handle the issue of statically allocated cpu
+	 * devices.  The different architectures will have their cpu device
+	 * code reworked to properly handle this in the near future, so this
+	 * function will then be changed to correctly free up the memory held
+	 * by the cpu device.
+	 *
+	 * Never copy this way of doing things, or you too will be made fun of
+	 * on the linux-kernel list, you have been warned.
+	 */
+}
+
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+static ssize_t print_cpu_modalias(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	ssize_t n;
+	u32 i;
+
+	n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:",
+		    CPU_FEATURE_TYPEVAL);
+
+	for (i = 0; i < MAX_CPU_FEATURES; i++)
+		if (cpu_have_feature(i)) {
+			if (PAGE_SIZE < n + sizeof(",XXXX\n")) {
+				WARN(1, "CPU features overflow page\n");
+				break;
+			}
+			n += sprintf(&buf[n], ",%04X", i);
+		}
+	buf[n++] = '\n';
+	return n;
+}
+
+static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (buf) {
+		print_cpu_modalias(NULL, NULL, buf);
+		add_uevent_var(env, "MODALIAS=%s", buf);
+		kfree(buf);
+	}
+	return 0;
+}
+#endif
 
 /*
  * register_cpu - Setup a sysfs device for a CPU.
@@ -215,60 +330,98 @@ static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL);
  *
  * Initialize and register the CPU device.
  */
-int __cpuinit register_cpu(struct cpu *cpu, int num)
+int register_cpu(struct cpu *cpu, int num)
 {
 	int error;
-	cpu->node_id = cpu_to_node(num);
-	cpu->sysdev.id = num;
-	cpu->sysdev.cls = &cpu_sysdev_class;
-
-	error = sysdev_register(&cpu->sysdev);
 
-	if (!error && cpu->hotpluggable)
-		register_cpu_control(cpu);
+	cpu->node_id = cpu_to_node(num);
+	memset(&cpu->dev, 0x00, sizeof(struct device));
+	cpu->dev.id = num;
+	cpu->dev.bus = &cpu_subsys;
+	cpu->dev.release = cpu_device_release;
+	cpu->dev.offline_disabled = !cpu->hotpluggable;
+	cpu->dev.offline = !cpu_online(num);
+	cpu->dev.of_node = of_get_cpu_node(num, NULL);
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+	cpu->dev.bus->uevent = cpu_uevent;
+#endif
+	cpu->dev.groups = common_cpu_attr_groups;
+	if (cpu->hotpluggable)
+		cpu->dev.groups = hotplugable_cpu_attr_groups;
+	error = device_register(&cpu->dev);
 	if (!error)
-		per_cpu(cpu_sys_devices, num) = &cpu->sysdev;
+		per_cpu(cpu_sys_devices, num) = &cpu->dev;
 	if (!error)
 		register_cpu_under_node(num, cpu_to_node(num));
 
-#ifdef CONFIG_KEXEC
-	if (!error)
-		error = sysdev_create_file(&cpu->sysdev, &attr_crash_notes);
-#endif
 	return error;
 }
 
-struct sys_device *get_cpu_sysdev(unsigned cpu)
+struct device *get_cpu_device(unsigned cpu)
 {
 	if (cpu < nr_cpu_ids && cpu_possible(cpu))
 		return per_cpu(cpu_sys_devices, cpu);
 	else
 		return NULL;
 }
-EXPORT_SYMBOL_GPL(get_cpu_sysdev);
-
-int __init cpu_dev_init(void)
-{
-	int err;
+EXPORT_SYMBOL_GPL(get_cpu_device);
 
-	err = sysdev_class_register(&cpu_sysdev_class);
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-	if (!err)
-		err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class);
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL);
 #endif
 
-	return err;
-}
-
-static struct sysdev_class_attribute *cpu_sysdev_class_attrs[] = {
+static struct attribute *cpu_root_attrs[] = {
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-	&attr_probe,
-	&attr_release,
+	&dev_attr_probe.attr,
+	&dev_attr_release.attr,
+#endif
+	&cpu_attrs[0].attr.attr,
+	&cpu_attrs[1].attr.attr,
+	&cpu_attrs[2].attr.attr,
+	&dev_attr_kernel_max.attr,
+	&dev_attr_offline.attr,
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+	&dev_attr_modalias.attr,
 #endif
-	&cpu_attrs[0].attr,
-	&cpu_attrs[1].attr,
-	&cpu_attrs[2].attr,
-	&attr_kernel_max,
-	&attr_offline,
 	NULL
 };
+
+static struct attribute_group cpu_root_attr_group = {
+	.attrs = cpu_root_attrs,
+};
+
+static const struct attribute_group *cpu_root_attr_groups[] = {
+	&cpu_root_attr_group,
+	NULL,
+};
+
+bool cpu_is_hotpluggable(unsigned cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+	return dev && container_of(dev, struct cpu, dev)->hotpluggable;
+}
+EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
+
+#ifdef CONFIG_GENERIC_CPU_DEVICES
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+#endif
+
+static void __init cpu_dev_register_generic(void)
+{
+#ifdef CONFIG_GENERIC_CPU_DEVICES
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (register_cpu(&per_cpu(cpu_devices, i), i))
+			panic("Failed to register CPU device");
+	}
+#endif
+}
+
+void __init cpu_dev_init(void)
+{
+	if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
+		panic("Failed to register CPU subsystem");
+
+	cpu_dev_register_generic();
+}
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index da57ee9d63f..e4ffbcf2f51 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -24,10 +24,173 @@
 #include <linux/wait.h>
 #include <linux/async.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/devinfo.h>
 
 #include "base.h"
 #include "power/power.h"
 
+/*
+ * Deferred Probe infrastructure.
+ *
+ * Sometimes driver probe order matters, but the kernel doesn't always have
+ * dependency information which means some drivers will get probed before a
+ * resource it depends on is available.  For example, an SDHCI driver may
+ * first need a GPIO line from an i2c GPIO controller before it can be
+ * initialized.  If a required resource is not available yet, a driver can
+ * request probing to be deferred by returning -EPROBE_DEFER from its probe hook
+ *
+ * Deferred probe maintains two lists of devices, a pending list and an active
+ * list.  A driver returning -EPROBE_DEFER causes the device to be added to the
+ * pending list.  A successful driver probe will trigger moving all devices
+ * from the pending to the active list so that the workqueue will eventually
+ * retry them.
+ *
+ * The deferred_probe_mutex must be held any time the deferred_probe_*_list
+ * of the (struct device*)->p->deferred_probe pointers are manipulated
+ */
+static DEFINE_MUTEX(deferred_probe_mutex);
+static LIST_HEAD(deferred_probe_pending_list);
+static LIST_HEAD(deferred_probe_active_list);
+static struct workqueue_struct *deferred_wq;
+static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
+
+/**
+ * deferred_probe_work_func() - Retry probing devices in the active list.
+ */
+static void deferred_probe_work_func(struct work_struct *work)
+{
+	struct device *dev;
+	struct device_private *private;
+	/*
+	 * This block processes every device in the deferred 'active' list.
+	 * Each device is removed from the active list and passed to
+	 * bus_probe_device() to re-attempt the probe.  The loop continues
+	 * until every device in the active list is removed and retried.
+	 *
+	 * Note: Once the device is removed from the list and the mutex is
+	 * released, it is possible for the device get freed by another thread
+	 * and cause a illegal pointer dereference.  This code uses
+	 * get/put_device() to ensure the device structure cannot disappear
+	 * from under our feet.
+	 */
+	mutex_lock(&deferred_probe_mutex);
+	while (!list_empty(&deferred_probe_active_list)) {
+		private = list_first_entry(&deferred_probe_active_list,
+					typeof(*dev->p), deferred_probe);
+		dev = private->device;
+		list_del_init(&private->deferred_probe);
+
+		get_device(dev);
+
+		/*
+		 * Drop the mutex while probing each device; the probe path may
+		 * manipulate the deferred list
+		 */
+		mutex_unlock(&deferred_probe_mutex);
+
+		/*
+		 * Force the device to the end of the dpm_list since
+		 * the PM code assumes that the order we add things to
+		 * the list is a good order for suspend but deferred
+		 * probe makes that very unsafe.
+		 */
+		device_pm_lock();
+		device_pm_move_last(dev);
+		device_pm_unlock();
+
+		dev_dbg(dev, "Retrying from deferred list\n");
+		bus_probe_device(dev);
+
+		mutex_lock(&deferred_probe_mutex);
+
+		put_device(dev);
+	}
+	mutex_unlock(&deferred_probe_mutex);
+}
+static DECLARE_WORK(deferred_probe_work, deferred_probe_work_func);
+
+static void driver_deferred_probe_add(struct device *dev)
+{
+	mutex_lock(&deferred_probe_mutex);
+	if (list_empty(&dev->p->deferred_probe)) {
+		dev_dbg(dev, "Added to deferred list\n");
+		list_add_tail(&dev->p->deferred_probe, &deferred_probe_pending_list);
+	}
+	mutex_unlock(&deferred_probe_mutex);
+}
+
+void driver_deferred_probe_del(struct device *dev)
+{
+	mutex_lock(&deferred_probe_mutex);
+	if (!list_empty(&dev->p->deferred_probe)) {
+		dev_dbg(dev, "Removed from deferred list\n");
+		list_del_init(&dev->p->deferred_probe);
+	}
+	mutex_unlock(&deferred_probe_mutex);
+}
+
+static bool driver_deferred_probe_enable = false;
+/**
+ * driver_deferred_probe_trigger() - Kick off re-probing deferred devices
+ *
+ * This functions moves all devices from the pending list to the active
+ * list and schedules the deferred probe workqueue to process them.  It
+ * should be called anytime a driver is successfully bound to a device.
+ *
+ * Note, there is a race condition in multi-threaded probe. In the case where
+ * more than one device is probing at the same time, it is possible for one
+ * probe to complete successfully while another is about to defer. If the second
+ * depends on the first, then it will get put on the pending list after the
+ * trigger event has already occured and will be stuck there.
+ *
+ * The atomic 'deferred_trigger_count' is used to determine if a successful
+ * trigger has occurred in the midst of probing a driver. If the trigger count
+ * changes in the midst of a probe, then deferred processing should be triggered
+ * again.
+ */
+static void driver_deferred_probe_trigger(void)
+{
+	if (!driver_deferred_probe_enable)
+		return;
+
+	/*
+	 * A successful probe means that all the devices in the pending list
+	 * should be triggered to be reprobed.  Move all the deferred devices
+	 * into the active list so they can be retried by the workqueue
+	 */
+	mutex_lock(&deferred_probe_mutex);
+	atomic_inc(&deferred_trigger_count);
+	list_splice_tail_init(&deferred_probe_pending_list,
+			      &deferred_probe_active_list);
+	mutex_unlock(&deferred_probe_mutex);
+
+	/*
+	 * Kick the re-probe thread.  It may already be scheduled, but it is
+	 * safe to kick it again.
+	 */
+	queue_work(deferred_wq, &deferred_probe_work);
+}
+
+/**
+ * deferred_probe_initcall() - Enable probing of deferred devices
+ *
+ * We don't want to get in the way when the bulk of drivers are getting probed.
+ * Instead, this initcall makes sure that deferred probing is delayed until
+ * late_initcall time.
+ */
+static int deferred_probe_initcall(void)
+{
+	deferred_wq = create_singlethread_workqueue("deferwq");
+	if (WARN_ON(!deferred_wq))
+		return -ENOMEM;
+
+	driver_deferred_probe_enable = true;
+	driver_deferred_probe_trigger();
+	/* Sort as many dependencies as possible before exiting initcalls */
+	flush_workqueue(deferred_wq);
+	return 0;
+}
+late_initcall(deferred_probe_initcall);
 
 static void driver_bound(struct device *dev)
 {
@@ -37,11 +200,18 @@ static void driver_bound(struct device *dev)
 		return;
 	}
 
-	pr_debug("driver: '%s': %s: bound to device '%s'\n", dev_name(dev),
-		 __func__, dev->driver->name);
+	pr_debug("driver: '%s': %s: bound to device '%s'\n", dev->driver->name,
+		 __func__, dev_name(dev));
 
 	klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices);
 
+	/*
+	 * Make sure the device is no longer in one of the deferred lists and
+	 * kick off retrying all pending devices
+	 */
+	driver_deferred_probe_del(dev);
+	driver_deferred_probe_trigger();
+
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_BOUND_DRIVER, dev);
@@ -108,6 +278,7 @@ static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);
 static int really_probe(struct device *dev, struct device_driver *drv)
 {
 	int ret = 0;
+	int local_trigger_count = atomic_read(&deferred_trigger_count);
 
 	atomic_inc(&probe_count);
 	pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
@@ -115,6 +286,12 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 	WARN_ON(!list_empty(&dev->devres_head));
 
 	dev->driver = drv;
+
+	/* If using pinctrl, bind pins now before probing */
+	ret = pinctrl_bind_pins(dev);
+	if (ret)
+		goto probe_failed;
+
 	if (driver_sysfs_add(dev)) {
 		printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n",
 			__func__, dev_name(dev));
@@ -141,12 +318,23 @@ probe_failed:
 	devres_release_all(dev);
 	driver_sysfs_remove(dev);
 	dev->driver = NULL;
-
-	if (ret != -ENODEV && ret != -ENXIO) {
+	dev_set_drvdata(dev, NULL);
+
+	if (ret == -EPROBE_DEFER) {
+		/* Driver requested deferred probing */
+		dev_info(dev, "Driver %s requests probe deferral\n", drv->name);
+		driver_deferred_probe_add(dev);
+		/* Did a trigger occur while probing? Need to re-trigger if yes */
+		if (local_trigger_count != atomic_read(&deferred_trigger_count))
+			driver_deferred_probe_trigger();
+	} else if (ret != -ENODEV && ret != -ENXIO) {
 		/* driver matched but the probe failed */
 		printk(KERN_WARNING
 		       "%s: probe of %s failed with error %d\n",
 		       drv->name, dev_name(dev), ret);
+	} else {
+		pr_debug("%s: probe of %s rejects match %d\n",
+		       drv->name, dev_name(dev), ret);
 	}
 	/*
 	 * Ignore errors returned by ->probe so that the next driver can try
@@ -207,10 +395,9 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 
-	pm_runtime_get_noresume(dev);
 	pm_runtime_barrier(dev);
 	ret = really_probe(dev, drv);
-	pm_runtime_put_sync(dev);
+	pm_request_idle(dev);
 
 	return ret;
 }
@@ -245,6 +432,10 @@ int device_attach(struct device *dev)
 
 	device_lock(dev);
 	if (dev->driver) {
+		if (klist_node_attached(&dev->p->knode_driver)) {
+			ret = 1;
+			goto out_unlock;
+		}
 		ret = device_bind_driver(dev);
 		if (ret == 0)
 			ret = 1;
@@ -253,10 +444,10 @@ int device_attach(struct device *dev)
 			ret = 0;
 		}
 	} else {
-		pm_runtime_get_noresume(dev);
 		ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
-		pm_runtime_put_sync(dev);
+		pm_request_idle(dev);
 	}
+out_unlock:
 	device_unlock(dev);
 	return ret;
 }
@@ -316,8 +507,7 @@ static void __device_release_driver(struct device *dev)
 
 	drv = dev->driver;
 	if (drv) {
-		pm_runtime_get_noresume(dev);
-		pm_runtime_barrier(dev);
+		pm_runtime_get_sync(dev);
 
 		driver_sysfs_remove(dev);
 
@@ -326,19 +516,21 @@ static void __device_release_driver(struct device *dev)
 						     BUS_NOTIFY_UNBIND_DRIVER,
 						     dev);
 
+		pm_runtime_put_sync(dev);
+
 		if (dev->bus && dev->bus->remove)
 			dev->bus->remove(dev);
 		else if (drv->remove)
 			drv->remove(dev);
 		devres_release_all(dev);
 		dev->driver = NULL;
+		dev_set_drvdata(dev, NULL);
 		klist_remove(&dev->p->knode_driver);
 		if (dev->bus)
 			blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 						     BUS_NOTIFY_UNBOUND_DRIVER,
 						     dev);
 
-		pm_runtime_put_sync(dev);
 	}
 }
 
@@ -395,30 +587,3 @@ void driver_detach(struct device_driver *drv)
 		put_device(dev);
 	}
 }
-
-/*
- * These exports can't be _GPL due to .h files using this within them, and it
- * might break something that was previously working...
- */
-void *dev_get_drvdata(const struct device *dev)
-{
-	if (dev && dev->p)
-		return dev->p->driver_data;
-	return NULL;
-}
-EXPORT_SYMBOL(dev_get_drvdata);
-
-void dev_set_drvdata(struct device *dev, void *data)
-{
-	int error;
-
-	if (!dev)
-		return;
-	if (!dev->p) {
-		error = device_private_init(dev);
-		if (error)
-			return;
-	}
-	dev->p->driver_data = data;
-}
-EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index cf7a0c78805..52302946770 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -50,8 +50,8 @@ static void devres_log(struct device *dev, struct devres_node *node,
 		       const char *op)
 {
 	if (unlikely(log_devres))
-		dev_printk(KERN_ERR, dev, "DEVRES %3s %p %s (%lu bytes)\n",
-			   op, node, node->name, (unsigned long)node->size);
+		dev_err(dev, "DEVRES %3s %p %s (%lu bytes)\n",
+			op, node, node->name, (unsigned long)node->size);
 }
 #else /* CONFIG_DEBUG_DEVRES */
 #define set_node_dbginfo(node, n, s)	do {} while (0)
@@ -91,7 +91,8 @@ static __always_inline struct devres * alloc_dr(dr_release_t release,
 	if (unlikely(!dr))
 		return NULL;
 
-	memset(dr, 0, tot_size);
+	memset(dr, 0, offsetof(struct devres, data));
+
 	INIT_LIST_HEAD(&dr->node.entry);
 	dr->node.release = release;
 	return dr;
@@ -110,7 +111,7 @@ void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
 	if (unlikely(!dr))
 		return NULL;
 	set_node_dbginfo(&dr->node, name, size);
@@ -135,7 +136,7 @@ void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
 	if (unlikely(!dr))
 		return NULL;
 	return dr->data;
@@ -144,6 +145,48 @@ EXPORT_SYMBOL_GPL(devres_alloc);
 #endif
 
 /**
+ * devres_for_each_res - Resource iterator
+ * @dev: Device to iterate resource from
+ * @release: Look for resources associated with this release function
+ * @match: Match function (optional)
+ * @match_data: Data for the match function
+ * @fn: Function to be called for each matched resource.
+ * @data: Data for @fn, the 3rd parameter of @fn
+ *
+ * Call @fn for each devres of @dev which is associated with @release
+ * and for which @match returns 1.
+ *
+ * RETURNS:
+ * 	void
+ */
+void devres_for_each_res(struct device *dev, dr_release_t release,
+			dr_match_t match, void *match_data,
+			void (*fn)(struct device *, void *, void *),
+			void *data)
+{
+	struct devres_node *node;
+	struct devres_node *tmp;
+	unsigned long flags;
+
+	if (!fn)
+		return;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+	list_for_each_entry_safe_reverse(node, tmp,
+			&dev->devres_head, entry) {
+		struct devres *dr = container_of(node, struct devres, node);
+
+		if (node->release != release)
+			continue;
+		if (match && !match(dev, dr->data, match_data))
+			continue;
+		fn(dev, dr->data, data);
+	}
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+}
+EXPORT_SYMBOL_GPL(devres_for_each_res);
+
+/**
  * devres_free - Free device resource data
  * @res: Pointer to devres data to free
  *
@@ -309,6 +352,10 @@ EXPORT_SYMBOL_GPL(devres_remove);
  * which @match returns 1.  If @match is NULL, it's considered to
  * match all.  If found, the resource is removed atomically and freed.
  *
+ * Note that the release function for the resource will not be called,
+ * only the devres-allocated data will be freed.  The caller becomes
+ * responsible for freeing any other data.
+ *
  * RETURNS:
  * 0 if devres is found and freed, -ENOENT if not found.
  */
@@ -326,6 +373,37 @@ int devres_destroy(struct device *dev, dr_release_t release,
 }
 EXPORT_SYMBOL_GPL(devres_destroy);
 
+
+/**
+ * devres_release - Find a device resource and destroy it, calling release
+ * @dev: Device to find resource from
+ * @release: Look for resources associated with this release function
+ * @match: Match function (optional)
+ * @match_data: Data for the match function
+ *
+ * Find the latest devres of @dev associated with @release and for
+ * which @match returns 1.  If @match is NULL, it's considered to
+ * match all.  If found, the resource is removed atomically, the
+ * release function called and the resource freed.
+ *
+ * RETURNS:
+ * 0 if devres is found and freed, -ENOENT if not found.
+ */
+int devres_release(struct device *dev, dr_release_t release,
+		   dr_match_t match, void *match_data)
+{
+	void *res;
+
+	res = devres_remove(dev, release, match, match_data);
+	if (unlikely(!res))
+		return -ENOENT;
+
+	(*release)(dev, res);
+	devres_free(res);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devres_release);
+
 static int remove_nodes(struct device *dev,
 			struct list_head *first, struct list_head *end,
 			struct list_head *todo)
@@ -397,6 +475,7 @@ static int remove_nodes(struct device *dev,
 
 static int release_nodes(struct device *dev, struct list_head *first,
 			 struct list_head *end, unsigned long flags)
+	__releases(&dev->devres_lock)
 {
 	LIST_HEAD(todo);
 	int cnt;
@@ -593,58 +672,259 @@ int devres_release_group(struct device *dev, void *id)
 EXPORT_SYMBOL_GPL(devres_release_group);
 
 /*
- * Managed kzalloc/kfree
+ * Custom devres actions allow inserting a simple function call
+ * into the teadown sequence.
+ */
+
+struct action_devres {
+	void *data;
+	void (*action)(void *);
+};
+
+static int devm_action_match(struct device *dev, void *res, void *p)
+{
+	struct action_devres *devres = res;
+	struct action_devres *target = p;
+
+	return devres->action == target->action &&
+	       devres->data == target->data;
+}
+
+static void devm_action_release(struct device *dev, void *res)
+{
+	struct action_devres *devres = res;
+
+	devres->action(devres->data);
+}
+
+/**
+ * devm_add_action() - add a custom action to list of managed resources
+ * @dev: Device that owns the action
+ * @action: Function that should be called
+ * @data: Pointer to data passed to @action implementation
+ *
+ * This adds a custom action to the list of managed resources so that
+ * it gets executed as part of standard resource unwinding.
+ */
+int devm_add_action(struct device *dev, void (*action)(void *), void *data)
+{
+	struct action_devres *devres;
+
+	devres = devres_alloc(devm_action_release,
+			      sizeof(struct action_devres), GFP_KERNEL);
+	if (!devres)
+		return -ENOMEM;
+
+	devres->data = data;
+	devres->action = action;
+
+	devres_add(dev, devres);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_add_action);
+
+/**
+ * devm_remove_action() - removes previously added custom action
+ * @dev: Device that owns the action
+ * @action: Function implementing the action
+ * @data: Pointer to data passed to @action implementation
+ *
+ * Removes instance of @action previously added by devm_add_action().
+ * Both action and data should match one of the existing entries.
+ */
+void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
+{
+	struct action_devres devres = {
+		.data = data,
+		.action = action,
+	};
+
+	WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match,
+			       &devres));
+
+}
+EXPORT_SYMBOL_GPL(devm_remove_action);
+
+/*
+ * Managed kmalloc/kfree
  */
-static void devm_kzalloc_release(struct device *dev, void *res)
+static void devm_kmalloc_release(struct device *dev, void *res)
 {
 	/* noop */
 }
 
-static int devm_kzalloc_match(struct device *dev, void *res, void *data)
+static int devm_kmalloc_match(struct device *dev, void *res, void *data)
 {
 	return res == data;
 }
 
 /**
- * devm_kzalloc - Resource-managed kzalloc
+ * devm_kmalloc - Resource-managed kmalloc
  * @dev: Device to allocate memory for
  * @size: Allocation size
  * @gfp: Allocation gfp flags
  *
- * Managed kzalloc.  Memory allocated with this function is
+ * Managed kmalloc.  Memory allocated with this function is
  * automatically freed on driver detach.  Like all other devres
  * resources, guaranteed alignment is unsigned long long.
  *
  * RETURNS:
  * Pointer to allocated memory on success, NULL on failure.
  */
-void * devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
+void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
 {
 	struct devres *dr;
 
 	/* use raw alloc_dr for kmalloc caller tracing */
-	dr = alloc_dr(devm_kzalloc_release, size, gfp);
+	dr = alloc_dr(devm_kmalloc_release, size, gfp);
 	if (unlikely(!dr))
 		return NULL;
 
+	/*
+	 * This is named devm_kzalloc_release for historical reasons
+	 * The initial implementation did not support kmalloc, only kzalloc
+	 */
 	set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
 	devres_add(dev, dr->data);
 	return dr->data;
 }
-EXPORT_SYMBOL_GPL(devm_kzalloc);
+EXPORT_SYMBOL_GPL(devm_kmalloc);
+
+/**
+ * devm_kstrdup - Allocate resource managed space and
+ *                copy an existing string into that.
+ * @dev: Device to allocate memory for
+ * @s: the string to duplicate
+ * @gfp: the GFP mask used in the devm_kmalloc() call when
+ *       allocating memory
+ * RETURNS:
+ * Pointer to allocated string on success, NULL on failure.
+ */
+char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
+{
+	size_t size;
+	char *buf;
+
+	if (!s)
+		return NULL;
+
+	size = strlen(s) + 1;
+	buf = devm_kmalloc(dev, size, gfp);
+	if (buf)
+		memcpy(buf, s, size);
+	return buf;
+}
+EXPORT_SYMBOL_GPL(devm_kstrdup);
 
 /**
  * devm_kfree - Resource-managed kfree
  * @dev: Device this memory belongs to
  * @p: Memory to free
  *
- * Free memory allocated with dev_kzalloc().
+ * Free memory allocated with devm_kmalloc().
  */
 void devm_kfree(struct device *dev, void *p)
 {
 	int rc;
 
-	rc = devres_destroy(dev, devm_kzalloc_release, devm_kzalloc_match, p);
+	rc = devres_destroy(dev, devm_kmalloc_release, devm_kmalloc_match, p);
 	WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_kfree);
+
+/**
+ * devm_kmemdup - Resource-managed kmemdup
+ * @dev: Device this memory belongs to
+ * @src: Memory region to duplicate
+ * @len: Memory region length
+ * @gfp: GFP mask to use
+ *
+ * Duplicate region of a memory using resource managed kmalloc
+ */
+void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
+{
+	void *p;
+
+	p = devm_kmalloc(dev, len, gfp);
+	if (p)
+		memcpy(p, src, len);
+
+	return p;
+}
+EXPORT_SYMBOL_GPL(devm_kmemdup);
+
+struct pages_devres {
+	unsigned long addr;
+	unsigned int order;
+};
+
+static int devm_pages_match(struct device *dev, void *res, void *p)
+{
+	struct pages_devres *devres = res;
+	struct pages_devres *target = p;
+
+	return devres->addr == target->addr;
+}
+
+static void devm_pages_release(struct device *dev, void *res)
+{
+	struct pages_devres *devres = res;
+
+	free_pages(devres->addr, devres->order);
+}
+
+/**
+ * devm_get_free_pages - Resource-managed __get_free_pages
+ * @dev: Device to allocate memory for
+ * @gfp_mask: Allocation gfp flags
+ * @order: Allocation size is (1 << order) pages
+ *
+ * Managed get_free_pages.  Memory allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * RETURNS:
+ * Address of allocated memory on success, 0 on failure.
+ */
+
+unsigned long devm_get_free_pages(struct device *dev,
+				  gfp_t gfp_mask, unsigned int order)
+{
+	struct pages_devres *devres;
+	unsigned long addr;
+
+	addr = __get_free_pages(gfp_mask, order);
+
+	if (unlikely(!addr))
+		return 0;
+
+	devres = devres_alloc(devm_pages_release,
+			      sizeof(struct pages_devres), GFP_KERNEL);
+	if (unlikely(!devres)) {
+		free_pages(addr, order);
+		return 0;
+	}
+
+	devres->addr = addr;
+	devres->order = order;
+
+	devres_add(dev, devres);
+	return addr;
+}
+EXPORT_SYMBOL_GPL(devm_get_free_pages);
+
+/**
+ * devm_free_pages - Resource-managed free_pages
+ * @dev: Device this memory belongs to
+ * @addr: Memory to free
+ *
+ * Free memory allocated with devm_get_free_pages(). Unlike free_pages,
+ * there is no need to supply the @order.
+ */
+void devm_free_pages(struct device *dev, unsigned long addr)
+{
+	struct pages_devres devres = { .addr = addr };
+
+	WARN_ON(devres_release(dev, devm_pages_release, devm_pages_match,
+			       &devres));
+}
+EXPORT_SYMBOL_GPL(devm_free_pages);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 82bbb5967aa..25798db1455 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -7,9 +7,9 @@
  * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
  * device which requests a device node, will add a node in this
  * filesystem.
- * By default, all devices are named after the the name of the
- * device, owned by root and have a default mode of 0600. Subsystems
- * can overwrite the default setting if needed.
+ * By default, all devices are named after the name of the device,
+ * owned by root and have a default mode of 0600. Subsystems can
+ * overwrite the default setting if needed.
  */
 
 #include <linux/kernel.h>
@@ -21,12 +21,12 @@
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
 #include <linux/ramfs.h>
-#include <linux/cred.h>
 #include <linux/sched.h>
-#include <linux/init_task.h>
 #include <linux/slab.h>
+#include <linux/kthread.h>
+#include "base.h"
 
-static struct vfsmount *dev_mnt;
+static struct task_struct *thread;
 
 #if defined CONFIG_DEVTMPFS_MOUNT
 static int mount_dev = 1;
@@ -34,7 +34,18 @@ static int mount_dev = 1;
 static int mount_dev;
 #endif
 
-static DEFINE_MUTEX(dirlock);
+static DEFINE_SPINLOCK(req_lock);
+
+static struct req {
+	struct req *next;
+	struct completion done;
+	int err;
+	const char *name;
+	umode_t mode;	/* 0 => delete */
+	kuid_t uid;
+	kgid_t gid;
+	struct device *dev;
+} *requests;
 
 static int __init mount_param(char *str)
 {
@@ -68,164 +79,173 @@ static inline int is_blockdev(struct device *dev)
 static inline int is_blockdev(struct device *dev) { return 0; }
 #endif
 
-static int dev_mkdir(const char *name, mode_t mode)
+int devtmpfs_create_node(struct device *dev)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
-	int err;
+	const char *tmp = NULL;
+	struct req req;
 
-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      name, LOOKUP_PARENT, &nd);
-	if (err)
-		return err;
+	if (!thread)
+		return 0;
 
-	dentry = lookup_create(&nd, 1);
-	if (!IS_ERR(dentry)) {
-		err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
-		if (!err)
-			/* mark as kernel-created inode */
-			dentry->d_inode->i_private = &dev_mnt;
-		dput(dentry);
-	} else {
-		err = PTR_ERR(dentry);
-	}
+	req.mode = 0;
+	req.uid = GLOBAL_ROOT_UID;
+	req.gid = GLOBAL_ROOT_GID;
+	req.name = device_get_devnode(dev, &req.mode, &req.uid, &req.gid, &tmp);
+	if (!req.name)
+		return -ENOMEM;
 
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_put(&nd.path);
-	return err;
-}
+	if (req.mode == 0)
+		req.mode = 0600;
+	if (is_blockdev(dev))
+		req.mode |= S_IFBLK;
+	else
+		req.mode |= S_IFCHR;
 
-static int create_path(const char *nodepath)
-{
-	int err;
+	req.dev = dev;
 
-	mutex_lock(&dirlock);
-	err = dev_mkdir(nodepath, 0755);
-	if (err == -ENOENT) {
-		char *path;
-		char *s;
-
-		/* parent directories do not exist, create them */
-		path = kstrdup(nodepath, GFP_KERNEL);
-		if (!path) {
-			err = -ENOMEM;
-			goto out;
-		}
-		s = path;
-		for (;;) {
-			s = strchr(s, '/');
-			if (!s)
-				break;
-			s[0] = '\0';
-			err = dev_mkdir(path, 0755);
-			if (err && err != -EEXIST)
-				break;
-			s[0] = '/';
-			s++;
-		}
-		kfree(path);
-	}
-out:
-	mutex_unlock(&dirlock);
-	return err;
+	init_completion(&req.done);
+
+	spin_lock(&req_lock);
+	req.next = requests;
+	requests = &req;
+	spin_unlock(&req_lock);
+
+	wake_up_process(thread);
+	wait_for_completion(&req.done);
+
+	kfree(tmp);
+
+	return req.err;
 }
 
-int devtmpfs_create_node(struct device *dev)
+int devtmpfs_delete_node(struct device *dev)
 {
 	const char *tmp = NULL;
-	const char *nodename;
-	const struct cred *curr_cred;
-	mode_t mode = 0;
-	struct nameidata nd;
-	struct dentry *dentry;
-	int err;
+	struct req req;
 
-	if (!dev_mnt)
+	if (!thread)
 		return 0;
 
-	nodename = device_get_devnode(dev, &mode, &tmp);
-	if (!nodename)
+	req.name = device_get_devnode(dev, NULL, NULL, NULL, &tmp);
+	if (!req.name)
 		return -ENOMEM;
 
-	if (mode == 0)
-		mode = 0600;
-	if (is_blockdev(dev))
-		mode |= S_IFBLK;
-	else
-		mode |= S_IFCHR;
+	req.mode = 0;
+	req.dev = dev;
 
-	curr_cred = override_creds(&init_cred);
+	init_completion(&req.done);
 
-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      nodename, LOOKUP_PARENT, &nd);
-	if (err == -ENOENT) {
-		create_path(nodename);
-		err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-				      nodename, LOOKUP_PARENT, &nd);
-	}
-	if (err)
-		goto out;
+	spin_lock(&req_lock);
+	req.next = requests;
+	requests = &req;
+	spin_unlock(&req_lock);
 
-	dentry = lookup_create(&nd, 0);
-	if (!IS_ERR(dentry)) {
-		err = vfs_mknod(nd.path.dentry->d_inode,
-				dentry, mode, dev->devt);
-		if (!err) {
-			struct iattr newattrs;
+	wake_up_process(thread);
+	wait_for_completion(&req.done);
 
-			/* fixup possibly umasked mode */
-			newattrs.ia_mode = mode;
-			newattrs.ia_valid = ATTR_MODE;
-			mutex_lock(&dentry->d_inode->i_mutex);
-			notify_change(dentry, &newattrs);
-			mutex_unlock(&dentry->d_inode->i_mutex);
+	kfree(tmp);
+	return req.err;
+}
 
-			/* mark as kernel-created inode */
-			dentry->d_inode->i_private = &dev_mnt;
-		}
-		dput(dentry);
-	} else {
-		err = PTR_ERR(dentry);
+static int dev_mkdir(const char *name, umode_t mode)
+{
+	struct dentry *dentry;
+	struct path path;
+	int err;
+
+	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	err = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+	if (!err)
+		/* mark as kernel-created inode */
+		dentry->d_inode->i_private = &thread;
+	done_path_create(&path, dentry);
+	return err;
+}
+
+static int create_path(const char *nodepath)
+{
+	char *path;
+	char *s;
+	int err = 0;
+
+	/* parent directories do not exist, create them */
+	path = kstrdup(nodepath, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	s = path;
+	for (;;) {
+		s = strchr(s, '/');
+		if (!s)
+			break;
+		s[0] = '\0';
+		err = dev_mkdir(path, 0755);
+		if (err && err != -EEXIST)
+			break;
+		s[0] = '/';
+		s++;
 	}
+	kfree(path);
+	return err;
+}
 
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_put(&nd.path);
-out:
-	kfree(tmp);
-	revert_creds(curr_cred);
+static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
+			 kgid_t gid, struct device *dev)
+{
+	struct dentry *dentry;
+	struct path path;
+	int err;
+
+	dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
+	if (dentry == ERR_PTR(-ENOENT)) {
+		create_path(nodename);
+		dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
+	}
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	err = vfs_mknod(path.dentry->d_inode, dentry, mode, dev->devt);
+	if (!err) {
+		struct iattr newattrs;
+
+		newattrs.ia_mode = mode;
+		newattrs.ia_uid = uid;
+		newattrs.ia_gid = gid;
+		newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
+		mutex_lock(&dentry->d_inode->i_mutex);
+		notify_change(dentry, &newattrs, NULL);
+		mutex_unlock(&dentry->d_inode->i_mutex);
+
+		/* mark as kernel-created inode */
+		dentry->d_inode->i_private = &thread;
+	}
+	done_path_create(&path, dentry);
 	return err;
 }
 
 static int dev_rmdir(const char *name)
 {
-	struct nameidata nd;
+	struct path parent;
 	struct dentry *dentry;
 	int err;
 
-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      name, LOOKUP_PARENT, &nd);
-	if (err)
-		return err;
-
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
-	if (!IS_ERR(dentry)) {
-		if (dentry->d_inode) {
-			if (dentry->d_inode->i_private == &dev_mnt)
-				err = vfs_rmdir(nd.path.dentry->d_inode,
-						dentry);
-			else
-				err = -EPERM;
-		} else {
-			err = -ENOENT;
-		}
-		dput(dentry);
+	dentry = kern_path_locked(name, &parent);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+	if (dentry->d_inode) {
+		if (dentry->d_inode->i_private == &thread)
+			err = vfs_rmdir(parent.dentry->d_inode, dentry);
+		else
+			err = -EPERM;
 	} else {
-		err = PTR_ERR(dentry);
+		err = -ENOENT;
 	}
-
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-	path_put(&nd.path);
+	dput(dentry);
+	mutex_unlock(&parent.dentry->d_inode->i_mutex);
+	path_put(&parent);
 	return err;
 }
 
@@ -238,7 +258,6 @@ static int delete_path(const char *nodepath)
 	if (!path)
 		return -ENOMEM;
 
-	mutex_lock(&dirlock);
 	for (;;) {
 		char *base;
 
@@ -250,7 +269,6 @@ static int delete_path(const char *nodepath)
 		if (err)
 			break;
 	}
-	mutex_unlock(&dirlock);
 
 	kfree(path);
 	return err;
@@ -259,7 +277,7 @@ static int delete_path(const char *nodepath)
 static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
 {
 	/* did we create it */
-	if (inode->i_private != &dev_mnt)
+	if (inode->i_private != &thread)
 		return 0;
 
 	/* does the dev_t match */
@@ -277,69 +295,48 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta
 	return 1;
 }
 
-int devtmpfs_delete_node(struct device *dev)
+static int handle_remove(const char *nodename, struct device *dev)
 {
-	const char *tmp = NULL;
-	const char *nodename;
-	const struct cred *curr_cred;
-	struct nameidata nd;
+	struct path parent;
 	struct dentry *dentry;
-	struct kstat stat;
-	int deleted = 1;
+	int deleted = 0;
 	int err;
 
-	if (!dev_mnt)
-		return 0;
-
-	nodename = device_get_devnode(dev, NULL, &tmp);
-	if (!nodename)
-		return -ENOMEM;
-
-	curr_cred = override_creds(&init_cred);
-	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
-			      nodename, LOOKUP_PARENT, &nd);
-	if (err)
-		goto out;
+	dentry = kern_path_locked(nodename, &parent);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
 
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
-	if (!IS_ERR(dentry)) {
-		if (dentry->d_inode) {
-			err = vfs_getattr(nd.path.mnt, dentry, &stat);
-			if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
-				struct iattr newattrs;
-				/*
-				 * before unlinking this node, reset permissions
-				 * of possible references like hardlinks
-				 */
-				newattrs.ia_uid = 0;
-				newattrs.ia_gid = 0;
-				newattrs.ia_mode = stat.mode & ~0777;
-				newattrs.ia_valid =
-					ATTR_UID|ATTR_GID|ATTR_MODE;
-				mutex_lock(&dentry->d_inode->i_mutex);
-				notify_change(dentry, &newattrs);
-				mutex_unlock(&dentry->d_inode->i_mutex);
-				err = vfs_unlink(nd.path.dentry->d_inode,
-						 dentry);
-				if (!err || err == -ENOENT)
-					deleted = 1;
-			}
-		} else {
-			err = -ENOENT;
+	if (dentry->d_inode) {
+		struct kstat stat;
+		struct path p = {.mnt = parent.mnt, .dentry = dentry};
+		err = vfs_getattr(&p, &stat);
+		if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
+			struct iattr newattrs;
+			/*
+			 * before unlinking this node, reset permissions
+			 * of possible references like hardlinks
+			 */
+			newattrs.ia_uid = GLOBAL_ROOT_UID;
+			newattrs.ia_gid = GLOBAL_ROOT_GID;
+			newattrs.ia_mode = stat.mode & ~0777;
+			newattrs.ia_valid =
+				ATTR_UID|ATTR_GID|ATTR_MODE;
+			mutex_lock(&dentry->d_inode->i_mutex);
+			notify_change(dentry, &newattrs, NULL);
+			mutex_unlock(&dentry->d_inode->i_mutex);
+			err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
+			if (!err || err == -ENOENT)
+				deleted = 1;
 		}
-		dput(dentry);
 	} else {
-		err = PTR_ERR(dentry);
+		err = -ENOENT;
 	}
-	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+	dput(dentry);
+	mutex_unlock(&parent.dentry->d_inode->i_mutex);
 
-	path_put(&nd.path);
+	path_put(&parent);
 	if (deleted && strchr(nodename, '/'))
 		delete_path(nodename);
-out:
-	kfree(tmp);
-	revert_creds(curr_cred);
 	return err;
 }
 
@@ -354,7 +351,7 @@ int devtmpfs_mount(const char *mntdir)
 	if (!mount_dev)
 		return 0;
 
-	if (!dev_mnt)
+	if (!thread)
 		return 0;
 
 	err = sys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, NULL);
@@ -365,31 +362,81 @@ int devtmpfs_mount(const char *mntdir)
 	return err;
 }
 
+static DECLARE_COMPLETION(setup_done);
+
+static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid,
+		  struct device *dev)
+{
+	if (mode)
+		return handle_create(name, mode, uid, gid, dev);
+	else
+		return handle_remove(name, dev);
+}
+
+static int devtmpfsd(void *p)
+{
+	char options[] = "mode=0755";
+	int *err = p;
+	*err = sys_unshare(CLONE_NEWNS);
+	if (*err)
+		goto out;
+	*err = sys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options);
+	if (*err)
+		goto out;
+	sys_chdir("/.."); /* will traverse into overmounted root */
+	sys_chroot(".");
+	complete(&setup_done);
+	while (1) {
+		spin_lock(&req_lock);
+		while (requests) {
+			struct req *req = requests;
+			requests = NULL;
+			spin_unlock(&req_lock);
+			while (req) {
+				struct req *next = req->next;
+				req->err = handle(req->name, req->mode,
+						  req->uid, req->gid, req->dev);
+				complete(&req->done);
+				req = next;
+			}
+			spin_lock(&req_lock);
+		}
+		__set_current_state(TASK_INTERRUPTIBLE);
+		spin_unlock(&req_lock);
+		schedule();
+	}
+	return 0;
+out:
+	complete(&setup_done);
+	return *err;
+}
+
 /*
  * Create devtmpfs instance, driver-core devices will add their device
  * nodes here.
  */
 int __init devtmpfs_init(void)
 {
-	int err;
-	struct vfsmount *mnt;
-	char options[] = "mode=0755";
-
-	err = register_filesystem(&dev_fs_type);
+	int err = register_filesystem(&dev_fs_type);
 	if (err) {
 		printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
 		       "type %i\n", err);
 		return err;
 	}
 
-	mnt = kern_mount_data(&dev_fs_type, options);
-	if (IS_ERR(mnt)) {
-		err = PTR_ERR(mnt);
+	thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
+	if (!IS_ERR(thread)) {
+		wait_for_completion(&setup_done);
+	} else {
+		err = PTR_ERR(thread);
+		thread = NULL;
+	}
+
+	if (err) {
 		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
 		unregister_filesystem(&dev_fs_type);
 		return err;
 	}
-	dev_mnt = mnt;
 
 	printk(KERN_INFO "devtmpfs: initialized\n");
 	return 0;
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
new file mode 100644
index 00000000000..840c7fa8098
--- /dev/null
+++ b/drivers/base/dma-buf.c
@@ -0,0 +1,743 @@
+/*
+ * Framework for buffer objects that can be shared across devices/subsystems.
+ *
+ * Copyright(C) 2011 Linaro Limited. All rights reserved.
+ * Author: Sumit Semwal <sumit.semwal@ti.com>
+ *
+ * Many thanks to linaro-mm-sig list, and specially
+ * Arnd Bergmann <arnd@arndb.de>, Rob Clark <rob@ti.com> and
+ * Daniel Vetter <daniel@ffwll.ch> for their support in creation and
+ * refining of this idea.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/dma-buf.h>
+#include <linux/anon_inodes.h>
+#include <linux/export.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static inline int is_dma_buf_file(struct file *);
+
+struct dma_buf_list {
+	struct list_head head;
+	struct mutex lock;
+};
+
+static struct dma_buf_list db_list;
+
+static int dma_buf_release(struct inode *inode, struct file *file)
+{
+	struct dma_buf *dmabuf;
+
+	if (!is_dma_buf_file(file))
+		return -EINVAL;
+
+	dmabuf = file->private_data;
+
+	BUG_ON(dmabuf->vmapping_counter);
+
+	dmabuf->ops->release(dmabuf);
+
+	mutex_lock(&db_list.lock);
+	list_del(&dmabuf->list_node);
+	mutex_unlock(&db_list.lock);
+
+	kfree(dmabuf);
+	return 0;
+}
+
+static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+{
+	struct dma_buf *dmabuf;
+
+	if (!is_dma_buf_file(file))
+		return -EINVAL;
+
+	dmabuf = file->private_data;
+
+	/* check for overflowing the buffer's size */
+	if (vma->vm_pgoff + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) >
+	    dmabuf->size >> PAGE_SHIFT)
+		return -EINVAL;
+
+	return dmabuf->ops->mmap(dmabuf, vma);
+}
+
+static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct dma_buf *dmabuf;
+	loff_t base;
+
+	if (!is_dma_buf_file(file))
+		return -EBADF;
+
+	dmabuf = file->private_data;
+
+	/* only support discovering the end of the buffer,
+	   but also allow SEEK_SET to maintain the idiomatic
+	   SEEK_END(0), SEEK_CUR(0) pattern */
+	if (whence == SEEK_END)
+		base = dmabuf->size;
+	else if (whence == SEEK_SET)
+		base = 0;
+	else
+		return -EINVAL;
+
+	if (offset != 0)
+		return -EINVAL;
+
+	return base + offset;
+}
+
+static const struct file_operations dma_buf_fops = {
+	.release	= dma_buf_release,
+	.mmap		= dma_buf_mmap_internal,
+	.llseek		= dma_buf_llseek,
+};
+
+/*
+ * is_dma_buf_file - Check if struct file* is associated with dma_buf
+ */
+static inline int is_dma_buf_file(struct file *file)
+{
+	return file->f_op == &dma_buf_fops;
+}
+
+/**
+ * dma_buf_export_named - Creates a new dma_buf, and associates an anon file
+ * with this buffer, so it can be exported.
+ * Also connect the allocator specific data and ops to the buffer.
+ * Additionally, provide a name string for exporter; useful in debugging.
+ *
+ * @priv:	[in]	Attach private data of allocator to this buffer
+ * @ops:	[in]	Attach allocator-defined dma buf ops to the new buffer.
+ * @size:	[in]	Size of the buffer
+ * @flags:	[in]	mode flags for the file.
+ * @exp_name:	[in]	name of the exporting module - useful for debugging.
+ *
+ * Returns, on success, a newly created dma_buf object, which wraps the
+ * supplied private data and operations for dma_buf_ops. On either missing
+ * ops, or error in allocating struct dma_buf, will return negative error.
+ *
+ */
+struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
+				size_t size, int flags, const char *exp_name)
+{
+	struct dma_buf *dmabuf;
+	struct file *file;
+
+	if (WARN_ON(!priv || !ops
+			  || !ops->map_dma_buf
+			  || !ops->unmap_dma_buf
+			  || !ops->release
+			  || !ops->kmap_atomic
+			  || !ops->kmap
+			  || !ops->mmap)) {
+		return ERR_PTR(-EINVAL);
+	}
+
+	dmabuf = kzalloc(sizeof(struct dma_buf), GFP_KERNEL);
+	if (dmabuf == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	dmabuf->priv = priv;
+	dmabuf->ops = ops;
+	dmabuf->size = size;
+	dmabuf->exp_name = exp_name;
+
+	file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf, flags);
+	if (IS_ERR(file)) {
+		kfree(dmabuf);
+		return ERR_CAST(file);
+	}
+
+	file->f_mode |= FMODE_LSEEK;
+	dmabuf->file = file;
+
+	mutex_init(&dmabuf->lock);
+	INIT_LIST_HEAD(&dmabuf->attachments);
+
+	mutex_lock(&db_list.lock);
+	list_add(&dmabuf->list_node, &db_list.head);
+	mutex_unlock(&db_list.lock);
+
+	return dmabuf;
+}
+EXPORT_SYMBOL_GPL(dma_buf_export_named);
+
+
+/**
+ * dma_buf_fd - returns a file descriptor for the given dma_buf
+ * @dmabuf:	[in]	pointer to dma_buf for which fd is required.
+ * @flags:      [in]    flags to give to fd
+ *
+ * On success, returns an associated 'fd'. Else, returns error.
+ */
+int dma_buf_fd(struct dma_buf *dmabuf, int flags)
+{
+	int fd;
+
+	if (!dmabuf || !dmabuf->file)
+		return -EINVAL;
+
+	fd = get_unused_fd_flags(flags);
+	if (fd < 0)
+		return fd;
+
+	fd_install(fd, dmabuf->file);
+
+	return fd;
+}
+EXPORT_SYMBOL_GPL(dma_buf_fd);
+
+/**
+ * dma_buf_get - returns the dma_buf structure related to an fd
+ * @fd:	[in]	fd associated with the dma_buf to be returned
+ *
+ * On success, returns the dma_buf structure associated with an fd; uses
+ * file's refcounting done by fget to increase refcount. returns ERR_PTR
+ * otherwise.
+ */
+struct dma_buf *dma_buf_get(int fd)
+{
+	struct file *file;
+
+	file = fget(fd);
+
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	if (!is_dma_buf_file(file)) {
+		fput(file);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return file->private_data;
+}
+EXPORT_SYMBOL_GPL(dma_buf_get);
+
+/**
+ * dma_buf_put - decreases refcount of the buffer
+ * @dmabuf:	[in]	buffer to reduce refcount of
+ *
+ * Uses file's refcounting done implicitly by fput()
+ */
+void dma_buf_put(struct dma_buf *dmabuf)
+{
+	if (WARN_ON(!dmabuf || !dmabuf->file))
+		return;
+
+	fput(dmabuf->file);
+}
+EXPORT_SYMBOL_GPL(dma_buf_put);
+
+/**
+ * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
+ * calls attach() of dma_buf_ops to allow device-specific attach functionality
+ * @dmabuf:	[in]	buffer to attach device to.
+ * @dev:	[in]	device to be attached.
+ *
+ * Returns struct dma_buf_attachment * for this attachment; returns ERR_PTR on
+ * error.
+ */
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+					  struct device *dev)
+{
+	struct dma_buf_attachment *attach;
+	int ret;
+
+	if (WARN_ON(!dmabuf || !dev))
+		return ERR_PTR(-EINVAL);
+
+	attach = kzalloc(sizeof(struct dma_buf_attachment), GFP_KERNEL);
+	if (attach == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	attach->dev = dev;
+	attach->dmabuf = dmabuf;
+
+	mutex_lock(&dmabuf->lock);
+
+	if (dmabuf->ops->attach) {
+		ret = dmabuf->ops->attach(dmabuf, dev, attach);
+		if (ret)
+			goto err_attach;
+	}
+	list_add(&attach->node, &dmabuf->attachments);
+
+	mutex_unlock(&dmabuf->lock);
+	return attach;
+
+err_attach:
+	kfree(attach);
+	mutex_unlock(&dmabuf->lock);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(dma_buf_attach);
+
+/**
+ * dma_buf_detach - Remove the given attachment from dmabuf's attachments list;
+ * optionally calls detach() of dma_buf_ops for device-specific detach
+ * @dmabuf:	[in]	buffer to detach from.
+ * @attach:	[in]	attachment to be detached; is free'd after this call.
+ *
+ */
+void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
+{
+	if (WARN_ON(!dmabuf || !attach))
+		return;
+
+	mutex_lock(&dmabuf->lock);
+	list_del(&attach->node);
+	if (dmabuf->ops->detach)
+		dmabuf->ops->detach(dmabuf, attach);
+
+	mutex_unlock(&dmabuf->lock);
+	kfree(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_detach);
+
+/**
+ * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
+ * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
+ * dma_buf_ops.
+ * @attach:	[in]	attachment whose scatterlist is to be returned
+ * @direction:	[in]	direction of DMA transfer
+ *
+ * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
+ * on error.
+ */
+struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
+					enum dma_data_direction direction)
+{
+	struct sg_table *sg_table = ERR_PTR(-EINVAL);
+
+	might_sleep();
+
+	if (WARN_ON(!attach || !attach->dmabuf))
+		return ERR_PTR(-EINVAL);
+
+	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+	if (!sg_table)
+		sg_table = ERR_PTR(-ENOMEM);
+
+	return sg_table;
+}
+EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
+
+/**
+ * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
+ * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
+ * dma_buf_ops.
+ * @attach:	[in]	attachment to unmap buffer from
+ * @sg_table:	[in]	scatterlist info of the buffer to unmap
+ * @direction:  [in]    direction of DMA transfer
+ *
+ */
+void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
+				struct sg_table *sg_table,
+				enum dma_data_direction direction)
+{
+	might_sleep();
+
+	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
+		return;
+
+	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
+						direction);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
+
+
+/**
+ * dma_buf_begin_cpu_access - Must be called before accessing a dma_buf from the
+ * cpu in the kernel context. Calls begin_cpu_access to allow exporter-specific
+ * preparations. Coherency is only guaranteed in the specified range for the
+ * specified access direction.
+ * @dmabuf:	[in]	buffer to prepare cpu access for.
+ * @start:	[in]	start of range for cpu access.
+ * @len:	[in]	length of range for cpu access.
+ * @direction:	[in]	length of range for cpu access.
+ *
+ * Can return negative error values, returns 0 on success.
+ */
+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
+			     enum dma_data_direction direction)
+{
+	int ret = 0;
+
+	if (WARN_ON(!dmabuf))
+		return -EINVAL;
+
+	if (dmabuf->ops->begin_cpu_access)
+		ret = dmabuf->ops->begin_cpu_access(dmabuf, start, len, direction);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
+
+/**
+ * dma_buf_end_cpu_access - Must be called after accessing a dma_buf from the
+ * cpu in the kernel context. Calls end_cpu_access to allow exporter-specific
+ * actions. Coherency is only guaranteed in the specified range for the
+ * specified access direction.
+ * @dmabuf:	[in]	buffer to complete cpu access for.
+ * @start:	[in]	start of range for cpu access.
+ * @len:	[in]	length of range for cpu access.
+ * @direction:	[in]	length of range for cpu access.
+ *
+ * This call must always succeed.
+ */
+void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
+			    enum dma_data_direction direction)
+{
+	WARN_ON(!dmabuf);
+
+	if (dmabuf->ops->end_cpu_access)
+		dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
+}
+EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
+
+/**
+ * dma_buf_kmap_atomic - Map a page of the buffer object into kernel address
+ * space. The same restrictions as for kmap_atomic and friends apply.
+ * @dmabuf:	[in]	buffer to map page from.
+ * @page_num:	[in]	page in PAGE_SIZE units to map.
+ *
+ * This call must always succeed, any necessary preparations that might fail
+ * need to be done in begin_cpu_access.
+ */
+void *dma_buf_kmap_atomic(struct dma_buf *dmabuf, unsigned long page_num)
+{
+	WARN_ON(!dmabuf);
+
+	return dmabuf->ops->kmap_atomic(dmabuf, page_num);
+}
+EXPORT_SYMBOL_GPL(dma_buf_kmap_atomic);
+
+/**
+ * dma_buf_kunmap_atomic - Unmap a page obtained by dma_buf_kmap_atomic.
+ * @dmabuf:	[in]	buffer to unmap page from.
+ * @page_num:	[in]	page in PAGE_SIZE units to unmap.
+ * @vaddr:	[in]	kernel space pointer obtained from dma_buf_kmap_atomic.
+ *
+ * This call must always succeed.
+ */
+void dma_buf_kunmap_atomic(struct dma_buf *dmabuf, unsigned long page_num,
+			   void *vaddr)
+{
+	WARN_ON(!dmabuf);
+
+	if (dmabuf->ops->kunmap_atomic)
+		dmabuf->ops->kunmap_atomic(dmabuf, page_num, vaddr);
+}
+EXPORT_SYMBOL_GPL(dma_buf_kunmap_atomic);
+
+/**
+ * dma_buf_kmap - Map a page of the buffer object into kernel address space. The
+ * same restrictions as for kmap and friends apply.
+ * @dmabuf:	[in]	buffer to map page from.
+ * @page_num:	[in]	page in PAGE_SIZE units to map.
+ *
+ * This call must always succeed, any necessary preparations that might fail
+ * need to be done in begin_cpu_access.
+ */
+void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
+{
+	WARN_ON(!dmabuf);
+
+	return dmabuf->ops->kmap(dmabuf, page_num);
+}
+EXPORT_SYMBOL_GPL(dma_buf_kmap);
+
+/**
+ * dma_buf_kunmap - Unmap a page obtained by dma_buf_kmap.
+ * @dmabuf:	[in]	buffer to unmap page from.
+ * @page_num:	[in]	page in PAGE_SIZE units to unmap.
+ * @vaddr:	[in]	kernel space pointer obtained from dma_buf_kmap.
+ *
+ * This call must always succeed.
+ */
+void dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long page_num,
+		    void *vaddr)
+{
+	WARN_ON(!dmabuf);
+
+	if (dmabuf->ops->kunmap)
+		dmabuf->ops->kunmap(dmabuf, page_num, vaddr);
+}
+EXPORT_SYMBOL_GPL(dma_buf_kunmap);
+
+
+/**
+ * dma_buf_mmap - Setup up a userspace mmap with the given vma
+ * @dmabuf:	[in]	buffer that should back the vma
+ * @vma:	[in]	vma for the mmap
+ * @pgoff:	[in]	offset in pages where this mmap should start within the
+ * 			dma-buf buffer.
+ *
+ * This function adjusts the passed in vma so that it points at the file of the
+ * dma_buf operation. It also adjusts the starting pgoff and does bounds
+ * checking on the size of the vma. Then it calls the exporters mmap function to
+ * set up the mapping.
+ *
+ * Can return negative error values, returns 0 on success.
+ */
+int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
+		 unsigned long pgoff)
+{
+	struct file *oldfile;
+	int ret;
+
+	if (WARN_ON(!dmabuf || !vma))
+		return -EINVAL;
+
+	/* check for offset overflow */
+	if (pgoff + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < pgoff)
+		return -EOVERFLOW;
+
+	/* check for overflowing the buffer's size */
+	if (pgoff + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) >
+	    dmabuf->size >> PAGE_SHIFT)
+		return -EINVAL;
+
+	/* readjust the vma */
+	get_file(dmabuf->file);
+	oldfile = vma->vm_file;
+	vma->vm_file = dmabuf->file;
+	vma->vm_pgoff = pgoff;
+
+	ret = dmabuf->ops->mmap(dmabuf, vma);
+	if (ret) {
+		/* restore old parameters on failure */
+		vma->vm_file = oldfile;
+		fput(dmabuf->file);
+	} else {
+		if (oldfile)
+			fput(oldfile);
+	}
+	return ret;
+
+}
+EXPORT_SYMBOL_GPL(dma_buf_mmap);
+
+/**
+ * dma_buf_vmap - Create virtual mapping for the buffer object into kernel
+ * address space. Same restrictions as for vmap and friends apply.
+ * @dmabuf:	[in]	buffer to vmap
+ *
+ * This call may fail due to lack of virtual mapping address space.
+ * These calls are optional in drivers. The intended use for them
+ * is for mapping objects linear in kernel space for high use objects.
+ * Please attempt to use kmap/kunmap before thinking about these interfaces.
+ *
+ * Returns NULL on error.
+ */
+void *dma_buf_vmap(struct dma_buf *dmabuf)
+{
+	void *ptr;
+
+	if (WARN_ON(!dmabuf))
+		return NULL;
+
+	if (!dmabuf->ops->vmap)
+		return NULL;
+
+	mutex_lock(&dmabuf->lock);
+	if (dmabuf->vmapping_counter) {
+		dmabuf->vmapping_counter++;
+		BUG_ON(!dmabuf->vmap_ptr);
+		ptr = dmabuf->vmap_ptr;
+		goto out_unlock;
+	}
+
+	BUG_ON(dmabuf->vmap_ptr);
+
+	ptr = dmabuf->ops->vmap(dmabuf);
+	if (WARN_ON_ONCE(IS_ERR(ptr)))
+		ptr = NULL;
+	if (!ptr)
+		goto out_unlock;
+
+	dmabuf->vmap_ptr = ptr;
+	dmabuf->vmapping_counter = 1;
+
+out_unlock:
+	mutex_unlock(&dmabuf->lock);
+	return ptr;
+}
+EXPORT_SYMBOL_GPL(dma_buf_vmap);
+
+/**
+ * dma_buf_vunmap - Unmap a vmap obtained by dma_buf_vmap.
+ * @dmabuf:	[in]	buffer to vunmap
+ * @vaddr:	[in]	vmap to vunmap
+ */
+void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
+{
+	if (WARN_ON(!dmabuf))
+		return;
+
+	BUG_ON(!dmabuf->vmap_ptr);
+	BUG_ON(dmabuf->vmapping_counter == 0);
+	BUG_ON(dmabuf->vmap_ptr != vaddr);
+
+	mutex_lock(&dmabuf->lock);
+	if (--dmabuf->vmapping_counter == 0) {
+		if (dmabuf->ops->vunmap)
+			dmabuf->ops->vunmap(dmabuf, vaddr);
+		dmabuf->vmap_ptr = NULL;
+	}
+	mutex_unlock(&dmabuf->lock);
+}
+EXPORT_SYMBOL_GPL(dma_buf_vunmap);
+
+#ifdef CONFIG_DEBUG_FS
+static int dma_buf_describe(struct seq_file *s)
+{
+	int ret;
+	struct dma_buf *buf_obj;
+	struct dma_buf_attachment *attach_obj;
+	int count = 0, attach_count;
+	size_t size = 0;
+
+	ret = mutex_lock_interruptible(&db_list.lock);
+
+	if (ret)
+		return ret;
+
+	seq_puts(s, "\nDma-buf Objects:\n");
+	seq_puts(s, "size\tflags\tmode\tcount\texp_name\n");
+
+	list_for_each_entry(buf_obj, &db_list.head, list_node) {
+		ret = mutex_lock_interruptible(&buf_obj->lock);
+
+		if (ret) {
+			seq_puts(s,
+				 "\tERROR locking buffer object: skipping\n");
+			continue;
+		}
+
+		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n",
+				buf_obj->size,
+				buf_obj->file->f_flags, buf_obj->file->f_mode,
+				(long)(buf_obj->file->f_count.counter),
+				buf_obj->exp_name);
+
+		seq_puts(s, "\tAttached Devices:\n");
+		attach_count = 0;
+
+		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
+			seq_puts(s, "\t");
+
+			seq_printf(s, "%s\n", dev_name(attach_obj->dev));
+			attach_count++;
+		}
+
+		seq_printf(s, "Total %d devices attached\n\n",
+				attach_count);
+
+		count++;
+		size += buf_obj->size;
+		mutex_unlock(&buf_obj->lock);
+	}
+
+	seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size);
+
+	mutex_unlock(&db_list.lock);
+	return 0;
+}
+
+static int dma_buf_show(struct seq_file *s, void *unused)
+{
+	void (*func)(struct seq_file *) = s->private;
+	func(s);
+	return 0;
+}
+
+static int dma_buf_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dma_buf_show, inode->i_private);
+}
+
+static const struct file_operations dma_buf_debug_fops = {
+	.open           = dma_buf_debug_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static struct dentry *dma_buf_debugfs_dir;
+
+static int dma_buf_init_debugfs(void)
+{
+	int err = 0;
+	dma_buf_debugfs_dir = debugfs_create_dir("dma_buf", NULL);
+	if (IS_ERR(dma_buf_debugfs_dir)) {
+		err = PTR_ERR(dma_buf_debugfs_dir);
+		dma_buf_debugfs_dir = NULL;
+		return err;
+	}
+
+	err = dma_buf_debugfs_create_file("bufinfo", dma_buf_describe);
+
+	if (err)
+		pr_debug("dma_buf: debugfs: failed to create node bufinfo\n");
+
+	return err;
+}
+
+static void dma_buf_uninit_debugfs(void)
+{
+	if (dma_buf_debugfs_dir)
+		debugfs_remove_recursive(dma_buf_debugfs_dir);
+}
+
+int dma_buf_debugfs_create_file(const char *name,
+				int (*write)(struct seq_file *))
+{
+	struct dentry *d;
+
+	d = debugfs_create_file(name, S_IRUGO, dma_buf_debugfs_dir,
+			write, &dma_buf_debug_fops);
+
+	return PTR_ERR_OR_ZERO(d);
+}
+#else
+static inline int dma_buf_init_debugfs(void)
+{
+	return 0;
+}
+static inline void dma_buf_uninit_debugfs(void)
+{
+}
+#endif
+
+static int __init dma_buf_init(void)
+{
+	mutex_init(&db_list.lock);
+	INIT_LIST_HEAD(&db_list.head);
+	dma_buf_init_debugfs();
+	return 0;
+}
+subsys_initcall(dma_buf_init);
+
+static void __exit dma_buf_deinit(void)
+{
+	dma_buf_uninit_debugfs();
+}
+__exitcall(dma_buf_deinit);
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index f369e279598..7d6e84a5142 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -4,17 +4,19 @@
  */
 #include <linux/slab.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/dma-mapping.h>
 
 struct dma_coherent_mem {
 	void		*virt_base;
 	dma_addr_t	device_base;
+	unsigned long	pfn_base;
 	int		size;
 	int		flags;
 	unsigned long	*bitmap;
 };
 
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				dma_addr_t device_addr, size_t size, int flags)
 {
 	void __iomem *mem_base = NULL;
@@ -30,7 +32,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 
 	/* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
 
-	mem_base = ioremap(bus_addr, size);
+	mem_base = ioremap(phys_addr, size);
 	if (!mem_base)
 		goto out;
 
@@ -43,6 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 
 	dev->dma_mem->virt_base = mem_base;
 	dev->dma_mem->device_base = device_addr;
+	dev->dma_mem->pfn_base = PFN_DOWN(phys_addr);
 	dev->dma_mem->size = pages;
 	dev->dma_mem->flags = flags;
 
@@ -175,3 +178,43 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
 	return 0;
 }
 EXPORT_SYMBOL(dma_release_from_coherent);
+
+/**
+ * dma_mmap_from_coherent() - try to mmap the memory allocated from
+ * per-device coherent memory pool to userspace
+ * @dev:	device from which the memory was allocated
+ * @vma:	vm_area for the userspace memory
+ * @vaddr:	cpu address returned by dma_alloc_from_coherent
+ * @size:	size of the memory buffer allocated by dma_alloc_from_coherent
+ * @ret:	result from remap_pfn_range()
+ *
+ * This checks whether the memory was allocated from the per-device
+ * coherent memory pool and if so, maps that memory to the provided vma.
+ *
+ * Returns 1 if we correctly mapped the memory, or 0 if the caller should
+ * proceed with mapping memory from generic pools.
+ */
+int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
+			   void *vaddr, size_t size, int *ret)
+{
+	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+
+	if (mem && vaddr >= mem->virt_base && vaddr + size <=
+		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+		unsigned long off = vma->vm_pgoff;
+		int start = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+		int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+		int count = size >> PAGE_SHIFT;
+
+		*ret = -ENXIO;
+		if (off < count && user_count <= count - off) {
+			unsigned long pfn = mem->pfn_base + start + off;
+			*ret = remap_pfn_range(vma, vma->vm_start, pfn,
+					       user_count << PAGE_SHIFT,
+					       vma->vm_page_prot);
+		}
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(dma_mmap_from_coherent);
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
new file mode 100644
index 00000000000..6467c919c50
--- /dev/null
+++ b/drivers/base/dma-contiguous.c
@@ -0,0 +1,409 @@
+/*
+ * Contiguous Memory Allocator for DMA mapping framework
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Written by:
+ *	Marek Szyprowski <m.szyprowski@samsung.com>
+ *	Michal Nazarewicz <mina86@mina86.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <asm/page.h>
+#include <asm/dma-contiguous.h>
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/page-isolation.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/mm_types.h>
+#include <linux/dma-contiguous.h>
+
+struct cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+	struct mutex	lock;
+};
+
+struct cma *dma_contiguous_default_area;
+
+#ifdef CONFIG_CMA_SIZE_MBYTES
+#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
+#else
+#define CMA_SIZE_MBYTES 0
+#endif
+
+/*
+ * Default global CMA area size can be defined in kernel's .config.
+ * This is useful mainly for distro maintainers to create a kernel
+ * that works correctly for most supported systems.
+ * The size can be set in bytes or as a percentage of the total memory
+ * in the system.
+ *
+ * Users, who want to set the size of global CMA area for their system
+ * should use cma= kernel parameter.
+ */
+static const phys_addr_t size_bytes = CMA_SIZE_MBYTES * SZ_1M;
+static phys_addr_t size_cmdline = -1;
+static phys_addr_t base_cmdline;
+static phys_addr_t limit_cmdline;
+
+static int __init early_cma(char *p)
+{
+	pr_debug("%s(%s)\n", __func__, p);
+	size_cmdline = memparse(p, &p);
+	if (*p != '@')
+		return 0;
+	base_cmdline = memparse(p + 1, &p);
+	if (*p != '-') {
+		limit_cmdline = base_cmdline + size_cmdline;
+		return 0;
+	}
+	limit_cmdline = memparse(p + 1, &p);
+
+	return 0;
+}
+early_param("cma", early_cma);
+
+#ifdef CONFIG_CMA_SIZE_PERCENTAGE
+
+static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
+{
+	struct memblock_region *reg;
+	unsigned long total_pages = 0;
+
+	/*
+	 * We cannot use memblock_phys_mem_size() here, because
+	 * memblock_analyze() has not been called yet.
+	 */
+	for_each_memblock(memory, reg)
+		total_pages += memblock_region_memory_end_pfn(reg) -
+			       memblock_region_memory_base_pfn(reg);
+
+	return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
+}
+
+#else
+
+static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
+{
+	return 0;
+}
+
+#endif
+
+/**
+ * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init dma_contiguous_reserve(phys_addr_t limit)
+{
+	phys_addr_t selected_size = 0;
+	phys_addr_t selected_base = 0;
+	phys_addr_t selected_limit = limit;
+	bool fixed = false;
+
+	pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
+
+	if (size_cmdline != -1) {
+		selected_size = size_cmdline;
+		selected_base = base_cmdline;
+		selected_limit = min_not_zero(limit_cmdline, limit);
+		if (base_cmdline + size_cmdline == limit_cmdline)
+			fixed = true;
+	} else {
+#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
+		selected_size = size_bytes;
+#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
+		selected_size = cma_early_percent_memory();
+#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
+		selected_size = min(size_bytes, cma_early_percent_memory());
+#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
+		selected_size = max(size_bytes, cma_early_percent_memory());
+#endif
+	}
+
+	if (selected_size && !dma_contiguous_default_area) {
+		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+			 (unsigned long)selected_size / SZ_1M);
+
+		dma_contiguous_reserve_area(selected_size, selected_base,
+					    selected_limit,
+					    &dma_contiguous_default_area,
+					    fixed);
+	}
+}
+
+static DEFINE_MUTEX(cma_mutex);
+
+static int __init cma_activate_area(struct cma *cma)
+{
+	int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+	unsigned i = cma->count >> pageblock_order;
+	struct zone *zone;
+
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+
+	do {
+		unsigned j;
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				goto err;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+
+	mutex_init(&cma->lock);
+	return 0;
+
+err:
+	kfree(cma->bitmap);
+	return -EINVAL;
+}
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+
+static int __init cma_init_reserved_areas(void)
+{
+	int i;
+
+	for (i = 0; i < cma_area_count; i++) {
+		int ret = cma_activate_area(&cma_areas[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * dma_contiguous_reserve_area() - reserve custom contiguous area
+ * @size: Size of the reserved area (in bytes),
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @res_cma: Pointer to store the created cma region.
+ * @fixed: hint about where to place the reserved area
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas for specific
+ * devices.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
+ */
+int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+				       phys_addr_t limit, struct cma **res_cma,
+				       bool fixed)
+{
+	struct cma *cma = &cma_areas[cma_area_count];
+	phys_addr_t alignment;
+	int ret = 0;
+
+	pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
+		 (unsigned long)size, (unsigned long)base,
+		 (unsigned long)limit);
+
+	/* Sanity checks */
+	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+		pr_err("Not enough slots for CMA reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	if (!size)
+		return -EINVAL;
+
+	/* Sanitise input arguments */
+	alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+	base = ALIGN(base, alignment);
+	size = ALIGN(size, alignment);
+	limit &= ~(alignment - 1);
+
+	/* Reserve memory */
+	if (base && fixed) {
+		if (memblock_is_region_reserved(base, size) ||
+		    memblock_reserve(base, size) < 0) {
+			ret = -EBUSY;
+			goto err;
+		}
+	} else {
+		phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+							limit);
+		if (!addr) {
+			ret = -ENOMEM;
+			goto err;
+		} else {
+			base = addr;
+		}
+	}
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	*res_cma = cma;
+	cma_area_count++;
+
+	pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+		(unsigned long)base);
+
+	/* Architecture specific contiguous memory fixup. */
+	dma_contiguous_early_fixup(base, size);
+	return 0;
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return ret;
+}
+
+static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+	mutex_lock(&cma->lock);
+	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
+	mutex_unlock(&cma->lock);
+}
+
+/**
+ * dma_alloc_from_contiguous() - allocate pages from contiguous area
+ * @dev:   Pointer to device for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates memory buffer for specified device. It uses
+ * device specific contiguous memory area if available or the default
+ * global one. Requires architecture specific dev_get_cma_area() helper
+ * function.
+ */
+struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+				       unsigned int align)
+{
+	unsigned long mask, pfn, pageno, start = 0;
+	struct cma *cma = dev_get_cma_area(dev);
+	struct page *page = NULL;
+	int ret;
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	if (align > CONFIG_CMA_ALIGNMENT)
+		align = CONFIG_CMA_ALIGNMENT;
+
+	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+		 count, align);
+
+	if (!count)
+		return NULL;
+
+	mask = (1 << align) - 1;
+
+
+	for (;;) {
+		mutex_lock(&cma->lock);
+		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
+						    start, count, mask);
+		if (pageno >= cma->count) {
+			mutex_unlock(&cma->lock);
+			break;
+		}
+		bitmap_set(cma->bitmap, pageno, count);
+		/*
+		 * It's safe to drop the lock here. We've marked this region for
+		 * our exclusive use. If the migration fails we will take the
+		 * lock again and unmark it.
+		 */
+		mutex_unlock(&cma->lock);
+
+		pfn = cma->base_pfn + pageno;
+		mutex_lock(&cma_mutex);
+		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+		mutex_unlock(&cma_mutex);
+		if (ret == 0) {
+			page = pfn_to_page(pfn);
+			break;
+		} else if (ret != -EBUSY) {
+			clear_cma_bitmap(cma, pfn, count);
+			break;
+		}
+		clear_cma_bitmap(cma, pfn, count);
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = pageno + mask + 1;
+	}
+
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * dma_release_from_contiguous() - release allocated pages
+ * @dev:   Pointer to device for which the pages were allocated.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by dma_alloc_from_contiguous().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool dma_release_from_contiguous(struct device *dev, struct page *pages,
+				 int count)
+{
+	struct cma *cma = dev_get_cma_area(dev);
+	unsigned long pfn;
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+	free_contig_range(pfn, count);
+	clear_cma_bitmap(cma, pfn, count);
+
+	return true;
+}
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 763d59c1eb6..6cd08e145bf 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -8,7 +8,9 @@
  */
 
 #include <linux/dma-mapping.h>
+#include <linux/export.h>
 #include <linux/gfp.h>
+#include <asm-generic/dma-coherent.h>
 
 /*
  * Managed DMA API
@@ -173,7 +175,7 @@ static void dmam_coherent_decl_release(struct device *dev, void *res)
 /**
  * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
  * @dev: Device to declare coherent memory for
- * @bus_addr: Bus address of coherent memory to be declared
+ * @phys_addr: Physical address of coherent memory to be declared
  * @device_addr: Device address of coherent memory to be declared
  * @size: Size of coherent memory to be declared
  * @flags: Flags
@@ -183,7 +185,7 @@ static void dmam_coherent_decl_release(struct device *dev, void *res)
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				 dma_addr_t device_addr, size_t size, int flags)
 {
 	void *res;
@@ -193,7 +195,7 @@ int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 	if (!res)
 		return -ENOMEM;
 
-	rc = dma_declare_coherent_memory(dev, bus_addr, device_addr, size,
+	rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
 					 flags);
 	if (rc == 0)
 		devres_add(dev, res);
@@ -217,3 +219,51 @@ void dmam_release_declared_memory(struct device *dev)
 EXPORT_SYMBOL(dmam_release_declared_memory);
 
 #endif
+
+/*
+ * Create scatter-list for the already allocated DMA buffer.
+ */
+int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
+		 void *cpu_addr, dma_addr_t handle, size_t size)
+{
+	struct page *page = virt_to_page(cpu_addr);
+	int ret;
+
+	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+	if (unlikely(ret))
+		return ret;
+
+	sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+	return 0;
+}
+EXPORT_SYMBOL(dma_common_get_sgtable);
+
+/*
+ * Create userspace mapping for the DMA-coherent memory.
+ */
+int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
+		    void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	int ret = -ENXIO;
+#ifdef CONFIG_MMU
+	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
+	unsigned long off = vma->vm_pgoff;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (off < count && user_count <= (count - off)) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      pfn + off,
+				      user_count << PAGE_SHIFT,
+				      vma->vm_page_prot);
+	}
+#endif	/* CONFIG_MMU */
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_common_mmap);
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index b631f7c5945..9e29943e56c 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -15,6 +15,7 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/sysfs.h>
 #include "base.h"
 
 static struct device *next_device(struct klist_iter *i)
@@ -80,7 +81,7 @@ struct device *driver_find_device(struct device_driver *drv,
 	struct klist_iter i;
 	struct device *dev;
 
-	if (!drv)
+	if (!drv || !drv->p)
 		return NULL;
 
 	klist_iter_init_node(&drv->p->klist_devices, &i,
@@ -123,92 +124,16 @@ void driver_remove_file(struct device_driver *drv,
 }
 EXPORT_SYMBOL_GPL(driver_remove_file);
 
-/**
- * driver_add_kobj - add a kobject below the specified driver
- * @drv: requesting device driver
- * @kobj: kobject to add below this driver
- * @fmt: format string that names the kobject
- *
- * You really don't want to do this, this is only here due to one looney
- * iseries driver, go poke those developers if you are annoyed about
- * this...
- */
-int driver_add_kobj(struct device_driver *drv, struct kobject *kobj,
-		    const char *fmt, ...)
+int driver_add_groups(struct device_driver *drv,
+		      const struct attribute_group **groups)
 {
-	va_list args;
-	char *name;
-	int ret;
-
-	va_start(args, fmt);
-	name = kvasprintf(GFP_KERNEL, fmt, args);
-	va_end(args);
-
-	if (!name)
-		return -ENOMEM;
-
-	ret = kobject_add(kobj, &drv->p->kobj, "%s", name);
-	kfree(name);
-	return ret;
+	return sysfs_create_groups(&drv->p->kobj, groups);
 }
-EXPORT_SYMBOL_GPL(driver_add_kobj);
 
-/**
- * get_driver - increment driver reference count.
- * @drv: driver.
- */
-struct device_driver *get_driver(struct device_driver *drv)
+void driver_remove_groups(struct device_driver *drv,
+			  const struct attribute_group **groups)
 {
-	if (drv) {
-		struct driver_private *priv;
-		struct kobject *kobj;
-
-		kobj = kobject_get(&drv->p->kobj);
-		priv = to_driver(kobj);
-		return priv->driver;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(get_driver);
-
-/**
- * put_driver - decrement driver's refcount.
- * @drv: driver.
- */
-void put_driver(struct device_driver *drv)
-{
-	kobject_put(&drv->p->kobj);
-}
-EXPORT_SYMBOL_GPL(put_driver);
-
-static int driver_add_groups(struct device_driver *drv,
-			     const struct attribute_group **groups)
-{
-	int error = 0;
-	int i;
-
-	if (groups) {
-		for (i = 0; groups[i]; i++) {
-			error = sysfs_create_group(&drv->p->kobj, groups[i]);
-			if (error) {
-				while (--i >= 0)
-					sysfs_remove_group(&drv->p->kobj,
-							   groups[i]);
-				break;
-			}
-		}
-	}
-	return error;
-}
-
-static void driver_remove_groups(struct device_driver *drv,
-				 const struct attribute_group **groups)
-{
-	int i;
-
-	if (groups)
-		for (i = 0; groups[i]; i++)
-			sysfs_remove_group(&drv->p->kobj, groups[i]);
+	sysfs_remove_groups(&drv->p->kobj, groups);
 }
 
 /**
@@ -234,7 +159,6 @@ int driver_register(struct device_driver *drv)
 
 	other = driver_find(drv->name, drv->bus);
 	if (other) {
-		put_driver(other);
 		printk(KERN_ERR "Error: Driver '%s' is already registered, "
 			"aborting...\n", drv->name);
 		return -EBUSY;
@@ -244,8 +168,12 @@ int driver_register(struct device_driver *drv)
 	if (ret)
 		return ret;
 	ret = driver_add_groups(drv, drv->groups);
-	if (ret)
+	if (ret) {
 		bus_remove_driver(drv);
+		return ret;
+	}
+	kobject_uevent(&drv->p->kobj, KOBJ_ADD);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(driver_register);
@@ -275,7 +203,9 @@ EXPORT_SYMBOL_GPL(driver_unregister);
  * Call kset_find_obj() to iterate over list of drivers on
  * a bus to find driver by name. Return driver if found.
  *
- * Note that kset_find_obj increments driver's reference count.
+ * This routine provides no locking to prevent the driver it returns
+ * from being unregistered or unloaded while the caller is using it.
+ * The caller is responsible for preventing this.
  */
 struct device_driver *driver_find(const char *name, struct bus_type *bus)
 {
@@ -283,6 +213,8 @@ struct device_driver *driver_find(const char *name, struct bus_type *bus)
 	struct driver_private *priv;
 
 	if (k) {
+		/* Drop reference added by kset_find_obj() */
+		kobject_put(k);
 		priv = to_driver(k);
 		return priv->driver;
 	}
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 40af43ebd92..d276e33880b 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -16,12 +16,22 @@
 #include <linux/interrupt.h>
 #include <linux/bitops.h>
 #include <linux/mutex.h>
-#include <linux/kthread.h>
+#include <linux/workqueue.h>
 #include <linux/highmem.h>
 #include <linux/firmware.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/list.h>
+#include <linux/async.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
+#include <linux/reboot.h>
 
-#define to_dev(obj) container_of(obj, struct device, kobj)
+#include <generated/utsrelease.h>
+
+#include "base.h"
 
 MODULE_AUTHOR("Manuel Estrada Sainz");
 MODULE_DESCRIPTION("Multi purpose firmware loading support");
@@ -81,38 +91,420 @@ enum {
 
 static int loading_timeout = 60;	/* In seconds */
 
-/* fw_lock could be moved to 'struct firmware_priv' but since it is just
- * guarding for corner cases a global lock should be OK */
-static DEFINE_MUTEX(fw_lock);
+static inline long firmware_loading_timeout(void)
+{
+	return loading_timeout > 0 ? loading_timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+}
 
-struct firmware_priv {
+/* firmware behavior options */
+#define FW_OPT_UEVENT	(1U << 0)
+#define FW_OPT_NOWAIT	(1U << 1)
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+#define FW_OPT_FALLBACK	(1U << 2)
+#else
+#define FW_OPT_FALLBACK	0
+#endif
+
+struct firmware_cache {
+	/* firmware_buf instance will be added into the below list */
+	spinlock_t lock;
+	struct list_head head;
+	int state;
+
+#ifdef CONFIG_PM_SLEEP
+	/*
+	 * Names of firmware images which have been cached successfully
+	 * will be added into the below list so that device uncache
+	 * helper can trace which firmware images have been cached
+	 * before.
+	 */
+	spinlock_t name_lock;
+	struct list_head fw_names;
+
+	struct delayed_work work;
+
+	struct notifier_block   pm_notify;
+#endif
+};
+
+struct firmware_buf {
+	struct kref ref;
+	struct list_head list;
 	struct completion completion;
-	struct firmware *fw;
+	struct firmware_cache *fwc;
 	unsigned long status;
+	void *data;
+	size_t size;
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	bool is_paged_buf;
+	bool need_uevent;
 	struct page **pages;
 	int nr_pages;
 	int page_array_size;
-	struct timer_list timeout;
-	struct device dev;
-	bool nowait;
+	struct list_head pending_list;
+#endif
 	char fw_id[];
 };
 
+struct fw_cache_entry {
+	struct list_head list;
+	char name[];
+};
+
+struct fw_name_devm {
+	unsigned long magic;
+	char name[];
+};
+
+#define to_fwbuf(d) container_of(d, struct firmware_buf, ref)
+
+#define	FW_LOADER_NO_CACHE	0
+#define	FW_LOADER_START_CACHE	1
+
+static int fw_cache_piggyback_on_request(const char *name);
+
+/* fw_lock could be moved to 'struct firmware_priv' but since it is just
+ * guarding for corner cases a global lock should be OK */
+static DEFINE_MUTEX(fw_lock);
+
+static struct firmware_cache fw_cache;
+
+static struct firmware_buf *__allocate_fw_buf(const char *fw_name,
+					      struct firmware_cache *fwc)
+{
+	struct firmware_buf *buf;
+
+	buf = kzalloc(sizeof(*buf) + strlen(fw_name) + 1 , GFP_ATOMIC);
+
+	if (!buf)
+		return buf;
+
+	kref_init(&buf->ref);
+	strcpy(buf->fw_id, fw_name);
+	buf->fwc = fwc;
+	init_completion(&buf->completion);
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	INIT_LIST_HEAD(&buf->pending_list);
+#endif
+
+	pr_debug("%s: fw-%s buf=%p\n", __func__, fw_name, buf);
+
+	return buf;
+}
+
+static struct firmware_buf *__fw_lookup_buf(const char *fw_name)
+{
+	struct firmware_buf *tmp;
+	struct firmware_cache *fwc = &fw_cache;
+
+	list_for_each_entry(tmp, &fwc->head, list)
+		if (!strcmp(tmp->fw_id, fw_name))
+			return tmp;
+	return NULL;
+}
+
+static int fw_lookup_and_allocate_buf(const char *fw_name,
+				      struct firmware_cache *fwc,
+				      struct firmware_buf **buf)
+{
+	struct firmware_buf *tmp;
+
+	spin_lock(&fwc->lock);
+	tmp = __fw_lookup_buf(fw_name);
+	if (tmp) {
+		kref_get(&tmp->ref);
+		spin_unlock(&fwc->lock);
+		*buf = tmp;
+		return 1;
+	}
+	tmp = __allocate_fw_buf(fw_name, fwc);
+	if (tmp)
+		list_add(&tmp->list, &fwc->head);
+	spin_unlock(&fwc->lock);
+
+	*buf = tmp;
+
+	return tmp ? 0 : -ENOMEM;
+}
+
+static void __fw_free_buf(struct kref *ref)
+	__releases(&fwc->lock)
+{
+	struct firmware_buf *buf = to_fwbuf(ref);
+	struct firmware_cache *fwc = buf->fwc;
+
+	pr_debug("%s: fw-%s buf=%p data=%p size=%u\n",
+		 __func__, buf->fw_id, buf, buf->data,
+		 (unsigned int)buf->size);
+
+	list_del(&buf->list);
+	spin_unlock(&fwc->lock);
+
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	if (buf->is_paged_buf) {
+		int i;
+		vunmap(buf->data);
+		for (i = 0; i < buf->nr_pages; i++)
+			__free_page(buf->pages[i]);
+		kfree(buf->pages);
+	} else
+#endif
+		vfree(buf->data);
+	kfree(buf);
+}
+
+static void fw_free_buf(struct firmware_buf *buf)
+{
+	struct firmware_cache *fwc = buf->fwc;
+	spin_lock(&fwc->lock);
+	if (!kref_put(&buf->ref, __fw_free_buf))
+		spin_unlock(&fwc->lock);
+}
+
+/* direct firmware loading support */
+static char fw_path_para[256];
+static const char * const fw_path[] = {
+	fw_path_para,
+	"/lib/firmware/updates/" UTS_RELEASE,
+	"/lib/firmware/updates",
+	"/lib/firmware/" UTS_RELEASE,
+	"/lib/firmware"
+};
+
+/*
+ * Typical usage is that passing 'firmware_class.path=$CUSTOMIZED_PATH'
+ * from kernel command line because firmware_class is generally built in
+ * kernel instead of module.
+ */
+module_param_string(path, fw_path_para, sizeof(fw_path_para), 0644);
+MODULE_PARM_DESC(path, "customized firmware image search path with a higher priority than default path");
+
+/* Don't inline this: 'struct kstat' is biggish */
+static noinline_for_stack int fw_file_size(struct file *file)
+{
+	struct kstat st;
+	if (vfs_getattr(&file->f_path, &st))
+		return -1;
+	if (!S_ISREG(st.mode))
+		return -1;
+	if (st.size != (int)st.size)
+		return -1;
+	return st.size;
+}
+
+static int fw_read_file_contents(struct file *file, struct firmware_buf *fw_buf)
+{
+	int size;
+	char *buf;
+	int rc;
+
+	size = fw_file_size(file);
+	if (size <= 0)
+		return -EINVAL;
+	buf = vmalloc(size);
+	if (!buf)
+		return -ENOMEM;
+	rc = kernel_read(file, 0, buf, size);
+	if (rc != size) {
+		if (rc > 0)
+			rc = -EIO;
+		vfree(buf);
+		return rc;
+	}
+	fw_buf->data = buf;
+	fw_buf->size = size;
+	return 0;
+}
+
+static int fw_get_filesystem_firmware(struct device *device,
+				       struct firmware_buf *buf)
+{
+	int i;
+	int rc = -ENOENT;
+	char *path = __getname();
+
+	for (i = 0; i < ARRAY_SIZE(fw_path); i++) {
+		struct file *file;
+
+		/* skip the unset customized path */
+		if (!fw_path[i][0])
+			continue;
+
+		snprintf(path, PATH_MAX, "%s/%s", fw_path[i], buf->fw_id);
+
+		file = filp_open(path, O_RDONLY, 0);
+		if (IS_ERR(file))
+			continue;
+		rc = fw_read_file_contents(file, buf);
+		fput(file);
+		if (rc)
+			dev_warn(device, "firmware, attempted to load %s, but failed with error %d\n",
+				path, rc);
+		else
+			break;
+	}
+	__putname(path);
+
+	if (!rc) {
+		dev_dbg(device, "firmware: direct-loading firmware %s\n",
+			buf->fw_id);
+		mutex_lock(&fw_lock);
+		set_bit(FW_STATUS_DONE, &buf->status);
+		complete_all(&buf->completion);
+		mutex_unlock(&fw_lock);
+	}
+
+	return rc;
+}
+
+/* firmware holds the ownership of pages */
+static void firmware_free_data(const struct firmware *fw)
+{
+	/* Loaded directly? */
+	if (!fw->priv) {
+		vfree(fw->data);
+		return;
+	}
+	fw_free_buf(fw->priv);
+}
+
+/* store the pages buffer info firmware from buf */
+static void fw_set_page_data(struct firmware_buf *buf, struct firmware *fw)
+{
+	fw->priv = buf;
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	fw->pages = buf->pages;
+#endif
+	fw->size = buf->size;
+	fw->data = buf->data;
+
+	pr_debug("%s: fw-%s buf=%p data=%p size=%u\n",
+		 __func__, buf->fw_id, buf, buf->data,
+		 (unsigned int)buf->size);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static void fw_name_devm_release(struct device *dev, void *res)
+{
+	struct fw_name_devm *fwn = res;
+
+	if (fwn->magic == (unsigned long)&fw_cache)
+		pr_debug("%s: fw_name-%s devm-%p released\n",
+				__func__, fwn->name, res);
+}
+
+static int fw_devm_match(struct device *dev, void *res,
+		void *match_data)
+{
+	struct fw_name_devm *fwn = res;
+
+	return (fwn->magic == (unsigned long)&fw_cache) &&
+		!strcmp(fwn->name, match_data);
+}
+
+static struct fw_name_devm *fw_find_devm_name(struct device *dev,
+		const char *name)
+{
+	struct fw_name_devm *fwn;
+
+	fwn = devres_find(dev, fw_name_devm_release,
+			  fw_devm_match, (void *)name);
+	return fwn;
+}
+
+/* add firmware name into devres list */
+static int fw_add_devm_name(struct device *dev, const char *name)
+{
+	struct fw_name_devm *fwn;
+
+	fwn = fw_find_devm_name(dev, name);
+	if (fwn)
+		return 1;
+
+	fwn = devres_alloc(fw_name_devm_release, sizeof(struct fw_name_devm) +
+			   strlen(name) + 1, GFP_KERNEL);
+	if (!fwn)
+		return -ENOMEM;
+
+	fwn->magic = (unsigned long)&fw_cache;
+	strcpy(fwn->name, name);
+	devres_add(dev, fwn);
+
+	return 0;
+}
+#else
+static int fw_add_devm_name(struct device *dev, const char *name)
+{
+	return 0;
+}
+#endif
+
+
+/*
+ * user-mode helper code
+ */
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+struct firmware_priv {
+	struct delayed_work timeout_work;
+	bool nowait;
+	struct device dev;
+	struct firmware_buf *buf;
+	struct firmware *fw;
+};
+
 static struct firmware_priv *to_firmware_priv(struct device *dev)
 {
 	return container_of(dev, struct firmware_priv, dev);
 }
 
+static void __fw_load_abort(struct firmware_buf *buf)
+{
+	/*
+	 * There is a small window in which user can write to 'loading'
+	 * between loading done and disappearance of 'loading'
+	 */
+	if (test_bit(FW_STATUS_DONE, &buf->status))
+		return;
+
+	list_del_init(&buf->pending_list);
+	set_bit(FW_STATUS_ABORT, &buf->status);
+	complete_all(&buf->completion);
+}
+
 static void fw_load_abort(struct firmware_priv *fw_priv)
 {
-	set_bit(FW_STATUS_ABORT, &fw_priv->status);
-	wmb();
-	complete(&fw_priv->completion);
+	struct firmware_buf *buf = fw_priv->buf;
+
+	__fw_load_abort(buf);
+
+	/* avoid user action after loading abort */
+	fw_priv->buf = NULL;
+}
+
+#define is_fw_load_aborted(buf)	\
+	test_bit(FW_STATUS_ABORT, &(buf)->status)
+
+static LIST_HEAD(pending_fw_head);
+
+/* reboot notifier for avoid deadlock with usermode_lock */
+static int fw_shutdown_notify(struct notifier_block *unused1,
+			      unsigned long unused2, void *unused3)
+{
+	mutex_lock(&fw_lock);
+	while (!list_empty(&pending_fw_head))
+		__fw_load_abort(list_first_entry(&pending_fw_head,
+					       struct firmware_buf,
+					       pending_list));
+	mutex_unlock(&fw_lock);
+	return NOTIFY_DONE;
 }
 
-static ssize_t firmware_timeout_show(struct class *class,
-				     struct class_attribute *attr,
-				     char *buf)
+static struct notifier_block fw_shutdown_nb = {
+	.notifier_call = fw_shutdown_notify,
+};
+
+static ssize_t timeout_show(struct class *class, struct class_attribute *attr,
+			    char *buf)
 {
 	return sprintf(buf, "%d\n", loading_timeout);
 }
@@ -130,9 +522,8 @@ static ssize_t firmware_timeout_show(struct class *class,
  *
  *	Note: zero means 'wait forever'.
  **/
-static ssize_t firmware_timeout_store(struct class *class,
-				      struct class_attribute *attr,
-				      const char *buf, size_t count)
+static ssize_t timeout_store(struct class *class, struct class_attribute *attr,
+			     const char *buf, size_t count)
 {
 	loading_timeout = simple_strtol(buf, NULL, 10);
 	if (loading_timeout < 0)
@@ -142,29 +533,22 @@ static ssize_t firmware_timeout_store(struct class *class,
 }
 
 static struct class_attribute firmware_class_attrs[] = {
-	__ATTR(timeout, S_IWUSR | S_IRUGO,
-		firmware_timeout_show, firmware_timeout_store),
+	__ATTR_RW(timeout),
 	__ATTR_NULL
 };
 
 static void fw_dev_release(struct device *dev)
 {
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-	int i;
 
-	for (i = 0; i < fw_priv->nr_pages; i++)
-		__free_page(fw_priv->pages[i]);
-	kfree(fw_priv->pages);
 	kfree(fw_priv);
-
-	module_put(THIS_MODULE);
 }
 
 static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
 
-	if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->fw_id))
+	if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id))
 		return -ENOMEM;
 	if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout))
 		return -ENOMEM;
@@ -185,26 +569,35 @@ static ssize_t firmware_loading_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-	int loading = test_bit(FW_STATUS_LOADING, &fw_priv->status);
+	int loading = 0;
 
-	return sprintf(buf, "%d\n", loading);
-}
+	mutex_lock(&fw_lock);
+	if (fw_priv->buf)
+		loading = test_bit(FW_STATUS_LOADING, &fw_priv->buf->status);
+	mutex_unlock(&fw_lock);
 
-static void firmware_free_data(const struct firmware *fw)
-{
-	int i;
-	vunmap(fw->data);
-	if (fw->pages) {
-		for (i = 0; i < PFN_UP(fw->size); i++)
-			__free_page(fw->pages[i]);
-		kfree(fw->pages);
-	}
+	return sprintf(buf, "%d\n", loading);
 }
 
 /* Some architectures don't have PAGE_KERNEL_RO */
 #ifndef PAGE_KERNEL_RO
 #define PAGE_KERNEL_RO PAGE_KERNEL
 #endif
+
+/* one pages buffer should be mapped/unmapped only once */
+static int fw_map_pages_buf(struct firmware_buf *buf)
+{
+	if (!buf->is_paged_buf)
+		return 0;
+
+	if (buf->data)
+		vunmap(buf->data);
+	buf->data = vmap(buf->pages, buf->nr_pages, 0, PAGE_KERNEL_RO);
+	if (!buf->data)
+		return -ENOMEM;
+	return 0;
+}
+
 /**
  * firmware_loading_store - set value in the 'loading' control file
  * @dev: device pointer
@@ -223,46 +616,44 @@ static ssize_t firmware_loading_store(struct device *dev,
 				      const char *buf, size_t count)
 {
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
+	struct firmware_buf *fw_buf;
 	int loading = simple_strtol(buf, NULL, 10);
 	int i;
 
+	mutex_lock(&fw_lock);
+	fw_buf = fw_priv->buf;
+	if (!fw_buf)
+		goto out;
+
 	switch (loading) {
 	case 1:
-		mutex_lock(&fw_lock);
-		if (!fw_priv->fw) {
-			mutex_unlock(&fw_lock);
-			break;
+		/* discarding any previous partial load */
+		if (!test_bit(FW_STATUS_DONE, &fw_buf->status)) {
+			for (i = 0; i < fw_buf->nr_pages; i++)
+				__free_page(fw_buf->pages[i]);
+			kfree(fw_buf->pages);
+			fw_buf->pages = NULL;
+			fw_buf->page_array_size = 0;
+			fw_buf->nr_pages = 0;
+			set_bit(FW_STATUS_LOADING, &fw_buf->status);
 		}
-		firmware_free_data(fw_priv->fw);
-		memset(fw_priv->fw, 0, sizeof(struct firmware));
-		/* If the pages are not owned by 'struct firmware' */
-		for (i = 0; i < fw_priv->nr_pages; i++)
-			__free_page(fw_priv->pages[i]);
-		kfree(fw_priv->pages);
-		fw_priv->pages = NULL;
-		fw_priv->page_array_size = 0;
-		fw_priv->nr_pages = 0;
-		set_bit(FW_STATUS_LOADING, &fw_priv->status);
-		mutex_unlock(&fw_lock);
 		break;
 	case 0:
-		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
-			vunmap(fw_priv->fw->data);
-			fw_priv->fw->data = vmap(fw_priv->pages,
-						 fw_priv->nr_pages,
-						 0, PAGE_KERNEL_RO);
-			if (!fw_priv->fw->data) {
-				dev_err(dev, "%s: vmap() failed\n", __func__);
-				goto err;
-			}
-			/* Pages are now owned by 'struct firmware' */
-			fw_priv->fw->pages = fw_priv->pages;
-			fw_priv->pages = NULL;
-
-			fw_priv->page_array_size = 0;
-			fw_priv->nr_pages = 0;
-			complete(&fw_priv->completion);
-			clear_bit(FW_STATUS_LOADING, &fw_priv->status);
+		if (test_bit(FW_STATUS_LOADING, &fw_buf->status)) {
+			set_bit(FW_STATUS_DONE, &fw_buf->status);
+			clear_bit(FW_STATUS_LOADING, &fw_buf->status);
+
+			/*
+			 * Several loading requests may be pending on
+			 * one same firmware buf, so let all requests
+			 * see the mapped 'buf->data' once the loading
+			 * is completed.
+			 * */
+			if (fw_map_pages_buf(fw_buf))
+				dev_err(dev, "%s: map pages failed\n",
+					__func__);
+			list_del_init(&fw_buf->pending_list);
+			complete_all(&fw_buf->completion);
 			break;
 		}
 		/* fallthrough */
@@ -270,11 +661,11 @@ static ssize_t firmware_loading_store(struct device *dev,
 		dev_err(dev, "%s: unexpected value (%d)\n", __func__, loading);
 		/* fallthrough */
 	case -1:
-	err:
 		fw_load_abort(fw_priv);
 		break;
 	}
-
+out:
+	mutex_unlock(&fw_lock);
 	return count;
 }
 
@@ -284,23 +675,23 @@ static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *buffer, loff_t offset, size_t count)
 {
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-	struct firmware *fw;
+	struct firmware_buf *buf;
 	ssize_t ret_count;
 
 	mutex_lock(&fw_lock);
-	fw = fw_priv->fw;
-	if (!fw || test_bit(FW_STATUS_DONE, &fw_priv->status)) {
+	buf = fw_priv->buf;
+	if (!buf || test_bit(FW_STATUS_DONE, &buf->status)) {
 		ret_count = -ENODEV;
 		goto out;
 	}
-	if (offset > fw->size) {
+	if (offset > buf->size) {
 		ret_count = 0;
 		goto out;
 	}
-	if (count > fw->size - offset)
-		count = fw->size - offset;
+	if (count > buf->size - offset)
+		count = buf->size - offset;
 
 	ret_count = count;
 
@@ -310,11 +701,11 @@ static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj,
 		int page_ofs = offset & (PAGE_SIZE-1);
 		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
 
-		page_data = kmap(fw_priv->pages[page_nr]);
+		page_data = kmap(buf->pages[page_nr]);
 
 		memcpy(buffer, page_data + page_ofs, page_cnt);
 
-		kunmap(fw_priv->pages[page_nr]);
+		kunmap(buf->pages[page_nr]);
 		buffer += page_cnt;
 		offset += page_cnt;
 		count -= page_cnt;
@@ -326,12 +717,13 @@ out:
 
 static int fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
 {
+	struct firmware_buf *buf = fw_priv->buf;
 	int pages_needed = ALIGN(min_size, PAGE_SIZE) >> PAGE_SHIFT;
 
 	/* If the array of pages is too small, grow it... */
-	if (fw_priv->page_array_size < pages_needed) {
+	if (buf->page_array_size < pages_needed) {
 		int new_array_size = max(pages_needed,
-					 fw_priv->page_array_size * 2);
+					 buf->page_array_size * 2);
 		struct page **new_pages;
 
 		new_pages = kmalloc(new_array_size * sizeof(void *),
@@ -340,24 +732,24 @@ static int fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
 			fw_load_abort(fw_priv);
 			return -ENOMEM;
 		}
-		memcpy(new_pages, fw_priv->pages,
-		       fw_priv->page_array_size * sizeof(void *));
-		memset(&new_pages[fw_priv->page_array_size], 0, sizeof(void *) *
-		       (new_array_size - fw_priv->page_array_size));
-		kfree(fw_priv->pages);
-		fw_priv->pages = new_pages;
-		fw_priv->page_array_size = new_array_size;
+		memcpy(new_pages, buf->pages,
+		       buf->page_array_size * sizeof(void *));
+		memset(&new_pages[buf->page_array_size], 0, sizeof(void *) *
+		       (new_array_size - buf->page_array_size));
+		kfree(buf->pages);
+		buf->pages = new_pages;
+		buf->page_array_size = new_array_size;
 	}
 
-	while (fw_priv->nr_pages < pages_needed) {
-		fw_priv->pages[fw_priv->nr_pages] =
+	while (buf->nr_pages < pages_needed) {
+		buf->pages[buf->nr_pages] =
 			alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
 
-		if (!fw_priv->pages[fw_priv->nr_pages]) {
+		if (!buf->pages[buf->nr_pages]) {
 			fw_load_abort(fw_priv);
 			return -ENOMEM;
 		}
-		fw_priv->nr_pages++;
+		buf->nr_pages++;
 	}
 	return 0;
 }
@@ -378,20 +770,21 @@ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *bin_attr,
 				   char *buffer, loff_t offset, size_t count)
 {
-	struct device *dev = to_dev(kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-	struct firmware *fw;
+	struct firmware_buf *buf;
 	ssize_t retval;
 
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
 	mutex_lock(&fw_lock);
-	fw = fw_priv->fw;
-	if (!fw || test_bit(FW_STATUS_DONE, &fw_priv->status)) {
+	buf = fw_priv->buf;
+	if (!buf || test_bit(FW_STATUS_DONE, &buf->status)) {
 		retval = -ENODEV;
 		goto out;
 	}
+
 	retval = fw_realloc_buffer(fw_priv, offset + count);
 	if (retval)
 		goto out;
@@ -404,17 +797,17 @@ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj,
 		int page_ofs = offset & (PAGE_SIZE - 1);
 		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
 
-		page_data = kmap(fw_priv->pages[page_nr]);
+		page_data = kmap(buf->pages[page_nr]);
 
 		memcpy(page_data + page_ofs, buffer, page_cnt);
 
-		kunmap(fw_priv->pages[page_nr]);
+		kunmap(buf->pages[page_nr]);
 		buffer += page_cnt;
 		offset += page_cnt;
 		count -= page_cnt;
 	}
 
-	fw->size = max_t(size_t, offset, fw->size);
+	buf->size = max_t(size_t, offset, buf->size);
 out:
 	mutex_unlock(&fw_lock);
 	return retval;
@@ -427,151 +820,320 @@ static struct bin_attribute firmware_attr_data = {
 	.write = firmware_data_write,
 };
 
-static void firmware_class_timeout(u_long data)
+static void firmware_class_timeout_work(struct work_struct *work)
 {
-	struct firmware_priv *fw_priv = (struct firmware_priv *) data;
+	struct firmware_priv *fw_priv = container_of(work,
+			struct firmware_priv, timeout_work.work);
 
+	mutex_lock(&fw_lock);
 	fw_load_abort(fw_priv);
+	mutex_unlock(&fw_lock);
 }
 
 static struct firmware_priv *
 fw_create_instance(struct firmware *firmware, const char *fw_name,
-		   struct device *device, bool uevent, bool nowait)
+		   struct device *device, unsigned int opt_flags)
 {
 	struct firmware_priv *fw_priv;
 	struct device *f_dev;
-	int error;
 
-	fw_priv = kzalloc(sizeof(*fw_priv) + strlen(fw_name) + 1 , GFP_KERNEL);
+	fw_priv = kzalloc(sizeof(*fw_priv), GFP_KERNEL);
 	if (!fw_priv) {
 		dev_err(device, "%s: kmalloc failed\n", __func__);
-		error = -ENOMEM;
-		goto err_out;
+		fw_priv = ERR_PTR(-ENOMEM);
+		goto exit;
 	}
 
+	fw_priv->nowait = !!(opt_flags & FW_OPT_NOWAIT);
 	fw_priv->fw = firmware;
-	fw_priv->nowait = nowait;
-	strcpy(fw_priv->fw_id, fw_name);
-	init_completion(&fw_priv->completion);
-	setup_timer(&fw_priv->timeout,
-		    firmware_class_timeout, (u_long) fw_priv);
+	INIT_DELAYED_WORK(&fw_priv->timeout_work,
+		firmware_class_timeout_work);
 
 	f_dev = &fw_priv->dev;
 
 	device_initialize(f_dev);
-	dev_set_name(f_dev, "%s", dev_name(device));
+	dev_set_name(f_dev, "%s", fw_name);
 	f_dev->parent = device;
 	f_dev->class = &firmware_class;
+exit:
+	return fw_priv;
+}
 
-	dev_set_uevent_suppress(f_dev, true);
+/* load a firmware via user helper */
+static int _request_firmware_load(struct firmware_priv *fw_priv,
+				  unsigned int opt_flags, long timeout)
+{
+	int retval = 0;
+	struct device *f_dev = &fw_priv->dev;
+	struct firmware_buf *buf = fw_priv->buf;
 
-	/* Need to pin this module until class device is destroyed */
-	__module_get(THIS_MODULE);
+	/* fall back on userspace loading */
+	buf->is_paged_buf = true;
+
+	dev_set_uevent_suppress(f_dev, true);
 
-	error = device_add(f_dev);
-	if (error) {
-		dev_err(device, "%s: device_register failed\n", __func__);
+	retval = device_add(f_dev);
+	if (retval) {
+		dev_err(f_dev, "%s: device_register failed\n", __func__);
 		goto err_put_dev;
 	}
 
-	error = device_create_bin_file(f_dev, &firmware_attr_data);
-	if (error) {
-		dev_err(device, "%s: sysfs_create_bin_file failed\n", __func__);
+	retval = device_create_bin_file(f_dev, &firmware_attr_data);
+	if (retval) {
+		dev_err(f_dev, "%s: sysfs_create_bin_file failed\n", __func__);
 		goto err_del_dev;
 	}
 
-	error = device_create_file(f_dev, &dev_attr_loading);
-	if (error) {
-		dev_err(device, "%s: device_create_file failed\n", __func__);
+	mutex_lock(&fw_lock);
+	list_add(&buf->pending_list, &pending_fw_head);
+	mutex_unlock(&fw_lock);
+
+	retval = device_create_file(f_dev, &dev_attr_loading);
+	if (retval) {
+		mutex_lock(&fw_lock);
+		list_del_init(&buf->pending_list);
+		mutex_unlock(&fw_lock);
+		dev_err(f_dev, "%s: device_create_file failed\n", __func__);
 		goto err_del_bin_attr;
 	}
 
-	if (uevent)
+	if (opt_flags & FW_OPT_UEVENT) {
+		buf->need_uevent = true;
 		dev_set_uevent_suppress(f_dev, false);
+		dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
+		if (timeout != MAX_SCHEDULE_TIMEOUT)
+			queue_delayed_work(system_power_efficient_wq,
+					   &fw_priv->timeout_work, timeout);
 
-	return fw_priv;
+		kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
+	}
+
+	wait_for_completion(&buf->completion);
+
+	cancel_delayed_work_sync(&fw_priv->timeout_work);
+	if (!buf->data)
+		retval = -ENOMEM;
 
+	device_remove_file(f_dev, &dev_attr_loading);
 err_del_bin_attr:
 	device_remove_bin_file(f_dev, &firmware_attr_data);
 err_del_dev:
 	device_del(f_dev);
 err_put_dev:
 	put_device(f_dev);
-err_out:
-	return ERR_PTR(error);
+	return retval;
 }
 
-static void fw_destroy_instance(struct firmware_priv *fw_priv)
+static int fw_load_from_user_helper(struct firmware *firmware,
+				    const char *name, struct device *device,
+				    unsigned int opt_flags, long timeout)
 {
-	struct device *f_dev = &fw_priv->dev;
+	struct firmware_priv *fw_priv;
 
-	device_remove_file(f_dev, &dev_attr_loading);
-	device_remove_bin_file(f_dev, &firmware_attr_data);
-	device_unregister(f_dev);
+	fw_priv = fw_create_instance(firmware, name, device, opt_flags);
+	if (IS_ERR(fw_priv))
+		return PTR_ERR(fw_priv);
+
+	fw_priv->buf = firmware->priv;
+	return _request_firmware_load(fw_priv, opt_flags, timeout);
 }
 
-static int _request_firmware(const struct firmware **firmware_p,
-			     const char *name, struct device *device,
-			     bool uevent, bool nowait)
+#ifdef CONFIG_PM_SLEEP
+/* kill pending requests without uevent to avoid blocking suspend */
+static void kill_requests_without_uevent(void)
 {
-	struct firmware_priv *fw_priv;
-	struct firmware *firmware;
-	int retval = 0;
+	struct firmware_buf *buf;
+	struct firmware_buf *next;
 
-	if (!firmware_p)
-		return -EINVAL;
+	mutex_lock(&fw_lock);
+	list_for_each_entry_safe(buf, next, &pending_fw_head, pending_list) {
+		if (!buf->need_uevent)
+			 __fw_load_abort(buf);
+	}
+	mutex_unlock(&fw_lock);
+}
+#endif
+
+#else /* CONFIG_FW_LOADER_USER_HELPER */
+static inline int
+fw_load_from_user_helper(struct firmware *firmware, const char *name,
+			 struct device *device, unsigned int opt_flags,
+			 long timeout)
+{
+	return -ENOENT;
+}
+
+/* No abort during direct loading */
+#define is_fw_load_aborted(buf) false
+
+#ifdef CONFIG_PM_SLEEP
+static inline void kill_requests_without_uevent(void) { }
+#endif
+
+#endif /* CONFIG_FW_LOADER_USER_HELPER */
+
+
+/* wait until the shared firmware_buf becomes ready (or error) */
+static int sync_cached_firmware_buf(struct firmware_buf *buf)
+{
+	int ret = 0;
+
+	mutex_lock(&fw_lock);
+	while (!test_bit(FW_STATUS_DONE, &buf->status)) {
+		if (is_fw_load_aborted(buf)) {
+			ret = -ENOENT;
+			break;
+		}
+		mutex_unlock(&fw_lock);
+		wait_for_completion(&buf->completion);
+		mutex_lock(&fw_lock);
+	}
+	mutex_unlock(&fw_lock);
+	return ret;
+}
+
+/* prepare firmware and firmware_buf structs;
+ * return 0 if a firmware is already assigned, 1 if need to load one,
+ * or a negative error code
+ */
+static int
+_request_firmware_prepare(struct firmware **firmware_p, const char *name,
+			  struct device *device)
+{
+	struct firmware *firmware;
+	struct firmware_buf *buf;
+	int ret;
 
 	*firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
 	if (!firmware) {
 		dev_err(device, "%s: kmalloc(struct firmware) failed\n",
 			__func__);
-		retval = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 
 	if (fw_get_builtin_firmware(firmware, name)) {
 		dev_dbg(device, "firmware: using built-in firmware %s\n", name);
-		return 0;
+		return 0; /* assigned */
 	}
 
-	if (uevent)
-		dev_dbg(device, "firmware: requesting %s\n", name);
+	ret = fw_lookup_and_allocate_buf(name, &fw_cache, &buf);
 
-	fw_priv = fw_create_instance(firmware, name, device, uevent, nowait);
-	if (IS_ERR(fw_priv)) {
-		retval = PTR_ERR(fw_priv);
-		goto out;
-	}
+	/*
+	 * bind with 'buf' now to avoid warning in failure path
+	 * of requesting firmware.
+	 */
+	firmware->priv = buf;
 
-	if (uevent) {
-		if (loading_timeout > 0)
-			mod_timer(&fw_priv->timeout,
-				  round_jiffies_up(jiffies +
-						   loading_timeout * HZ));
-
-		kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
+	if (ret > 0) {
+		ret = sync_cached_firmware_buf(buf);
+		if (!ret) {
+			fw_set_page_data(buf, firmware);
+			return 0; /* assigned */
+		}
 	}
 
-	wait_for_completion(&fw_priv->completion);
+	if (ret < 0)
+		return ret;
+	return 1; /* need to load */
+}
 
-	set_bit(FW_STATUS_DONE, &fw_priv->status);
-	del_timer_sync(&fw_priv->timeout);
+static int assign_firmware_buf(struct firmware *fw, struct device *device,
+			       unsigned int opt_flags)
+{
+	struct firmware_buf *buf = fw->priv;
 
 	mutex_lock(&fw_lock);
-	if (!fw_priv->fw->size || test_bit(FW_STATUS_ABORT, &fw_priv->status))
-		retval = -ENOENT;
-	fw_priv->fw = NULL;
+	if (!buf->size || is_fw_load_aborted(buf)) {
+		mutex_unlock(&fw_lock);
+		return -ENOENT;
+	}
+
+	/*
+	 * add firmware name into devres list so that we can auto cache
+	 * and uncache firmware for device.
+	 *
+	 * device may has been deleted already, but the problem
+	 * should be fixed in devres or driver core.
+	 */
+	/* don't cache firmware handled without uevent */
+	if (device && (opt_flags & FW_OPT_UEVENT))
+		fw_add_devm_name(device, buf->fw_id);
+
+	/*
+	 * After caching firmware image is started, let it piggyback
+	 * on request firmware.
+	 */
+	if (buf->fwc->state == FW_LOADER_START_CACHE) {
+		if (fw_cache_piggyback_on_request(buf->fw_id))
+			kref_get(&buf->ref);
+	}
+
+	/* pass the pages buffer to driver at the last minute */
+	fw_set_page_data(buf, fw);
 	mutex_unlock(&fw_lock);
+	return 0;
+}
 
-	fw_destroy_instance(fw_priv);
+/* called from request_firmware() and request_firmware_work_func() */
+static int
+_request_firmware(const struct firmware **firmware_p, const char *name,
+		  struct device *device, unsigned int opt_flags)
+{
+	struct firmware *fw;
+	long timeout;
+	int ret;
 
-out:
-	if (retval) {
-		release_firmware(firmware);
-		*firmware_p = NULL;
+	if (!firmware_p)
+		return -EINVAL;
+
+	ret = _request_firmware_prepare(&fw, name, device);
+	if (ret <= 0) /* error or already assigned */
+		goto out;
+
+	ret = 0;
+	timeout = firmware_loading_timeout();
+	if (opt_flags & FW_OPT_NOWAIT) {
+		timeout = usermodehelper_read_lock_wait(timeout);
+		if (!timeout) {
+			dev_dbg(device, "firmware: %s loading timed out\n",
+				name);
+			ret = -EBUSY;
+			goto out;
+		}
+	} else {
+		ret = usermodehelper_read_trylock();
+		if (WARN_ON(ret)) {
+			dev_err(device, "firmware: %s will not be loaded\n",
+				name);
+			goto out;
+		}
 	}
 
-	return retval;
+	ret = fw_get_filesystem_firmware(device, fw->priv);
+	if (ret) {
+		if (opt_flags & FW_OPT_FALLBACK) {
+			dev_warn(device,
+				 "Direct firmware load failed with error %d\n",
+				 ret);
+			dev_warn(device, "Falling back to user helper\n");
+			ret = fw_load_from_user_helper(fw, name, device,
+						       opt_flags, timeout);
+		}
+	}
+
+	if (!ret)
+		ret = assign_firmware_buf(fw, device, opt_flags);
+
+	usermodehelper_read_unlock();
+
+ out:
+	if (ret < 0) {
+		release_firmware(fw);
+		fw = NULL;
+	}
+
+	*firmware_p = fw;
+	return ret;
 }
 
 /**
@@ -588,14 +1150,50 @@ out:
  *      @name will be used as $FIRMWARE in the uevent environment and
  *      should be distinctive enough not to be confused with any other
  *      firmware image for this or any other device.
+ *
+ *	Caller must hold the reference count of @device.
+ *
+ *	The function can be called safely inside device's suspend and
+ *	resume callback.
  **/
 int
 request_firmware(const struct firmware **firmware_p, const char *name,
                  struct device *device)
 {
-        int uevent = 1;
-        return _request_firmware(firmware_p, name, device, uevent, false);
+	int ret;
+
+	/* Need to pin this module until return */
+	__module_get(THIS_MODULE);
+	ret = _request_firmware(firmware_p, name, device,
+				FW_OPT_UEVENT | FW_OPT_FALLBACK);
+	module_put(THIS_MODULE);
+	return ret;
+}
+EXPORT_SYMBOL(request_firmware);
+
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+/**
+ * request_firmware: - load firmware directly without usermode helper
+ * @firmware_p: pointer to firmware image
+ * @name: name of firmware file
+ * @device: device for which firmware is being loaded
+ *
+ * This function works pretty much like request_firmware(), but this doesn't
+ * fall back to usermode helper even if the firmware couldn't be loaded
+ * directly from fs.  Hence it's useful for loading optional firmwares, which
+ * aren't always present, without extra long timeouts of udev.
+ **/
+int request_firmware_direct(const struct firmware **firmware_p,
+			    const char *name, struct device *device)
+{
+	int ret;
+	__module_get(THIS_MODULE);
+	ret = _request_firmware(firmware_p, name, device, FW_OPT_UEVENT);
+	module_put(THIS_MODULE);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(request_firmware_direct);
+#endif
 
 /**
  * release_firmware: - release the resource associated with a firmware image
@@ -609,6 +1207,7 @@ void release_firmware(const struct firmware *fw)
 		kfree(fw);
 	}
 }
+EXPORT_SYMBOL(release_firmware);
 
 /* Async support */
 struct firmware_work {
@@ -618,28 +1217,23 @@ struct firmware_work {
 	struct device *device;
 	void *context;
 	void (*cont)(const struct firmware *fw, void *context);
-	int uevent;
+	unsigned int opt_flags;
 };
 
-static int request_firmware_work_func(void *arg)
+static void request_firmware_work_func(struct work_struct *work)
 {
-	struct firmware_work *fw_work = arg;
+	struct firmware_work *fw_work;
 	const struct firmware *fw;
-	int ret;
 
-	if (!arg) {
-		WARN_ON(1);
-		return 0;
-	}
+	fw_work = container_of(work, struct firmware_work, work);
 
-	ret = _request_firmware(&fw, fw_work->name, fw_work->device,
-				fw_work->uevent, true);
+	_request_firmware(&fw, fw_work->name, fw_work->device,
+			  fw_work->opt_flags);
 	fw_work->cont(fw, fw_work->context);
+	put_device(fw_work->device); /* taken in request_firmware_nowait() */
 
 	module_put(fw_work->module);
 	kfree(fw_work);
-
-	return ret;
 }
 
 /**
@@ -655,17 +1249,22 @@ static int request_firmware_work_func(void *arg)
  * @cont: function will be called asynchronously when the firmware
  *	request is over.
  *
- *	Asynchronous variant of request_firmware() for user contexts where
- *	it is not possible to sleep for long time. It can't be called
- *	in atomic contexts.
+ *	Caller must hold the reference count of @device.
+ *
+ *	Asynchronous variant of request_firmware() for user contexts:
+ *		- sleep for as small periods as possible since it may
+ *		increase kernel boot time of built-in device drivers
+ *		requesting firmware in their ->probe() methods, if
+ *		@gfp is GFP_KERNEL.
+ *
+ *		- can't sleep at all if @gfp is GFP_ATOMIC.
  **/
 int
 request_firmware_nowait(
-	struct module *module, int uevent,
+	struct module *module, bool uevent,
 	const char *name, struct device *device, gfp_t gfp, void *context,
 	void (*cont)(const struct firmware *fw, void *context))
 {
-	struct task_struct *task;
 	struct firmware_work *fw_work;
 
 	fw_work = kzalloc(sizeof (struct firmware_work), gfp);
@@ -677,38 +1276,397 @@ request_firmware_nowait(
 	fw_work->device = device;
 	fw_work->context = context;
 	fw_work->cont = cont;
-	fw_work->uevent = uevent;
+	fw_work->opt_flags = FW_OPT_NOWAIT | FW_OPT_FALLBACK |
+		(uevent ? FW_OPT_UEVENT : 0);
 
 	if (!try_module_get(module)) {
 		kfree(fw_work);
 		return -EFAULT;
 	}
 
-	task = kthread_run(request_firmware_work_func, fw_work,
-			    "firmware/%s", name);
-	if (IS_ERR(task)) {
-		fw_work->cont(NULL, fw_work->context);
-		module_put(fw_work->module);
-		kfree(fw_work);
-		return PTR_ERR(task);
+	get_device(fw_work->device);
+	INIT_WORK(&fw_work->work, request_firmware_work_func);
+	schedule_work(&fw_work->work);
+	return 0;
+}
+EXPORT_SYMBOL(request_firmware_nowait);
+
+#ifdef CONFIG_PM_SLEEP
+static ASYNC_DOMAIN_EXCLUSIVE(fw_cache_domain);
+
+/**
+ * cache_firmware - cache one firmware image in kernel memory space
+ * @fw_name: the firmware image name
+ *
+ * Cache firmware in kernel memory so that drivers can use it when
+ * system isn't ready for them to request firmware image from userspace.
+ * Once it returns successfully, driver can use request_firmware or its
+ * nowait version to get the cached firmware without any interacting
+ * with userspace
+ *
+ * Return 0 if the firmware image has been cached successfully
+ * Return !0 otherwise
+ *
+ */
+static int cache_firmware(const char *fw_name)
+{
+	int ret;
+	const struct firmware *fw;
+
+	pr_debug("%s: %s\n", __func__, fw_name);
+
+	ret = request_firmware(&fw, fw_name, NULL);
+	if (!ret)
+		kfree(fw);
+
+	pr_debug("%s: %s ret=%d\n", __func__, fw_name, ret);
+
+	return ret;
+}
+
+static struct firmware_buf *fw_lookup_buf(const char *fw_name)
+{
+	struct firmware_buf *tmp;
+	struct firmware_cache *fwc = &fw_cache;
+
+	spin_lock(&fwc->lock);
+	tmp = __fw_lookup_buf(fw_name);
+	spin_unlock(&fwc->lock);
+
+	return tmp;
+}
+
+/**
+ * uncache_firmware - remove one cached firmware image
+ * @fw_name: the firmware image name
+ *
+ * Uncache one firmware image which has been cached successfully
+ * before.
+ *
+ * Return 0 if the firmware cache has been removed successfully
+ * Return !0 otherwise
+ *
+ */
+static int uncache_firmware(const char *fw_name)
+{
+	struct firmware_buf *buf;
+	struct firmware fw;
+
+	pr_debug("%s: %s\n", __func__, fw_name);
+
+	if (fw_get_builtin_firmware(&fw, fw_name))
+		return 0;
+
+	buf = fw_lookup_buf(fw_name);
+	if (buf) {
+		fw_free_buf(buf);
+		return 0;
 	}
 
+	return -EINVAL;
+}
+
+static struct fw_cache_entry *alloc_fw_cache_entry(const char *name)
+{
+	struct fw_cache_entry *fce;
+
+	fce = kzalloc(sizeof(*fce) + strlen(name) + 1, GFP_ATOMIC);
+	if (!fce)
+		goto exit;
+
+	strcpy(fce->name, name);
+exit:
+	return fce;
+}
+
+static int __fw_entry_found(const char *name)
+{
+	struct firmware_cache *fwc = &fw_cache;
+	struct fw_cache_entry *fce;
+
+	list_for_each_entry(fce, &fwc->fw_names, list) {
+		if (!strcmp(fce->name, name))
+			return 1;
+	}
 	return 0;
 }
 
+static int fw_cache_piggyback_on_request(const char *name)
+{
+	struct firmware_cache *fwc = &fw_cache;
+	struct fw_cache_entry *fce;
+	int ret = 0;
+
+	spin_lock(&fwc->name_lock);
+	if (__fw_entry_found(name))
+		goto found;
+
+	fce = alloc_fw_cache_entry(name);
+	if (fce) {
+		ret = 1;
+		list_add(&fce->list, &fwc->fw_names);
+		pr_debug("%s: fw: %s\n", __func__, name);
+	}
+found:
+	spin_unlock(&fwc->name_lock);
+	return ret;
+}
+
+static void free_fw_cache_entry(struct fw_cache_entry *fce)
+{
+	kfree(fce);
+}
+
+static void __async_dev_cache_fw_image(void *fw_entry,
+				       async_cookie_t cookie)
+{
+	struct fw_cache_entry *fce = fw_entry;
+	struct firmware_cache *fwc = &fw_cache;
+	int ret;
+
+	ret = cache_firmware(fce->name);
+	if (ret) {
+		spin_lock(&fwc->name_lock);
+		list_del(&fce->list);
+		spin_unlock(&fwc->name_lock);
+
+		free_fw_cache_entry(fce);
+	}
+}
+
+/* called with dev->devres_lock held */
+static void dev_create_fw_entry(struct device *dev, void *res,
+				void *data)
+{
+	struct fw_name_devm *fwn = res;
+	const char *fw_name = fwn->name;
+	struct list_head *head = data;
+	struct fw_cache_entry *fce;
+
+	fce = alloc_fw_cache_entry(fw_name);
+	if (fce)
+		list_add(&fce->list, head);
+}
+
+static int devm_name_match(struct device *dev, void *res,
+			   void *match_data)
+{
+	struct fw_name_devm *fwn = res;
+	return (fwn->magic == (unsigned long)match_data);
+}
+
+static void dev_cache_fw_image(struct device *dev, void *data)
+{
+	LIST_HEAD(todo);
+	struct fw_cache_entry *fce;
+	struct fw_cache_entry *fce_next;
+	struct firmware_cache *fwc = &fw_cache;
+
+	devres_for_each_res(dev, fw_name_devm_release,
+			    devm_name_match, &fw_cache,
+			    dev_create_fw_entry, &todo);
+
+	list_for_each_entry_safe(fce, fce_next, &todo, list) {
+		list_del(&fce->list);
+
+		spin_lock(&fwc->name_lock);
+		/* only one cache entry for one firmware */
+		if (!__fw_entry_found(fce->name)) {
+			list_add(&fce->list, &fwc->fw_names);
+		} else {
+			free_fw_cache_entry(fce);
+			fce = NULL;
+		}
+		spin_unlock(&fwc->name_lock);
+
+		if (fce)
+			async_schedule_domain(__async_dev_cache_fw_image,
+					      (void *)fce,
+					      &fw_cache_domain);
+	}
+}
+
+static void __device_uncache_fw_images(void)
+{
+	struct firmware_cache *fwc = &fw_cache;
+	struct fw_cache_entry *fce;
+
+	spin_lock(&fwc->name_lock);
+	while (!list_empty(&fwc->fw_names)) {
+		fce = list_entry(fwc->fw_names.next,
+				struct fw_cache_entry, list);
+		list_del(&fce->list);
+		spin_unlock(&fwc->name_lock);
+
+		uncache_firmware(fce->name);
+		free_fw_cache_entry(fce);
+
+		spin_lock(&fwc->name_lock);
+	}
+	spin_unlock(&fwc->name_lock);
+}
+
+/**
+ * device_cache_fw_images - cache devices' firmware
+ *
+ * If one device called request_firmware or its nowait version
+ * successfully before, the firmware names are recored into the
+ * device's devres link list, so device_cache_fw_images can call
+ * cache_firmware() to cache these firmwares for the device,
+ * then the device driver can load its firmwares easily at
+ * time when system is not ready to complete loading firmware.
+ */
+static void device_cache_fw_images(void)
+{
+	struct firmware_cache *fwc = &fw_cache;
+	int old_timeout;
+	DEFINE_WAIT(wait);
+
+	pr_debug("%s\n", __func__);
+
+	/* cancel uncache work */
+	cancel_delayed_work_sync(&fwc->work);
+
+	/*
+	 * use small loading timeout for caching devices' firmware
+	 * because all these firmware images have been loaded
+	 * successfully at lease once, also system is ready for
+	 * completing firmware loading now. The maximum size of
+	 * firmware in current distributions is about 2M bytes,
+	 * so 10 secs should be enough.
+	 */
+	old_timeout = loading_timeout;
+	loading_timeout = 10;
+
+	mutex_lock(&fw_lock);
+	fwc->state = FW_LOADER_START_CACHE;
+	dpm_for_each_dev(NULL, dev_cache_fw_image);
+	mutex_unlock(&fw_lock);
+
+	/* wait for completion of caching firmware for all devices */
+	async_synchronize_full_domain(&fw_cache_domain);
+
+	loading_timeout = old_timeout;
+}
+
+/**
+ * device_uncache_fw_images - uncache devices' firmware
+ *
+ * uncache all firmwares which have been cached successfully
+ * by device_uncache_fw_images earlier
+ */
+static void device_uncache_fw_images(void)
+{
+	pr_debug("%s\n", __func__);
+	__device_uncache_fw_images();
+}
+
+static void device_uncache_fw_images_work(struct work_struct *work)
+{
+	device_uncache_fw_images();
+}
+
+/**
+ * device_uncache_fw_images_delay - uncache devices firmwares
+ * @delay: number of milliseconds to delay uncache device firmwares
+ *
+ * uncache all devices's firmwares which has been cached successfully
+ * by device_cache_fw_images after @delay milliseconds.
+ */
+static void device_uncache_fw_images_delay(unsigned long delay)
+{
+	queue_delayed_work(system_power_efficient_wq, &fw_cache.work,
+			   msecs_to_jiffies(delay));
+}
+
+static int fw_pm_notify(struct notifier_block *notify_block,
+			unsigned long mode, void *unused)
+{
+	switch (mode) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+	case PM_RESTORE_PREPARE:
+		kill_requests_without_uevent();
+		device_cache_fw_images();
+		break;
+
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+		/*
+		 * In case that system sleep failed and syscore_suspend is
+		 * not called.
+		 */
+		mutex_lock(&fw_lock);
+		fw_cache.state = FW_LOADER_NO_CACHE;
+		mutex_unlock(&fw_lock);
+
+		device_uncache_fw_images_delay(10 * MSEC_PER_SEC);
+		break;
+	}
+
+	return 0;
+}
+
+/* stop caching firmware once syscore_suspend is reached */
+static int fw_suspend(void)
+{
+	fw_cache.state = FW_LOADER_NO_CACHE;
+	return 0;
+}
+
+static struct syscore_ops fw_syscore_ops = {
+	.suspend = fw_suspend,
+};
+#else
+static int fw_cache_piggyback_on_request(const char *name)
+{
+	return 0;
+}
+#endif
+
+static void __init fw_cache_init(void)
+{
+	spin_lock_init(&fw_cache.lock);
+	INIT_LIST_HEAD(&fw_cache.head);
+	fw_cache.state = FW_LOADER_NO_CACHE;
+
+#ifdef CONFIG_PM_SLEEP
+	spin_lock_init(&fw_cache.name_lock);
+	INIT_LIST_HEAD(&fw_cache.fw_names);
+
+	INIT_DELAYED_WORK(&fw_cache.work,
+			  device_uncache_fw_images_work);
+
+	fw_cache.pm_notify.notifier_call = fw_pm_notify;
+	register_pm_notifier(&fw_cache.pm_notify);
+
+	register_syscore_ops(&fw_syscore_ops);
+#endif
+}
+
 static int __init firmware_class_init(void)
 {
+	fw_cache_init();
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	register_reboot_notifier(&fw_shutdown_nb);
 	return class_register(&firmware_class);
+#else
+	return 0;
+#endif
 }
 
 static void __exit firmware_class_exit(void)
 {
+#ifdef CONFIG_PM_SLEEP
+	unregister_syscore_ops(&fw_syscore_ops);
+	unregister_pm_notifier(&fw_cache.pm_notify);
+#endif
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	unregister_reboot_notifier(&fw_shutdown_nb);
 	class_unregister(&firmware_class);
+#endif
 }
 
 fs_initcall(firmware_class_init);
 module_exit(firmware_class_exit);
-
-EXPORT_SYMBOL(release_firmware);
-EXPORT_SYMBOL(request_firmware);
-EXPORT_SYMBOL(request_firmware_nowait);
diff --git a/drivers/base/hypervisor.c b/drivers/base/hypervisor.c
index 6428cba3aad..4f8b741f461 100644
--- a/drivers/base/hypervisor.c
+++ b/drivers/base/hypervisor.c
@@ -10,6 +10,7 @@
 
 #include <linux/kobject.h>
 #include <linux/device.h>
+#include <linux/export.h>
 #include "base.h"
 
 struct kobject *hypervisor_kobj;
diff --git a/drivers/base/init.c b/drivers/base/init.c
index c8a934e7942..da033d3bab3 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -31,7 +31,7 @@ void __init driver_init(void)
 	 * core core pieces.
 	 */
 	platform_bus_init();
-	system_bus_init();
 	cpu_dev_init();
 	memory_dev_init();
+	container_dev_init();
 }
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
deleted file mode 100644
index 6e6b6a11b3c..00000000000
--- a/drivers/base/iommu.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
- * Author: Joerg Roedel <joerg.roedel@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/bug.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/iommu.h>
-
-static struct iommu_ops *iommu_ops;
-
-void register_iommu(struct iommu_ops *ops)
-{
-	if (iommu_ops)
-		BUG();
-
-	iommu_ops = ops;
-}
-
-bool iommu_found(void)
-{
-	return iommu_ops != NULL;
-}
-EXPORT_SYMBOL_GPL(iommu_found);
-
-struct iommu_domain *iommu_domain_alloc(void)
-{
-	struct iommu_domain *domain;
-	int ret;
-
-	domain = kmalloc(sizeof(*domain), GFP_KERNEL);
-	if (!domain)
-		return NULL;
-
-	ret = iommu_ops->domain_init(domain);
-	if (ret)
-		goto out_free;
-
-	return domain;
-
-out_free:
-	kfree(domain);
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(iommu_domain_alloc);
-
-void iommu_domain_free(struct iommu_domain *domain)
-{
-	iommu_ops->domain_destroy(domain);
-	kfree(domain);
-}
-EXPORT_SYMBOL_GPL(iommu_domain_free);
-
-int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
-{
-	return iommu_ops->attach_dev(domain, dev);
-}
-EXPORT_SYMBOL_GPL(iommu_attach_device);
-
-void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
-{
-	iommu_ops->detach_dev(domain, dev);
-}
-EXPORT_SYMBOL_GPL(iommu_detach_device);
-
-phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
-			       unsigned long iova)
-{
-	return iommu_ops->iova_to_phys(domain, iova);
-}
-EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
-
-int iommu_domain_has_cap(struct iommu_domain *domain,
-			 unsigned long cap)
-{
-	return iommu_ops->domain_has_cap(domain, cap);
-}
-EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
-
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
-	      phys_addr_t paddr, int gfp_order, int prot)
-{
-	unsigned long invalid_mask;
-	size_t size;
-
-	size         = 0x1000UL << gfp_order;
-	invalid_mask = size - 1;
-
-	BUG_ON((iova | paddr) & invalid_mask);
-
-	return iommu_ops->map(domain, iova, paddr, gfp_order, prot);
-}
-EXPORT_SYMBOL_GPL(iommu_map);
-
-int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
-{
-	unsigned long invalid_mask;
-	size_t size;
-
-	size         = 0x1000UL << gfp_order;
-	invalid_mask = size - 1;
-
-	BUG_ON(iova & invalid_mask);
-
-	return iommu_ops->unmap(domain, iova, gfp_order);
-}
-EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index cafeaaf0428..89f752dd846 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -1,5 +1,5 @@
 /*
- * drivers/base/memory.c - basic Memory class support
+ * Memory subsystem support
  *
  * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
  *            Dave Hansen <haveblue@us.ibm.com>
@@ -10,46 +10,42 @@
  * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
  */
 
-#include <linux/sysdev.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/topology.h>
 #include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/memory.h>
-#include <linux/kobject.h>
 #include <linux/memory_hotplug.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 static DEFINE_MUTEX(mem_sysfs_mutex);
 
 #define MEMORY_CLASS_NAME	"memory"
 
-static struct sysdev_class memory_sysdev_class = {
-	.name = MEMORY_CLASS_NAME,
-};
+#define to_memory_block(dev) container_of(dev, struct memory_block, dev)
 
-static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
-{
-	return MEMORY_CLASS_NAME;
-}
+static int sections_per_block;
 
-static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env)
+static inline int base_memory_block_id(int section_nr)
 {
-	int retval = 0;
-
-	return retval;
+	return section_nr / sections_per_block;
 }
 
-static const struct kset_uevent_ops memory_uevent_ops = {
-	.name		= memory_uevent_name,
-	.uevent		= memory_uevent,
+static int memory_subsys_online(struct device *dev);
+static int memory_subsys_offline(struct device *dev);
+
+static struct bus_type memory_subsys = {
+	.name = MEMORY_CLASS_NAME,
+	.dev_name = MEMORY_CLASS_NAME,
+	.online = memory_subsys_online,
+	.offline = memory_subsys_offline,
 };
 
 static BLOCKING_NOTIFIER_HEAD(memory_chain);
@@ -80,30 +76,31 @@ void unregister_memory_isolate_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_memory_isolate_notifier);
 
-/*
- * register_memory - Setup a sysfs device for a memory block
- */
-static
-int register_memory(struct memory_block *memory, struct mem_section *section)
+static void memory_block_release(struct device *dev)
 {
-	int error;
+	struct memory_block *mem = to_memory_block(dev);
 
-	memory->sysdev.cls = &memory_sysdev_class;
-	memory->sysdev.id = __section_nr(section);
+	kfree(mem);
+}
 
-	error = sysdev_register(&memory->sysdev);
-	return error;
+unsigned long __weak memory_block_size_bytes(void)
+{
+	return MIN_MEMORY_BLOCK_SIZE;
 }
 
-static void
-unregister_memory(struct memory_block *memory, struct mem_section *section)
+static unsigned long get_memory_block_size(void)
 {
-	BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
-	BUG_ON(memory->sysdev.id != __section_nr(section));
+	unsigned long block_sz;
 
-	/* drop the ref. we got in remove_memory_block() */
-	kobject_put(&memory->sysdev.kobj);
-	sysdev_unregister(&memory->sysdev);
+	block_sz = memory_block_size_bytes();
+
+	/* Validate blk_sz is a power of 2 and not less than section size */
+	if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
+		WARN_ON(1);
+		block_sz = MIN_MEMORY_BLOCK_SIZE;
+	}
+
+	return block_sz;
 }
 
 /*
@@ -111,38 +108,43 @@ unregister_memory(struct memory_block *memory, struct mem_section *section)
  * uses.
  */
 
-static ssize_t show_mem_phys_index(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mem_start_phys_index(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
-	return sprintf(buf, "%08lx\n", mem->phys_index);
+	struct memory_block *mem = to_memory_block(dev);
+	unsigned long phys_index;
+
+	phys_index = mem->start_section_nr / sections_per_block;
+	return sprintf(buf, "%08lx\n", phys_index);
 }
 
 /*
  * Show whether the section of memory is likely to be hot-removable
  */
-static ssize_t show_mem_removable(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mem_removable(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	unsigned long start_pfn;
-	int ret;
-	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+	unsigned long i, pfn;
+	int ret = 1;
+	struct memory_block *mem = to_memory_block(dev);
+
+	for (i = 0; i < sections_per_block; i++) {
+		if (!present_section_nr(mem->start_section_nr + i))
+			continue;
+		pfn = section_nr_to_pfn(mem->start_section_nr + i);
+		ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+	}
 
-	start_pfn = section_nr_to_pfn(mem->phys_index);
-	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
 	return sprintf(buf, "%d\n", ret);
 }
 
 /*
  * online, offline, going offline, etc.
  */
-static ssize_t show_mem_state(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mem_state(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+	struct memory_block *mem = to_memory_block(dev);
 	ssize_t len = 0;
 
 	/*
@@ -180,57 +182,68 @@ int memory_isolate_notify(unsigned long val, void *v)
 }
 
 /*
- * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
- * OK to have direct references to sparsemem variables in here.
+ * The probe routines leave the pages reserved, just as the bootmem code does.
+ * Make sure they're still that way.
  */
-static int
-memory_block_action(struct memory_block *mem, unsigned long action)
+static bool pages_correctly_reserved(unsigned long start_pfn)
 {
-	int i;
-	unsigned long psection;
-	unsigned long start_pfn, start_paddr;
-	struct page *first_page;
-	int ret;
-	int old_state = mem->state;
-
-	psection = mem->phys_index;
-	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
+	int i, j;
+	struct page *page;
+	unsigned long pfn = start_pfn;
 
 	/*
-	 * The probe routines leave the pages reserved, just
-	 * as the bootmem code does.  Make sure they're still
-	 * that way.
+	 * memmap between sections is not contiguous except with
+	 * SPARSEMEM_VMEMMAP. We lookup the page once per section
+	 * and assume memmap is contiguous within each section
 	 */
-	if (action == MEM_ONLINE) {
-		for (i = 0; i < PAGES_PER_SECTION; i++) {
-			if (PageReserved(first_page+i))
+	for (i = 0; i < sections_per_block; i++, pfn += PAGES_PER_SECTION) {
+		if (WARN_ON_ONCE(!pfn_valid(pfn)))
+			return false;
+		page = pfn_to_page(pfn);
+
+		for (j = 0; j < PAGES_PER_SECTION; j++) {
+			if (PageReserved(page + j))
 				continue;
 
 			printk(KERN_WARNING "section number %ld page number %d "
-				"not reserved, was it already online? \n",
-				psection, i);
-			return -EBUSY;
+				"not reserved, was it already online?\n",
+				pfn_to_section_nr(pfn), j);
+
+			return false;
 		}
 	}
 
+	return true;
+}
+
+/*
+ * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
+ * OK to have direct references to sparsemem variables in here.
+ */
+static int
+memory_block_action(unsigned long phys_index, unsigned long action, int online_type)
+{
+	unsigned long start_pfn;
+	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+	struct page *first_page;
+	int ret;
+
+	first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
+	start_pfn = page_to_pfn(first_page);
+
 	switch (action) {
 		case MEM_ONLINE:
-			start_pfn = page_to_pfn(first_page);
-			ret = online_pages(start_pfn, PAGES_PER_SECTION);
+			if (!pages_correctly_reserved(start_pfn))
+				return -EBUSY;
+
+			ret = online_pages(start_pfn, nr_pages, online_type);
 			break;
 		case MEM_OFFLINE:
-			mem->state = MEM_GOING_OFFLINE;
-			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
-			ret = remove_memory(start_paddr,
-					    PAGES_PER_SECTION << PAGE_SHIFT);
-			if (ret) {
-				mem->state = old_state;
-				break;
-			}
+			ret = offline_pages(start_pfn, nr_pages);
 			break;
 		default:
-			WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
-					__func__, mem, action, action);
+			WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
+			     "%ld\n", __func__, phys_index, action, action);
 			ret = -EINVAL;
 	}
 
@@ -241,41 +254,104 @@ static int memory_block_change_state(struct memory_block *mem,
 		unsigned long to_state, unsigned long from_state_req)
 {
 	int ret = 0;
-	mutex_lock(&mem->state_mutex);
 
-	if (mem->state != from_state_req) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (mem->state != from_state_req)
+		return -EINVAL;
 
-	ret = memory_block_action(mem, to_state);
-	if (!ret)
-		mem->state = to_state;
+	if (to_state == MEM_OFFLINE)
+		mem->state = MEM_GOING_OFFLINE;
+
+	ret = memory_block_action(mem->start_section_nr, to_state,
+				mem->online_type);
+
+	mem->state = ret ? from_state_req : to_state;
+
+	return ret;
+}
+
+/* The device lock serializes operations on memory_subsys_[online|offline] */
+static int memory_subsys_online(struct device *dev)
+{
+	struct memory_block *mem = to_memory_block(dev);
+	int ret;
+
+	if (mem->state == MEM_ONLINE)
+		return 0;
+
+	/*
+	 * If we are called from store_mem_state(), online_type will be
+	 * set >= 0 Otherwise we were called from the device online
+	 * attribute and need to set the online_type.
+	 */
+	if (mem->online_type < 0)
+		mem->online_type = ONLINE_KEEP;
+
+	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
+
+	/* clear online_type */
+	mem->online_type = -1;
 
-out:
-	mutex_unlock(&mem->state_mutex);
 	return ret;
 }
 
+static int memory_subsys_offline(struct device *dev)
+{
+	struct memory_block *mem = to_memory_block(dev);
+
+	if (mem->state == MEM_OFFLINE)
+		return 0;
+
+	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
+}
+
 static ssize_t
-store_mem_state(struct sys_device *dev,
-		struct sysdev_attribute *attr, const char *buf, size_t count)
+store_mem_state(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct memory_block *mem;
-	unsigned int phys_section_nr;
-	int ret = -EINVAL;
+	struct memory_block *mem = to_memory_block(dev);
+	int ret, online_type;
+
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
 
-	mem = container_of(dev, struct memory_block, sysdev);
-	phys_section_nr = mem->phys_index;
+	if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
+		online_type = ONLINE_KERNEL;
+	else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
+		online_type = ONLINE_MOVABLE;
+	else if (!strncmp(buf, "online", min_t(int, count, 6)))
+		online_type = ONLINE_KEEP;
+	else if (!strncmp(buf, "offline", min_t(int, count, 7)))
+		online_type = -1;
+	else {
+		ret = -EINVAL;
+		goto err;
+	}
 
-	if (!present_section_nr(phys_section_nr))
-		goto out;
+	switch (online_type) {
+	case ONLINE_KERNEL:
+	case ONLINE_MOVABLE:
+	case ONLINE_KEEP:
+		/*
+		 * mem->online_type is not protected so there can be a
+		 * race here.  However, when racing online, the first
+		 * will succeed and the second will just return as the
+		 * block will already be online.  The online type
+		 * could be either one, but that is expected.
+		 */
+		mem->online_type = online_type;
+		ret = device_online(&mem->dev);
+		break;
+	case -1:
+		ret = device_offline(&mem->dev);
+		break;
+	default:
+		ret = -EINVAL; /* should never happen */
+	}
+
+err:
+	unlock_device_hotplug();
 
-	if (!strncmp(buf, "online", min((int)count, 6)))
-		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
-	else if(!strncmp(buf, "offline", min((int)count, 7)))
-		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
-out:
 	if (ret)
 		return ret;
 	return count;
@@ -290,41 +366,29 @@ out:
  * s.t. if I offline all of these sections I can then
  * remove the physical device?
  */
-static ssize_t show_phys_device(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_phys_device(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
-	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+	struct memory_block *mem = to_memory_block(dev);
 	return sprintf(buf, "%d\n", mem->phys_device);
 }
 
-static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
-static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
-static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
-static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
-
-#define mem_create_simple_file(mem, attr_name)	\
-	sysdev_create_file(&mem->sysdev, &attr_##attr_name)
-#define mem_remove_simple_file(mem, attr_name)	\
-	sysdev_remove_file(&mem->sysdev, &attr_##attr_name)
+static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
+static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state);
+static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL);
+static DEVICE_ATTR(removable, 0444, show_mem_removable, NULL);
 
 /*
  * Block size attribute stuff
  */
 static ssize_t
-print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr,
+print_block_size(struct device *dev, struct device_attribute *attr,
 		 char *buf)
 {
-	return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
+	return sprintf(buf, "%lx\n", get_memory_block_size());
 }
 
-static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
-
-static int block_size_init(void)
-{
-	return sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&attr_block_size_bytes.attr);
-}
+static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL);
 
 /*
  * Some architectures will have custom drivers to do this, and
@@ -334,35 +398,35 @@ static int block_size_init(void)
  */
 #ifdef CONFIG_ARCH_MEMORY_PROBE
 static ssize_t
-memory_probe_store(struct class *class, struct class_attribute *attr,
+memory_probe_store(struct device *dev, struct device_attribute *attr,
 		   const char *buf, size_t count)
 {
 	u64 phys_addr;
 	int nid;
-	int ret;
+	int i, ret;
+	unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
 
 	phys_addr = simple_strtoull(buf, NULL, 0);
 
-	nid = memory_add_physaddr_to_nid(phys_addr);
-	ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);
+	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
+		return -EINVAL;
 
-	if (ret)
-		count = ret;
+	for (i = 0; i < sections_per_block; i++) {
+		nid = memory_add_physaddr_to_nid(phys_addr);
+		ret = add_memory(nid, phys_addr,
+				 PAGES_PER_SECTION << PAGE_SHIFT);
+		if (ret)
+			goto out;
 
-	return count;
-}
-static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
+		phys_addr += MIN_MEMORY_BLOCK_SIZE;
+	}
 
-static int memory_probe_init(void)
-{
-	return sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&class_attr_probe.attr);
-}
-#else
-static inline int memory_probe_init(void)
-{
-	return 0;
+	ret = count;
+out:
+	return ret;
 }
+
+static DEVICE_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
 #endif
 
 #ifdef CONFIG_MEMORY_FAILURE
@@ -372,15 +436,15 @@ static inline int memory_probe_init(void)
 
 /* Soft offline a page */
 static ssize_t
-store_soft_offline_page(struct class *class,
-			struct class_attribute *attr,
+store_soft_offline_page(struct device *dev,
+			struct device_attribute *attr,
 			const char *buf, size_t count)
 {
 	int ret;
 	u64 pfn;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	if (strict_strtoull(buf, 0, &pfn) < 0)
+	if (kstrtoull(buf, 0, &pfn) < 0)
 		return -EINVAL;
 	pfn >>= PAGE_SHIFT;
 	if (!pfn_valid(pfn))
@@ -391,40 +455,23 @@ store_soft_offline_page(struct class *class,
 
 /* Forcibly offline a page, including killing processes. */
 static ssize_t
-store_hard_offline_page(struct class *class,
-			struct class_attribute *attr,
+store_hard_offline_page(struct device *dev,
+			struct device_attribute *attr,
 			const char *buf, size_t count)
 {
 	int ret;
 	u64 pfn;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	if (strict_strtoull(buf, 0, &pfn) < 0)
+	if (kstrtoull(buf, 0, &pfn) < 0)
 		return -EINVAL;
 	pfn >>= PAGE_SHIFT;
-	ret = __memory_failure(pfn, 0, 0);
+	ret = memory_failure(pfn, 0, 0);
 	return ret ? ret : count;
 }
 
-static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
-static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
-
-static __init int memory_fail_init(void)
-{
-	int err;
-
-	err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&class_attr_soft_offline_page.attr);
-	if (!err)
-		err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&class_attr_hard_offline_page.attr);
-	return err;
-}
-#else
-static inline int memory_fail_init(void)
-{
-	return 0;
-}
+static DEVICE_ATTR(soft_offline_page, S_IWUSR, NULL, store_soft_offline_page);
+static DEVICE_ATTR(hard_offline_page, S_IWUSR, NULL, store_hard_offline_page);
 #endif
 
 /*
@@ -437,30 +484,23 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn)
 	return 0;
 }
 
+/*
+ * A reference for the returned object is held and the reference for the
+ * hinted object is released.
+ */
 struct memory_block *find_memory_block_hinted(struct mem_section *section,
 					      struct memory_block *hint)
 {
-	struct kobject *kobj;
-	struct sys_device *sysdev;
-	struct memory_block *mem;
-	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
+	int block_id = base_memory_block_id(__section_nr(section));
+	struct device *hintdev = hint ? &hint->dev : NULL;
+	struct device *dev;
 
-	kobj = hint ? &hint->sysdev.kobj : NULL;
-
-	/*
-	 * This only works because we know that section == sysdev->id
-	 * slightly redundant with sysdev_register()
-	 */
-	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
-
-	kobj = kset_find_obj_hinted(&memory_sysdev_class.kset, name, kobj);
-	if (!kobj)
+	dev = subsys_find_device_by_id(&memory_subsys, block_id, hintdev);
+	if (hint)
+		put_device(&hint->dev);
+	if (!dev)
 		return NULL;
-
-	sysdev = container_of(kobj, struct sys_device, kobj);
-	mem = container_of(sysdev, struct memory_block, sysdev);
-
-	return mem;
+	return to_memory_block(dev);
 }
 
 /*
@@ -469,79 +509,153 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section,
  * this gets to be a real problem, we can always use a radix
  * tree or something here.
  *
- * This could be made generic for all sysdev classes.
+ * This could be made generic for all device subsystems.
  */
 struct memory_block *find_memory_block(struct mem_section *section)
 {
 	return find_memory_block_hinted(section, NULL);
 }
 
-static int add_memory_block(int nid, struct mem_section *section,
-			unsigned long state, enum mem_add_context context)
+static struct attribute *memory_memblk_attrs[] = {
+	&dev_attr_phys_index.attr,
+	&dev_attr_state.attr,
+	&dev_attr_phys_device.attr,
+	&dev_attr_removable.attr,
+	NULL
+};
+
+static struct attribute_group memory_memblk_attr_group = {
+	.attrs = memory_memblk_attrs,
+};
+
+static const struct attribute_group *memory_memblk_attr_groups[] = {
+	&memory_memblk_attr_group,
+	NULL,
+};
+
+/*
+ * register_memory - Setup a sysfs device for a memory block
+ */
+static
+int register_memory(struct memory_block *memory)
+{
+	memory->dev.bus = &memory_subsys;
+	memory->dev.id = memory->start_section_nr / sections_per_block;
+	memory->dev.release = memory_block_release;
+	memory->dev.groups = memory_memblk_attr_groups;
+	memory->dev.offline = memory->state == MEM_OFFLINE;
+
+	return device_register(&memory->dev);
+}
+
+static int init_memory_block(struct memory_block **memory,
+			     struct mem_section *section, unsigned long state)
 {
-	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	struct memory_block *mem;
 	unsigned long start_pfn;
+	int scn_nr;
 	int ret = 0;
 
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
-	mutex_lock(&mem_sysfs_mutex);
-
-	mem->phys_index = __section_nr(section);
+	scn_nr = __section_nr(section);
+	mem->start_section_nr =
+			base_memory_block_id(scn_nr) * sections_per_block;
+	mem->end_section_nr = mem->start_section_nr + sections_per_block - 1;
 	mem->state = state;
 	mem->section_count++;
-	mutex_init(&mem->state_mutex);
-	start_pfn = section_nr_to_pfn(mem->phys_index);
+	start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
 
-	ret = register_memory(mem, section);
-	if (!ret)
-		ret = mem_create_simple_file(mem, phys_index);
-	if (!ret)
-		ret = mem_create_simple_file(mem, state);
-	if (!ret)
-		ret = mem_create_simple_file(mem, phys_device);
-	if (!ret)
-		ret = mem_create_simple_file(mem, removable);
-	if (!ret) {
-		if (context == HOTPLUG)
-			ret = register_mem_sect_under_node(mem, nid);
-	}
+	ret = register_memory(mem);
 
-	mutex_unlock(&mem_sysfs_mutex);
+	*memory = mem;
 	return ret;
 }
 
-int remove_memory_block(unsigned long node_id, struct mem_section *section,
-		int phys_device)
+static int add_memory_block(int base_section_nr)
 {
 	struct memory_block *mem;
+	int i, ret, section_count = 0, section_nr;
 
-	mutex_lock(&mem_sysfs_mutex);
-	mem = find_memory_block(section);
-
-	mem->section_count--;
-	if (mem->section_count == 0) {
-		unregister_mem_sect_under_nodes(mem);
-		mem_remove_simple_file(mem, phys_index);
-		mem_remove_simple_file(mem, state);
-		mem_remove_simple_file(mem, phys_device);
-		mem_remove_simple_file(mem, removable);
-		unregister_memory(mem, section);
+	for (i = base_section_nr;
+	     (i < base_section_nr + sections_per_block) && i < NR_MEM_SECTIONS;
+	     i++) {
+		if (!present_section_nr(i))
+			continue;
+		if (section_count == 0)
+			section_nr = i;
+		section_count++;
 	}
 
-	mutex_unlock(&mem_sysfs_mutex);
+	if (section_count == 0)
+		return 0;
+	ret = init_memory_block(&mem, __nr_to_section(section_nr), MEM_ONLINE);
+	if (ret)
+		return ret;
+	mem->section_count = section_count;
 	return 0;
 }
 
+
 /*
  * need an interface for the VM to add new memory regions,
  * but without onlining it.
  */
 int register_new_memory(int nid, struct mem_section *section)
 {
-	return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
+	int ret = 0;
+	struct memory_block *mem;
+
+	mutex_lock(&mem_sysfs_mutex);
+
+	mem = find_memory_block(section);
+	if (mem) {
+		mem->section_count++;
+		put_device(&mem->dev);
+	} else {
+		ret = init_memory_block(&mem, section, MEM_OFFLINE);
+		if (ret)
+			goto out;
+	}
+
+	if (mem->section_count == sections_per_block)
+		ret = register_mem_sect_under_node(mem, nid);
+out:
+	mutex_unlock(&mem_sysfs_mutex);
+	return ret;
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static void
+unregister_memory(struct memory_block *memory)
+{
+	BUG_ON(memory->dev.bus != &memory_subsys);
+
+	/* drop the ref. we got in remove_memory_block() */
+	put_device(&memory->dev);
+	device_unregister(&memory->dev);
+}
+
+static int remove_memory_block(unsigned long node_id,
+			       struct mem_section *section, int phys_device)
+{
+	struct memory_block *mem;
+
+	mutex_lock(&mem_sysfs_mutex);
+	mem = find_memory_block(section);
+	unregister_mem_sect_under_nodes(mem, __section_nr(section));
+
+	mem->section_count--;
+	if (mem->section_count == 0)
+		unregister_memory(mem);
+	else
+		put_device(&mem->dev);
+
+	mutex_unlock(&mem_sysfs_mutex);
+	return 0;
 }
 
 int unregister_memory_section(struct mem_section *section)
@@ -551,6 +665,36 @@ int unregister_memory_section(struct mem_section *section)
 
 	return remove_memory_block(0, section, 0);
 }
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+/* return true if the memory block is offlined, otherwise, return false */
+bool is_memblock_offlined(struct memory_block *mem)
+{
+	return mem->state == MEM_OFFLINE;
+}
+
+static struct attribute *memory_root_attrs[] = {
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+	&dev_attr_probe.attr,
+#endif
+
+#ifdef CONFIG_MEMORY_FAILURE
+	&dev_attr_soft_offline_page.attr,
+	&dev_attr_hard_offline_page.attr,
+#endif
+
+	&dev_attr_block_size_bytes.attr,
+	NULL
+};
+
+static struct attribute_group memory_root_attr_group = {
+	.attrs = memory_root_attrs,
+};
+
+static const struct attribute_group *memory_root_attr_groups[] = {
+	&memory_root_attr_group,
+	NULL,
+};
 
 /*
  * Initialize the sysfs support for memory devices...
@@ -560,34 +704,27 @@ int __init memory_dev_init(void)
 	unsigned int i;
 	int ret;
 	int err;
+	unsigned long block_sz;
 
-	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
-	ret = sysdev_class_register(&memory_sysdev_class);
+	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
 	if (ret)
 		goto out;
 
+	block_sz = get_memory_block_size();
+	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
 	/*
 	 * Create entries for memory sections that were found
 	 * during boot and have been initialized
 	 */
-	for (i = 0; i < NR_MEM_SECTIONS; i++) {
-		if (!present_section_nr(i))
-			continue;
-		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
-				       BOOT);
+	mutex_lock(&mem_sysfs_mutex);
+	for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) {
+		err = add_memory_block(i);
 		if (!ret)
 			ret = err;
 	}
+	mutex_unlock(&mem_sysfs_mutex);
 
-	err = memory_probe_init();
-	if (!ret)
-		ret = err;
-	err = memory_fail_init();
-	if (!ret)
-		ret = err;
-	err = block_size_init();
-	if (!ret)
-		ret = err;
 out:
 	if (ret)
 		printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
diff --git a/drivers/base/node.c b/drivers/base/node.c
index ce012a9c620..8f7ed9933a7 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -1,12 +1,13 @@
 /*
- * drivers/base/node.c - basic Node class support
+ * Basic Node interface support
  */
 
-#include <linux/sysdev.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/memory.h>
+#include <linux/vmstat.h>
+#include <linux/notifier.h>
 #include <linux/node.h>
 #include <linux/hugetlb.h>
 #include <linux/compaction.h>
@@ -18,18 +19,16 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 
-static struct sysdev_class_attribute *node_state_attrs[];
-
-static struct sysdev_class node_class = {
+static struct bus_type node_subsys = {
 	.name = "node",
-	.attrs = node_state_attrs,
+	.dev_name = "node",
 };
 
 
-static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
+static ssize_t node_read_cpumap(struct device *dev, int type, char *buf)
 {
 	struct node *node_dev = to_node(dev);
-	const struct cpumask *mask = cpumask_of_node(node_dev->sysdev.id);
+	const struct cpumask *mask = cpumask_of_node(node_dev->dev.id);
 	int len;
 
 	/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
@@ -43,23 +42,23 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
 	return len;
 }
 
-static inline ssize_t node_read_cpumask(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static inline ssize_t node_read_cpumask(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return node_read_cpumap(dev, 0, buf);
 }
-static inline ssize_t node_read_cpulist(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static inline ssize_t node_read_cpulist(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return node_read_cpumap(dev, 1, buf);
 }
 
-static SYSDEV_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
-static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
+static DEVICE_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
+static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
 
 #define K(x) ((x) << (PAGE_SHIFT - 10))
-static ssize_t node_read_meminfo(struct sys_device * dev,
-			struct sysdev_attribute *attr, char * buf)
+static ssize_t node_read_meminfo(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	int n;
 	int nid = dev->id;
@@ -117,7 +116,11 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d WritebackTmp:   %8lu kB\n"
 		       "Node %d Slab:           %8lu kB\n"
 		       "Node %d SReclaimable:   %8lu kB\n"
-		       "Node %d SUnreclaim:     %8lu kB\n",
+		       "Node %d SUnreclaim:     %8lu kB\n"
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		       "Node %d AnonHugePages:  %8lu kB\n"
+#endif
+			,
 		       nid, K(node_page_state(nid, NR_FILE_DIRTY)),
 		       nid, K(node_page_state(nid, NR_WRITEBACK)),
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
@@ -133,16 +136,23 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
 				node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
 		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))
+			, nid,
+			K(node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
+			HPAGE_PMD_NR));
+#else
 		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
+#endif
 	n += hugetlb_report_node_meminfo(nid, buf + n);
 	return n;
 }
 
 #undef K
-static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
+static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
 
-static ssize_t node_read_numastat(struct sys_device * dev,
-				struct sysdev_attribute *attr, char * buf)
+static ssize_t node_read_numastat(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf,
 		       "numa_hit %lu\n"
@@ -158,22 +168,25 @@ static ssize_t node_read_numastat(struct sys_device * dev,
 		       node_page_state(dev->id, NUMA_LOCAL),
 		       node_page_state(dev->id, NUMA_OTHER));
 }
-static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
+static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
 
-static ssize_t node_read_vmstat(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t node_read_vmstat(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	int nid = dev->id;
-	return sprintf(buf,
-		"nr_written %lu\n"
-		"nr_dirtied %lu\n",
-		node_page_state(nid, NR_WRITTEN),
-		node_page_state(nid, NR_DIRTIED));
+	int i;
+	int n = 0;
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
+			     node_page_state(nid, i));
+
+	return n;
 }
-static SYSDEV_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
+static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
 
-static ssize_t node_read_distance(struct sys_device * dev,
-			struct sysdev_attribute *attr, char * buf)
+static ssize_t node_read_distance(struct device *dev,
+			struct device_attribute *attr, char * buf)
 {
 	int nid = dev->id;
 	int len = 0;
@@ -191,7 +204,7 @@ static ssize_t node_read_distance(struct sys_device * dev,
 	len += sprintf(buf + len, "\n");
 	return len;
 }
-static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
+static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL);
 
 #ifdef CONFIG_HUGETLBFS
 /*
@@ -209,7 +222,7 @@ static node_registration_func_t __hugetlb_unregister_node;
 static inline bool hugetlb_register_node(struct node *node)
 {
 	if (__hugetlb_register_node &&
-			node_state(node->sysdev.id, N_HIGH_MEMORY)) {
+			node_state(node->dev.id, N_MEMORY)) {
 		__hugetlb_register_node(node);
 		return true;
 	}
@@ -234,6 +247,24 @@ static inline void hugetlb_register_node(struct node *node) {}
 static inline void hugetlb_unregister_node(struct node *node) {}
 #endif
 
+static void node_device_release(struct device *dev)
+{
+	struct node *node = to_node(dev);
+
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
+	/*
+	 * We schedule the work only when a memory section is
+	 * onlined/offlined on this node. When we come here,
+	 * all the memory on this node has been offlined,
+	 * so we won't enqueue new work to this work.
+	 *
+	 * The work is using node->node_work, so we should
+	 * flush work before freeing the memory.
+	 */
+	flush_work(&node->node_work);
+#endif
+	kfree(node);
+}
 
 /*
  * register_node - Setup a sysfs device for a node.
@@ -241,21 +272,22 @@ static inline void hugetlb_unregister_node(struct node *node) {}
  *
  * Initialize and register the node device.
  */
-int register_node(struct node *node, int num, struct node *parent)
+static int register_node(struct node *node, int num, struct node *parent)
 {
 	int error;
 
-	node->sysdev.id = num;
-	node->sysdev.cls = &node_class;
-	error = sysdev_register(&node->sysdev);
+	node->dev.id = num;
+	node->dev.bus = &node_subsys;
+	node->dev.release = node_device_release;
+	error = device_register(&node->dev);
 
 	if (!error){
-		sysdev_create_file(&node->sysdev, &attr_cpumap);
-		sysdev_create_file(&node->sysdev, &attr_cpulist);
-		sysdev_create_file(&node->sysdev, &attr_meminfo);
-		sysdev_create_file(&node->sysdev, &attr_numastat);
-		sysdev_create_file(&node->sysdev, &attr_distance);
-		sysdev_create_file(&node->sysdev, &attr_vmstat);
+		device_create_file(&node->dev, &dev_attr_cpumap);
+		device_create_file(&node->dev, &dev_attr_cpulist);
+		device_create_file(&node->dev, &dev_attr_meminfo);
+		device_create_file(&node->dev, &dev_attr_numastat);
+		device_create_file(&node->dev, &dev_attr_distance);
+		device_create_file(&node->dev, &dev_attr_vmstat);
 
 		scan_unevictable_register_node(node);
 
@@ -275,20 +307,20 @@ int register_node(struct node *node, int num, struct node *parent)
  */
 void unregister_node(struct node *node)
 {
-	sysdev_remove_file(&node->sysdev, &attr_cpumap);
-	sysdev_remove_file(&node->sysdev, &attr_cpulist);
-	sysdev_remove_file(&node->sysdev, &attr_meminfo);
-	sysdev_remove_file(&node->sysdev, &attr_numastat);
-	sysdev_remove_file(&node->sysdev, &attr_distance);
-	sysdev_remove_file(&node->sysdev, &attr_vmstat);
+	device_remove_file(&node->dev, &dev_attr_cpumap);
+	device_remove_file(&node->dev, &dev_attr_cpulist);
+	device_remove_file(&node->dev, &dev_attr_meminfo);
+	device_remove_file(&node->dev, &dev_attr_numastat);
+	device_remove_file(&node->dev, &dev_attr_distance);
+	device_remove_file(&node->dev, &dev_attr_vmstat);
 
 	scan_unevictable_unregister_node(node);
 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
 
-	sysdev_unregister(&node->sysdev);
+	device_unregister(&node->dev);
 }
 
-struct node node_devices[MAX_NUMNODES];
+struct node *node_devices[MAX_NUMNODES];
 
 /*
  * register cpu under node
@@ -296,41 +328,41 @@ struct node node_devices[MAX_NUMNODES];
 int register_cpu_under_node(unsigned int cpu, unsigned int nid)
 {
 	int ret;
-	struct sys_device *obj;
+	struct device *obj;
 
 	if (!node_online(nid))
 		return 0;
 
-	obj = get_cpu_sysdev(cpu);
+	obj = get_cpu_device(cpu);
 	if (!obj)
 		return 0;
 
-	ret = sysfs_create_link(&node_devices[nid].sysdev.kobj,
+	ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
 				&obj->kobj,
 				kobject_name(&obj->kobj));
 	if (ret)
 		return ret;
 
 	return sysfs_create_link(&obj->kobj,
-				 &node_devices[nid].sysdev.kobj,
-				 kobject_name(&node_devices[nid].sysdev.kobj));
+				 &node_devices[nid]->dev.kobj,
+				 kobject_name(&node_devices[nid]->dev.kobj));
 }
 
 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 {
-	struct sys_device *obj;
+	struct device *obj;
 
 	if (!node_online(nid))
 		return 0;
 
-	obj = get_cpu_sysdev(cpu);
+	obj = get_cpu_device(cpu);
 	if (!obj)
 		return 0;
 
-	sysfs_remove_link(&node_devices[nid].sysdev.kobj,
+	sysfs_remove_link(&node_devices[nid]->dev.kobj,
 			  kobject_name(&obj->kobj));
 	sysfs_remove_link(&obj->kobj,
-			  kobject_name(&node_devices[nid].sysdev.kobj));
+			  kobject_name(&node_devices[nid]->dev.kobj));
 
 	return 0;
 }
@@ -360,8 +392,10 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
 		return -EFAULT;
 	if (!node_online(nid))
 		return 0;
-	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
-	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
+
+	sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
+	sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
+	sect_end_pfn += PAGES_PER_SECTION - 1;
 	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
 		int page_nid;
 
@@ -370,22 +404,23 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
 			continue;
 		if (page_nid != nid)
 			continue;
-		ret = sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
-					&mem_blk->sysdev.kobj,
-					kobject_name(&mem_blk->sysdev.kobj));
+		ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
+					&mem_blk->dev.kobj,
+					kobject_name(&mem_blk->dev.kobj));
 		if (ret)
 			return ret;
 
-		return sysfs_create_link_nowarn(&mem_blk->sysdev.kobj,
-				&node_devices[nid].sysdev.kobj,
-				kobject_name(&node_devices[nid].sysdev.kobj));
+		return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
+				&node_devices[nid]->dev.kobj,
+				kobject_name(&node_devices[nid]->dev.kobj));
 	}
 	/* mem section does not span the specified node */
 	return 0;
 }
 
 /* unregister memory section under all nodes that it spans */
-int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
+int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
+				    unsigned long phys_index)
 {
 	NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL);
 	unsigned long pfn, sect_start_pfn, sect_end_pfn;
@@ -397,7 +432,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
 	if (!unlinked_nodes)
 		return -ENOMEM;
 	nodes_clear(*unlinked_nodes);
-	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
+
+	sect_start_pfn = section_nr_to_pfn(phys_index);
 	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
 	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
 		int nid;
@@ -409,10 +445,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
 			continue;
 		if (node_test_and_set(nid, *unlinked_nodes))
 			continue;
-		sysfs_remove_link(&node_devices[nid].sysdev.kobj,
-			 kobject_name(&mem_blk->sysdev.kobj));
-		sysfs_remove_link(&mem_blk->sysdev.kobj,
-			 kobject_name(&node_devices[nid].sysdev.kobj));
+		sysfs_remove_link(&node_devices[nid]->dev.kobj,
+			 kobject_name(&mem_blk->dev.kobj));
+		sysfs_remove_link(&mem_blk->dev.kobj,
+			 kobject_name(&node_devices[nid]->dev.kobj));
 	}
 	NODEMASK_FREE(unlinked_nodes);
 	return 0;
@@ -434,7 +470,15 @@ static int link_mem_sections(int nid)
 		if (!present_section_nr(section_nr))
 			continue;
 		mem_sect = __nr_to_section(section_nr);
+
+		/* same memblock ? */
+		if (mem_blk)
+			if ((section_nr >= mem_blk->start_section_nr) &&
+			    (section_nr <= mem_blk->end_section_nr))
+				continue;
+
 		mem_blk = find_memory_block_hinted(mem_sect, mem_blk);
+
 		ret = register_mem_sect_under_node(mem_blk, nid);
 		if (!err)
 			err = ret;
@@ -443,7 +487,7 @@ static int link_mem_sections(int nid)
 	}
 
 	if (mem_blk)
-		kobject_put(&mem_blk->sysdev.kobj);
+		kobject_put(&mem_blk->dev.kobj);
 	return err;
 }
 
@@ -470,7 +514,7 @@ static void node_hugetlb_work(struct work_struct *work)
 
 static void init_node_hugetlb_work(int nid)
 {
-	INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work);
+	INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
 }
 
 static int node_memory_callback(struct notifier_block *self,
@@ -487,7 +531,7 @@ static int node_memory_callback(struct notifier_block *self,
 		 * when transitioning to/from memoryless state.
 		 */
 		if (nid != NUMA_NO_NODE)
-			schedule_work(&node_devices[nid].node_work);
+			schedule_work(&node_devices[nid]->node_work);
 		break;
 
 	case MEM_GOING_ONLINE:
@@ -528,9 +572,13 @@ int register_one_node(int nid)
 		struct node *parent = NULL;
 
 		if (p_node != nid)
-			parent = &node_devices[p_node];
+			parent = node_devices[p_node];
 
-		error = register_node(&node_devices[nid], nid, parent);
+		node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
+		if (!node_devices[nid])
+			return -ENOMEM;
+
+		error = register_node(node_devices[nid], nid, parent);
 
 		/* link cpu under this node */
 		for_each_present_cpu(cpu) {
@@ -551,7 +599,12 @@ int register_one_node(int nid)
 
 void unregister_one_node(int nid)
 {
-	unregister_node(&node_devices[nid]);
+	if (!node_devices[nid])
+		return;
+
+	unregister_node(node_devices[nid]);
+	kfree(node_devices[nid]);
+	node_devices[nid] = NULL;
 }
 
 /*
@@ -562,50 +615,63 @@ static ssize_t print_nodes_state(enum node_states state, char *buf)
 {
 	int n;
 
-	n = nodelist_scnprintf(buf, PAGE_SIZE, node_states[state]);
-	if (n > 0 && PAGE_SIZE > n + 1) {
-		*(buf + n++) = '\n';
-		*(buf + n++) = '\0';
-	}
+	n = nodelist_scnprintf(buf, PAGE_SIZE-2, node_states[state]);
+	buf[n++] = '\n';
+	buf[n] = '\0';
 	return n;
 }
 
 struct node_attr {
-	struct sysdev_class_attribute attr;
+	struct device_attribute attr;
 	enum node_states state;
 };
 
-static ssize_t show_node_state(struct sysdev_class *class,
-			       struct sysdev_class_attribute *attr, char *buf)
+static ssize_t show_node_state(struct device *dev,
+			       struct device_attribute *attr, char *buf)
 {
 	struct node_attr *na = container_of(attr, struct node_attr, attr);
 	return print_nodes_state(na->state, buf);
 }
 
 #define _NODE_ATTR(name, state) \
-	{ _SYSDEV_CLASS_ATTR(name, 0444, show_node_state, NULL), state }
+	{ __ATTR(name, 0444, show_node_state, NULL), state }
 
 static struct node_attr node_state_attr[] = {
-	_NODE_ATTR(possible, N_POSSIBLE),
-	_NODE_ATTR(online, N_ONLINE),
-	_NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
-	_NODE_ATTR(has_cpu, N_CPU),
+	[N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
+	[N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
+	[N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
 #ifdef CONFIG_HIGHMEM
-	_NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
+	[N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
+#endif
+#ifdef CONFIG_MOVABLE_NODE
+	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
 #endif
+	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
 };
 
-static struct sysdev_class_attribute *node_state_attrs[] = {
-	&node_state_attr[0].attr,
-	&node_state_attr[1].attr,
-	&node_state_attr[2].attr,
-	&node_state_attr[3].attr,
+static struct attribute *node_state_attrs[] = {
+	&node_state_attr[N_POSSIBLE].attr.attr,
+	&node_state_attr[N_ONLINE].attr.attr,
+	&node_state_attr[N_NORMAL_MEMORY].attr.attr,
 #ifdef CONFIG_HIGHMEM
-	&node_state_attr[4].attr,
+	&node_state_attr[N_HIGH_MEMORY].attr.attr,
 #endif
+#ifdef CONFIG_MOVABLE_NODE
+	&node_state_attr[N_MEMORY].attr.attr,
+#endif
+	&node_state_attr[N_CPU].attr.attr,
 	NULL
 };
 
+static struct attribute_group memory_root_attr_group = {
+	.attrs = node_state_attrs,
+};
+
+static const struct attribute_group *cpu_root_attr_groups[] = {
+	&memory_root_attr_group,
+	NULL,
+};
+
 #define NODE_CALLBACK_PRI	2	/* lower than SLAB */
 static int __init register_node_type(void)
 {
@@ -614,10 +680,13 @@ static int __init register_node_type(void)
  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
 
-	ret = sysdev_class_register(&node_class);
+	ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
 	if (!ret) {
-		hotplug_memory_notifier(node_memory_callback,
-					NODE_CALLBACK_PRI);
+		static struct notifier_block node_memory_callback_nb = {
+			.notifier_call = node_memory_callback,
+			.priority = NODE_CALLBACK_PRI,
+		};
+		register_hotmemory_notifier(&node_memory_callback_nb);
 	}
 
 	/*
diff --git a/drivers/base/pinctrl.c b/drivers/base/pinctrl.c
new file mode 100644
index 00000000000..5fb74b43848
--- /dev/null
+++ b/drivers/base/pinctrl.c
@@ -0,0 +1,88 @@
+/*
+ * Driver core interface to the pinctrl subsystem.
+ *
+ * Copyright (C) 2012 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ * Based on bits of regulator core, gpio core and clk core
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/device.h>
+#include <linux/pinctrl/devinfo.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/slab.h>
+
+/**
+ * pinctrl_bind_pins() - called by the device core before probe
+ * @dev: the device that is just about to probe
+ */
+int pinctrl_bind_pins(struct device *dev)
+{
+	int ret;
+
+	dev->pins = devm_kzalloc(dev, sizeof(*(dev->pins)), GFP_KERNEL);
+	if (!dev->pins)
+		return -ENOMEM;
+
+	dev->pins->p = devm_pinctrl_get(dev);
+	if (IS_ERR(dev->pins->p)) {
+		dev_dbg(dev, "no pinctrl handle\n");
+		ret = PTR_ERR(dev->pins->p);
+		goto cleanup_alloc;
+	}
+
+	dev->pins->default_state = pinctrl_lookup_state(dev->pins->p,
+					PINCTRL_STATE_DEFAULT);
+	if (IS_ERR(dev->pins->default_state)) {
+		dev_dbg(dev, "no default pinctrl state\n");
+		ret = 0;
+		goto cleanup_get;
+	}
+
+	ret = pinctrl_select_state(dev->pins->p, dev->pins->default_state);
+	if (ret) {
+		dev_dbg(dev, "failed to activate default pinctrl state\n");
+		goto cleanup_get;
+	}
+
+#ifdef CONFIG_PM
+	/*
+	 * If power management is enabled, we also look for the optional
+	 * sleep and idle pin states, with semantics as defined in
+	 * <linux/pinctrl/pinctrl-state.h>
+	 */
+	dev->pins->sleep_state = pinctrl_lookup_state(dev->pins->p,
+					PINCTRL_STATE_SLEEP);
+	if (IS_ERR(dev->pins->sleep_state))
+		/* Not supplying this state is perfectly legal */
+		dev_dbg(dev, "no sleep pinctrl state\n");
+
+	dev->pins->idle_state = pinctrl_lookup_state(dev->pins->p,
+					PINCTRL_STATE_IDLE);
+	if (IS_ERR(dev->pins->idle_state))
+		/* Not supplying this state is perfectly legal */
+		dev_dbg(dev, "no idle pinctrl state\n");
+#endif
+
+	return 0;
+
+	/*
+	 * If no pinctrl handle or default state was found for this device,
+	 * let's explicitly free the pin container in the device, there is
+	 * no point in keeping it around.
+	 */
+cleanup_get:
+	devm_pinctrl_put(dev->pins->p);
+cleanup_alloc:
+	devm_kfree(dev, dev->pins);
+	dev->pins = NULL;
+
+	/* Only return deferrals */
+	if (ret != -EPROBE_DEFER)
+		ret = 0;
+
+	return ret;
+}
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index f051cfff18a..eee48c49f5d 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -13,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/platform_device.h>
 #include <linux/of_device.h>
+#include <linux/of_irq.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
@@ -20,11 +21,14 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/idr.h>
+#include <linux/acpi.h>
 
 #include "base.h"
+#include "power/power.h"
 
-#define to_platform_driver(drv)	(container_of((drv), struct platform_driver, \
-				 driver))
+/* For automatically allocated device IDs */
+static DEFINE_IDA(platform_devid_ida);
 
 struct device platform_bus = {
 	.init_name	= "platform",
@@ -32,6 +36,25 @@ struct device platform_bus = {
 EXPORT_SYMBOL_GPL(platform_bus);
 
 /**
+ * arch_setup_pdev_archdata - Allow manipulation of archdata before its used
+ * @pdev: platform device
+ *
+ * This is called before platform_device_add() such that any pdev_archdata may
+ * be setup before the platform_notifier is called.  So if a user needs to
+ * manipulate any relevant information in the pdev_archdata they can do:
+ *
+ *	platform_device_alloc()
+ *	... manipulate ...
+ *	platform_device_add()
+ *
+ * And if they don't care they can just call platform_device_register() and
+ * everything will just work out.
+ */
+void __weak arch_setup_pdev_archdata(struct platform_device *pdev)
+{
+}
+
+/**
  * platform_get_resource - get a resource for a device
  * @dev: platform device
  * @type: resource type
@@ -59,9 +82,25 @@ EXPORT_SYMBOL_GPL(platform_get_resource);
  */
 int platform_get_irq(struct platform_device *dev, unsigned int num)
 {
-	struct resource *r = platform_get_resource(dev, IORESOURCE_IRQ, num);
+#ifdef CONFIG_SPARC
+	/* sparc does not have irqs represented as IORESOURCE_IRQ resources */
+	if (!dev || num >= dev->archdata.num_irqs)
+		return -ENXIO;
+	return dev->archdata.irqs[num];
+#else
+	struct resource *r;
+	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
+		int ret;
+
+		ret = of_irq_get(dev->dev.of_node, num);
+		if (ret >= 0 || ret == -EPROBE_DEFER)
+			return ret;
+	}
+
+	r = platform_get_resource(dev, IORESOURCE_IRQ, num);
 
 	return r ? r->start : -ENXIO;
+#endif
 }
 EXPORT_SYMBOL_GPL(platform_get_irq);
 
@@ -80,6 +119,9 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
 	for (i = 0; i < dev->num_resources; i++) {
 		struct resource *r = &dev->resource[i];
 
+		if (unlikely(!r->name))
+			continue;
+
 		if (type == resource_type(r) && !strcmp(r->name, name))
 			return r;
 	}
@@ -88,15 +130,23 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
 EXPORT_SYMBOL_GPL(platform_get_resource_byname);
 
 /**
- * platform_get_irq - get an IRQ for a device
+ * platform_get_irq_byname - get an IRQ for a device by name
  * @dev: platform device
  * @name: IRQ name
  */
 int platform_get_irq_byname(struct platform_device *dev, const char *name)
 {
-	struct resource *r = platform_get_resource_byname(dev, IORESOURCE_IRQ,
-							  name);
+	struct resource *r;
+
+	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
+		int ret;
 
+		ret = of_irq_get_byname(dev->dev.of_node, name);
+		if (ret >= 0 || ret == -EPROBE_DEFER)
+			return ret;
+	}
+
+	r = platform_get_resource_byname(dev, IORESOURCE_IRQ, name);
 	return r ? r->start : -ENXIO;
 }
 EXPORT_SYMBOL_GPL(platform_get_irq_byname);
@@ -149,6 +199,7 @@ static void platform_device_release(struct device *dev)
 
 	of_device_node_put(&pa->pdev.dev);
 	kfree(pa->pdev.dev.platform_data);
+	kfree(pa->pdev.mfd_cell);
 	kfree(pa->pdev.resource);
 	kfree(pa);
 }
@@ -172,6 +223,7 @@ struct platform_device *platform_device_alloc(const char *name, int id)
 		pa->pdev.id = id;
 		device_initialize(&pa->pdev.dev);
 		pa->pdev.dev.release = platform_device_release;
+		arch_setup_pdev_archdata(&pa->pdev);
 	}
 
 	return pa ? &pa->pdev : NULL;
@@ -191,18 +243,18 @@ EXPORT_SYMBOL_GPL(platform_device_alloc);
 int platform_device_add_resources(struct platform_device *pdev,
 				  const struct resource *res, unsigned int num)
 {
-	struct resource *r;
+	struct resource *r = NULL;
 
-	if (!res)
-		return 0;
-
-	r = kmemdup(res, sizeof(struct resource) * num, GFP_KERNEL);
-	if (r) {
-		pdev->resource = r;
-		pdev->num_resources = num;
-		return 0;
+	if (res) {
+		r = kmemdup(res, sizeof(struct resource) * num, GFP_KERNEL);
+		if (!r)
+			return -ENOMEM;
 	}
-	return -ENOMEM;
+
+	kfree(pdev->resource);
+	pdev->resource = r;
+	pdev->num_resources = num;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(platform_device_add_resources);
 
@@ -219,17 +271,17 @@ EXPORT_SYMBOL_GPL(platform_device_add_resources);
 int platform_device_add_data(struct platform_device *pdev, const void *data,
 			     size_t size)
 {
-	void *d;
-
-	if (!data)
-		return 0;
+	void *d = NULL;
 
-	d = kmemdup(data, size, GFP_KERNEL);
-	if (d) {
-		pdev->dev.platform_data = d;
-		return 0;
+	if (data) {
+		d = kmemdup(data, size, GFP_KERNEL);
+		if (!d)
+			return -ENOMEM;
 	}
-	return -ENOMEM;
+
+	kfree(pdev->dev.platform_data);
+	pdev->dev.platform_data = d;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(platform_device_add_data);
 
@@ -242,7 +294,7 @@ EXPORT_SYMBOL_GPL(platform_device_add_data);
  */
 int platform_device_add(struct platform_device *pdev)
 {
-	int i, ret = 0;
+	int i, ret;
 
 	if (!pdev)
 		return -EINVAL;
@@ -252,10 +304,27 @@ int platform_device_add(struct platform_device *pdev)
 
 	pdev->dev.bus = &platform_bus_type;
 
-	if (pdev->id != -1)
+	switch (pdev->id) {
+	default:
 		dev_set_name(&pdev->dev, "%s.%d", pdev->name,  pdev->id);
-	else
+		break;
+	case PLATFORM_DEVID_NONE:
 		dev_set_name(&pdev->dev, "%s", pdev->name);
+		break;
+	case PLATFORM_DEVID_AUTO:
+		/*
+		 * Automatically allocated device ID. We mark it as such so
+		 * that we remember it must be freed, and we append a suffix
+		 * to avoid namespace collision with explicit IDs.
+		 */
+		ret = ida_simple_get(&platform_devid_ida, 0, 0, GFP_KERNEL);
+		if (ret < 0)
+			goto err_out;
+		pdev->id = ret;
+		pdev->id_auto = true;
+		dev_set_name(&pdev->dev, "%s.%d.auto", pdev->name, pdev->id);
+		break;
+	}
 
 	for (i = 0; i < pdev->num_resources; i++) {
 		struct resource *p, *r = &pdev->resource[i];
@@ -272,9 +341,7 @@ int platform_device_add(struct platform_device *pdev)
 		}
 
 		if (p && insert_resource(p, r)) {
-			printk(KERN_ERR
-			       "%s: failed to claim resource %d\n",
-			       dev_name(&pdev->dev), i);
+			dev_err(&pdev->dev, "failed to claim resource %d\n", i);
 			ret = -EBUSY;
 			goto failed;
 		}
@@ -288,6 +355,11 @@ int platform_device_add(struct platform_device *pdev)
 		return ret;
 
  failed:
+	if (pdev->id_auto) {
+		ida_simple_remove(&platform_devid_ida, pdev->id);
+		pdev->id = PLATFORM_DEVID_AUTO;
+	}
+
 	while (--i >= 0) {
 		struct resource *r = &pdev->resource[i];
 		unsigned long type = resource_type(r);
@@ -296,6 +368,7 @@ int platform_device_add(struct platform_device *pdev)
 			release_resource(r);
 	}
 
+ err_out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(platform_device_add);
@@ -315,6 +388,11 @@ void platform_device_del(struct platform_device *pdev)
 	if (pdev) {
 		device_del(&pdev->dev);
 
+		if (pdev->id_auto) {
+			ida_simple_remove(&platform_devid_ida, pdev->id);
+			pdev->id = PLATFORM_DEVID_AUTO;
+		}
+
 		for (i = 0; i < pdev->num_resources; i++) {
 			struct resource *r = &pdev->resource[i];
 			unsigned long type = resource_type(r);
@@ -333,6 +411,7 @@ EXPORT_SYMBOL_GPL(platform_device_del);
 int platform_device_register(struct platform_device *pdev)
 {
 	device_initialize(&pdev->dev);
+	arch_setup_pdev_archdata(pdev);
 	return platform_device_add(pdev);
 }
 EXPORT_SYMBOL_GPL(platform_device_register);
@@ -353,59 +432,85 @@ void platform_device_unregister(struct platform_device *pdev)
 EXPORT_SYMBOL_GPL(platform_device_unregister);
 
 /**
- * platform_device_register_resndata - add a platform-level device with
+ * platform_device_register_full - add a platform-level device with
  * resources and platform-specific data
  *
- * @parent: parent device for the device we're adding
- * @name: base name of the device we're adding
- * @id: instance id
- * @res: set of resources that needs to be allocated for the device
- * @num: number of resources
- * @data: platform specific data for this platform device
- * @size: size of platform specific data
+ * @pdevinfo: data used to create device
  *
  * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
  */
-struct platform_device *__init_or_module platform_device_register_resndata(
-		struct device *parent,
-		const char *name, int id,
-		const struct resource *res, unsigned int num,
-		const void *data, size_t size)
+struct platform_device *platform_device_register_full(
+		const struct platform_device_info *pdevinfo)
 {
 	int ret = -ENOMEM;
 	struct platform_device *pdev;
 
-	pdev = platform_device_alloc(name, id);
+	pdev = platform_device_alloc(pdevinfo->name, pdevinfo->id);
 	if (!pdev)
-		goto err;
-
-	pdev->dev.parent = parent;
+		goto err_alloc;
+
+	pdev->dev.parent = pdevinfo->parent;
+	ACPI_COMPANION_SET(&pdev->dev, pdevinfo->acpi_node.companion);
+
+	if (pdevinfo->dma_mask) {
+		/*
+		 * This memory isn't freed when the device is put,
+		 * I don't have a nice idea for that though.  Conceptually
+		 * dma_mask in struct device should not be a pointer.
+		 * See http://thread.gmane.org/gmane.linux.kernel.pci/9081
+		 */
+		pdev->dev.dma_mask =
+			kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL);
+		if (!pdev->dev.dma_mask)
+			goto err;
+
+		*pdev->dev.dma_mask = pdevinfo->dma_mask;
+		pdev->dev.coherent_dma_mask = pdevinfo->dma_mask;
+	}
 
-	ret = platform_device_add_resources(pdev, res, num);
+	ret = platform_device_add_resources(pdev,
+			pdevinfo->res, pdevinfo->num_res);
 	if (ret)
 		goto err;
 
-	ret = platform_device_add_data(pdev, data, size);
+	ret = platform_device_add_data(pdev,
+			pdevinfo->data, pdevinfo->size_data);
 	if (ret)
 		goto err;
 
 	ret = platform_device_add(pdev);
 	if (ret) {
 err:
+		ACPI_COMPANION_SET(&pdev->dev, NULL);
+		kfree(pdev->dev.dma_mask);
+
+err_alloc:
 		platform_device_put(pdev);
 		return ERR_PTR(ret);
 	}
 
 	return pdev;
 }
-EXPORT_SYMBOL_GPL(platform_device_register_resndata);
+EXPORT_SYMBOL_GPL(platform_device_register_full);
 
 static int platform_drv_probe(struct device *_dev)
 {
 	struct platform_driver *drv = to_platform_driver(_dev->driver);
 	struct platform_device *dev = to_platform_device(_dev);
+	int ret;
 
-	return drv->probe(dev);
+	acpi_dev_pm_attach(_dev, true);
+
+	ret = drv->probe(dev);
+	if (ret)
+		acpi_dev_pm_detach(_dev, true);
+
+	if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
+		dev_warn(_dev, "probe deferral not supported\n");
+		ret = -ENXIO;
+	}
+
+	return ret;
 }
 
 static int platform_drv_probe_fail(struct device *_dev)
@@ -417,8 +522,12 @@ static int platform_drv_remove(struct device *_dev)
 {
 	struct platform_driver *drv = to_platform_driver(_dev->driver);
 	struct platform_device *dev = to_platform_device(_dev);
+	int ret;
 
-	return drv->remove(dev);
+	ret = drv->remove(dev);
+	acpi_dev_pm_detach(_dev, true);
+
+	return ret;
 }
 
 static void platform_drv_shutdown(struct device *_dev)
@@ -427,14 +536,18 @@ static void platform_drv_shutdown(struct device *_dev)
 	struct platform_device *dev = to_platform_device(_dev);
 
 	drv->shutdown(dev);
+	acpi_dev_pm_detach(_dev, true);
 }
 
 /**
- * platform_driver_register - register a driver for platform-level devices
+ * __platform_driver_register - register a driver for platform-level devices
  * @drv: platform driver structure
+ * @owner: owning module/driver
  */
-int platform_driver_register(struct platform_driver *drv)
+int __platform_driver_register(struct platform_driver *drv,
+				struct module *owner)
 {
+	drv->driver.owner = owner;
 	drv->driver.bus = &platform_bus_type;
 	if (drv->probe)
 		drv->driver.probe = platform_drv_probe;
@@ -445,7 +558,7 @@ int platform_driver_register(struct platform_driver *drv)
 
 	return driver_register(&drv->driver);
 }
-EXPORT_SYMBOL_GPL(platform_driver_register);
+EXPORT_SYMBOL_GPL(__platform_driver_register);
 
 /**
  * platform_driver_unregister - unregister a driver for platform-level devices
@@ -471,6 +584,8 @@ EXPORT_SYMBOL_GPL(platform_driver_unregister);
  * into system-on-chip processors, where the controller devices have been
  * configured as part of board setup.
  *
+ * Note that this is incompatible with deferred probing.
+ *
  * Returns zero if the driver registered and bound to a device, else returns
  * a negative error code and with the driver not registered.
  */
@@ -479,6 +594,12 @@ int __init_or_module platform_driver_probe(struct platform_driver *drv,
 {
 	int retval, code;
 
+	/*
+	 * Prevent driver from requesting probe deferral to avoid further
+	 * futile probe attempts.
+	 */
+	drv->prevent_deferred_probe = true;
+
 	/* make sure driver won't have bind/unbind attributes */
 	drv->driver.suppress_bind_attrs = true;
 
@@ -571,15 +692,27 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 			     char *buf)
 {
 	struct platform_device	*pdev = to_platform_device(dev);
-	int len = snprintf(buf, PAGE_SIZE, "platform:%s\n", pdev->name);
+	int len;
+
+	len = of_device_get_modalias(dev, buf, PAGE_SIZE -1);
+	if (len != -ENODEV)
+		return len;
+
+	len = acpi_device_modalias(dev, buf, PAGE_SIZE -1);
+	if (len != -ENODEV)
+		return len;
+
+	len = snprintf(buf, PAGE_SIZE, "platform:%s\n", pdev->name);
 
 	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
 }
+static DEVICE_ATTR_RO(modalias);
 
-static struct device_attribute platform_dev_attrs[] = {
-	__ATTR_RO(modalias),
-	__ATTR_NULL,
+static struct attribute *platform_dev_attrs[] = {
+	&dev_attr_modalias.attr,
+	NULL,
 };
+ATTRIBUTE_GROUPS(platform_dev);
 
 static int platform_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
@@ -587,12 +720,16 @@ static int platform_uevent(struct device *dev, struct kobj_uevent_env *env)
 	int rc;
 
 	/* Some devices have extra OF data and an OF-style MODALIAS */
-	rc = of_device_uevent(dev,env);
+	rc = of_device_uevent_modalias(dev, env);
+	if (rc != -ENODEV)
+		return rc;
+
+	rc = acpi_device_uevent_modalias(dev, env);
 	if (rc != -ENODEV)
 		return rc;
 
 	add_uevent_var(env, "MODALIAS=%s%s", PLATFORM_MODULE_PREFIX,
-		(pdev->id_entry) ? pdev->id_entry->name : pdev->name);
+			pdev->name);
 	return 0;
 }
 
@@ -632,6 +769,10 @@ static int platform_match(struct device *dev, struct device_driver *drv)
 	if (of_driver_match_device(dev, drv))
 		return 1;
 
+	/* Then try ACPI style match */
+	if (acpi_driver_match_device(dev, drv))
+		return 1;
+
 	/* Then try to match against the id table */
 	if (pdrv->id_table)
 		return platform_match_id(pdrv->id_table, pdev) != NULL;
@@ -666,35 +807,11 @@ static int platform_legacy_resume(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_prepare(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (drv && drv->pm && drv->pm->prepare)
-		ret = drv->pm->prepare(dev);
-
-	return ret;
-}
-
-static void platform_pm_complete(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-
-	if (drv && drv->pm && drv->pm->complete)
-		drv->pm->complete(dev);
-}
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define platform_pm_prepare		NULL
-#define platform_pm_complete		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_SUSPEND
 
-int __weak platform_pm_suspend(struct device *dev)
+int platform_pm_suspend(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -712,23 +829,7 @@ int __weak platform_pm_suspend(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_suspend_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->suspend_noirq)
-			ret = drv->pm->suspend_noirq(dev);
-	}
-
-	return ret;
-}
-
-int __weak platform_pm_resume(struct device *dev)
+int platform_pm_resume(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -746,34 +847,11 @@ int __weak platform_pm_resume(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_resume_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->resume_noirq)
-			ret = drv->pm->resume_noirq(dev);
-	}
-
-	return ret;
-}
+#endif /* CONFIG_SUSPEND */
 
-#else /* !CONFIG_SUSPEND */
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
-#define platform_pm_suspend		NULL
-#define platform_pm_resume		NULL
-#define platform_pm_suspend_noirq	NULL
-#define platform_pm_resume_noirq	NULL
-
-#endif /* !CONFIG_SUSPEND */
-
-#ifdef CONFIG_HIBERNATION
-
-static int platform_pm_freeze(struct device *dev)
+int platform_pm_freeze(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -791,23 +869,7 @@ static int platform_pm_freeze(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_freeze_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->freeze_noirq)
-			ret = drv->pm->freeze_noirq(dev);
-	}
-
-	return ret;
-}
-
-static int platform_pm_thaw(struct device *dev)
+int platform_pm_thaw(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -825,23 +887,7 @@ static int platform_pm_thaw(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_thaw_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->thaw_noirq)
-			ret = drv->pm->thaw_noirq(dev);
-	}
-
-	return ret;
-}
-
-static int platform_pm_poweroff(struct device *dev)
+int platform_pm_poweroff(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -859,23 +905,7 @@ static int platform_pm_poweroff(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_poweroff_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->poweroff_noirq)
-			ret = drv->pm->poweroff_noirq(dev);
-	}
-
-	return ret;
-}
-
-static int platform_pm_restore(struct device *dev)
+int platform_pm_restore(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -893,124 +923,23 @@ static int platform_pm_restore(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_restore_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->restore_noirq)
-			ret = drv->pm->restore_noirq(dev);
-	}
-
-	return ret;
-}
-
-#else /* !CONFIG_HIBERNATION */
-
-#define platform_pm_freeze		NULL
-#define platform_pm_thaw		NULL
-#define platform_pm_poweroff		NULL
-#define platform_pm_restore		NULL
-#define platform_pm_freeze_noirq	NULL
-#define platform_pm_thaw_noirq		NULL
-#define platform_pm_poweroff_noirq	NULL
-#define platform_pm_restore_noirq	NULL
-
-#endif /* !CONFIG_HIBERNATION */
-
-#ifdef CONFIG_PM_RUNTIME
-
-int __weak platform_pm_runtime_suspend(struct device *dev)
-{
-	return pm_generic_runtime_suspend(dev);
-};
-
-int __weak platform_pm_runtime_resume(struct device *dev)
-{
-	return pm_generic_runtime_resume(dev);
-};
-
-int __weak platform_pm_runtime_idle(struct device *dev)
-{
-	return pm_generic_runtime_idle(dev);
-};
-
-#else /* !CONFIG_PM_RUNTIME */
-
-#define platform_pm_runtime_suspend NULL
-#define platform_pm_runtime_resume NULL
-#define platform_pm_runtime_idle NULL
-
-#endif /* !CONFIG_PM_RUNTIME */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 
 static const struct dev_pm_ops platform_dev_pm_ops = {
-	.prepare = platform_pm_prepare,
-	.complete = platform_pm_complete,
-	.suspend = platform_pm_suspend,
-	.resume = platform_pm_resume,
-	.freeze = platform_pm_freeze,
-	.thaw = platform_pm_thaw,
-	.poweroff = platform_pm_poweroff,
-	.restore = platform_pm_restore,
-	.suspend_noirq = platform_pm_suspend_noirq,
-	.resume_noirq = platform_pm_resume_noirq,
-	.freeze_noirq = platform_pm_freeze_noirq,
-	.thaw_noirq = platform_pm_thaw_noirq,
-	.poweroff_noirq = platform_pm_poweroff_noirq,
-	.restore_noirq = platform_pm_restore_noirq,
-	.runtime_suspend = platform_pm_runtime_suspend,
-	.runtime_resume = platform_pm_runtime_resume,
-	.runtime_idle = platform_pm_runtime_idle,
+	.runtime_suspend = pm_generic_runtime_suspend,
+	.runtime_resume = pm_generic_runtime_resume,
+	USE_PLATFORM_PM_SLEEP_OPS
 };
 
 struct bus_type platform_bus_type = {
 	.name		= "platform",
-	.dev_attrs	= platform_dev_attrs,
+	.dev_groups	= platform_dev_groups,
 	.match		= platform_match,
 	.uevent		= platform_uevent,
 	.pm		= &platform_dev_pm_ops,
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
-/**
- * platform_bus_get_pm_ops() - return pointer to busses dev_pm_ops
- *
- * This function can be used by platform code to get the current
- * set of dev_pm_ops functions used by the platform_bus_type.
- */
-const struct dev_pm_ops * __init platform_bus_get_pm_ops(void)
-{
-	return platform_bus_type.pm;
-}
-
-/**
- * platform_bus_set_pm_ops() - update dev_pm_ops for the platform_bus_type
- *
- * @pm: pointer to new dev_pm_ops struct to be used for platform_bus_type
- *
- * Platform code can override the dev_pm_ops methods of
- * platform_bus_type by using this function.  It is expected that
- * platform code will first do a platform_bus_get_pm_ops(), then
- * kmemdup it, then customize selected methods and pass a pointer to
- * the new struct dev_pm_ops to this function.
- *
- * Since platform-specific code is customizing methods for *all*
- * devices (not just platform-specific devices) it is expected that
- * any custom overrides of these functions will keep existing behavior
- * and simply extend it.  For example, any customization of the
- * runtime PM methods should continue to call the pm_generic_*
- * functions as the default ones do in addition to the
- * platform-specific behavior.
- */
-void __init platform_bus_set_pm_ops(const struct dev_pm_ops *pm)
-{
-	platform_bus_type.pm = pm;
-}
-
 int __init platform_bus_init(void)
 {
 	int error;
@@ -1125,6 +1054,7 @@ void __init early_platform_add_devices(struct platform_device **devs, int num)
 		dev = &devs[i]->dev;
 
 		if (!dev->devres_head.next) {
+			pm_runtime_early_init(dev);
 			INIT_LIST_HEAD(&dev->devres_head);
 			list_add_tail(&dev->devres_head,
 				      &early_platform_device_list);
@@ -1164,7 +1094,7 @@ void __init early_platform_driver_register_all(char *class_str)
  * @epdrv: early platform driver structure
  * @id: id to match against
  */
-static  __init struct platform_device *
+static struct platform_device * __init
 early_platform_match(struct early_platform_driver *epdrv, int id)
 {
 	struct platform_device *pd;
@@ -1182,7 +1112,7 @@ early_platform_match(struct early_platform_driver *epdrv, int id)
  * @epdrv: early platform driver structure
  * @id: return true if id or above exists
  */
-static  __init int early_platform_left(struct early_platform_driver *epdrv,
+static int __init early_platform_left(struct early_platform_driver *epdrv,
 				       int id)
 {
 	struct platform_device *pd;
@@ -1237,8 +1167,8 @@ static int __init early_platform_driver_probe_id(char *class_str,
 
 		switch (match_id) {
 		case EARLY_PLATFORM_ID_ERROR:
-			pr_warning("%s: unable to parse %s parameter\n",
-				   class_str, epdrv->pdrv->driver.name);
+			pr_warn("%s: unable to parse %s parameter\n",
+				class_str, epdrv->pdrv->driver.name);
 			/* fall-through */
 		case EARLY_PLATFORM_ID_UNSET:
 			match = NULL;
@@ -1269,8 +1199,8 @@ static int __init early_platform_driver_probe_id(char *class_str,
 			}
 
 			if (epdrv->pdrv->probe(match))
-				pr_warning("%s: unable to probe %s early.\n",
-					   class_str, match->name);
+				pr_warn("%s: unable to probe %s early.\n",
+					class_str, match->name);
 			else
 				n++;
 		}
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index abe46edfe5b..1cb8544598d 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,9 +1,8 @@
-obj-$(CONFIG_PM)	+= sysfs.o
+obj-$(CONFIG_PM)	+= sysfs.o generic_ops.o common.o qos.o runtime.o
 obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
-obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
-obj-$(CONFIG_PM_OPS)	+= generic_ops.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
+obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o domain_governor.o
+obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
-ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
new file mode 100644
index 00000000000..b99e6c06ee6
--- /dev/null
+++ b/drivers/base/power/clock_ops.c
@@ -0,0 +1,505 @@
+/*
+ * drivers/base/power/clock_ops.c - Generic clock manipulation PM callbacks
+ *
+ * Copyright (c) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/pm.h>
+#include <linux/pm_clock.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+
+#ifdef CONFIG_PM
+
+enum pce_status {
+	PCE_STATUS_NONE = 0,
+	PCE_STATUS_ACQUIRED,
+	PCE_STATUS_ENABLED,
+	PCE_STATUS_ERROR,
+};
+
+struct pm_clock_entry {
+	struct list_head node;
+	char *con_id;
+	struct clk *clk;
+	enum pce_status status;
+};
+
+/**
+ * pm_clk_enable - Enable a clock, reporting any errors
+ * @dev: The device for the given clock
+ * @clk: The clock being enabled.
+ */
+static inline int __pm_clk_enable(struct device *dev, struct clk *clk)
+{
+	int ret = clk_enable(clk);
+	if (ret)
+		dev_err(dev, "%s: failed to enable clk %p, error %d\n",
+			__func__, clk, ret);
+
+	return ret;
+}
+
+/**
+ * pm_clk_acquire - Acquire a device clock.
+ * @dev: Device whose clock is to be acquired.
+ * @ce: PM clock entry corresponding to the clock.
+ */
+static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
+{
+	ce->clk = clk_get(dev, ce->con_id);
+	if (IS_ERR(ce->clk)) {
+		ce->status = PCE_STATUS_ERROR;
+	} else {
+		clk_prepare(ce->clk);
+		ce->status = PCE_STATUS_ACQUIRED;
+		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
+	}
+}
+
+/**
+ * pm_clk_add - Start using a device clock for power management.
+ * @dev: Device whose clock is going to be used for power management.
+ * @con_id: Connection ID of the clock.
+ *
+ * Add the clock represented by @con_id to the list of clocks used for
+ * the power management of @dev.
+ */
+int pm_clk_add(struct device *dev, const char *con_id)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!psd)
+		return -EINVAL;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		dev_err(dev, "Not enough memory for clock entry.\n");
+		return -ENOMEM;
+	}
+
+	if (con_id) {
+		ce->con_id = kstrdup(con_id, GFP_KERNEL);
+		if (!ce->con_id) {
+			dev_err(dev,
+				"Not enough memory for clock connection ID.\n");
+			kfree(ce);
+			return -ENOMEM;
+		}
+	}
+
+	pm_clk_acquire(dev, ce);
+
+	spin_lock_irq(&psd->lock);
+	list_add_tail(&ce->node, &psd->clock_list);
+	spin_unlock_irq(&psd->lock);
+	return 0;
+}
+
+/**
+ * __pm_clk_remove - Destroy PM clock entry.
+ * @ce: PM clock entry to destroy.
+ */
+static void __pm_clk_remove(struct pm_clock_entry *ce)
+{
+	if (!ce)
+		return;
+
+	if (ce->status < PCE_STATUS_ERROR) {
+		if (ce->status == PCE_STATUS_ENABLED)
+			clk_disable(ce->clk);
+
+		if (ce->status >= PCE_STATUS_ACQUIRED) {
+			clk_unprepare(ce->clk);
+			clk_put(ce->clk);
+		}
+	}
+
+	kfree(ce->con_id);
+	kfree(ce);
+}
+
+/**
+ * pm_clk_remove - Stop using a device clock for power management.
+ * @dev: Device whose clock should not be used for PM any more.
+ * @con_id: Connection ID of the clock.
+ *
+ * Remove the clock represented by @con_id from the list of clocks used for
+ * the power management of @dev.
+ */
+void pm_clk_remove(struct device *dev, const char *con_id)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!psd)
+		return;
+
+	spin_lock_irq(&psd->lock);
+
+	list_for_each_entry(ce, &psd->clock_list, node) {
+		if (!con_id && !ce->con_id)
+			goto remove;
+		else if (!con_id || !ce->con_id)
+			continue;
+		else if (!strcmp(con_id, ce->con_id))
+			goto remove;
+	}
+
+	spin_unlock_irq(&psd->lock);
+	return;
+
+ remove:
+	list_del(&ce->node);
+	spin_unlock_irq(&psd->lock);
+
+	__pm_clk_remove(ce);
+}
+
+/**
+ * pm_clk_init - Initialize a device's list of power management clocks.
+ * @dev: Device to initialize the list of PM clocks for.
+ *
+ * Initialize the lock and clock_list members of the device's pm_subsys_data
+ * object.
+ */
+void pm_clk_init(struct device *dev)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	if (psd)
+		INIT_LIST_HEAD(&psd->clock_list);
+}
+
+/**
+ * pm_clk_create - Create and initialize a device's list of PM clocks.
+ * @dev: Device to create and initialize the list of PM clocks for.
+ *
+ * Allocate a struct pm_subsys_data object, initialize its lock and clock_list
+ * members and make the @dev's power.subsys_data field point to it.
+ */
+int pm_clk_create(struct device *dev)
+{
+	return dev_pm_get_subsys_data(dev);
+}
+
+/**
+ * pm_clk_destroy - Destroy a device's list of power management clocks.
+ * @dev: Device to destroy the list of PM clocks for.
+ *
+ * Clear the @dev's power.subsys_data field, remove the list of clock entries
+ * from the struct pm_subsys_data object pointed to by it before and free
+ * that object.
+ */
+void pm_clk_destroy(struct device *dev)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce, *c;
+	struct list_head list;
+
+	if (!psd)
+		return;
+
+	INIT_LIST_HEAD(&list);
+
+	spin_lock_irq(&psd->lock);
+
+	list_for_each_entry_safe_reverse(ce, c, &psd->clock_list, node)
+		list_move(&ce->node, &list);
+
+	spin_unlock_irq(&psd->lock);
+
+	dev_pm_put_subsys_data(dev);
+
+	list_for_each_entry_safe_reverse(ce, c, &list, node) {
+		list_del(&ce->node);
+		__pm_clk_remove(ce);
+	}
+}
+
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_PM_RUNTIME
+
+/**
+ * pm_clk_suspend - Disable clocks in a device's PM clock list.
+ * @dev: Device to disable the clocks for.
+ */
+int pm_clk_suspend(struct device *dev)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+	unsigned long flags;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!psd)
+		return 0;
+
+	spin_lock_irqsave(&psd->lock, flags);
+
+	list_for_each_entry_reverse(ce, &psd->clock_list, node) {
+		if (ce->status < PCE_STATUS_ERROR) {
+			if (ce->status == PCE_STATUS_ENABLED)
+				clk_disable(ce->clk);
+			ce->status = PCE_STATUS_ACQUIRED;
+		}
+	}
+
+	spin_unlock_irqrestore(&psd->lock, flags);
+
+	return 0;
+}
+
+/**
+ * pm_clk_resume - Enable clocks in a device's PM clock list.
+ * @dev: Device to enable the clocks for.
+ */
+int pm_clk_resume(struct device *dev)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+	unsigned long flags;
+	int ret;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!psd)
+		return 0;
+
+	spin_lock_irqsave(&psd->lock, flags);
+
+	list_for_each_entry(ce, &psd->clock_list, node) {
+		if (ce->status < PCE_STATUS_ERROR) {
+			ret = __pm_clk_enable(dev, ce->clk);
+			if (!ret)
+				ce->status = PCE_STATUS_ENABLED;
+		}
+	}
+
+	spin_unlock_irqrestore(&psd->lock, flags);
+
+	return 0;
+}
+
+/**
+ * pm_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the pm_domain member of that object is copied to the device's
+ * pm_domain field and its con_ids member is used to populate the device's list
+ * of PM clocks, depending on @action.
+ *
+ * If the device's pm_domain field is already populated with a value different
+ * from the one stored in the struct pm_clk_notifier_block object, the function
+ * does nothing.
+ */
+static int pm_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+	char **con_id;
+	int error;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->pm_domain)
+			break;
+
+		error = pm_clk_create(dev);
+		if (error)
+			break;
+
+		dev->pm_domain = clknb->pm_domain;
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids; *con_id; con_id++)
+				pm_clk_add(dev, *con_id);
+		} else {
+			pm_clk_add(dev, NULL);
+		}
+
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->pm_domain != clknb->pm_domain)
+			break;
+
+		dev->pm_domain = NULL;
+		pm_clk_destroy(dev);
+		break;
+	}
+
+	return 0;
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+#ifdef CONFIG_PM
+
+/**
+ * pm_clk_suspend - Disable clocks in a device's PM clock list.
+ * @dev: Device to disable the clocks for.
+ */
+int pm_clk_suspend(struct device *dev)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+	unsigned long flags;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	/* If there is no driver, the clocks are already disabled. */
+	if (!psd || !dev->driver)
+		return 0;
+
+	spin_lock_irqsave(&psd->lock, flags);
+
+	list_for_each_entry_reverse(ce, &psd->clock_list, node)
+		clk_disable(ce->clk);
+
+	spin_unlock_irqrestore(&psd->lock, flags);
+
+	return 0;
+}
+
+/**
+ * pm_clk_resume - Enable clocks in a device's PM clock list.
+ * @dev: Device to enable the clocks for.
+ */
+int pm_clk_resume(struct device *dev)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+	unsigned long flags;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	/* If there is no driver, the clocks should remain disabled. */
+	if (!psd || !dev->driver)
+		return 0;
+
+	spin_lock_irqsave(&psd->lock, flags);
+
+	list_for_each_entry(ce, &psd->clock_list, node)
+		__pm_clk_enable(dev, ce->clk);
+
+	spin_unlock_irqrestore(&psd->lock, flags);
+
+	return 0;
+}
+
+#endif /* CONFIG_PM */
+
+/**
+ * enable_clock - Enable a device clock.
+ * @dev: Device whose clock is to be enabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void enable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_prepare_enable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced on.\n");
+	}
+}
+
+/**
+ * disable_clock - Disable a device clock.
+ * @dev: Device whose clock is to be disabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void disable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_disable_unprepare(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced off.\n");
+	}
+}
+
+/**
+ * pm_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the con_ids member of that object is used to enable or disable
+ * the device's clocks, depending on @action.
+ */
+static int pm_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+	char **con_id;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_BIND_DRIVER:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids; *con_id; con_id++)
+				enable_clock(dev, *con_id);
+		} else {
+			enable_clock(dev, NULL);
+		}
+		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids; *con_id; con_id++)
+				disable_clock(dev, *con_id);
+		} else {
+			disable_clock(dev, NULL);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+/**
+ * pm_clk_add_notifier - Add bus type notifier for power management clocks.
+ * @bus: Bus type to add the notifier to.
+ * @clknb: Notifier to be added to the given bus type.
+ *
+ * The nb member of @clknb is not expected to be initialized and its
+ * notifier_call member will be replaced with pm_clk_notify().  However,
+ * the remaining members of @clknb should be populated prior to calling this
+ * routine.
+ */
+void pm_clk_add_notifier(struct bus_type *bus,
+				 struct pm_clk_notifier_block *clknb)
+{
+	if (!bus || !clknb)
+		return;
+
+	clknb->nb.notifier_call = pm_clk_notify;
+	bus_register_notifier(bus, &clknb->nb);
+}
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
new file mode 100644
index 00000000000..df2e5eeaeb0
--- /dev/null
+++ b/drivers/base/power/common.c
@@ -0,0 +1,84 @@
+/*
+ * drivers/base/power/common.c - Common device power management code.
+ *
+ * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/pm_clock.h>
+
+/**
+ * dev_pm_get_subsys_data - Create or refcount power.subsys_data for device.
+ * @dev: Device to handle.
+ *
+ * If power.subsys_data is NULL, point it to a new object, otherwise increment
+ * its reference counter.  Return 1 if a new object has been created, otherwise
+ * return 0 or error code.
+ */
+int dev_pm_get_subsys_data(struct device *dev)
+{
+	struct pm_subsys_data *psd;
+
+	psd = kzalloc(sizeof(*psd), GFP_KERNEL);
+	if (!psd)
+		return -ENOMEM;
+
+	spin_lock_irq(&dev->power.lock);
+
+	if (dev->power.subsys_data) {
+		dev->power.subsys_data->refcount++;
+	} else {
+		spin_lock_init(&psd->lock);
+		psd->refcount = 1;
+		dev->power.subsys_data = psd;
+		pm_clk_init(dev);
+		psd = NULL;
+	}
+
+	spin_unlock_irq(&dev->power.lock);
+
+	/* kfree() verifies that its argument is nonzero. */
+	kfree(psd);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dev_pm_get_subsys_data);
+
+/**
+ * dev_pm_put_subsys_data - Drop reference to power.subsys_data.
+ * @dev: Device to handle.
+ *
+ * If the reference counter of power.subsys_data is zero after dropping the
+ * reference, power.subsys_data is removed.  Return 1 if that happens or 0
+ * otherwise.
+ */
+int dev_pm_put_subsys_data(struct device *dev)
+{
+	struct pm_subsys_data *psd;
+	int ret = 1;
+
+	spin_lock_irq(&dev->power.lock);
+
+	psd = dev_to_psd(dev);
+	if (!psd)
+		goto out;
+
+	if (--psd->refcount == 0) {
+		dev->power.subsys_data = NULL;
+	} else {
+		psd = NULL;
+		ret = 0;
+	}
+
+ out:
+	spin_unlock_irq(&dev->power.lock);
+	kfree(psd);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
new file mode 100644
index 00000000000..eee55c1e5fd
--- /dev/null
+++ b/drivers/base/power/domain.c
@@ -0,0 +1,2191 @@
+/*
+ * drivers/base/power/domain.c - Common code related to device power domains.
+ *
+ * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <linux/export.h>
+
+#define GENPD_DEV_CALLBACK(genpd, type, callback, dev)		\
+({								\
+	type (*__routine)(struct device *__d); 			\
+	type __ret = (type)0;					\
+								\
+	__routine = genpd->dev_ops.callback; 			\
+	if (__routine) {					\
+		__ret = __routine(dev); 			\
+	} else {						\
+		__routine = dev_gpd_data(dev)->ops.callback;	\
+		if (__routine) 					\
+			__ret = __routine(dev);			\
+	}							\
+	__ret;							\
+})
+
+#define GENPD_DEV_TIMED_CALLBACK(genpd, type, callback, dev, field, name)	\
+({										\
+	ktime_t __start = ktime_get();						\
+	type __retval = GENPD_DEV_CALLBACK(genpd, type, callback, dev);		\
+	s64 __elapsed = ktime_to_ns(ktime_sub(ktime_get(), __start));		\
+	struct gpd_timing_data *__td = &dev_gpd_data(dev)->td;			\
+	if (!__retval && __elapsed > __td->field) {				\
+		__td->field = __elapsed;					\
+		dev_dbg(dev, name " latency exceeded, new value %lld ns\n",	\
+			__elapsed);						\
+		genpd->max_off_time_changed = true;				\
+		__td->constraint_changed = true;				\
+	}									\
+	__retval;								\
+})
+
+static LIST_HEAD(gpd_list);
+static DEFINE_MUTEX(gpd_list_lock);
+
+static struct generic_pm_domain *pm_genpd_lookup_name(const char *domain_name)
+{
+	struct generic_pm_domain *genpd = NULL, *gpd;
+
+	if (IS_ERR_OR_NULL(domain_name))
+		return NULL;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (!strcmp(gpd->name, domain_name)) {
+			genpd = gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+	return genpd;
+}
+
+#ifdef CONFIG_PM
+
+struct generic_pm_domain *dev_to_genpd(struct device *dev)
+{
+	if (IS_ERR_OR_NULL(dev->pm_domain))
+		return ERR_PTR(-EINVAL);
+
+	return pd_to_genpd(dev->pm_domain);
+}
+
+static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_TIMED_CALLBACK(genpd, int, stop, dev,
+					stop_latency_ns, "stop");
+}
+
+static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_TIMED_CALLBACK(genpd, int, start, dev,
+					start_latency_ns, "start");
+}
+
+static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
+{
+	bool ret = false;
+
+	if (!WARN_ON(atomic_read(&genpd->sd_count) == 0))
+		ret = !!atomic_dec_and_test(&genpd->sd_count);
+
+	return ret;
+}
+
+static void genpd_sd_counter_inc(struct generic_pm_domain *genpd)
+{
+	atomic_inc(&genpd->sd_count);
+	smp_mb__after_atomic();
+}
+
+static void genpd_acquire_lock(struct generic_pm_domain *genpd)
+{
+	DEFINE_WAIT(wait);
+
+	mutex_lock(&genpd->lock);
+	/*
+	 * Wait for the domain to transition into either the active,
+	 * or the power off state.
+	 */
+	for (;;) {
+		prepare_to_wait(&genpd->status_wait_queue, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (genpd->status == GPD_STATE_ACTIVE
+		    || genpd->status == GPD_STATE_POWER_OFF)
+			break;
+		mutex_unlock(&genpd->lock);
+
+		schedule();
+
+		mutex_lock(&genpd->lock);
+	}
+	finish_wait(&genpd->status_wait_queue, &wait);
+}
+
+static void genpd_release_lock(struct generic_pm_domain *genpd)
+{
+	mutex_unlock(&genpd->lock);
+}
+
+static void genpd_set_active(struct generic_pm_domain *genpd)
+{
+	if (genpd->resume_count == 0)
+		genpd->status = GPD_STATE_ACTIVE;
+}
+
+static void genpd_recalc_cpu_exit_latency(struct generic_pm_domain *genpd)
+{
+	s64 usecs64;
+
+	if (!genpd->cpu_data)
+		return;
+
+	usecs64 = genpd->power_on_latency_ns;
+	do_div(usecs64, NSEC_PER_USEC);
+	usecs64 += genpd->cpu_data->saved_exit_latency;
+	genpd->cpu_data->idle_state->exit_latency = usecs64;
+}
+
+/**
+ * __pm_genpd_poweron - Restore power to a given PM domain and its masters.
+ * @genpd: PM domain to power up.
+ *
+ * Restore power to @genpd and all of its masters so that it is possible to
+ * resume a device belonging to it.
+ */
+static int __pm_genpd_poweron(struct generic_pm_domain *genpd)
+	__releases(&genpd->lock) __acquires(&genpd->lock)
+{
+	struct gpd_link *link;
+	DEFINE_WAIT(wait);
+	int ret = 0;
+
+	/* If the domain's master is being waited for, we have to wait too. */
+	for (;;) {
+		prepare_to_wait(&genpd->status_wait_queue, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (genpd->status != GPD_STATE_WAIT_MASTER)
+			break;
+		mutex_unlock(&genpd->lock);
+
+		schedule();
+
+		mutex_lock(&genpd->lock);
+	}
+	finish_wait(&genpd->status_wait_queue, &wait);
+
+	if (genpd->status == GPD_STATE_ACTIVE
+	    || (genpd->prepared_count > 0 && genpd->suspend_power_off))
+		return 0;
+
+	if (genpd->status != GPD_STATE_POWER_OFF) {
+		genpd_set_active(genpd);
+		return 0;
+	}
+
+	if (genpd->cpu_data) {
+		cpuidle_pause_and_lock();
+		genpd->cpu_data->idle_state->disabled = true;
+		cpuidle_resume_and_unlock();
+		goto out;
+	}
+
+	/*
+	 * The list is guaranteed not to change while the loop below is being
+	 * executed, unless one of the masters' .power_on() callbacks fiddles
+	 * with it.
+	 */
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		genpd_sd_counter_inc(link->master);
+		genpd->status = GPD_STATE_WAIT_MASTER;
+
+		mutex_unlock(&genpd->lock);
+
+		ret = pm_genpd_poweron(link->master);
+
+		mutex_lock(&genpd->lock);
+
+		/*
+		 * The "wait for parent" status is guaranteed not to change
+		 * while the master is powering on.
+		 */
+		genpd->status = GPD_STATE_POWER_OFF;
+		wake_up_all(&genpd->status_wait_queue);
+		if (ret) {
+			genpd_sd_counter_dec(link->master);
+			goto err;
+		}
+	}
+
+	if (genpd->power_on) {
+		ktime_t time_start = ktime_get();
+		s64 elapsed_ns;
+
+		ret = genpd->power_on(genpd);
+		if (ret)
+			goto err;
+
+		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
+		if (elapsed_ns > genpd->power_on_latency_ns) {
+			genpd->power_on_latency_ns = elapsed_ns;
+			genpd->max_off_time_changed = true;
+			genpd_recalc_cpu_exit_latency(genpd);
+			if (genpd->name)
+				pr_warning("%s: Power-on latency exceeded, "
+					"new value %lld ns\n", genpd->name,
+					elapsed_ns);
+		}
+	}
+
+ out:
+	genpd_set_active(genpd);
+
+	return 0;
+
+ err:
+	list_for_each_entry_continue_reverse(link, &genpd->slave_links, slave_node)
+		genpd_sd_counter_dec(link->master);
+
+	return ret;
+}
+
+/**
+ * pm_genpd_poweron - Restore power to a given PM domain and its masters.
+ * @genpd: PM domain to power up.
+ */
+int pm_genpd_poweron(struct generic_pm_domain *genpd)
+{
+	int ret;
+
+	mutex_lock(&genpd->lock);
+	ret = __pm_genpd_poweron(genpd);
+	mutex_unlock(&genpd->lock);
+	return ret;
+}
+
+/**
+ * pm_genpd_name_poweron - Restore power to a given PM domain and its masters.
+ * @domain_name: Name of the PM domain to power up.
+ */
+int pm_genpd_name_poweron(const char *domain_name)
+{
+	struct generic_pm_domain *genpd;
+
+	genpd = pm_genpd_lookup_name(domain_name);
+	return genpd ? pm_genpd_poweron(genpd) : -EINVAL;
+}
+
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_PM_RUNTIME
+
+static int genpd_start_dev_no_timing(struct generic_pm_domain *genpd,
+				     struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, start, dev);
+}
+
+static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_TIMED_CALLBACK(genpd, int, save_state, dev,
+					save_state_latency_ns, "state save");
+}
+
+static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_TIMED_CALLBACK(genpd, int, restore_state, dev,
+					restore_state_latency_ns,
+					"state restore");
+}
+
+static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
+				     unsigned long val, void *ptr)
+{
+	struct generic_pm_domain_data *gpd_data;
+	struct device *dev;
+
+	gpd_data = container_of(nb, struct generic_pm_domain_data, nb);
+
+	mutex_lock(&gpd_data->lock);
+	dev = gpd_data->base.dev;
+	if (!dev) {
+		mutex_unlock(&gpd_data->lock);
+		return NOTIFY_DONE;
+	}
+	mutex_unlock(&gpd_data->lock);
+
+	for (;;) {
+		struct generic_pm_domain *genpd;
+		struct pm_domain_data *pdd;
+
+		spin_lock_irq(&dev->power.lock);
+
+		pdd = dev->power.subsys_data ?
+				dev->power.subsys_data->domain_data : NULL;
+		if (pdd && pdd->dev) {
+			to_gpd_data(pdd)->td.constraint_changed = true;
+			genpd = dev_to_genpd(dev);
+		} else {
+			genpd = ERR_PTR(-ENODATA);
+		}
+
+		spin_unlock_irq(&dev->power.lock);
+
+		if (!IS_ERR(genpd)) {
+			mutex_lock(&genpd->lock);
+			genpd->max_off_time_changed = true;
+			mutex_unlock(&genpd->lock);
+		}
+
+		dev = dev->parent;
+		if (!dev || dev->power.ignore_children)
+			break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * __pm_genpd_save_device - Save the pre-suspend state of a device.
+ * @pdd: Domain data of the device to save the state of.
+ * @genpd: PM domain the device belongs to.
+ */
+static int __pm_genpd_save_device(struct pm_domain_data *pdd,
+				  struct generic_pm_domain *genpd)
+	__releases(&genpd->lock) __acquires(&genpd->lock)
+{
+	struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
+	struct device *dev = pdd->dev;
+	int ret = 0;
+
+	if (gpd_data->need_restore)
+		return 0;
+
+	mutex_unlock(&genpd->lock);
+
+	genpd_start_dev(genpd, dev);
+	ret = genpd_save_dev(genpd, dev);
+	genpd_stop_dev(genpd, dev);
+
+	mutex_lock(&genpd->lock);
+
+	if (!ret)
+		gpd_data->need_restore = true;
+
+	return ret;
+}
+
+/**
+ * __pm_genpd_restore_device - Restore the pre-suspend state of a device.
+ * @pdd: Domain data of the device to restore the state of.
+ * @genpd: PM domain the device belongs to.
+ */
+static void __pm_genpd_restore_device(struct pm_domain_data *pdd,
+				      struct generic_pm_domain *genpd)
+	__releases(&genpd->lock) __acquires(&genpd->lock)
+{
+	struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
+	struct device *dev = pdd->dev;
+	bool need_restore = gpd_data->need_restore;
+
+	gpd_data->need_restore = false;
+	mutex_unlock(&genpd->lock);
+
+	genpd_start_dev(genpd, dev);
+	if (need_restore)
+		genpd_restore_dev(genpd, dev);
+
+	mutex_lock(&genpd->lock);
+}
+
+/**
+ * genpd_abort_poweroff - Check if a PM domain power off should be aborted.
+ * @genpd: PM domain to check.
+ *
+ * Return true if a PM domain's status changed to GPD_STATE_ACTIVE during
+ * a "power off" operation, which means that a "power on" has occured in the
+ * meantime, or if its resume_count field is different from zero, which means
+ * that one of its devices has been resumed in the meantime.
+ */
+static bool genpd_abort_poweroff(struct generic_pm_domain *genpd)
+{
+	return genpd->status == GPD_STATE_WAIT_MASTER
+		|| genpd->status == GPD_STATE_ACTIVE || genpd->resume_count > 0;
+}
+
+/**
+ * genpd_queue_power_off_work - Queue up the execution of pm_genpd_poweroff().
+ * @genpd: PM domait to power off.
+ *
+ * Queue up the execution of pm_genpd_poweroff() unless it's already been done
+ * before.
+ */
+void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
+{
+	queue_work(pm_wq, &genpd->power_off_work);
+}
+
+/**
+ * pm_genpd_poweroff - Remove power from a given PM domain.
+ * @genpd: PM domain to power down.
+ *
+ * If all of the @genpd's devices have been suspended and all of its subdomains
+ * have been powered down, run the runtime suspend callbacks provided by all of
+ * the @genpd's devices' drivers and remove power from @genpd.
+ */
+static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
+	__releases(&genpd->lock) __acquires(&genpd->lock)
+{
+	struct pm_domain_data *pdd;
+	struct gpd_link *link;
+	unsigned int not_suspended;
+	int ret = 0;
+
+ start:
+	/*
+	 * Do not try to power off the domain in the following situations:
+	 * (1) The domain is already in the "power off" state.
+	 * (2) The domain is waiting for its master to power up.
+	 * (3) One of the domain's devices is being resumed right now.
+	 * (4) System suspend is in progress.
+	 */
+	if (genpd->status == GPD_STATE_POWER_OFF
+	    || genpd->status == GPD_STATE_WAIT_MASTER
+	    || genpd->resume_count > 0 || genpd->prepared_count > 0)
+		return 0;
+
+	if (atomic_read(&genpd->sd_count) > 0)
+		return -EBUSY;
+
+	not_suspended = 0;
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		enum pm_qos_flags_status stat;
+
+		stat = dev_pm_qos_flags(pdd->dev,
+					PM_QOS_FLAG_NO_POWER_OFF
+						| PM_QOS_FLAG_REMOTE_WAKEUP);
+		if (stat > PM_QOS_FLAGS_NONE)
+			return -EBUSY;
+
+		if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev)
+		    || pdd->dev->power.irq_safe))
+			not_suspended++;
+	}
+
+	if (not_suspended > genpd->in_progress)
+		return -EBUSY;
+
+	if (genpd->poweroff_task) {
+		/*
+		 * Another instance of pm_genpd_poweroff() is executing
+		 * callbacks, so tell it to start over and return.
+		 */
+		genpd->status = GPD_STATE_REPEAT;
+		return 0;
+	}
+
+	if (genpd->gov && genpd->gov->power_down_ok) {
+		if (!genpd->gov->power_down_ok(&genpd->domain))
+			return -EAGAIN;
+	}
+
+	genpd->status = GPD_STATE_BUSY;
+	genpd->poweroff_task = current;
+
+	list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) {
+		ret = atomic_read(&genpd->sd_count) == 0 ?
+			__pm_genpd_save_device(pdd, genpd) : -EBUSY;
+
+		if (genpd_abort_poweroff(genpd))
+			goto out;
+
+		if (ret) {
+			genpd_set_active(genpd);
+			goto out;
+		}
+
+		if (genpd->status == GPD_STATE_REPEAT) {
+			genpd->poweroff_task = NULL;
+			goto start;
+		}
+	}
+
+	if (genpd->cpu_data) {
+		/*
+		 * If cpu_data is set, cpuidle should turn the domain off when
+		 * the CPU in it is idle.  In that case we don't decrement the
+		 * subdomain counts of the master domains, so that power is not
+		 * removed from the current domain prematurely as a result of
+		 * cutting off the masters' power.
+		 */
+		genpd->status = GPD_STATE_POWER_OFF;
+		cpuidle_pause_and_lock();
+		genpd->cpu_data->idle_state->disabled = false;
+		cpuidle_resume_and_unlock();
+		goto out;
+	}
+
+	if (genpd->power_off) {
+		ktime_t time_start;
+		s64 elapsed_ns;
+
+		if (atomic_read(&genpd->sd_count) > 0) {
+			ret = -EBUSY;
+			goto out;
+		}
+
+		time_start = ktime_get();
+
+		/*
+		 * If sd_count > 0 at this point, one of the subdomains hasn't
+		 * managed to call pm_genpd_poweron() for the master yet after
+		 * incrementing it.  In that case pm_genpd_poweron() will wait
+		 * for us to drop the lock, so we can call .power_off() and let
+		 * the pm_genpd_poweron() restore power for us (this shouldn't
+		 * happen very often).
+		 */
+		ret = genpd->power_off(genpd);
+		if (ret == -EBUSY) {
+			genpd_set_active(genpd);
+			goto out;
+		}
+
+		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
+		if (elapsed_ns > genpd->power_off_latency_ns) {
+			genpd->power_off_latency_ns = elapsed_ns;
+			genpd->max_off_time_changed = true;
+			if (genpd->name)
+				pr_warning("%s: Power-off latency exceeded, "
+					"new value %lld ns\n", genpd->name,
+					elapsed_ns);
+		}
+	}
+
+	genpd->status = GPD_STATE_POWER_OFF;
+
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		genpd_sd_counter_dec(link->master);
+		genpd_queue_power_off_work(link->master);
+	}
+
+ out:
+	genpd->poweroff_task = NULL;
+	wake_up_all(&genpd->status_wait_queue);
+	return ret;
+}
+
+/**
+ * genpd_power_off_work_fn - Power off PM domain whose subdomain count is 0.
+ * @work: Work structure used for scheduling the execution of this function.
+ */
+static void genpd_power_off_work_fn(struct work_struct *work)
+{
+	struct generic_pm_domain *genpd;
+
+	genpd = container_of(work, struct generic_pm_domain, power_off_work);
+
+	genpd_acquire_lock(genpd);
+	pm_genpd_poweroff(genpd);
+	genpd_release_lock(genpd);
+}
+
+/**
+ * pm_genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
+ * @dev: Device to suspend.
+ *
+ * Carry out a runtime suspend of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a PM domain consisting of I/O devices.
+ */
+static int pm_genpd_runtime_suspend(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+	bool (*stop_ok)(struct device *__dev);
+	int ret;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	might_sleep_if(!genpd->dev_irq_safe);
+
+	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
+	if (stop_ok && !stop_ok(dev))
+		return -EBUSY;
+
+	ret = genpd_stop_dev(genpd, dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * If power.irq_safe is set, this routine will be run with interrupts
+	 * off, so it can't use mutexes.
+	 */
+	if (dev->power.irq_safe)
+		return 0;
+
+	mutex_lock(&genpd->lock);
+	genpd->in_progress++;
+	pm_genpd_poweroff(genpd);
+	genpd->in_progress--;
+	mutex_unlock(&genpd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_genpd_runtime_resume - Resume a device belonging to I/O PM domain.
+ * @dev: Device to resume.
+ *
+ * Carry out a runtime resume of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a PM domain consisting of I/O devices.
+ */
+static int pm_genpd_runtime_resume(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+	DEFINE_WAIT(wait);
+	int ret;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	might_sleep_if(!genpd->dev_irq_safe);
+
+	/* If power.irq_safe, the PM domain is never powered off. */
+	if (dev->power.irq_safe)
+		return genpd_start_dev_no_timing(genpd, dev);
+
+	mutex_lock(&genpd->lock);
+	ret = __pm_genpd_poweron(genpd);
+	if (ret) {
+		mutex_unlock(&genpd->lock);
+		return ret;
+	}
+	genpd->status = GPD_STATE_BUSY;
+	genpd->resume_count++;
+	for (;;) {
+		prepare_to_wait(&genpd->status_wait_queue, &wait,
+				TASK_UNINTERRUPTIBLE);
+		/*
+		 * If current is the powering off task, we have been called
+		 * reentrantly from one of the device callbacks, so we should
+		 * not wait.
+		 */
+		if (!genpd->poweroff_task || genpd->poweroff_task == current)
+			break;
+		mutex_unlock(&genpd->lock);
+
+		schedule();
+
+		mutex_lock(&genpd->lock);
+	}
+	finish_wait(&genpd->status_wait_queue, &wait);
+	__pm_genpd_restore_device(dev->power.subsys_data->domain_data, genpd);
+	genpd->resume_count--;
+	genpd_set_active(genpd);
+	wake_up_all(&genpd->status_wait_queue);
+	mutex_unlock(&genpd->lock);
+
+	return 0;
+}
+
+static bool pd_ignore_unused;
+static int __init pd_ignore_unused_setup(char *__unused)
+{
+	pd_ignore_unused = true;
+	return 1;
+}
+__setup("pd_ignore_unused", pd_ignore_unused_setup);
+
+/**
+ * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use.
+ */
+void pm_genpd_poweroff_unused(void)
+{
+	struct generic_pm_domain *genpd;
+
+	if (pd_ignore_unused) {
+		pr_warn("genpd: Not disabling unused power domains\n");
+		return;
+	}
+
+	mutex_lock(&gpd_list_lock);
+
+	list_for_each_entry(genpd, &gpd_list, gpd_list_node)
+		genpd_queue_power_off_work(genpd);
+
+	mutex_unlock(&gpd_list_lock);
+}
+
+#else
+
+static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
+					    unsigned long val, void *ptr)
+{
+	return NOTIFY_DONE;
+}
+
+static inline void genpd_power_off_work_fn(struct work_struct *work) {}
+
+#define pm_genpd_runtime_suspend	NULL
+#define pm_genpd_runtime_resume		NULL
+
+#endif /* CONFIG_PM_RUNTIME */
+
+#ifdef CONFIG_PM_SLEEP
+
+/**
+ * pm_genpd_present - Check if the given PM domain has been initialized.
+ * @genpd: PM domain to check.
+ */
+static bool pm_genpd_present(struct generic_pm_domain *genpd)
+{
+	struct generic_pm_domain *gpd;
+
+	if (IS_ERR_OR_NULL(genpd))
+		return false;
+
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node)
+		if (gpd == genpd)
+			return true;
+
+	return false;
+}
+
+static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
+				    struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev);
+}
+
+static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, suspend, dev);
+}
+
+static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev);
+}
+
+static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev);
+}
+
+static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, resume, dev);
+}
+
+static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, freeze, dev);
+}
+
+static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev);
+}
+
+static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev);
+}
+
+static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, thaw, dev);
+}
+
+/**
+ * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
+ * @genpd: PM domain to power off, if possible.
+ *
+ * Check if the given PM domain can be powered off (during system suspend or
+ * hibernation) and do that if so.  Also, in that case propagate to its masters.
+ *
+ * This function is only called in "noirq" and "syscore" stages of system power
+ * transitions, so it need not acquire locks (all of the "noirq" callbacks are
+ * executed sequentially, so it is guaranteed that it will never run twice in
+ * parallel).
+ */
+static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd)
+{
+	struct gpd_link *link;
+
+	if (genpd->status == GPD_STATE_POWER_OFF)
+		return;
+
+	if (genpd->suspended_count != genpd->device_count
+	    || atomic_read(&genpd->sd_count) > 0)
+		return;
+
+	if (genpd->power_off)
+		genpd->power_off(genpd);
+
+	genpd->status = GPD_STATE_POWER_OFF;
+
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		genpd_sd_counter_dec(link->master);
+		pm_genpd_sync_poweroff(link->master);
+	}
+}
+
+/**
+ * pm_genpd_sync_poweron - Synchronously power on a PM domain and its masters.
+ * @genpd: PM domain to power on.
+ *
+ * This function is only called in "noirq" and "syscore" stages of system power
+ * transitions, so it need not acquire locks (all of the "noirq" callbacks are
+ * executed sequentially, so it is guaranteed that it will never run twice in
+ * parallel).
+ */
+static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd)
+{
+	struct gpd_link *link;
+
+	if (genpd->status != GPD_STATE_POWER_OFF)
+		return;
+
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		pm_genpd_sync_poweron(link->master);
+		genpd_sd_counter_inc(link->master);
+	}
+
+	if (genpd->power_on)
+		genpd->power_on(genpd);
+
+	genpd->status = GPD_STATE_ACTIVE;
+}
+
+/**
+ * resume_needed - Check whether to resume a device before system suspend.
+ * @dev: Device to check.
+ * @genpd: PM domain the device belongs to.
+ *
+ * There are two cases in which a device that can wake up the system from sleep
+ * states should be resumed by pm_genpd_prepare(): (1) if the device is enabled
+ * to wake up the system and it has to remain active for this purpose while the
+ * system is in the sleep state and (2) if the device is not enabled to wake up
+ * the system from sleep states and it generally doesn't generate wakeup signals
+ * by itself (those signals are generated on its behalf by other parts of the
+ * system).  In the latter case it may be necessary to reconfigure the device's
+ * wakeup settings during system suspend, because it may have been set up to
+ * signal remote wakeup from the system's working state as needed by runtime PM.
+ * Return 'true' in either of the above cases.
+ */
+static bool resume_needed(struct device *dev, struct generic_pm_domain *genpd)
+{
+	bool active_wakeup;
+
+	if (!device_can_wakeup(dev))
+		return false;
+
+	active_wakeup = genpd_dev_active_wakeup(genpd, dev);
+	return device_may_wakeup(dev) ? active_wakeup : !active_wakeup;
+}
+
+/**
+ * pm_genpd_prepare - Start power transition of a device in a PM domain.
+ * @dev: Device to start the transition of.
+ *
+ * Start a power transition of a device (during a system-wide power transition)
+ * under the assumption that its pm_domain field points to the domain member of
+ * an object of type struct generic_pm_domain representing a PM domain
+ * consisting of I/O devices.
+ */
+static int pm_genpd_prepare(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+	int ret;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	/*
+	 * If a wakeup request is pending for the device, it should be woken up
+	 * at this point and a system wakeup event should be reported if it's
+	 * set up to wake up the system from sleep states.
+	 */
+	pm_runtime_get_noresume(dev);
+	if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
+		pm_wakeup_event(dev, 0);
+
+	if (pm_wakeup_pending()) {
+		pm_runtime_put(dev);
+		return -EBUSY;
+	}
+
+	if (resume_needed(dev, genpd))
+		pm_runtime_resume(dev);
+
+	genpd_acquire_lock(genpd);
+
+	if (genpd->prepared_count++ == 0) {
+		genpd->suspended_count = 0;
+		genpd->suspend_power_off = genpd->status == GPD_STATE_POWER_OFF;
+	}
+
+	genpd_release_lock(genpd);
+
+	if (genpd->suspend_power_off) {
+		pm_runtime_put_noidle(dev);
+		return 0;
+	}
+
+	/*
+	 * The PM domain must be in the GPD_STATE_ACTIVE state at this point,
+	 * so pm_genpd_poweron() will return immediately, but if the device
+	 * is suspended (e.g. it's been stopped by genpd_stop_dev()), we need
+	 * to make it operational.
+	 */
+	pm_runtime_resume(dev);
+	__pm_runtime_disable(dev, false);
+
+	ret = pm_generic_prepare(dev);
+	if (ret) {
+		mutex_lock(&genpd->lock);
+
+		if (--genpd->prepared_count == 0)
+			genpd->suspend_power_off = false;
+
+		mutex_unlock(&genpd->lock);
+		pm_runtime_enable(dev);
+	}
+
+	pm_runtime_put(dev);
+	return ret;
+}
+
+/**
+ * pm_genpd_suspend - Suspend a device belonging to an I/O PM domain.
+ * @dev: Device to suspend.
+ *
+ * Suspend a device under the assumption that its pm_domain field points to the
+ * domain member of an object of type struct generic_pm_domain representing
+ * a PM domain consisting of I/O devices.
+ */
+static int pm_genpd_suspend(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_suspend_late - Late suspend of a device from an I/O PM domain.
+ * @dev: Device to suspend.
+ *
+ * Carry out a late suspend of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a PM domain consisting of I/O devices.
+ */
+static int pm_genpd_suspend_late(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_suspend_late(genpd, dev);
+}
+
+/**
+ * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain.
+ * @dev: Device to suspend.
+ *
+ * Stop the device and remove power from the domain if all devices in it have
+ * been stopped.
+ */
+static int pm_genpd_suspend_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	if (genpd->suspend_power_off
+	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
+		return 0;
+
+	genpd_stop_dev(genpd, dev);
+
+	/*
+	 * Since all of the "noirq" callbacks are executed sequentially, it is
+	 * guaranteed that this function will never run twice in parallel for
+	 * the same PM domain, so it is not necessary to use locking here.
+	 */
+	genpd->suspended_count++;
+	pm_genpd_sync_poweroff(genpd);
+
+	return 0;
+}
+
+/**
+ * pm_genpd_resume_noirq - Start of resume of device in an I/O PM domain.
+ * @dev: Device to resume.
+ *
+ * Restore power to the device's PM domain, if necessary, and start the device.
+ */
+static int pm_genpd_resume_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	if (genpd->suspend_power_off
+	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
+		return 0;
+
+	/*
+	 * Since all of the "noirq" callbacks are executed sequentially, it is
+	 * guaranteed that this function will never run twice in parallel for
+	 * the same PM domain, so it is not necessary to use locking here.
+	 */
+	pm_genpd_sync_poweron(genpd);
+	genpd->suspended_count--;
+
+	return genpd_start_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_resume_early - Early resume of a device in an I/O PM domain.
+ * @dev: Device to resume.
+ *
+ * Carry out an early resume of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_resume_early(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_resume_early(genpd, dev);
+}
+
+/**
+ * pm_genpd_resume - Resume of device in an I/O PM domain.
+ * @dev: Device to resume.
+ *
+ * Resume a device under the assumption that its pm_domain field points to the
+ * domain member of an object of type struct generic_pm_domain representing
+ * a power domain consisting of I/O devices.
+ */
+static int pm_genpd_resume(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_freeze - Freezing a device in an I/O PM domain.
+ * @dev: Device to freeze.
+ *
+ * Freeze a device under the assumption that its pm_domain field points to the
+ * domain member of an object of type struct generic_pm_domain representing
+ * a power domain consisting of I/O devices.
+ */
+static int pm_genpd_freeze(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_freeze_late - Late freeze of a device in an I/O PM domain.
+ * @dev: Device to freeze.
+ *
+ * Carry out a late freeze of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_freeze_late(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_freeze_late(genpd, dev);
+}
+
+/**
+ * pm_genpd_freeze_noirq - Completion of freezing a device in an I/O PM domain.
+ * @dev: Device to freeze.
+ *
+ * Carry out a late freeze of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_freeze_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_stop_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_thaw_noirq - Early thaw of device in an I/O PM domain.
+ * @dev: Device to thaw.
+ *
+ * Start the device, unless power has been removed from the domain already
+ * before the system transition.
+ */
+static int pm_genpd_thaw_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_start_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_thaw_early - Early thaw of device in an I/O PM domain.
+ * @dev: Device to thaw.
+ *
+ * Carry out an early thaw of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_thaw_early(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_thaw_early(genpd, dev);
+}
+
+/**
+ * pm_genpd_thaw - Thaw a device belonging to an I/O power domain.
+ * @dev: Device to thaw.
+ *
+ * Thaw a device under the assumption that its pm_domain field points to the
+ * domain member of an object of type struct generic_pm_domain representing
+ * a power domain consisting of I/O devices.
+ */
+static int pm_genpd_thaw(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_restore_noirq - Start of restore of device in an I/O PM domain.
+ * @dev: Device to resume.
+ *
+ * Make sure the domain will be in the same power state as before the
+ * hibernation the system is resuming from and start the device if necessary.
+ */
+static int pm_genpd_restore_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	/*
+	 * Since all of the "noirq" callbacks are executed sequentially, it is
+	 * guaranteed that this function will never run twice in parallel for
+	 * the same PM domain, so it is not necessary to use locking here.
+	 *
+	 * At this point suspended_count == 0 means we are being run for the
+	 * first time for the given domain in the present cycle.
+	 */
+	if (genpd->suspended_count++ == 0) {
+		/*
+		 * The boot kernel might put the domain into arbitrary state,
+		 * so make it appear as powered off to pm_genpd_sync_poweron(),
+		 * so that it tries to power it on in case it was really off.
+		 */
+		genpd->status = GPD_STATE_POWER_OFF;
+		if (genpd->suspend_power_off) {
+			/*
+			 * If the domain was off before the hibernation, make
+			 * sure it will be off going forward.
+			 */
+			if (genpd->power_off)
+				genpd->power_off(genpd);
+
+			return 0;
+		}
+	}
+
+	if (genpd->suspend_power_off)
+		return 0;
+
+	pm_genpd_sync_poweron(genpd);
+
+	return genpd_start_dev(genpd, dev);
+}
+
+/**
+ * pm_genpd_complete - Complete power transition of a device in a power domain.
+ * @dev: Device to complete the transition of.
+ *
+ * Complete a power transition of a device (during a system-wide power
+ * transition) under the assumption that its pm_domain field points to the
+ * domain member of an object of type struct generic_pm_domain representing
+ * a power domain consisting of I/O devices.
+ */
+static void pm_genpd_complete(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+	bool run_complete;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return;
+
+	mutex_lock(&genpd->lock);
+
+	run_complete = !genpd->suspend_power_off;
+	if (--genpd->prepared_count == 0)
+		genpd->suspend_power_off = false;
+
+	mutex_unlock(&genpd->lock);
+
+	if (run_complete) {
+		pm_generic_complete(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+		pm_request_idle(dev);
+	}
+}
+
+/**
+ * pm_genpd_syscore_switch - Switch power during system core suspend or resume.
+ * @dev: Device that normally is marked as "always on" to switch power for.
+ *
+ * This routine may only be called during the system core (syscore) suspend or
+ * resume phase for devices whose "always on" flags are set.
+ */
+void pm_genpd_syscore_switch(struct device *dev, bool suspend)
+{
+	struct generic_pm_domain *genpd;
+
+	genpd = dev_to_genpd(dev);
+	if (!pm_genpd_present(genpd))
+		return;
+
+	if (suspend) {
+		genpd->suspended_count++;
+		pm_genpd_sync_poweroff(genpd);
+	} else {
+		pm_genpd_sync_poweron(genpd);
+		genpd->suspended_count--;
+	}
+}
+EXPORT_SYMBOL_GPL(pm_genpd_syscore_switch);
+
+#else
+
+#define pm_genpd_prepare		NULL
+#define pm_genpd_suspend		NULL
+#define pm_genpd_suspend_late		NULL
+#define pm_genpd_suspend_noirq		NULL
+#define pm_genpd_resume_early		NULL
+#define pm_genpd_resume_noirq		NULL
+#define pm_genpd_resume			NULL
+#define pm_genpd_freeze			NULL
+#define pm_genpd_freeze_late		NULL
+#define pm_genpd_freeze_noirq		NULL
+#define pm_genpd_thaw_early		NULL
+#define pm_genpd_thaw_noirq		NULL
+#define pm_genpd_thaw			NULL
+#define pm_genpd_restore_noirq		NULL
+#define pm_genpd_complete		NULL
+
+#endif /* CONFIG_PM_SLEEP */
+
+static struct generic_pm_domain_data *__pm_genpd_alloc_dev_data(struct device *dev)
+{
+	struct generic_pm_domain_data *gpd_data;
+
+	gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL);
+	if (!gpd_data)
+		return NULL;
+
+	mutex_init(&gpd_data->lock);
+	gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier;
+	dev_pm_qos_add_notifier(dev, &gpd_data->nb);
+	return gpd_data;
+}
+
+static void __pm_genpd_free_dev_data(struct device *dev,
+				     struct generic_pm_domain_data *gpd_data)
+{
+	dev_pm_qos_remove_notifier(dev, &gpd_data->nb);
+	kfree(gpd_data);
+}
+
+/**
+ * __pm_genpd_add_device - Add a device to an I/O PM domain.
+ * @genpd: PM domain to add the device to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
+ */
+int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
+			  struct gpd_timing_data *td)
+{
+	struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL;
+	struct pm_domain_data *pdd;
+	int ret = 0;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
+		return -EINVAL;
+
+	gpd_data_new = __pm_genpd_alloc_dev_data(dev);
+	if (!gpd_data_new)
+		return -ENOMEM;
+
+	genpd_acquire_lock(genpd);
+
+	if (genpd->prepared_count > 0) {
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	list_for_each_entry(pdd, &genpd->dev_list, list_node)
+		if (pdd->dev == dev) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+	ret = dev_pm_get_subsys_data(dev);
+	if (ret)
+		goto out;
+
+	genpd->device_count++;
+	genpd->max_off_time_changed = true;
+
+	spin_lock_irq(&dev->power.lock);
+
+	dev->pm_domain = &genpd->domain;
+	if (dev->power.subsys_data->domain_data) {
+		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
+	} else {
+		gpd_data = gpd_data_new;
+		dev->power.subsys_data->domain_data = &gpd_data->base;
+	}
+	gpd_data->refcount++;
+	if (td)
+		gpd_data->td = *td;
+
+	spin_unlock_irq(&dev->power.lock);
+
+	mutex_lock(&gpd_data->lock);
+	gpd_data->base.dev = dev;
+	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
+	gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF;
+	gpd_data->td.constraint_changed = true;
+	gpd_data->td.effective_constraint_ns = -1;
+	mutex_unlock(&gpd_data->lock);
+
+ out:
+	genpd_release_lock(genpd);
+
+	if (gpd_data != gpd_data_new)
+		__pm_genpd_free_dev_data(dev, gpd_data_new);
+
+	return ret;
+}
+
+/**
+ * __pm_genpd_of_add_device - Add a device to an I/O PM domain.
+ * @genpd_node: Device tree node pointer representing a PM domain to which the
+ *   the device is added to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
+ */
+int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev,
+			     struct gpd_timing_data *td)
+{
+	struct generic_pm_domain *genpd = NULL, *gpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (IS_ERR_OR_NULL(genpd_node) || IS_ERR_OR_NULL(dev))
+		return -EINVAL;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (gpd->of_node == genpd_node) {
+			genpd = gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	if (!genpd)
+		return -EINVAL;
+
+	return __pm_genpd_add_device(genpd, dev, td);
+}
+
+
+/**
+ * __pm_genpd_name_add_device - Find I/O PM domain and add a device to it.
+ * @domain_name: Name of the PM domain to add the device to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
+ */
+int __pm_genpd_name_add_device(const char *domain_name, struct device *dev,
+			       struct gpd_timing_data *td)
+{
+	return __pm_genpd_add_device(pm_genpd_lookup_name(domain_name), dev, td);
+}
+
+/**
+ * pm_genpd_remove_device - Remove a device from an I/O PM domain.
+ * @genpd: PM domain to remove the device from.
+ * @dev: Device to be removed.
+ */
+int pm_genpd_remove_device(struct generic_pm_domain *genpd,
+			   struct device *dev)
+{
+	struct generic_pm_domain_data *gpd_data;
+	struct pm_domain_data *pdd;
+	bool remove = false;
+	int ret = 0;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)
+	    ||  IS_ERR_OR_NULL(dev->pm_domain)
+	    ||  pd_to_genpd(dev->pm_domain) != genpd)
+		return -EINVAL;
+
+	genpd_acquire_lock(genpd);
+
+	if (genpd->prepared_count > 0) {
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	genpd->device_count--;
+	genpd->max_off_time_changed = true;
+
+	spin_lock_irq(&dev->power.lock);
+
+	dev->pm_domain = NULL;
+	pdd = dev->power.subsys_data->domain_data;
+	list_del_init(&pdd->list_node);
+	gpd_data = to_gpd_data(pdd);
+	if (--gpd_data->refcount == 0) {
+		dev->power.subsys_data->domain_data = NULL;
+		remove = true;
+	}
+
+	spin_unlock_irq(&dev->power.lock);
+
+	mutex_lock(&gpd_data->lock);
+	pdd->dev = NULL;
+	mutex_unlock(&gpd_data->lock);
+
+	genpd_release_lock(genpd);
+
+	dev_pm_put_subsys_data(dev);
+	if (remove)
+		__pm_genpd_free_dev_data(dev, gpd_data);
+
+	return 0;
+
+ out:
+	genpd_release_lock(genpd);
+
+	return ret;
+}
+
+/**
+ * pm_genpd_dev_need_restore - Set/unset the device's "need restore" flag.
+ * @dev: Device to set/unset the flag for.
+ * @val: The new value of the device's "need restore" flag.
+ */
+void pm_genpd_dev_need_restore(struct device *dev, bool val)
+{
+	struct pm_subsys_data *psd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	psd = dev_to_psd(dev);
+	if (psd && psd->domain_data)
+		to_gpd_data(psd->domain_data)->need_restore = val;
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_dev_need_restore);
+
+/**
+ * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
+ * @genpd: Master PM domain to add the subdomain to.
+ * @subdomain: Subdomain to be added.
+ */
+int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
+			   struct generic_pm_domain *subdomain)
+{
+	struct gpd_link *link;
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)
+	    || genpd == subdomain)
+		return -EINVAL;
+
+ start:
+	genpd_acquire_lock(genpd);
+	mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+
+	if (subdomain->status != GPD_STATE_POWER_OFF
+	    && subdomain->status != GPD_STATE_ACTIVE) {
+		mutex_unlock(&subdomain->lock);
+		genpd_release_lock(genpd);
+		goto start;
+	}
+
+	if (genpd->status == GPD_STATE_POWER_OFF
+	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	list_for_each_entry(link, &genpd->master_links, master_node) {
+		if (link->slave == subdomain && link->master == genpd) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	link = kzalloc(sizeof(*link), GFP_KERNEL);
+	if (!link) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	link->master = genpd;
+	list_add_tail(&link->master_node, &genpd->master_links);
+	link->slave = subdomain;
+	list_add_tail(&link->slave_node, &subdomain->slave_links);
+	if (subdomain->status != GPD_STATE_POWER_OFF)
+		genpd_sd_counter_inc(genpd);
+
+ out:
+	mutex_unlock(&subdomain->lock);
+	genpd_release_lock(genpd);
+
+	return ret;
+}
+
+/**
+ * pm_genpd_add_subdomain_names - Add a subdomain to an I/O PM domain.
+ * @master_name: Name of the master PM domain to add the subdomain to.
+ * @subdomain_name: Name of the subdomain to be added.
+ */
+int pm_genpd_add_subdomain_names(const char *master_name,
+				 const char *subdomain_name)
+{
+	struct generic_pm_domain *master = NULL, *subdomain = NULL, *gpd;
+
+	if (IS_ERR_OR_NULL(master_name) || IS_ERR_OR_NULL(subdomain_name))
+		return -EINVAL;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (!master && !strcmp(gpd->name, master_name))
+			master = gpd;
+
+		if (!subdomain && !strcmp(gpd->name, subdomain_name))
+			subdomain = gpd;
+
+		if (master && subdomain)
+			break;
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	return pm_genpd_add_subdomain(master, subdomain);
+}
+
+/**
+ * pm_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain.
+ * @genpd: Master PM domain to remove the subdomain from.
+ * @subdomain: Subdomain to be removed.
+ */
+int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
+			      struct generic_pm_domain *subdomain)
+{
+	struct gpd_link *link;
+	int ret = -EINVAL;
+
+	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
+		return -EINVAL;
+
+ start:
+	genpd_acquire_lock(genpd);
+
+	list_for_each_entry(link, &genpd->master_links, master_node) {
+		if (link->slave != subdomain)
+			continue;
+
+		mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+
+		if (subdomain->status != GPD_STATE_POWER_OFF
+		    && subdomain->status != GPD_STATE_ACTIVE) {
+			mutex_unlock(&subdomain->lock);
+			genpd_release_lock(genpd);
+			goto start;
+		}
+
+		list_del(&link->master_node);
+		list_del(&link->slave_node);
+		kfree(link);
+		if (subdomain->status != GPD_STATE_POWER_OFF)
+			genpd_sd_counter_dec(genpd);
+
+		mutex_unlock(&subdomain->lock);
+
+		ret = 0;
+		break;
+	}
+
+	genpd_release_lock(genpd);
+
+	return ret;
+}
+
+/**
+ * pm_genpd_add_callbacks - Add PM domain callbacks to a given device.
+ * @dev: Device to add the callbacks to.
+ * @ops: Set of callbacks to add.
+ * @td: Timing data to add to the device along with the callbacks (optional).
+ *
+ * Every call to this routine should be balanced with a call to
+ * __pm_genpd_remove_callbacks() and they must not be nested.
+ */
+int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops,
+			   struct gpd_timing_data *td)
+{
+	struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL;
+	int ret = 0;
+
+	if (!(dev && ops))
+		return -EINVAL;
+
+	gpd_data_new = __pm_genpd_alloc_dev_data(dev);
+	if (!gpd_data_new)
+		return -ENOMEM;
+
+	pm_runtime_disable(dev);
+	device_pm_lock();
+
+	ret = dev_pm_get_subsys_data(dev);
+	if (ret)
+		goto out;
+
+	spin_lock_irq(&dev->power.lock);
+
+	if (dev->power.subsys_data->domain_data) {
+		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
+	} else {
+		gpd_data = gpd_data_new;
+		dev->power.subsys_data->domain_data = &gpd_data->base;
+	}
+	gpd_data->refcount++;
+	gpd_data->ops = *ops;
+	if (td)
+		gpd_data->td = *td;
+
+	spin_unlock_irq(&dev->power.lock);
+
+ out:
+	device_pm_unlock();
+	pm_runtime_enable(dev);
+
+	if (gpd_data != gpd_data_new)
+		__pm_genpd_free_dev_data(dev, gpd_data_new);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks);
+
+/**
+ * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device.
+ * @dev: Device to remove the callbacks from.
+ * @clear_td: If set, clear the device's timing data too.
+ *
+ * This routine can only be called after pm_genpd_add_callbacks().
+ */
+int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
+{
+	struct generic_pm_domain_data *gpd_data = NULL;
+	bool remove = false;
+	int ret = 0;
+
+	if (!(dev && dev->power.subsys_data))
+		return -EINVAL;
+
+	pm_runtime_disable(dev);
+	device_pm_lock();
+
+	spin_lock_irq(&dev->power.lock);
+
+	if (dev->power.subsys_data->domain_data) {
+		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
+		gpd_data->ops = (struct gpd_dev_ops){ NULL };
+		if (clear_td)
+			gpd_data->td = (struct gpd_timing_data){ 0 };
+
+		if (--gpd_data->refcount == 0) {
+			dev->power.subsys_data->domain_data = NULL;
+			remove = true;
+		}
+	} else {
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irq(&dev->power.lock);
+
+	device_pm_unlock();
+	pm_runtime_enable(dev);
+
+	if (ret)
+		return ret;
+
+	dev_pm_put_subsys_data(dev);
+	if (remove)
+		__pm_genpd_free_dev_data(dev, gpd_data);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks);
+
+/**
+ * pm_genpd_attach_cpuidle - Connect the given PM domain with cpuidle.
+ * @genpd: PM domain to be connected with cpuidle.
+ * @state: cpuidle state this domain can disable/enable.
+ *
+ * Make a PM domain behave as though it contained a CPU core, that is, instead
+ * of calling its power down routine it will enable the given cpuidle state so
+ * that the cpuidle subsystem can power it down (if possible and desirable).
+ */
+int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
+{
+	struct cpuidle_driver *cpuidle_drv;
+	struct gpd_cpu_data *cpu_data;
+	struct cpuidle_state *idle_state;
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(genpd) || state < 0)
+		return -EINVAL;
+
+	genpd_acquire_lock(genpd);
+
+	if (genpd->cpu_data) {
+		ret = -EEXIST;
+		goto out;
+	}
+	cpu_data = kzalloc(sizeof(*cpu_data), GFP_KERNEL);
+	if (!cpu_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	cpuidle_drv = cpuidle_driver_ref();
+	if (!cpuidle_drv) {
+		ret = -ENODEV;
+		goto err_drv;
+	}
+	if (cpuidle_drv->state_count <= state) {
+		ret = -EINVAL;
+		goto err;
+	}
+	idle_state = &cpuidle_drv->states[state];
+	if (!idle_state->disabled) {
+		ret = -EAGAIN;
+		goto err;
+	}
+	cpu_data->idle_state = idle_state;
+	cpu_data->saved_exit_latency = idle_state->exit_latency;
+	genpd->cpu_data = cpu_data;
+	genpd_recalc_cpu_exit_latency(genpd);
+
+ out:
+	genpd_release_lock(genpd);
+	return ret;
+
+ err:
+	cpuidle_driver_unref();
+
+ err_drv:
+	kfree(cpu_data);
+	goto out;
+}
+
+/**
+ * pm_genpd_name_attach_cpuidle - Find PM domain and connect cpuidle to it.
+ * @name: Name of the domain to connect to cpuidle.
+ * @state: cpuidle state this domain can manipulate.
+ */
+int pm_genpd_name_attach_cpuidle(const char *name, int state)
+{
+	return pm_genpd_attach_cpuidle(pm_genpd_lookup_name(name), state);
+}
+
+/**
+ * pm_genpd_detach_cpuidle - Remove the cpuidle connection from a PM domain.
+ * @genpd: PM domain to remove the cpuidle connection from.
+ *
+ * Remove the cpuidle connection set up by pm_genpd_attach_cpuidle() from the
+ * given PM domain.
+ */
+int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
+{
+	struct gpd_cpu_data *cpu_data;
+	struct cpuidle_state *idle_state;
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(genpd))
+		return -EINVAL;
+
+	genpd_acquire_lock(genpd);
+
+	cpu_data = genpd->cpu_data;
+	if (!cpu_data) {
+		ret = -ENODEV;
+		goto out;
+	}
+	idle_state = cpu_data->idle_state;
+	if (!idle_state->disabled) {
+		ret = -EAGAIN;
+		goto out;
+	}
+	idle_state->exit_latency = cpu_data->saved_exit_latency;
+	cpuidle_driver_unref();
+	genpd->cpu_data = NULL;
+	kfree(cpu_data);
+
+ out:
+	genpd_release_lock(genpd);
+	return ret;
+}
+
+/**
+ * pm_genpd_name_detach_cpuidle - Find PM domain and disconnect cpuidle from it.
+ * @name: Name of the domain to disconnect cpuidle from.
+ */
+int pm_genpd_name_detach_cpuidle(const char *name)
+{
+	return pm_genpd_detach_cpuidle(pm_genpd_lookup_name(name));
+}
+
+/* Default device callbacks for generic PM domains. */
+
+/**
+ * pm_genpd_default_save_state - Default "save device state" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_save_state(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+
+	cb = dev_gpd_data(dev)->ops.save_state;
+	if (cb)
+		return cb(dev);
+
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_suspend;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_suspend;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_suspend;
+	else
+		cb = NULL;
+
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_suspend;
+
+	return cb ? cb(dev) : 0;
+}
+
+/**
+ * pm_genpd_default_restore_state - Default PM domians "restore device state".
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_restore_state(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+
+	cb = dev_gpd_data(dev)->ops.restore_state;
+	if (cb)
+		return cb(dev);
+
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_resume;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_resume;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_resume;
+	else
+		cb = NULL;
+
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_resume;
+
+	return cb ? cb(dev) : 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+/**
+ * pm_genpd_default_suspend - Default "device suspend" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_suspend(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend;
+
+	return cb ? cb(dev) : pm_generic_suspend(dev);
+}
+
+/**
+ * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_suspend_late(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late;
+
+	return cb ? cb(dev) : pm_generic_suspend_late(dev);
+}
+
+/**
+ * pm_genpd_default_resume_early - Default "early device resume" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_resume_early(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early;
+
+	return cb ? cb(dev) : pm_generic_resume_early(dev);
+}
+
+/**
+ * pm_genpd_default_resume - Default "device resume" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_resume(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume;
+
+	return cb ? cb(dev) : pm_generic_resume(dev);
+}
+
+/**
+ * pm_genpd_default_freeze - Default "device freeze" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_freeze(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze;
+
+	return cb ? cb(dev) : pm_generic_freeze(dev);
+}
+
+/**
+ * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_freeze_late(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late;
+
+	return cb ? cb(dev) : pm_generic_freeze_late(dev);
+}
+
+/**
+ * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_thaw_early(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early;
+
+	return cb ? cb(dev) : pm_generic_thaw_early(dev);
+}
+
+/**
+ * pm_genpd_default_thaw - Default "device thaw" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_thaw(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw;
+
+	return cb ? cb(dev) : pm_generic_thaw(dev);
+}
+
+#else /* !CONFIG_PM_SLEEP */
+
+#define pm_genpd_default_suspend	NULL
+#define pm_genpd_default_suspend_late	NULL
+#define pm_genpd_default_resume_early	NULL
+#define pm_genpd_default_resume		NULL
+#define pm_genpd_default_freeze		NULL
+#define pm_genpd_default_freeze_late	NULL
+#define pm_genpd_default_thaw_early	NULL
+#define pm_genpd_default_thaw		NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
+/**
+ * pm_genpd_init - Initialize a generic I/O PM domain object.
+ * @genpd: PM domain object to initialize.
+ * @gov: PM domain governor to associate with the domain (may be NULL).
+ * @is_off: Initial value of the domain's power_is_off field.
+ */
+void pm_genpd_init(struct generic_pm_domain *genpd,
+		   struct dev_power_governor *gov, bool is_off)
+{
+	if (IS_ERR_OR_NULL(genpd))
+		return;
+
+	INIT_LIST_HEAD(&genpd->master_links);
+	INIT_LIST_HEAD(&genpd->slave_links);
+	INIT_LIST_HEAD(&genpd->dev_list);
+	mutex_init(&genpd->lock);
+	genpd->gov = gov;
+	INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn);
+	genpd->in_progress = 0;
+	atomic_set(&genpd->sd_count, 0);
+	genpd->status = is_off ? GPD_STATE_POWER_OFF : GPD_STATE_ACTIVE;
+	init_waitqueue_head(&genpd->status_wait_queue);
+	genpd->poweroff_task = NULL;
+	genpd->resume_count = 0;
+	genpd->device_count = 0;
+	genpd->max_off_time_ns = -1;
+	genpd->max_off_time_changed = true;
+	genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend;
+	genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume;
+	genpd->domain.ops.prepare = pm_genpd_prepare;
+	genpd->domain.ops.suspend = pm_genpd_suspend;
+	genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
+	genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq;
+	genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq;
+	genpd->domain.ops.resume_early = pm_genpd_resume_early;
+	genpd->domain.ops.resume = pm_genpd_resume;
+	genpd->domain.ops.freeze = pm_genpd_freeze;
+	genpd->domain.ops.freeze_late = pm_genpd_freeze_late;
+	genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
+	genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
+	genpd->domain.ops.thaw_early = pm_genpd_thaw_early;
+	genpd->domain.ops.thaw = pm_genpd_thaw;
+	genpd->domain.ops.poweroff = pm_genpd_suspend;
+	genpd->domain.ops.poweroff_late = pm_genpd_suspend_late;
+	genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
+	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
+	genpd->domain.ops.restore_early = pm_genpd_resume_early;
+	genpd->domain.ops.restore = pm_genpd_resume;
+	genpd->domain.ops.complete = pm_genpd_complete;
+	genpd->dev_ops.save_state = pm_genpd_default_save_state;
+	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
+	genpd->dev_ops.suspend = pm_genpd_default_suspend;
+	genpd->dev_ops.suspend_late = pm_genpd_default_suspend_late;
+	genpd->dev_ops.resume_early = pm_genpd_default_resume_early;
+	genpd->dev_ops.resume = pm_genpd_default_resume;
+	genpd->dev_ops.freeze = pm_genpd_default_freeze;
+	genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late;
+	genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early;
+	genpd->dev_ops.thaw = pm_genpd_default_thaw;
+	mutex_lock(&gpd_list_lock);
+	list_add(&genpd->gpd_list_node, &gpd_list);
+	mutex_unlock(&gpd_list_lock);
+}
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
new file mode 100644
index 00000000000..a089e3bcdfb
--- /dev/null
+++ b/drivers/base/power/domain_governor.c
@@ -0,0 +1,253 @@
+/*
+ * drivers/base/power/domain_governor.c - Governors for device PM domains.
+ *
+ * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_qos.h>
+#include <linux/hrtimer.h>
+
+#ifdef CONFIG_PM_RUNTIME
+
+static int dev_update_qos_constraint(struct device *dev, void *data)
+{
+	s64 *constraint_ns_p = data;
+	s32 constraint_ns = -1;
+
+	if (dev->power.subsys_data && dev->power.subsys_data->domain_data)
+		constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns;
+
+	if (constraint_ns < 0) {
+		constraint_ns = dev_pm_qos_read_value(dev);
+		constraint_ns *= NSEC_PER_USEC;
+	}
+	if (constraint_ns == 0)
+		return 0;
+
+	/*
+	 * constraint_ns cannot be negative here, because the device has been
+	 * suspended.
+	 */
+	if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0)
+		*constraint_ns_p = constraint_ns;
+
+	return 0;
+}
+
+/**
+ * default_stop_ok - Default PM domain governor routine for stopping devices.
+ * @dev: Device to check.
+ */
+bool default_stop_ok(struct device *dev)
+{
+	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	unsigned long flags;
+	s64 constraint_ns;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	if (!td->constraint_changed) {
+		bool ret = td->cached_stop_ok;
+
+		spin_unlock_irqrestore(&dev->power.lock, flags);
+		return ret;
+	}
+	td->constraint_changed = false;
+	td->cached_stop_ok = false;
+	td->effective_constraint_ns = -1;
+	constraint_ns = __dev_pm_qos_read_value(dev);
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	if (constraint_ns < 0)
+		return false;
+
+	constraint_ns *= NSEC_PER_USEC;
+	/*
+	 * We can walk the children without any additional locking, because
+	 * they all have been suspended at this point and their
+	 * effective_constraint_ns fields won't be modified in parallel with us.
+	 */
+	if (!dev->power.ignore_children)
+		device_for_each_child(dev, &constraint_ns,
+				      dev_update_qos_constraint);
+
+	if (constraint_ns > 0) {
+		constraint_ns -= td->start_latency_ns;
+		if (constraint_ns == 0)
+			return false;
+	}
+	td->effective_constraint_ns = constraint_ns;
+	td->cached_stop_ok = constraint_ns > td->stop_latency_ns ||
+				constraint_ns == 0;
+	/*
+	 * The children have been suspended already, so we don't need to take
+	 * their stop latencies into account here.
+	 */
+	return td->cached_stop_ok;
+}
+
+/**
+ * default_power_down_ok - Default generic PM domain power off governor routine.
+ * @pd: PM domain to check.
+ *
+ * This routine must be executed under the PM domain's lock.
+ */
+static bool default_power_down_ok(struct dev_pm_domain *pd)
+{
+	struct generic_pm_domain *genpd = pd_to_genpd(pd);
+	struct gpd_link *link;
+	struct pm_domain_data *pdd;
+	s64 min_off_time_ns;
+	s64 off_on_time_ns;
+
+	if (genpd->max_off_time_changed) {
+		struct gpd_link *link;
+
+		/*
+		 * We have to invalidate the cached results for the masters, so
+		 * use the observation that default_power_down_ok() is not
+		 * going to be called for any master until this instance
+		 * returns.
+		 */
+		list_for_each_entry(link, &genpd->slave_links, slave_node)
+			link->master->max_off_time_changed = true;
+
+		genpd->max_off_time_changed = false;
+		genpd->cached_power_down_ok = false;
+		genpd->max_off_time_ns = -1;
+	} else {
+		return genpd->cached_power_down_ok;
+	}
+
+	off_on_time_ns = genpd->power_off_latency_ns +
+				genpd->power_on_latency_ns;
+	/*
+	 * It doesn't make sense to remove power from the domain if saving
+	 * the state of all devices in it and the power off/power on operations
+	 * take too much time.
+	 *
+	 * All devices in this domain have been stopped already at this point.
+	 */
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		if (pdd->dev->driver)
+			off_on_time_ns +=
+				to_gpd_data(pdd)->td.save_state_latency_ns;
+	}
+
+	min_off_time_ns = -1;
+	/*
+	 * Check if subdomains can be off for enough time.
+	 *
+	 * All subdomains have been powered off already at this point.
+	 */
+	list_for_each_entry(link, &genpd->master_links, master_node) {
+		struct generic_pm_domain *sd = link->slave;
+		s64 sd_max_off_ns = sd->max_off_time_ns;
+
+		if (sd_max_off_ns < 0)
+			continue;
+
+		/*
+		 * Check if the subdomain is allowed to be off long enough for
+		 * the current domain to turn off and on (that's how much time
+		 * it will have to wait worst case).
+		 */
+		if (sd_max_off_ns <= off_on_time_ns)
+			return false;
+
+		if (min_off_time_ns > sd_max_off_ns || min_off_time_ns < 0)
+			min_off_time_ns = sd_max_off_ns;
+	}
+
+	/*
+	 * Check if the devices in the domain can be off enough time.
+	 */
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		struct gpd_timing_data *td;
+		s64 constraint_ns;
+
+		if (!pdd->dev->driver)
+			continue;
+
+		/*
+		 * Check if the device is allowed to be off long enough for the
+		 * domain to turn off and on (that's how much time it will
+		 * have to wait worst case).
+		 */
+		td = &to_gpd_data(pdd)->td;
+		constraint_ns = td->effective_constraint_ns;
+		/* default_stop_ok() need not be called before us. */
+		if (constraint_ns < 0) {
+			constraint_ns = dev_pm_qos_read_value(pdd->dev);
+			constraint_ns *= NSEC_PER_USEC;
+		}
+		if (constraint_ns == 0)
+			continue;
+
+		/*
+		 * constraint_ns cannot be negative here, because the device has
+		 * been suspended.
+		 */
+		constraint_ns -= td->restore_state_latency_ns;
+		if (constraint_ns <= off_on_time_ns)
+			return false;
+
+		if (min_off_time_ns > constraint_ns || min_off_time_ns < 0)
+			min_off_time_ns = constraint_ns;
+	}
+
+	genpd->cached_power_down_ok = true;
+
+	/*
+	 * If the computed minimum device off time is negative, there are no
+	 * latency constraints, so the domain can spend arbitrary time in the
+	 * "off" state.
+	 */
+	if (min_off_time_ns < 0)
+		return true;
+
+	/*
+	 * The difference between the computed minimum subdomain or device off
+	 * time and the time needed to turn the domain on is the maximum
+	 * theoretical time this domain can spend in the "off" state.
+	 */
+	genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns;
+	return true;
+}
+
+static bool always_on_power_down_ok(struct dev_pm_domain *domain)
+{
+	return false;
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+bool default_stop_ok(struct device *dev)
+{
+	return false;
+}
+
+#define default_power_down_ok	NULL
+#define always_on_power_down_ok	NULL
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+struct dev_power_governor simple_qos_governor = {
+	.stop_ok = default_stop_ok,
+	.power_down_ok = default_power_down_ok,
+};
+
+/**
+ * pm_genpd_gov_always_on - A governor implementing an always-on policy
+ */
+struct dev_power_governor pm_domain_always_on_gov = {
+	.power_down_ok = always_on_power_down_ok,
+	.stop_ok = default_stop_ok,
+};
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 81f2c84697f..96a92db83ca 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -8,38 +8,16 @@
 
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
+#include <linux/export.h>
 
-#ifdef CONFIG_PM_RUNTIME
-/**
- * pm_generic_runtime_idle - Generic runtime idle callback for subsystems.
- * @dev: Device to handle.
- *
- * If PM operations are defined for the @dev's driver and they include
- * ->runtime_idle(), execute it and return its error code, if nonzero.
- * Otherwise, execute pm_runtime_suspend() for the device and return 0.
- */
-int pm_generic_runtime_idle(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm && pm->runtime_idle) {
-		int ret = pm->runtime_idle(dev);
-		if (ret)
-			return ret;
-	}
-
-	pm_runtime_suspend(dev);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pm_generic_runtime_idle);
-
+#ifdef CONFIG_PM
 /**
  * pm_generic_runtime_suspend - Generic runtime suspend callback for subsystems.
  * @dev: Device to suspend.
  *
  * If PM operations are defined for the @dev's driver and they include
  * ->runtime_suspend(), execute it and return its error code.  Otherwise,
- * return -EINVAL.
+ * return 0.
  */
 int pm_generic_runtime_suspend(struct device *dev)
 {
@@ -58,7 +36,7 @@ EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend);
  *
  * If PM operations are defined for the @dev's driver and they include
  * ->runtime_resume(), execute it and return its error code.  Otherwise,
- * return -EINVAL.
+ * return 0.
  */
 int pm_generic_runtime_resume(struct device *dev)
 {
@@ -70,46 +48,49 @@ int pm_generic_runtime_resume(struct device *dev)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 /**
- * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
- * @dev: Device to handle.
- * @event: PM transition of the system under way.
+ * pm_generic_prepare - Generic routine preparing a device for power transition.
+ * @dev: Device to prepare.
  *
- * If the device has not been suspended at run time, execute the
- * suspend/freeze/poweroff/thaw callback provided by its driver, if defined, and
- * return its error code.  Otherwise, return zero.
+ * Prepare a device for a system-wide power transition.
+ */
+int pm_generic_prepare(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (drv && drv->pm && drv->pm->prepare)
+		ret = drv->pm->prepare(dev);
+
+	return ret;
+}
+
+/**
+ * pm_generic_suspend_noirq - Generic suspend_noirq callback for subsystems.
+ * @dev: Device to suspend.
  */
-static int __pm_generic_call(struct device *dev, int event)
+int pm_generic_suspend_noirq(struct device *dev)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-	int (*callback)(struct device *);
 
-	if (!pm || pm_runtime_suspended(dev))
-		return 0;
+	return pm && pm->suspend_noirq ? pm->suspend_noirq(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
 
-	switch (event) {
-	case PM_EVENT_SUSPEND:
-		callback = pm->suspend;
-		break;
-	case PM_EVENT_FREEZE:
-		callback = pm->freeze;
-		break;
-	case PM_EVENT_HIBERNATE:
-		callback = pm->poweroff;
-		break;
-	case PM_EVENT_THAW:
-		callback = pm->thaw;
-		break;
-	default:
-		callback = NULL;
-		break;
-	}
+/**
+ * pm_generic_suspend_late - Generic suspend_late callback for subsystems.
+ * @dev: Device to suspend.
+ */
+int pm_generic_suspend_late(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
-	return callback ? callback(dev) : 0;
+	return pm && pm->suspend_late ? pm->suspend_late(dev) : 0;
 }
+EXPORT_SYMBOL_GPL(pm_generic_suspend_late);
 
 /**
  * pm_generic_suspend - Generic suspend callback for subsystems.
@@ -117,82 +98,143 @@ static int __pm_generic_call(struct device *dev, int event)
  */
 int pm_generic_suspend(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_SUSPEND);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->suspend ? pm->suspend(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_suspend);
 
 /**
+ * pm_generic_freeze_noirq - Generic freeze_noirq callback for subsystems.
+ * @dev: Device to freeze.
+ */
+int pm_generic_freeze_noirq(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze_noirq ? pm->freeze_noirq(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq);
+
+/**
+ * pm_generic_freeze_late - Generic freeze_late callback for subsystems.
+ * @dev: Device to freeze.
+ */
+int pm_generic_freeze_late(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze_late ? pm->freeze_late(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_freeze_late);
+
+/**
  * pm_generic_freeze - Generic freeze callback for subsystems.
  * @dev: Device to freeze.
  */
 int pm_generic_freeze(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_FREEZE);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze ? pm->freeze(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_freeze);
 
 /**
+ * pm_generic_poweroff_noirq - Generic poweroff_noirq callback for subsystems.
+ * @dev: Device to handle.
+ */
+int pm_generic_poweroff_noirq(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff_noirq ? pm->poweroff_noirq(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq);
+
+/**
+ * pm_generic_poweroff_late - Generic poweroff_late callback for subsystems.
+ * @dev: Device to handle.
+ */
+int pm_generic_poweroff_late(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff_late ? pm->poweroff_late(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_poweroff_late);
+
+/**
  * pm_generic_poweroff - Generic poweroff callback for subsystems.
  * @dev: Device to handle.
  */
 int pm_generic_poweroff(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_HIBERNATE);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff ? pm->poweroff(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff);
 
 /**
+ * pm_generic_thaw_noirq - Generic thaw_noirq callback for subsystems.
+ * @dev: Device to thaw.
+ */
+int pm_generic_thaw_noirq(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw_noirq ? pm->thaw_noirq(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq);
+
+/**
+ * pm_generic_thaw_early - Generic thaw_early callback for subsystems.
+ * @dev: Device to thaw.
+ */
+int pm_generic_thaw_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw_early ? pm->thaw_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_thaw_early);
+
+/**
  * pm_generic_thaw - Generic thaw callback for subsystems.
  * @dev: Device to thaw.
  */
 int pm_generic_thaw(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_THAW);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw ? pm->thaw(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_thaw);
 
 /**
- * __pm_generic_resume - Generic resume/restore callback for subsystems.
- * @dev: Device to handle.
- * @event: PM transition of the system under way.
- *
- * Execute the resume/resotre callback provided by the @dev's driver, if
- * defined.  If it returns 0, change the device's runtime PM status to 'active'.
- * Return the callback's error code.
+ * pm_generic_resume_noirq - Generic resume_noirq callback for subsystems.
+ * @dev: Device to resume.
  */
-static int __pm_generic_resume(struct device *dev, int event)
+int pm_generic_resume_noirq(struct device *dev)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-	int (*callback)(struct device *);
-	int ret;
 
-	if (!pm)
-		return 0;
-
-	switch (event) {
-	case PM_EVENT_RESUME:
-		callback = pm->resume;
-		break;
-	case PM_EVENT_RESTORE:
-		callback = pm->restore;
-		break;
-	default:
-		callback = NULL;
-		break;
-	}
-
-	if (!callback)
-		return 0;
-
-	ret = callback(dev);
-	if (!ret) {
-		pm_runtime_disable(dev);
-		pm_runtime_set_active(dev);
-		pm_runtime_enable(dev);
-	}
+	return pm && pm->resume_noirq ? pm->resume_noirq(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
 
-	return ret;
+/**
+ * pm_generic_resume_early - Generic resume_early callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_resume_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume_early ? pm->resume_early(dev) : 0;
 }
+EXPORT_SYMBOL_GPL(pm_generic_resume_early);
 
 /**
  * pm_generic_resume - Generic resume callback for subsystems.
@@ -200,34 +242,65 @@ static int __pm_generic_resume(struct device *dev, int event)
  */
 int pm_generic_resume(struct device *dev)
 {
-	return __pm_generic_resume(dev, PM_EVENT_RESUME);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume ? pm->resume(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume);
 
 /**
+ * pm_generic_restore_noirq - Generic restore_noirq callback for subsystems.
+ * @dev: Device to restore.
+ */
+int pm_generic_restore_noirq(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore_noirq ? pm->restore_noirq(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
+
+/**
+ * pm_generic_restore_early - Generic restore_early callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_restore_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore_early ? pm->restore_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_restore_early);
+
+/**
  * pm_generic_restore - Generic restore callback for subsystems.
  * @dev: Device to restore.
  */
 int pm_generic_restore(struct device *dev)
 {
-	return __pm_generic_resume(dev, PM_EVENT_RESTORE);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore ? pm->restore(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
-#endif /* CONFIG_PM_SLEEP */
 
-struct dev_pm_ops generic_subsys_pm_ops = {
-#ifdef CONFIG_PM_SLEEP
-	.suspend = pm_generic_suspend,
-	.resume = pm_generic_resume,
-	.freeze = pm_generic_freeze,
-	.thaw = pm_generic_thaw,
-	.poweroff = pm_generic_poweroff,
-	.restore = pm_generic_restore,
-#endif
-#ifdef CONFIG_PM_RUNTIME
-	.runtime_suspend = pm_generic_runtime_suspend,
-	.runtime_resume = pm_generic_runtime_resume,
-	.runtime_idle = pm_generic_runtime_idle,
-#endif
-};
-EXPORT_SYMBOL_GPL(generic_subsys_pm_ops);
+/**
+ * pm_generic_complete - Generic routine completing a device power transition.
+ * @dev: Device to handle.
+ *
+ * Complete a device power transition during a system-wide power transition.
+ */
+void pm_generic_complete(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+
+	if (drv && drv->pm && drv->pm->complete)
+		drv->pm->complete(dev);
+
+	/*
+	 * Let runtime PM try to suspend devices that haven't been in use before
+	 * going into the system-wide sleep state we're resuming from.
+	 */
+	pm_request_idle(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 31b526661ec..bf412961a93 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -8,7 +8,7 @@
  *
  *
  * The driver model core calls device_pm_add() when a device is registered.
- * This will intialize the embedded device_pm_info object in the device
+ * This will initialize the embedded device_pm_info object in the device
  * and add it to the list of power-controlled devices. sysfs entries for
  * controlling device power management will also be added.
  *
@@ -19,6 +19,7 @@
 
 #include <linux/device.h>
 #include <linux/kallsyms.h>
+#include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
@@ -26,10 +27,17 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/async.h>
+#include <linux/suspend.h>
+#include <trace/events/power.h>
+#include <linux/cpufreq.h>
+#include <linux/cpuidle.h>
+#include <linux/timer.h>
 
 #include "../base.h"
 #include "power.h"
 
+typedef int (*pm_callback_t)(struct device *);
+
 /*
  * The entries in the dpm_list list are in a depth first order, simply
  * because children are guaranteed to be discovered after parents, and
@@ -41,30 +49,55 @@
  */
 
 LIST_HEAD(dpm_list);
+static LIST_HEAD(dpm_prepared_list);
+static LIST_HEAD(dpm_suspended_list);
+static LIST_HEAD(dpm_late_early_list);
+static LIST_HEAD(dpm_noirq_list);
 
+struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
-/*
- * Set once the preparation of devices for a PM transition has started, reset
- * before starting to resume devices.  Protected by dpm_list_mtx.
- */
-static bool transition_started;
-
 static int async_error;
 
+static char *pm_verb(int event)
+{
+	switch (event) {
+	case PM_EVENT_SUSPEND:
+		return "suspend";
+	case PM_EVENT_RESUME:
+		return "resume";
+	case PM_EVENT_FREEZE:
+		return "freeze";
+	case PM_EVENT_QUIESCE:
+		return "quiesce";
+	case PM_EVENT_HIBERNATE:
+		return "hibernate";
+	case PM_EVENT_THAW:
+		return "thaw";
+	case PM_EVENT_RESTORE:
+		return "restore";
+	case PM_EVENT_RECOVER:
+		return "recover";
+	default:
+		return "(unknown PM event)";
+	}
+}
+
 /**
- * device_pm_init - Initialize the PM-related part of a device object.
+ * device_pm_sleep_init - Initialize system suspend-related device fields.
  * @dev: Device object being initialized.
  */
-void device_pm_init(struct device *dev)
+void device_pm_sleep_init(struct device *dev)
 {
-	dev->power.status = DPM_ON;
+	dev->power.is_prepared = false;
+	dev->power.is_suspended = false;
+	dev->power.is_noirq_suspended = false;
+	dev->power.is_late_suspended = false;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
 	dev->power.wakeup = NULL;
-	spin_lock_init(&dev->power.lock);
-	pm_runtime_init(dev);
+	INIT_LIST_HEAD(&dev->power.entry);
 }
 
 /**
@@ -90,22 +123,11 @@ void device_pm_unlock(void)
 void device_pm_add(struct device *dev)
 {
 	pr_debug("PM: Adding info for %s:%s\n",
-		 dev->bus ? dev->bus->name : "No Bus",
-		 kobject_name(&dev->kobj));
+		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	mutex_lock(&dpm_list_mtx);
-	if (dev->parent) {
-		if (dev->parent->power.status >= DPM_SUSPENDING)
-			dev_warn(dev, "parent %s should not be sleeping\n",
-				 dev_name(dev->parent));
-	} else if (transition_started) {
-		/*
-		 * We refuse to register parentless devices while a PM
-		 * transition is in progress in order to avoid leaving them
-		 * unhandled down the road
-		 */
-		dev_WARN(dev, "Parentless device registered during a PM transaction\n");
-	}
-
+	if (dev->parent && dev->parent->power.is_prepared)
+		dev_warn(dev, "parent %s should not be sleeping\n",
+			dev_name(dev->parent));
 	list_add_tail(&dev->power.entry, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
 }
@@ -117,8 +139,7 @@ void device_pm_add(struct device *dev)
 void device_pm_remove(struct device *dev)
 {
 	pr_debug("PM: Removing info for %s:%s\n",
-		 dev->bus ? dev->bus->name : "No Bus",
-		 kobject_name(&dev->kobj));
+		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	complete_all(&dev->power.completion);
 	mutex_lock(&dpm_list_mtx);
 	list_del_init(&dev->power.entry);
@@ -135,10 +156,8 @@ void device_pm_remove(struct device *dev)
 void device_pm_move_before(struct device *deva, struct device *devb)
 {
 	pr_debug("PM: Moving %s:%s before %s:%s\n",
-		 deva->bus ? deva->bus->name : "No Bus",
-		 kobject_name(&deva->kobj),
-		 devb->bus ? devb->bus->name : "No Bus",
-		 kobject_name(&devb->kobj));
+		 deva->bus ? deva->bus->name : "No Bus", dev_name(deva),
+		 devb->bus ? devb->bus->name : "No Bus", dev_name(devb));
 	/* Delete deva from dpm_list and reinsert before devb. */
 	list_move_tail(&deva->power.entry, &devb->power.entry);
 }
@@ -151,10 +170,8 @@ void device_pm_move_before(struct device *deva, struct device *devb)
 void device_pm_move_after(struct device *deva, struct device *devb)
 {
 	pr_debug("PM: Moving %s:%s after %s:%s\n",
-		 deva->bus ? deva->bus->name : "No Bus",
-		 kobject_name(&deva->kobj),
-		 devb->bus ? devb->bus->name : "No Bus",
-		 kobject_name(&devb->kobj));
+		 deva->bus ? deva->bus->name : "No Bus", dev_name(deva),
+		 devb->bus ? devb->bus->name : "No Bus", dev_name(devb));
 	/* Delete deva from dpm_list and reinsert after devb. */
 	list_move(&deva->power.entry, &devb->power.entry);
 }
@@ -166,8 +183,7 @@ void device_pm_move_after(struct device *deva, struct device *devb)
 void device_pm_move_last(struct device *dev)
 {
 	pr_debug("PM: Moving %s:%s to end of list\n",
-		 dev->bus ? dev->bus->name : "No Bus",
-		 kobject_name(&dev->kobj));
+		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	list_move_tail(&dev->power.entry, &dpm_list);
 }
 
@@ -175,9 +191,10 @@ static ktime_t initcall_debug_start(struct device *dev)
 {
 	ktime_t calltime = ktime_set(0, 0);
 
-	if (initcall_debug) {
-		pr_info("calling  %s+ @ %i\n",
-				dev_name(dev), task_pid_nr(current));
+	if (pm_print_times_enabled) {
+		pr_info("calling  %s+ @ %i, parent: %s\n",
+			dev_name(dev), task_pid_nr(current),
+			dev->parent ? dev_name(dev->parent) : "none");
 		calltime = ktime_get();
 	}
 
@@ -185,15 +202,17 @@ static ktime_t initcall_debug_start(struct device *dev)
 }
 
 static void initcall_debug_report(struct device *dev, ktime_t calltime,
-				  int error)
+				  int error, pm_message_t state, char *info)
 {
-	ktime_t delta, rettime;
+	ktime_t rettime;
+	s64 nsecs;
 
-	if (initcall_debug) {
-		rettime = ktime_get();
-		delta = ktime_sub(rettime, calltime);
+	rettime = ktime_get();
+	nsecs = (s64) ktime_to_ns(ktime_sub(rettime, calltime));
+
+	if (pm_print_times_enabled) {
 		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
-			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+			error, (unsigned long long)nsecs >> 10);
 	}
 }
 
@@ -223,175 +242,103 @@ static void dpm_wait_for_children(struct device *dev, bool async)
 }
 
 /**
- * pm_op - Execute the PM operation appropriate for given PM event.
- * @dev: Device to handle.
+ * pm_op - Return the PM operation appropriate for given PM event.
  * @ops: PM operations to choose from.
  * @state: PM transition of the system being carried out.
  */
-static int pm_op(struct device *dev,
-		 const struct dev_pm_ops *ops,
-		 pm_message_t state)
+static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
 {
-	int error = 0;
-	ktime_t calltime;
-
-	calltime = initcall_debug_start(dev);
-
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		if (ops->suspend) {
-			error = ops->suspend(dev);
-			suspend_report_result(ops->suspend, error);
-		}
-		break;
+		return ops->suspend;
 	case PM_EVENT_RESUME:
-		if (ops->resume) {
-			error = ops->resume(dev);
-			suspend_report_result(ops->resume, error);
-		}
-		break;
+		return ops->resume;
 #endif /* CONFIG_SUSPEND */
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
-		if (ops->freeze) {
-			error = ops->freeze(dev);
-			suspend_report_result(ops->freeze, error);
-		}
-		break;
+		return ops->freeze;
 	case PM_EVENT_HIBERNATE:
-		if (ops->poweroff) {
-			error = ops->poweroff(dev);
-			suspend_report_result(ops->poweroff, error);
-		}
-		break;
+		return ops->poweroff;
 	case PM_EVENT_THAW:
 	case PM_EVENT_RECOVER:
-		if (ops->thaw) {
-			error = ops->thaw(dev);
-			suspend_report_result(ops->thaw, error);
-		}
+		return ops->thaw;
 		break;
 	case PM_EVENT_RESTORE:
-		if (ops->restore) {
-			error = ops->restore(dev);
-			suspend_report_result(ops->restore, error);
-		}
-		break;
-#endif /* CONFIG_HIBERNATION */
-	default:
-		error = -EINVAL;
+		return ops->restore;
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	}
 
-	initcall_debug_report(dev, calltime, error);
-
-	return error;
+	return NULL;
 }
 
 /**
- * pm_noirq_op - Execute the PM operation appropriate for given PM event.
- * @dev: Device to handle.
+ * pm_late_early_op - Return the PM operation appropriate for given PM event.
  * @ops: PM operations to choose from.
  * @state: PM transition of the system being carried out.
  *
- * The driver of @dev will not receive interrupts while this function is being
- * executed.
+ * Runtime PM is disabled for @dev while this function is being executed.
  */
-static int pm_noirq_op(struct device *dev,
-			const struct dev_pm_ops *ops,
-			pm_message_t state)
+static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops,
+				      pm_message_t state)
 {
-	int error = 0;
-	ktime_t calltime, delta, rettime;
-
-	if (initcall_debug) {
-		pr_info("calling  %s+ @ %i, parent: %s\n",
-				dev_name(dev), task_pid_nr(current),
-				dev->parent ? dev_name(dev->parent) : "none");
-		calltime = ktime_get();
-	}
-
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		if (ops->suspend_noirq) {
-			error = ops->suspend_noirq(dev);
-			suspend_report_result(ops->suspend_noirq, error);
-		}
-		break;
+		return ops->suspend_late;
 	case PM_EVENT_RESUME:
-		if (ops->resume_noirq) {
-			error = ops->resume_noirq(dev);
-			suspend_report_result(ops->resume_noirq, error);
-		}
-		break;
+		return ops->resume_early;
 #endif /* CONFIG_SUSPEND */
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
-		if (ops->freeze_noirq) {
-			error = ops->freeze_noirq(dev);
-			suspend_report_result(ops->freeze_noirq, error);
-		}
-		break;
+		return ops->freeze_late;
 	case PM_EVENT_HIBERNATE:
-		if (ops->poweroff_noirq) {
-			error = ops->poweroff_noirq(dev);
-			suspend_report_result(ops->poweroff_noirq, error);
-		}
-		break;
+		return ops->poweroff_late;
 	case PM_EVENT_THAW:
 	case PM_EVENT_RECOVER:
-		if (ops->thaw_noirq) {
-			error = ops->thaw_noirq(dev);
-			suspend_report_result(ops->thaw_noirq, error);
-		}
-		break;
+		return ops->thaw_early;
 	case PM_EVENT_RESTORE:
-		if (ops->restore_noirq) {
-			error = ops->restore_noirq(dev);
-			suspend_report_result(ops->restore_noirq, error);
-		}
-		break;
-#endif /* CONFIG_HIBERNATION */
-	default:
-		error = -EINVAL;
+		return ops->restore_early;
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	}
 
-	if (initcall_debug) {
-		rettime = ktime_get();
-		delta = ktime_sub(rettime, calltime);
-		printk("initcall %s_i+ returned %d after %Ld usecs\n",
-			dev_name(dev), error,
-			(unsigned long long)ktime_to_ns(delta) >> 10);
-	}
-
-	return error;
+	return NULL;
 }
 
-static char *pm_verb(int event)
+/**
+ * pm_noirq_op - Return the PM operation appropriate for given PM event.
+ * @ops: PM operations to choose from.
+ * @state: PM transition of the system being carried out.
+ *
+ * The driver of @dev will not receive interrupts while this function is being
+ * executed.
+ */
+static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t state)
 {
-	switch (event) {
+	switch (state.event) {
+#ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		return "suspend";
+		return ops->suspend_noirq;
 	case PM_EVENT_RESUME:
-		return "resume";
+		return ops->resume_noirq;
+#endif /* CONFIG_SUSPEND */
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
-		return "freeze";
 	case PM_EVENT_QUIESCE:
-		return "quiesce";
+		return ops->freeze_noirq;
 	case PM_EVENT_HIBERNATE:
-		return "hibernate";
+		return ops->poweroff_noirq;
 	case PM_EVENT_THAW:
-		return "thaw";
-	case PM_EVENT_RESTORE:
-		return "restore";
 	case PM_EVENT_RECOVER:
-		return "recover";
-	default:
-		return "(unknown PM event)";
+		return ops->thaw_noirq;
+	case PM_EVENT_RESTORE:
+		return ops->restore_noirq;
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	}
+
+	return NULL;
 }
 
 static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
@@ -405,7 +352,7 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
 			int error)
 {
 	printk(KERN_ERR "PM: Device %s failed to %s%s: error %d\n",
-		kobject_name(&dev->kobj), pm_verb(state.event), info, error);
+		dev_name(dev), pm_verb(state.event), info, error);
 }
 
 static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info)
@@ -425,6 +372,93 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info)
 		usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC);
 }
 
+static int dpm_run_callback(pm_callback_t cb, struct device *dev,
+			    pm_message_t state, char *info)
+{
+	ktime_t calltime;
+	int error;
+
+	if (!cb)
+		return 0;
+
+	calltime = initcall_debug_start(dev);
+
+	pm_dev_dbg(dev, state, info);
+	trace_device_pm_callback_start(dev, info, state.event);
+	error = cb(dev);
+	trace_device_pm_callback_end(dev, error);
+	suspend_report_result(cb, error);
+
+	initcall_debug_report(dev, calltime, error, state, info);
+
+	return error;
+}
+
+#ifdef CONFIG_DPM_WATCHDOG
+struct dpm_watchdog {
+	struct device		*dev;
+	struct task_struct	*tsk;
+	struct timer_list	timer;
+};
+
+#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
+	struct dpm_watchdog wd
+
+/**
+ * dpm_watchdog_handler - Driver suspend / resume watchdog handler.
+ * @data: Watchdog object address.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so panic() to
+ * capture a crash-dump in pstore.
+ */
+static void dpm_watchdog_handler(unsigned long data)
+{
+	struct dpm_watchdog *wd = (void *)data;
+
+	dev_emerg(wd->dev, "**** DPM device timeout ****\n");
+	show_stack(wd->tsk, NULL);
+	panic("%s %s: unrecoverable failure\n",
+		dev_driver_string(wd->dev), dev_name(wd->dev));
+}
+
+/**
+ * dpm_watchdog_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
+{
+	struct timer_list *timer = &wd->timer;
+
+	wd->dev = dev;
+	wd->tsk = current;
+
+	init_timer_on_stack(timer);
+	/* use same timeout value for both suspend and resume */
+	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
+	timer->function = dpm_watchdog_handler;
+	timer->data = (unsigned long)wd;
+	add_timer(timer);
+}
+
+/**
+ * dpm_watchdog_clear - Disable suspend/resume watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_watchdog_clear(struct dpm_watchdog *wd)
+{
+	struct timer_list *timer = &wd->timer;
+
+	del_timer_sync(timer);
+	destroy_timer_on_stack(timer);
+}
+#else
+#define DECLARE_DPM_WATCHDOG_ON_STACK(wd)
+#define dpm_watchdog_set(x, y)
+#define dpm_watchdog_clear(x)
+#endif
+
 /*------------------------- Resume routines -------------------------*/
 
 /**
@@ -435,86 +469,256 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info)
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int device_resume_noirq(struct device *dev, pm_message_t state)
+static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
 {
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	if (dev->bus && dev->bus->pm) {
-		pm_dev_dbg(dev, state, "EARLY ");
-		error = pm_noirq_op(dev, dev->bus->pm, state);
-		if (error)
-			goto End;
-	}
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Out;
 
-	if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "EARLY type ");
-		error = pm_noirq_op(dev, dev->type->pm, state);
-		if (error)
-			goto End;
+	if (!dev->power.is_noirq_suspended)
+		goto Out;
+
+	dpm_wait(dev->parent, async);
+
+	if (dev->pm_domain) {
+		info = "noirq power domain ";
+		callback = pm_noirq_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "noirq type ";
+		callback = pm_noirq_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "noirq class ";
+		callback = pm_noirq_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "noirq bus ";
+		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
-	if (dev->class && dev->class->pm) {
-		pm_dev_dbg(dev, state, "EARLY class ");
-		error = pm_noirq_op(dev, dev->class->pm, state);
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "noirq driver ";
+		callback = pm_noirq_op(dev->driver->pm, state);
 	}
 
-End:
+	error = dpm_run_callback(callback, dev, state, info);
+	dev->power.is_noirq_suspended = false;
+
+ Out:
+	complete_all(&dev->power.completion);
 	TRACE_RESUME(error);
 	return error;
 }
 
+static bool is_async(struct device *dev)
+{
+	return dev->power.async_suspend && pm_async_enabled
+		&& !pm_trace_is_enabled();
+}
+
+static void async_resume_noirq(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = device_resume_noirq(dev, pm_transition, true);
+	if (error)
+		pm_dev_err(dev, pm_transition, " async", error);
+
+	put_device(dev);
+}
+
 /**
- * dpm_resume_noirq - Execute "early resume" callbacks for non-sysdev devices.
+ * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  *
- * Call the "noirq" resume handlers for all devices marked as DPM_OFF_IRQ and
+ * Call the "noirq" resume handlers for all devices in dpm_noirq_list and
  * enable device drivers to receive interrupts.
  */
-void dpm_resume_noirq(pm_message_t state)
+static void dpm_resume_noirq(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
-	transition_started = false;
-	list_for_each_entry(dev, &dpm_list, power.entry)
-		if (dev->power.status > DPM_OFF) {
+	pm_transition = state;
+
+	/*
+	 * Advanced the async threads upfront,
+	 * in case the starting of async threads is
+	 * delayed by non-async resuming devices.
+	 */
+	list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
+		reinit_completion(&dev->power.completion);
+		if (is_async(dev)) {
+			get_device(dev);
+			async_schedule(async_resume_noirq, dev);
+		}
+	}
+
+	while (!list_empty(&dpm_noirq_list)) {
+		dev = to_device(dpm_noirq_list.next);
+		get_device(dev);
+		list_move_tail(&dev->power.entry, &dpm_late_early_list);
+		mutex_unlock(&dpm_list_mtx);
+
+		if (!is_async(dev)) {
 			int error;
 
-			dev->power.status = DPM_OFF;
-			error = device_resume_noirq(dev, state);
-			if (error)
-				pm_dev_err(dev, state, " early", error);
+			error = device_resume_noirq(dev, state, false);
+			if (error) {
+				suspend_stats.failed_resume_noirq++;
+				dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
+				dpm_save_failed_dev(dev_name(dev));
+				pm_dev_err(dev, state, " noirq", error);
+			}
 		}
+
+		mutex_lock(&dpm_list_mtx);
+		put_device(dev);
+	}
 	mutex_unlock(&dpm_list_mtx);
-	dpm_show_time(starttime, state, "early");
+	async_synchronize_full();
+	dpm_show_time(starttime, state, "noirq");
 	resume_device_irqs();
+	cpuidle_resume();
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
 }
-EXPORT_SYMBOL_GPL(dpm_resume_noirq);
 
 /**
- * legacy_resume - Execute a legacy (bus or class) resume callback for device.
- * @dev: Device to resume.
- * @cb: Resume callback to execute.
+ * device_resume_early - Execute an "early resume" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
+ *
+ * Runtime PM is disabled for @dev while this function is being executed.
  */
-static int legacy_resume(struct device *dev, int (*cb)(struct device *dev))
+static int device_resume_early(struct device *dev, pm_message_t state, bool async)
 {
-	int error;
-	ktime_t calltime;
+	pm_callback_t callback = NULL;
+	char *info = NULL;
+	int error = 0;
 
-	calltime = initcall_debug_start(dev);
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
 
-	error = cb(dev);
-	suspend_report_result(cb, error);
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Out;
 
-	initcall_debug_report(dev, calltime, error);
+	if (!dev->power.is_late_suspended)
+		goto Out;
+
+	dpm_wait(dev->parent, async);
 
+	if (dev->pm_domain) {
+		info = "early power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "early type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "early class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "early bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
+
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "early driver ";
+		callback = pm_late_early_op(dev->driver->pm, state);
+	}
+
+	error = dpm_run_callback(callback, dev, state, info);
+	dev->power.is_late_suspended = false;
+
+ Out:
+	TRACE_RESUME(error);
+
+	pm_runtime_enable(dev);
+	complete_all(&dev->power.completion);
 	return error;
 }
 
+static void async_resume_early(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = device_resume_early(dev, pm_transition, true);
+	if (error)
+		pm_dev_err(dev, pm_transition, " async", error);
+
+	put_device(dev);
+}
+
+/**
+ * dpm_resume_early - Execute "early resume" callbacks for all devices.
+ * @state: PM transition of the system being carried out.
+ */
+static void dpm_resume_early(pm_message_t state)
+{
+	struct device *dev;
+	ktime_t starttime = ktime_get();
+
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, true);
+	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
+
+	/*
+	 * Advanced the async threads upfront,
+	 * in case the starting of async threads is
+	 * delayed by non-async resuming devices.
+	 */
+	list_for_each_entry(dev, &dpm_late_early_list, power.entry) {
+		reinit_completion(&dev->power.completion);
+		if (is_async(dev)) {
+			get_device(dev);
+			async_schedule(async_resume_early, dev);
+		}
+	}
+
+	while (!list_empty(&dpm_late_early_list)) {
+		dev = to_device(dpm_late_early_list.next);
+		get_device(dev);
+		list_move_tail(&dev->power.entry, &dpm_suspended_list);
+		mutex_unlock(&dpm_list_mtx);
+
+		if (!is_async(dev)) {
+			int error;
+
+			error = device_resume_early(dev, state, false);
+			if (error) {
+				suspend_stats.failed_resume_early++;
+				dpm_save_failed_step(SUSPEND_RESUME_EARLY);
+				dpm_save_failed_dev(dev_name(dev));
+				pm_dev_err(dev, state, " early", error);
+			}
+		}
+		mutex_lock(&dpm_list_mtx);
+		put_device(dev);
+	}
+	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
+	dpm_show_time(starttime, state, "early");
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, false);
+}
+
+/**
+ * dpm_resume_start - Execute "noirq" and "early" device callbacks.
+ * @state: PM transition of the system being carried out.
+ */
+void dpm_resume_start(pm_message_t state)
+{
+	dpm_resume_noirq(state);
+	dpm_resume_early(state);
+}
+EXPORT_SYMBOL_GPL(dpm_resume_start);
+
 /**
  * device_resume - Execute "resume" callbacks for given device.
  * @dev: Device to handle.
@@ -523,51 +727,90 @@ static int legacy_resume(struct device *dev, int (*cb)(struct device *dev))
  */
 static int device_resume(struct device *dev, pm_message_t state, bool async)
 {
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 	int error = 0;
+	DECLARE_DPM_WATCHDOG_ON_STACK(wd);
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
+	if (dev->power.syscore)
+		goto Complete;
+
+	if (dev->power.direct_complete) {
+		/* Match the pm_runtime_disable() in __device_suspend(). */
+		pm_runtime_enable(dev);
+		goto Complete;
+	}
+
 	dpm_wait(dev->parent, async);
+	dpm_watchdog_set(&wd, dev);
 	device_lock(dev);
 
-	dev->power.status = DPM_RESUMING;
+	/*
+	 * This is a fib.  But we'll allow new children to be added below
+	 * a resumed device, even if the device hasn't been completed yet.
+	 */
+	dev->power.is_prepared = false;
 
-	if (dev->bus) {
-		if (dev->bus->pm) {
-			pm_dev_dbg(dev, state, "");
-			error = pm_op(dev, dev->bus->pm, state);
-		} else if (dev->bus->resume) {
-			pm_dev_dbg(dev, state, "legacy ");
-			error = legacy_resume(dev, dev->bus->resume);
-		}
-		if (error)
-			goto End;
+	if (!dev->power.is_suspended)
+		goto Unlock;
+
+	if (dev->pm_domain) {
+		info = "power domain ";
+		callback = pm_op(&dev->pm_domain->ops, state);
+		goto Driver;
 	}
 
-	if (dev->type) {
-		if (dev->type->pm) {
-			pm_dev_dbg(dev, state, "type ");
-			error = pm_op(dev, dev->type->pm, state);
-		}
-		if (error)
-			goto End;
+	if (dev->type && dev->type->pm) {
+		info = "type ";
+		callback = pm_op(dev->type->pm, state);
+		goto Driver;
 	}
 
 	if (dev->class) {
 		if (dev->class->pm) {
-			pm_dev_dbg(dev, state, "class ");
-			error = pm_op(dev, dev->class->pm, state);
+			info = "class ";
+			callback = pm_op(dev->class->pm, state);
+			goto Driver;
 		} else if (dev->class->resume) {
-			pm_dev_dbg(dev, state, "legacy class ");
-			error = legacy_resume(dev, dev->class->resume);
+			info = "legacy class ";
+			callback = dev->class->resume;
+			goto End;
 		}
 	}
+
+	if (dev->bus) {
+		if (dev->bus->pm) {
+			info = "bus ";
+			callback = pm_op(dev->bus->pm, state);
+		} else if (dev->bus->resume) {
+			info = "legacy bus ";
+			callback = dev->bus->resume;
+			goto End;
+		}
+	}
+
+ Driver:
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "driver ";
+		callback = pm_op(dev->driver->pm, state);
+	}
+
  End:
+	error = dpm_run_callback(callback, dev, state, info);
+	dev->power.is_suspended = false;
+
+ Unlock:
 	device_unlock(dev);
+	dpm_watchdog_clear(&wd);
+
+ Complete:
 	complete_all(&dev->power.completion);
 
 	TRACE_RESUME(error);
+
 	return error;
 }
 
@@ -582,12 +825,6 @@ static void async_resume(void *data, async_cookie_t cookie)
 	put_device(dev);
 }
 
-static bool is_async(struct device *dev)
-{
-	return dev->power.async_suspend && pm_async_enabled
-		&& !pm_trace_is_enabled();
-}
-
 /**
  * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -595,53 +832,54 @@ static bool is_async(struct device *dev)
  * Execute the appropriate "resume" callback for all devices whose status
  * indicates that they are suspended.
  */
-static void dpm_resume(pm_message_t state)
+void dpm_resume(pm_message_t state)
 {
-	struct list_head list;
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
-	INIT_LIST_HEAD(&list);
+	trace_suspend_resume(TPS("dpm_resume"), state.event, true);
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
 
-	list_for_each_entry(dev, &dpm_list, power.entry) {
-		if (dev->power.status < DPM_OFF)
-			continue;
-
-		INIT_COMPLETION(dev->power.completion);
+	list_for_each_entry(dev, &dpm_suspended_list, power.entry) {
+		reinit_completion(&dev->power.completion);
 		if (is_async(dev)) {
 			get_device(dev);
 			async_schedule(async_resume, dev);
 		}
 	}
 
-	while (!list_empty(&dpm_list)) {
-		dev = to_device(dpm_list.next);
+	while (!list_empty(&dpm_suspended_list)) {
+		dev = to_device(dpm_suspended_list.next);
 		get_device(dev);
-		if (dev->power.status >= DPM_OFF && !is_async(dev)) {
+		if (!is_async(dev)) {
 			int error;
 
 			mutex_unlock(&dpm_list_mtx);
 
 			error = device_resume(dev, state, false);
+			if (error) {
+				suspend_stats.failed_resume++;
+				dpm_save_failed_step(SUSPEND_RESUME);
+				dpm_save_failed_dev(dev_name(dev));
+				pm_dev_err(dev, state, "", error);
+			}
 
 			mutex_lock(&dpm_list_mtx);
-			if (error)
-				pm_dev_err(dev, state, "", error);
-		} else if (dev->power.status == DPM_SUSPENDING) {
-			/* Allow new children of the device to be registered */
-			dev->power.status = DPM_RESUMING;
 		}
 		if (!list_empty(&dev->power.entry))
-			list_move_tail(&dev->power.entry, &list);
+			list_move_tail(&dev->power.entry, &dpm_prepared_list);
 		put_device(dev);
 	}
-	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, NULL);
+
+	cpufreq_resume();
+	trace_suspend_resume(TPS("dpm_resume"), state.event, false);
 }
 
 /**
@@ -651,24 +889,43 @@ static void dpm_resume(pm_message_t state)
  */
 static void device_complete(struct device *dev, pm_message_t state)
 {
+	void (*callback)(struct device *) = NULL;
+	char *info = NULL;
+
+	if (dev->power.syscore)
+		return;
+
 	device_lock(dev);
 
-	if (dev->class && dev->class->pm && dev->class->pm->complete) {
-		pm_dev_dbg(dev, state, "completing class ");
-		dev->class->pm->complete(dev);
+	if (dev->pm_domain) {
+		info = "completing power domain ";
+		callback = dev->pm_domain->ops.complete;
+	} else if (dev->type && dev->type->pm) {
+		info = "completing type ";
+		callback = dev->type->pm->complete;
+	} else if (dev->class && dev->class->pm) {
+		info = "completing class ";
+		callback = dev->class->pm->complete;
+	} else if (dev->bus && dev->bus->pm) {
+		info = "completing bus ";
+		callback = dev->bus->pm->complete;
 	}
 
-	if (dev->type && dev->type->pm && dev->type->pm->complete) {
-		pm_dev_dbg(dev, state, "completing type ");
-		dev->type->pm->complete(dev);
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "completing driver ";
+		callback = dev->driver->pm->complete;
 	}
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->complete) {
-		pm_dev_dbg(dev, state, "completing ");
-		dev->bus->pm->complete(dev);
+	if (callback) {
+		pm_dev_dbg(dev, state, info);
+		trace_device_pm_callback_start(dev, info, state.event);
+		callback(dev);
+		trace_device_pm_callback_end(dev, 0);
 	}
 
 	device_unlock(dev);
+
+	pm_runtime_put(dev);
 }
 
 /**
@@ -678,32 +935,31 @@ static void device_complete(struct device *dev, pm_message_t state)
  * Execute the ->complete() callbacks for all devices whose PM status is not
  * DPM_ON (this allows new devices to be registered).
  */
-static void dpm_complete(pm_message_t state)
+void dpm_complete(pm_message_t state)
 {
 	struct list_head list;
 
+	trace_suspend_resume(TPS("dpm_complete"), state.event, true);
+	might_sleep();
+
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
-	transition_started = false;
-	while (!list_empty(&dpm_list)) {
-		struct device *dev = to_device(dpm_list.prev);
+	while (!list_empty(&dpm_prepared_list)) {
+		struct device *dev = to_device(dpm_prepared_list.prev);
 
 		get_device(dev);
-		if (dev->power.status > DPM_ON) {
-			dev->power.status = DPM_ON;
-			mutex_unlock(&dpm_list_mtx);
+		dev->power.is_prepared = false;
+		list_move(&dev->power.entry, &list);
+		mutex_unlock(&dpm_list_mtx);
 
-			device_complete(dev, state);
-			pm_runtime_put_sync(dev);
+		device_complete(dev, state);
 
-			mutex_lock(&dpm_list_mtx);
-		}
-		if (!list_empty(&dev->power.entry))
-			list_move(&dev->power.entry, &list);
+		mutex_lock(&dpm_list_mtx);
 		put_device(dev);
 	}
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_complete"), state.event, false);
 }
 
 /**
@@ -715,7 +971,6 @@ static void dpm_complete(pm_message_t state)
  */
 void dpm_resume_end(pm_message_t state)
 {
-	might_sleep();
 	dpm_resume(state);
 	dpm_complete(state);
 }
@@ -753,64 +1008,290 @@ static pm_message_t resume_event(pm_message_t sleep_state)
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int device_suspend_noirq(struct device *dev, pm_message_t state)
+static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
 {
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 	int error = 0;
 
-	if (dev->class && dev->class->pm) {
-		pm_dev_dbg(dev, state, "LATE class ");
-		error = pm_noirq_op(dev, dev->class->pm, state);
-		if (error)
-			goto End;
+	if (async_error)
+		goto Complete;
+
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
 	}
 
-	if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "LATE type ");
-		error = pm_noirq_op(dev, dev->type->pm, state);
-		if (error)
-			goto End;
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Complete;
+
+	dpm_wait_for_children(dev, async);
+
+	if (dev->pm_domain) {
+		info = "noirq power domain ";
+		callback = pm_noirq_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "noirq type ";
+		callback = pm_noirq_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "noirq class ";
+		callback = pm_noirq_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "noirq bus ";
+		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
-	if (dev->bus && dev->bus->pm) {
-		pm_dev_dbg(dev, state, "LATE ");
-		error = pm_noirq_op(dev, dev->bus->pm, state);
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "noirq driver ";
+		callback = pm_noirq_op(dev->driver->pm, state);
 	}
 
-End:
+	error = dpm_run_callback(callback, dev, state, info);
+	if (!error)
+		dev->power.is_noirq_suspended = true;
+	else
+		async_error = error;
+
+Complete:
+	complete_all(&dev->power.completion);
 	return error;
 }
 
+static void async_suspend_noirq(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_suspend_noirq(dev, pm_transition, true);
+	if (error) {
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, pm_transition, " async", error);
+	}
+
+	put_device(dev);
+}
+
+static int device_suspend_noirq(struct device *dev)
+{
+	reinit_completion(&dev->power.completion);
+
+	if (pm_async_enabled && dev->power.async_suspend) {
+		get_device(dev);
+		async_schedule(async_suspend_noirq, dev);
+		return 0;
+	}
+	return __device_suspend_noirq(dev, pm_transition, false);
+}
+
 /**
- * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
+ * dpm_suspend_noirq - Execute "noirq suspend" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  *
  * Prevent device drivers from receiving interrupts and call the "noirq" suspend
  * handlers for all non-sysdev devices.
  */
-int dpm_suspend_noirq(pm_message_t state)
+static int dpm_suspend_noirq(pm_message_t state)
 {
-	struct device *dev;
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);
+	cpuidle_pause();
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
-	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
-		error = device_suspend_noirq(dev, state);
+	pm_transition = state;
+	async_error = 0;
+
+	while (!list_empty(&dpm_late_early_list)) {
+		struct device *dev = to_device(dpm_late_early_list.prev);
+
+		get_device(dev);
+		mutex_unlock(&dpm_list_mtx);
+
+		error = device_suspend_noirq(dev);
+
+		mutex_lock(&dpm_list_mtx);
 		if (error) {
-			pm_dev_err(dev, state, " late", error);
+			pm_dev_err(dev, state, " noirq", error);
+			dpm_save_failed_dev(dev_name(dev));
+			put_device(dev);
 			break;
 		}
-		dev->power.status = DPM_OFF_IRQ;
+		if (!list_empty(&dev->power.entry))
+			list_move(&dev->power.entry, &dpm_noirq_list);
+		put_device(dev);
+
+		if (async_error)
+			break;
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
+	async_synchronize_full();
+	if (!error)
+		error = async_error;
+
+	if (error) {
+		suspend_stats.failed_suspend_noirq++;
+		dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ);
 		dpm_resume_noirq(resume_event(state));
+	} else {
+		dpm_show_time(starttime, state, "noirq");
+	}
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false);
+	return error;
+}
+
+/**
+ * device_suspend_late - Execute a "late suspend" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
+ *
+ * Runtime PM is disabled for @dev while this function is being executed.
+ */
+static int __device_suspend_late(struct device *dev, pm_message_t state, bool async)
+{
+	pm_callback_t callback = NULL;
+	char *info = NULL;
+	int error = 0;
+
+	__pm_runtime_disable(dev, false);
+
+	if (async_error)
+		goto Complete;
+
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
+	}
+
+	if (dev->power.syscore || dev->power.direct_complete)
+		goto Complete;
+
+	dpm_wait_for_children(dev, async);
+
+	if (dev->pm_domain) {
+		info = "late power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "late type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "late class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "late bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
+
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "late driver ";
+		callback = pm_late_early_op(dev->driver->pm, state);
+	}
+
+	error = dpm_run_callback(callback, dev, state, info);
+	if (!error)
+		dev->power.is_late_suspended = true;
 	else
+		async_error = error;
+
+Complete:
+	complete_all(&dev->power.completion);
+	return error;
+}
+
+static void async_suspend_late(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_suspend_late(dev, pm_transition, true);
+	if (error) {
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, pm_transition, " async", error);
+	}
+	put_device(dev);
+}
+
+static int device_suspend_late(struct device *dev)
+{
+	reinit_completion(&dev->power.completion);
+
+	if (pm_async_enabled && dev->power.async_suspend) {
+		get_device(dev);
+		async_schedule(async_suspend_late, dev);
+		return 0;
+	}
+
+	return __device_suspend_late(dev, pm_transition, false);
+}
+
+/**
+ * dpm_suspend_late - Execute "late suspend" callbacks for all devices.
+ * @state: PM transition of the system being carried out.
+ */
+static int dpm_suspend_late(pm_message_t state)
+{
+	ktime_t starttime = ktime_get();
+	int error = 0;
+
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);
+	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
+	async_error = 0;
+
+	while (!list_empty(&dpm_suspended_list)) {
+		struct device *dev = to_device(dpm_suspended_list.prev);
+
+		get_device(dev);
+		mutex_unlock(&dpm_list_mtx);
+
+		error = device_suspend_late(dev);
+
+		mutex_lock(&dpm_list_mtx);
+		if (error) {
+			pm_dev_err(dev, state, " late", error);
+			dpm_save_failed_dev(dev_name(dev));
+			put_device(dev);
+			break;
+		}
+		if (!list_empty(&dev->power.entry))
+			list_move(&dev->power.entry, &dpm_late_early_list);
+		put_device(dev);
+
+		if (async_error)
+			break;
+	}
+	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
+	if (error) {
+		suspend_stats.failed_suspend_late++;
+		dpm_save_failed_step(SUSPEND_SUSPEND_LATE);
+		dpm_resume_early(resume_event(state));
+	} else {
 		dpm_show_time(starttime, state, "late");
+	}
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, false);
 	return error;
 }
-EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
+
+/**
+ * dpm_suspend_end - Execute "late" and "noirq" device suspend callbacks.
+ * @state: PM transition of the system being carried out.
+ */
+int dpm_suspend_end(pm_message_t state)
+{
+	int error = dpm_suspend_late(state);
+	if (error)
+		return error;
+
+	error = dpm_suspend_noirq(state);
+	if (error) {
+		dpm_resume_early(resume_event(state));
+		return error;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpm_suspend_end);
 
 /**
  * legacy_suspend - Execute a legacy (bus or class) suspend callback for device.
@@ -819,17 +1300,20 @@ EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
  * @cb: Suspend callback to execute.
  */
 static int legacy_suspend(struct device *dev, pm_message_t state,
-			  int (*cb)(struct device *dev, pm_message_t state))
+			  int (*cb)(struct device *dev, pm_message_t state),
+			  char *info)
 {
 	int error;
 	ktime_t calltime;
 
 	calltime = initcall_debug_start(dev);
 
+	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
+	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
-	initcall_debug_report(dev, calltime, error);
+	initcall_debug_report(dev, calltime, error, state, info);
 
 	return error;
 }
@@ -842,52 +1326,114 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
  */
 static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 {
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 	int error = 0;
+	DECLARE_DPM_WATCHDOG_ON_STACK(wd);
 
 	dpm_wait_for_children(dev, async);
-	device_lock(dev);
 
 	if (async_error)
-		goto End;
+		goto Complete;
+
+	/*
+	 * If a device configured to wake up the system from sleep states
+	 * has been suspended at run time and there's a resume request pending
+	 * for it, this is equivalent to the device signaling wakeup, so the
+	 * system suspend operation should be aborted.
+	 */
+	if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
+		pm_wakeup_event(dev, 0);
+
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
+	}
+
+	if (dev->power.syscore)
+		goto Complete;
+
+	if (dev->power.direct_complete) {
+		if (pm_runtime_status_suspended(dev)) {
+			pm_runtime_disable(dev);
+			if (pm_runtime_suspended_if_enabled(dev))
+				goto Complete;
+
+			pm_runtime_enable(dev);
+		}
+		dev->power.direct_complete = false;
+	}
+
+	dpm_watchdog_set(&wd, dev);
+	device_lock(dev);
+
+	if (dev->pm_domain) {
+		info = "power domain ";
+		callback = pm_op(&dev->pm_domain->ops, state);
+		goto Run;
+	}
+
+	if (dev->type && dev->type->pm) {
+		info = "type ";
+		callback = pm_op(dev->type->pm, state);
+		goto Run;
+	}
 
 	if (dev->class) {
 		if (dev->class->pm) {
-			pm_dev_dbg(dev, state, "class ");
-			error = pm_op(dev, dev->class->pm, state);
+			info = "class ";
+			callback = pm_op(dev->class->pm, state);
+			goto Run;
 		} else if (dev->class->suspend) {
 			pm_dev_dbg(dev, state, "legacy class ");
-			error = legacy_suspend(dev, state, dev->class->suspend);
-		}
-		if (error)
+			error = legacy_suspend(dev, state, dev->class->suspend,
+						"legacy class ");
 			goto End;
-	}
-
-	if (dev->type) {
-		if (dev->type->pm) {
-			pm_dev_dbg(dev, state, "type ");
-			error = pm_op(dev, dev->type->pm, state);
 		}
-		if (error)
-			goto End;
 	}
 
 	if (dev->bus) {
 		if (dev->bus->pm) {
-			pm_dev_dbg(dev, state, "");
-			error = pm_op(dev, dev->bus->pm, state);
+			info = "bus ";
+			callback = pm_op(dev->bus->pm, state);
 		} else if (dev->bus->suspend) {
-			pm_dev_dbg(dev, state, "legacy ");
-			error = legacy_suspend(dev, state, dev->bus->suspend);
+			pm_dev_dbg(dev, state, "legacy bus ");
+			error = legacy_suspend(dev, state, dev->bus->suspend,
+						"legacy bus ");
+			goto End;
 		}
 	}
 
-	if (!error)
-		dev->power.status = DPM_OFF;
+ Run:
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "driver ";
+		callback = pm_op(dev->driver->pm, state);
+	}
+
+	error = dpm_run_callback(callback, dev, state, info);
 
  End:
+	if (!error) {
+		struct device *parent = dev->parent;
+
+		dev->power.is_suspended = true;
+		if (parent) {
+			spin_lock_irq(&parent->power.lock);
+
+			dev->parent->power.direct_complete = false;
+			if (dev->power.wakeup_path
+			    && !dev->parent->power.ignore_children)
+				dev->parent->power.wakeup_path = true;
+
+			spin_unlock_irq(&parent->power.lock);
+		}
+	}
+
 	device_unlock(dev);
-	complete_all(&dev->power.completion);
+	dpm_watchdog_clear(&wd);
 
+ Complete:
+	complete_all(&dev->power.completion);
 	if (error)
 		async_error = error;
 
@@ -900,15 +1446,17 @@ static void async_suspend(void *data, async_cookie_t cookie)
 	int error;
 
 	error = __device_suspend(dev, pm_transition, true);
-	if (error)
+	if (error) {
+		dpm_save_failed_dev(dev_name(dev));
 		pm_dev_err(dev, pm_transition, " async", error);
+	}
 
 	put_device(dev);
 }
 
 static int device_suspend(struct device *dev)
 {
-	INIT_COMPLETION(dev->power.completion);
+	reinit_completion(&dev->power.completion);
 
 	if (pm_async_enabled && dev->power.async_suspend) {
 		get_device(dev);
@@ -923,18 +1471,21 @@ static int device_suspend(struct device *dev)
  * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
  * @state: PM transition of the system being carried out.
  */
-static int dpm_suspend(pm_message_t state)
+int dpm_suspend(pm_message_t state)
 {
-	struct list_head list;
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
-	INIT_LIST_HEAD(&list);
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, true);
+	might_sleep();
+
+	cpufreq_suspend();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
-	while (!list_empty(&dpm_list)) {
-		struct device *dev = to_device(dpm_list.prev);
+	while (!list_empty(&dpm_prepared_list)) {
+		struct device *dev = to_device(dpm_prepared_list.prev);
 
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
@@ -944,22 +1495,26 @@ static int dpm_suspend(pm_message_t state)
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
 			pm_dev_err(dev, state, "", error);
+			dpm_save_failed_dev(dev_name(dev));
 			put_device(dev);
 			break;
 		}
 		if (!list_empty(&dev->power.entry))
-			list_move(&dev->power.entry, &list);
+			list_move(&dev->power.entry, &dpm_suspended_list);
 		put_device(dev);
 		if (async_error)
 			break;
 	}
-	list_splice(&list, dpm_list.prev);
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	if (!error)
 		error = async_error;
-	if (!error)
+	if (error) {
+		suspend_stats.failed_suspend++;
+		dpm_save_failed_step(SUSPEND_SUSPEND);
+	} else
 		dpm_show_time(starttime, state, NULL);
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, false);
 	return error;
 }
 
@@ -973,35 +1528,68 @@ static int dpm_suspend(pm_message_t state)
  */
 static int device_prepare(struct device *dev, pm_message_t state)
 {
-	int error = 0;
+	int (*callback)(struct device *) = NULL;
+	char *info = NULL;
+	int ret = 0;
+
+	if (dev->power.syscore)
+		return 0;
+
+	/*
+	 * If a device's parent goes into runtime suspend at the wrong time,
+	 * it won't be possible to resume the device.  To prevent this we
+	 * block runtime suspend here, during the prepare phase, and allow
+	 * it again during the complete phase.
+	 */
+	pm_runtime_get_noresume(dev);
 
 	device_lock(dev);
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) {
-		pm_dev_dbg(dev, state, "preparing ");
-		error = dev->bus->pm->prepare(dev);
-		suspend_report_result(dev->bus->pm->prepare, error);
-		if (error)
-			goto End;
+	dev->power.wakeup_path = device_may_wakeup(dev);
+
+	if (dev->pm_domain) {
+		info = "preparing power domain ";
+		callback = dev->pm_domain->ops.prepare;
+	} else if (dev->type && dev->type->pm) {
+		info = "preparing type ";
+		callback = dev->type->pm->prepare;
+	} else if (dev->class && dev->class->pm) {
+		info = "preparing class ";
+		callback = dev->class->pm->prepare;
+	} else if (dev->bus && dev->bus->pm) {
+		info = "preparing bus ";
+		callback = dev->bus->pm->prepare;
 	}
 
-	if (dev->type && dev->type->pm && dev->type->pm->prepare) {
-		pm_dev_dbg(dev, state, "preparing type ");
-		error = dev->type->pm->prepare(dev);
-		suspend_report_result(dev->type->pm->prepare, error);
-		if (error)
-			goto End;
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "preparing driver ";
+		callback = dev->driver->pm->prepare;
 	}
 
-	if (dev->class && dev->class->pm && dev->class->pm->prepare) {
-		pm_dev_dbg(dev, state, "preparing class ");
-		error = dev->class->pm->prepare(dev);
-		suspend_report_result(dev->class->pm->prepare, error);
+	if (callback) {
+		trace_device_pm_callback_start(dev, info, state.event);
+		ret = callback(dev);
+		trace_device_pm_callback_end(dev, ret);
 	}
- End:
+
 	device_unlock(dev);
 
-	return error;
+	if (ret < 0) {
+		suspend_report_result(callback, ret);
+		pm_runtime_put(dev);
+		return ret;
+	}
+	/*
+	 * A positive return value from ->prepare() means "this device appears
+	 * to be runtime-suspended and its state is fine, so if it really is
+	 * runtime-suspended, you can leave it in that state provided that you
+	 * will do the same thing with all of its descendants".  This only
+	 * applies to suspend transitions, however.
+	 */
+	spin_lock_irq(&dev->power.lock);
+	dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND;
+	spin_unlock_irq(&dev->power.lock);
+	return 0;
 }
 
 /**
@@ -1010,51 +1598,42 @@ static int device_prepare(struct device *dev, pm_message_t state)
  *
  * Execute the ->prepare() callback(s) for all devices.
  */
-static int dpm_prepare(pm_message_t state)
+int dpm_prepare(pm_message_t state)
 {
-	struct list_head list;
 	int error = 0;
 
-	INIT_LIST_HEAD(&list);
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, true);
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
-	transition_started = true;
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.next);
 
 		get_device(dev);
-		dev->power.status = DPM_PREPARING;
 		mutex_unlock(&dpm_list_mtx);
 
-		pm_runtime_get_noresume(dev);
-		if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) {
-			/* Wake-up requested during system sleep transition. */
-			pm_runtime_put_sync(dev);
-			error = -EBUSY;
-		} else {
-			error = device_prepare(dev, state);
-		}
+		error = device_prepare(dev, state);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
-			dev->power.status = DPM_ON;
 			if (error == -EAGAIN) {
 				put_device(dev);
 				error = 0;
 				continue;
 			}
-			printk(KERN_ERR "PM: Failed to prepare device %s "
-				"for power transition: error %d\n",
-				kobject_name(&dev->kobj), error);
+			printk(KERN_INFO "PM: Device %s not prepared "
+				"for power transition: code %d\n",
+				dev_name(dev), error);
 			put_device(dev);
 			break;
 		}
-		dev->power.status = DPM_SUSPENDING;
+		dev->power.is_prepared = true;
 		if (!list_empty(&dev->power.entry))
-			list_move_tail(&dev->power.entry, &list);
+			list_move_tail(&dev->power.entry, &dpm_prepared_list);
 		put_device(dev);
 	}
-	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
 	return error;
 }
 
@@ -1069,9 +1648,11 @@ int dpm_suspend_start(pm_message_t state)
 {
 	int error;
 
-	might_sleep();
 	error = dpm_prepare(state);
-	if (!error)
+	if (error) {
+		suspend_stats.failed_prepare++;
+		dpm_save_failed_step(SUSPEND_PREPARE);
+	} else
 		error = dpm_suspend(state);
 	return error;
 }
@@ -1095,3 +1676,25 @@ int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
 	return async_error;
 }
 EXPORT_SYMBOL_GPL(device_pm_wait_for_dev);
+
+/**
+ * dpm_for_each_dev - device iterator.
+ * @data: data for the callback.
+ * @fn: function to be called for each device.
+ *
+ * Iterate over devices in dpm_list, and call @fn for each device,
+ * passing it @data.
+ */
+void dpm_for_each_dev(void *data, void (*fn)(struct device *, void *))
+{
+	struct device *dev;
+
+	if (!fn)
+		return;
+
+	device_pm_lock();
+	list_for_each_entry(dev, &dpm_list, power.entry)
+		fn(dev, data);
+	device_pm_unlock();
+}
+EXPORT_SYMBOL_GPL(dpm_for_each_dev);
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 2bb9b4cf59d..89ced955faf 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -14,13 +14,14 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/cpufreq.h>
+#include <linux/device.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
-#include <linux/opp.h>
+#include <linux/pm_opp.h>
+#include <linux/of.h>
+#include <linux/export.h>
 
 /*
  * Internal data structure organization with the OPP layer library is as
@@ -39,7 +40,7 @@
  */
 
 /**
- * struct opp - Generic OPP description structure
+ * struct dev_pm_opp - Generic OPP description structure
  * @node:	opp list node. The nodes are maintained throughout the lifetime
  *		of boot. It is expected only an optimal set of OPPs are
  *		added to the library by the SoC framework.
@@ -52,10 +53,11 @@
  * @rate:	Frequency in hertz
  * @u_volt:	Nominal voltage in microvolts corresponding to this OPP
  * @dev_opp:	points back to the device_opp struct this opp belongs to
+ * @head:	RCU callback head used for deferred freeing
  *
  * This structure stores the OPP information for a given device.
  */
-struct opp {
+struct dev_pm_opp {
 	struct list_head node;
 
 	bool available;
@@ -63,6 +65,7 @@ struct opp {
 	unsigned long u_volt;
 
 	struct device_opp *dev_opp;
+	struct rcu_head head;
 };
 
 /**
@@ -73,6 +76,7 @@ struct opp {
  *		RCU usage: nodes are not modified in the list of device_opp,
  *		however addition is possible and is secured by dev_opp_list_lock
  * @dev:	device pointer
+ * @head:	notifier head to notify the OPP availability changes.
  * @opp_list:	list of opps
  *
  * This is an internal data structure maintaining the link to opps attached to
@@ -83,6 +87,7 @@ struct device_opp {
 	struct list_head node;
 
 	struct device *dev;
+	struct srcu_notifier_head head;
 	struct list_head opp_list;
 };
 
@@ -129,7 +134,7 @@ static struct device_opp *find_device_opp(struct device *dev)
 }
 
 /**
- * opp_get_voltage() - Gets the voltage corresponding to an available opp
+ * dev_pm_opp_get_voltage() - Gets the voltage corresponding to an available opp
  * @opp:	opp for which voltage has to be returned for
  *
  * Return voltage in micro volt corresponding to the opp, else
@@ -143,9 +148,9 @@ static struct device_opp *find_device_opp(struct device *dev)
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long opp_get_voltage(struct opp *opp)
+unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
-	struct opp *tmp_opp;
+	struct dev_pm_opp *tmp_opp;
 	unsigned long v = 0;
 
 	tmp_opp = rcu_dereference(opp);
@@ -156,9 +161,10 @@ unsigned long opp_get_voltage(struct opp *opp)
 
 	return v;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage);
 
 /**
- * opp_get_freq() - Gets the frequency corresponding to an available opp
+ * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp
  * @opp:	opp for which frequency has to be returned for
  *
  * Return frequency in hertz corresponding to the opp, else
@@ -172,9 +178,9 @@ unsigned long opp_get_voltage(struct opp *opp)
  * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
  * pointer.
  */
-unsigned long opp_get_freq(struct opp *opp)
+unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 {
-	struct opp *tmp_opp;
+	struct dev_pm_opp *tmp_opp;
 	unsigned long f = 0;
 
 	tmp_opp = rcu_dereference(opp);
@@ -185,9 +191,10 @@ unsigned long opp_get_freq(struct opp *opp)
 
 	return f;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
 
 /**
- * opp_get_opp_count() - Get number of opps available in the opp list
+ * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
  * @dev:	device for which we do this operation
  *
  * This function returns the number of available opps if there are any,
@@ -197,10 +204,10 @@ unsigned long opp_get_freq(struct opp *opp)
  * internally references two RCU protected structures: device_opp and opp which
  * are safe as long as we are under a common RCU locked section.
  */
-int opp_get_opp_count(struct device *dev)
+int dev_pm_opp_get_opp_count(struct device *dev)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp;
+	struct dev_pm_opp *temp_opp;
 	int count = 0;
 
 	dev_opp = find_device_opp(dev);
@@ -217,16 +224,20 @@ int opp_get_opp_count(struct device *dev)
 
 	return count;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
 
 /**
- * opp_find_freq_exact() - search for an exact frequency
+ * dev_pm_opp_find_freq_exact() - search for an exact frequency
  * @dev:		device for which we do this operation
  * @freq:		frequency to search for
- * @is_available:	true/false - match for available opp
+ * @available:		true/false - match for available opp
  *
  * Searches for exact match in the opp list and returns pointer to the matching
  * opp if found, else returns ERR_PTR in case of error and should be handled
- * using IS_ERR.
+ * using IS_ERR. Error return values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
  *
  * Note: available is a modifier for the search. if available=true, then the
  * match is for exact matching frequency and is available in the stored OPP
@@ -241,11 +252,12 @@ int opp_get_opp_count(struct device *dev)
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
-				bool available)
+struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+					      unsigned long freq,
+					      bool available)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
@@ -264,9 +276,10 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
 
 	return opp;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
 
 /**
- * opp_find_freq_ceil() - Search for an rounded ceil freq
+ * dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq
  * @dev:	device for which we do this operation
  * @freq:	Start frequency
  *
@@ -274,7 +287,11 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
  * for a device.
  *
  * Returns matching *opp and refreshes *freq accordingly, else returns
- * ERR_PTR in case of error and should be handled using IS_ERR.
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
  *
  * Locking: This function must be called under rcu_read_lock(). opp is a rcu
  * protected pointer. The reason for the same is that the opp pointer which is
@@ -282,10 +299,11 @@ struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+					     unsigned long *freq)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
@@ -294,7 +312,7 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
 
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp))
-		return opp;
+		return ERR_CAST(dev_opp);
 
 	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
 		if (temp_opp->available && temp_opp->rate >= *freq) {
@@ -306,9 +324,10 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
 
 	return opp;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
 
 /**
- * opp_find_freq_floor() - Search for a rounded floor freq
+ * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq
  * @dev:	device for which we do this operation
  * @freq:	Start frequency
  *
@@ -316,7 +335,11 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
  * for a device.
  *
  * Returns matching *opp and refreshes *freq accordingly, else returns
- * ERR_PTR in case of error and should be handled using IS_ERR.
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
  *
  * Locking: This function must be called under rcu_read_lock(). opp is a rcu
  * protected pointer. The reason for the same is that the opp pointer which is
@@ -324,10 +347,11 @@ struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
  * under the locked area. The pointer returned must be used prior to unlocking
  * with rcu_read_unlock() to maintain the integrity of the pointer.
  */
-struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
+struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+					      unsigned long *freq)
 {
 	struct device_opp *dev_opp;
-	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
@@ -336,7 +360,7 @@ struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
 
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp))
-		return opp;
+		return ERR_CAST(dev_opp);
 
 	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
 		if (temp_opp->available) {
@@ -352,31 +376,39 @@ struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
 
 	return opp;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
 /**
- * opp_add()  - Add an OPP table from a table definitions
+ * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
  * @freq:	Frequency in Hz for this OPP
  * @u_volt:	Voltage in uVolts for this OPP
  *
  * This function adds an opp definition to the opp list and returns status.
  * The opp is made available by default and it can be controlled using
- * opp_enable/disable functions.
+ * dev_pm_opp_enable/disable functions.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
+ *
+ * Return:
+ * 0:		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST:	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM:	Memory allocation failure
  */
-int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 {
 	struct device_opp *dev_opp = NULL;
-	struct opp *opp, *new_opp;
+	struct dev_pm_opp *opp, *new_opp;
 	struct list_head *head;
 
 	/* allocate new OPP node */
-	new_opp = kzalloc(sizeof(struct opp), GFP_KERNEL);
+	new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
 	if (!new_opp) {
 		dev_warn(dev, "%s: Unable to create new OPP node\n", __func__);
 		return -ENOMEM;
@@ -404,6 +436,7 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 		}
 
 		dev_opp->dev = dev;
+		srcu_init_notifier_head(&dev_opp->head);
 		INIT_LIST_HEAD(&dev_opp->opp_list);
 
 		/* Secure the device list modification */
@@ -416,20 +449,42 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 	new_opp->u_volt = u_volt;
 	new_opp->available = true;
 
-	/* Insert new OPP in order of increasing frequency */
+	/*
+	 * Insert new OPP in order of increasing frequency
+	 * and discard if already present
+	 */
 	head = &dev_opp->opp_list;
 	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
-		if (new_opp->rate < opp->rate)
+		if (new_opp->rate <= opp->rate)
 			break;
 		else
 			head = &opp->node;
 	}
 
+	/* Duplicate OPPs ? */
+	if (new_opp->rate == opp->rate) {
+		int ret = opp->available && new_opp->u_volt == opp->u_volt ?
+			0 : -EEXIST;
+
+		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
+			 __func__, opp->rate, opp->u_volt, opp->available,
+			 new_opp->rate, new_opp->u_volt, new_opp->available);
+		mutex_unlock(&dev_opp_list_lock);
+		kfree(new_opp);
+		return ret;
+	}
+
 	list_add_rcu(&new_opp->node, head);
 	mutex_unlock(&dev_opp_list_lock);
 
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_add);
 
 /**
  * opp_set_availability() - helper to set the availability of an opp
@@ -453,12 +508,12 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 static int opp_set_availability(struct device *dev, unsigned long freq,
 		bool availability_req)
 {
-	struct device_opp *tmp_dev_opp, *dev_opp = NULL;
-	struct opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
+	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
 	int r = 0;
 
 	/* keep the node allocated */
-	new_opp = kmalloc(sizeof(struct opp), GFP_KERNEL);
+	new_opp = kmalloc(sizeof(*new_opp), GFP_KERNEL);
 	if (!new_opp) {
 		dev_warn(dev, "%s: Unable to create OPP\n", __func__);
 		return -ENOMEM;
@@ -502,27 +557,32 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
 
 	list_replace_rcu(&opp->node, &new_opp->node);
 	mutex_unlock(&dev_opp_list_lock);
-	synchronize_rcu();
+	kfree_rcu(opp, head);
 
-	/* clean up old opp */
-	new_opp = opp;
-	goto out;
+	/* Notify the change of the OPP availability */
+	if (availability_req)
+		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ENABLE,
+					 new_opp);
+	else
+		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE,
+					 new_opp);
+
+	return 0;
 
 unlock:
 	mutex_unlock(&dev_opp_list_lock);
-out:
 	kfree(new_opp);
 	return r;
 }
 
 /**
- * opp_enable() - Enable a specific OPP
+ * dev_pm_opp_enable() - Enable a specific OPP
  * @dev:	device for which we do this operation
  * @freq:	OPP frequency to enable
  *
  * Enables a provided opp. If the operation is valid, this returns 0, else the
  * corresponding error value. It is meant to be used for users an OPP available
- * after being temporarily made unavailable with opp_disable.
+ * after being temporarily made unavailable with dev_pm_opp_disable.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
@@ -530,20 +590,21 @@ out:
  * this function is *NOT* called under RCU protection or in contexts where
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  */
-int opp_enable(struct device *dev, unsigned long freq)
+int dev_pm_opp_enable(struct device *dev, unsigned long freq)
 {
 	return opp_set_availability(dev, freq, true);
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
 
 /**
- * opp_disable() - Disable a specific OPP
+ * dev_pm_opp_disable() - Disable a specific OPP
  * @dev:	device for which we do this operation
  * @freq:	OPP frequency to disable
  *
  * Disables a provided opp. If the operation is valid, this returns
  * 0, else the corresponding error value. It is meant to be a temporary
  * control by users to make this OPP not available until the circumstances are
- * right to make it available again (with a call to opp_enable).
+ * right to make it available again (with a call to dev_pm_opp_enable).
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
  * Hence this function indirectly uses RCU and mutex locks to keep the
@@ -551,78 +612,67 @@ int opp_enable(struct device *dev, unsigned long freq)
  * this function is *NOT* called under RCU protection or in contexts where
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  */
-int opp_disable(struct device *dev, unsigned long freq)
+int dev_pm_opp_disable(struct device *dev, unsigned long freq)
 {
 	return opp_set_availability(dev, freq, false);
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
 
-#ifdef CONFIG_CPU_FREQ
 /**
- * opp_init_cpufreq_table() - create a cpufreq table for a device
- * @dev:	device for which we do this operation
- * @table:	Cpufreq table returned back to caller
- *
- * Generate a cpufreq table for a provided device- this assumes that the
- * opp list is already initialized and ready for usage.
- *
- * This function allocates required memory for the cpufreq table. It is
- * expected that the caller does the required maintenance such as freeing
- * the table as required.
- *
- * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
- * if no memory available for the operation (table is not populated), returns 0
- * if successful and table is populated.
- *
- * WARNING: It is  important for the callers to ensure refreshing their copy of
- * the table if any of the mentioned functions have been invoked in the interim.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * To simplify the logic, we pretend we are updater and hold relevant mutex here
- * Callers should ensure that this function is *NOT* called under RCU protection
- * or in contexts where mutex locking cannot be used.
+ * dev_pm_opp_get_notifier() - find notifier_head of the device with opp
+ * @dev:	device pointer used to lookup device OPPs.
  */
-int opp_init_cpufreq_table(struct device *dev,
-			    struct cpufreq_frequency_table **table)
+struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
 {
-	struct device_opp *dev_opp;
-	struct opp *opp;
-	struct cpufreq_frequency_table *freq_table;
-	int i = 0;
-
-	/* Pretend as if I am an updater */
-	mutex_lock(&dev_opp_list_lock);
+	struct device_opp *dev_opp = find_device_opp(dev);
 
-	dev_opp = find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		int r = PTR_ERR(dev_opp);
-		mutex_unlock(&dev_opp_list_lock);
-		dev_err(dev, "%s: Device OPP not found (%d)\n", __func__, r);
-		return r;
-	}
+	if (IS_ERR(dev_opp))
+		return ERR_CAST(dev_opp); /* matching type */
 
-	freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
-			     (opp_get_opp_count(dev) + 1), GFP_KERNEL);
-	if (!freq_table) {
-		mutex_unlock(&dev_opp_list_lock);
-		dev_warn(dev, "%s: Unable to allocate frequency table\n",
-			__func__);
-		return -ENOMEM;
-	}
+	return &dev_opp->head;
+}
 
-	list_for_each_entry(opp, &dev_opp->opp_list, node) {
-		if (opp->available) {
-			freq_table[i].index = i;
-			freq_table[i].frequency = opp->rate / 1000;
-			i++;
-		}
+#ifdef CONFIG_OF
+/**
+ * of_init_opp_table() - Initialize opp table from device tree
+ * @dev:	device pointer used to lookup device OPPs.
+ *
+ * Register the initial OPP table with the OPP library for given device.
+ */
+int of_init_opp_table(struct device *dev)
+{
+	const struct property *prop;
+	const __be32 *val;
+	int nr;
+
+	prop = of_find_property(dev->of_node, "operating-points", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (!prop->value)
+		return -ENODATA;
+
+	/*
+	 * Each OPP is a set of tuples consisting of frequency and
+	 * voltage like <freq-kHz vol-uV>.
+	 */
+	nr = prop->length / sizeof(u32);
+	if (nr % 2) {
+		dev_err(dev, "%s: Invalid OPP list\n", __func__);
+		return -EINVAL;
 	}
-	mutex_unlock(&dev_opp_list_lock);
 
-	freq_table[i].index = i;
-	freq_table[i].frequency = CPUFREQ_TABLE_END;
+	val = prop->value;
+	while (nr) {
+		unsigned long freq = be32_to_cpup(val++) * 1000;
+		unsigned long volt = be32_to_cpup(val++);
 
-	*table = &freq_table[0];
+		if (dev_pm_opp_add(dev, freq, volt))
+			dev_warn(dev, "%s: Failed to add OPP %ld\n",
+				 __func__, freq);
+		nr -= 2;
+	}
 
 	return 0;
 }
-#endif		/* CONFIG_CPU_FREQ */
+EXPORT_SYMBOL_GPL(of_init_opp_table);
+#endif
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 698dde74258..a21223d9592 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -1,10 +1,32 @@
+#include <linux/pm_qos.h>
+
+static inline void device_pm_init_common(struct device *dev)
+{
+	if (!dev->power.early_init) {
+		spin_lock_init(&dev->power.lock);
+		dev->power.qos = NULL;
+		dev->power.early_init = true;
+	}
+}
+
 #ifdef CONFIG_PM_RUNTIME
 
+static inline void pm_runtime_early_init(struct device *dev)
+{
+	dev->power.disable_depth = 1;
+	device_pm_init_common(dev);
+}
+
 extern void pm_runtime_init(struct device *dev);
 extern void pm_runtime_remove(struct device *dev);
 
 #else /* !CONFIG_PM_RUNTIME */
 
+static inline void pm_runtime_early_init(struct device *dev)
+{
+	device_pm_init_common(dev);
+}
+
 static inline void pm_runtime_init(struct device *dev) {}
 static inline void pm_runtime_remove(struct device *dev) {}
 
@@ -23,7 +45,7 @@ static inline struct device *to_device(struct list_head *entry)
 	return container_of(entry, struct device, power.entry);
 }
 
-extern void device_pm_init(struct device *dev);
+extern void device_pm_sleep_init(struct device *dev);
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
 extern void device_pm_move_before(struct device *, struct device *);
@@ -32,18 +54,15 @@ extern void device_pm_move_last(struct device *);
 
 #else /* !CONFIG_PM_SLEEP */
 
-static inline void device_pm_init(struct device *dev)
-{
-	spin_lock_init(&dev->power.lock);
-	pm_runtime_init(dev);
-}
+static inline void device_pm_sleep_init(struct device *dev) {}
+
+static inline void device_pm_add(struct device *dev) {}
 
 static inline void device_pm_remove(struct device *dev)
 {
 	pm_runtime_remove(dev);
 }
 
-static inline void device_pm_add(struct device *dev) {}
 static inline void device_pm_move_before(struct device *deva,
 					 struct device *devb) {}
 static inline void device_pm_move_after(struct device *deva,
@@ -52,25 +71,37 @@ static inline void device_pm_move_last(struct device *dev) {}
 
 #endif /* !CONFIG_PM_SLEEP */
 
+static inline void device_pm_init(struct device *dev)
+{
+	device_pm_init_common(dev);
+	device_pm_sleep_init(dev);
+	pm_runtime_init(dev);
+}
+
 #ifdef CONFIG_PM
 
 /*
  * sysfs.c
  */
 
-extern int dpm_sysfs_add(struct device *);
-extern void dpm_sysfs_remove(struct device *);
-extern void rpm_sysfs_remove(struct device *);
+extern int dpm_sysfs_add(struct device *dev);
+extern void dpm_sysfs_remove(struct device *dev);
+extern void rpm_sysfs_remove(struct device *dev);
+extern int wakeup_sysfs_add(struct device *dev);
+extern void wakeup_sysfs_remove(struct device *dev);
+extern int pm_qos_sysfs_add_resume_latency(struct device *dev);
+extern void pm_qos_sysfs_remove_resume_latency(struct device *dev);
+extern int pm_qos_sysfs_add_flags(struct device *dev);
+extern void pm_qos_sysfs_remove_flags(struct device *dev);
 
 #else /* CONFIG_PM */
 
-static inline int dpm_sysfs_add(struct device *dev)
-{
-	return 0;
-}
-
-static inline void dpm_sysfs_remove(struct device *dev)
-{
-}
+static inline int dpm_sysfs_add(struct device *dev) { return 0; }
+static inline void dpm_sysfs_remove(struct device *dev) {}
+static inline void rpm_sysfs_remove(struct device *dev) {}
+static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
+static inline void wakeup_sysfs_remove(struct device *dev) {}
+static inline int pm_qos_sysfs_add(struct device *dev) { return 0; }
+static inline void pm_qos_sysfs_remove(struct device *dev) {}
 
 #endif
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
new file mode 100644
index 00000000000..36b9eb4862c
--- /dev/null
+++ b/drivers/base/power/qos.c
@@ -0,0 +1,886 @@
+/*
+ * Devices PM QoS constraints management
+ *
+ * Copyright (C) 2011 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ * This module exposes the interface to kernel space for specifying
+ * per-device PM QoS dependencies. It provides infrastructure for registration
+ * of:
+ *
+ * Dependents on a QoS value : register requests
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based. Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ * Watchers can register different types of notification callbacks:
+ *  . a per-device notification callback using the dev_pm_qos_*_notifier API.
+ *    The notification chain data is stored in the per-device constraint
+ *    data struct.
+ *  . a system-wide notification callback using the dev_pm_qos_*_global_notifier
+ *    API. The notification chain data is stored in a static variable.
+ *
+ * Note about the per-device constraint data struct allocation:
+ * . The per-device constraints data struct ptr is tored into the device
+ *    dev_pm_info.
+ * . To minimize the data usage by the per-device constraints, the data struct
+ *   is only allocated at the first call to dev_pm_qos_add_request.
+ * . The data is later free'd when the device is removed from the system.
+ *  . A global mutex protects the constraints users from the data being
+ *     allocated and free'd.
+ */
+
+#include <linux/pm_qos.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/export.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <trace/events/power.h>
+
+#include "power.h"
+
+static DEFINE_MUTEX(dev_pm_qos_mtx);
+static DEFINE_MUTEX(dev_pm_qos_sysfs_mtx);
+
+static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
+
+/**
+ * __dev_pm_qos_flags - Check PM QoS flags for a given device.
+ * @dev: Device to check the PM QoS flags for.
+ * @mask: Flags to check against.
+ *
+ * This routine must be called with dev->power.lock held.
+ */
+enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask)
+{
+	struct dev_pm_qos *qos = dev->power.qos;
+	struct pm_qos_flags *pqf;
+	s32 val;
+
+	if (IS_ERR_OR_NULL(qos))
+		return PM_QOS_FLAGS_UNDEFINED;
+
+	pqf = &qos->flags;
+	if (list_empty(&pqf->list))
+		return PM_QOS_FLAGS_UNDEFINED;
+
+	val = pqf->effective_flags & mask;
+	if (val)
+		return (val == mask) ? PM_QOS_FLAGS_ALL : PM_QOS_FLAGS_SOME;
+
+	return PM_QOS_FLAGS_NONE;
+}
+
+/**
+ * dev_pm_qos_flags - Check PM QoS flags for a given device (locked).
+ * @dev: Device to check the PM QoS flags for.
+ * @mask: Flags to check against.
+ */
+enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask)
+{
+	unsigned long irqflags;
+	enum pm_qos_flags_status ret;
+
+	spin_lock_irqsave(&dev->power.lock, irqflags);
+	ret = __dev_pm_qos_flags(dev, mask);
+	spin_unlock_irqrestore(&dev->power.lock, irqflags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_flags);
+
+/**
+ * __dev_pm_qos_read_value - Get PM QoS constraint for a given device.
+ * @dev: Device to get the PM QoS constraint value for.
+ *
+ * This routine must be called with dev->power.lock held.
+ */
+s32 __dev_pm_qos_read_value(struct device *dev)
+{
+	return IS_ERR_OR_NULL(dev->power.qos) ?
+		0 : pm_qos_read_value(&dev->power.qos->resume_latency);
+}
+
+/**
+ * dev_pm_qos_read_value - Get PM QoS constraint for a given device (locked).
+ * @dev: Device to get the PM QoS constraint value for.
+ */
+s32 dev_pm_qos_read_value(struct device *dev)
+{
+	unsigned long flags;
+	s32 ret;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+	ret = __dev_pm_qos_read_value(dev);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	return ret;
+}
+
+/**
+ * apply_constraint - Add/modify/remove device PM QoS request.
+ * @req: Constraint request to apply
+ * @action: Action to perform (add/update/remove).
+ * @value: Value to assign to the QoS request.
+ *
+ * Internal function to update the constraints list using the PM QoS core
+ * code and if needed call the per-device and the global notification
+ * callbacks
+ */
+static int apply_constraint(struct dev_pm_qos_request *req,
+			    enum pm_qos_req_action action, s32 value)
+{
+	struct dev_pm_qos *qos = req->dev->power.qos;
+	int ret;
+
+	switch(req->type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+		ret = pm_qos_update_target(&qos->resume_latency,
+					   &req->data.pnode, action, value);
+		if (ret) {
+			value = pm_qos_read_value(&qos->resume_latency);
+			blocking_notifier_call_chain(&dev_pm_notifiers,
+						     (unsigned long)value,
+						     req);
+		}
+		break;
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		ret = pm_qos_update_target(&qos->latency_tolerance,
+					   &req->data.pnode, action, value);
+		if (ret) {
+			value = pm_qos_read_value(&qos->latency_tolerance);
+			req->dev->power.set_latency_tolerance(req->dev, value);
+		}
+		break;
+	case DEV_PM_QOS_FLAGS:
+		ret = pm_qos_update_flags(&qos->flags, &req->data.flr,
+					  action, value);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/*
+ * dev_pm_qos_constraints_allocate
+ * @dev: device to allocate data for
+ *
+ * Called at the first call to add_request, for constraint data allocation
+ * Must be called with the dev_pm_qos_mtx mutex held
+ */
+static int dev_pm_qos_constraints_allocate(struct device *dev)
+{
+	struct dev_pm_qos *qos;
+	struct pm_qos_constraints *c;
+	struct blocking_notifier_head *n;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (!qos)
+		return -ENOMEM;
+
+	n = kzalloc(sizeof(*n), GFP_KERNEL);
+	if (!n) {
+		kfree(qos);
+		return -ENOMEM;
+	}
+	BLOCKING_INIT_NOTIFIER_HEAD(n);
+
+	c = &qos->resume_latency;
+	plist_head_init(&c->list);
+	c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
+	c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
+	c->no_constraint_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
+	c->type = PM_QOS_MIN;
+	c->notifiers = n;
+
+	c = &qos->latency_tolerance;
+	plist_head_init(&c->list);
+	c->target_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE;
+	c->default_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE;
+	c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
+	c->type = PM_QOS_MIN;
+
+	INIT_LIST_HEAD(&qos->flags.list);
+
+	spin_lock_irq(&dev->power.lock);
+	dev->power.qos = qos;
+	spin_unlock_irq(&dev->power.lock);
+
+	return 0;
+}
+
+static void __dev_pm_qos_hide_latency_limit(struct device *dev);
+static void __dev_pm_qos_hide_flags(struct device *dev);
+
+/**
+ * dev_pm_qos_constraints_destroy
+ * @dev: target device
+ *
+ * Called from the device PM subsystem on device removal under device_pm_lock().
+ */
+void dev_pm_qos_constraints_destroy(struct device *dev)
+{
+	struct dev_pm_qos *qos;
+	struct dev_pm_qos_request *req, *tmp;
+	struct pm_qos_constraints *c;
+	struct pm_qos_flags *f;
+
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	/*
+	 * If the device's PM QoS resume latency limit or PM QoS flags have been
+	 * exposed to user space, they have to be hidden at this point.
+	 */
+	pm_qos_sysfs_remove_resume_latency(dev);
+	pm_qos_sysfs_remove_flags(dev);
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	__dev_pm_qos_hide_latency_limit(dev);
+	__dev_pm_qos_hide_flags(dev);
+
+	qos = dev->power.qos;
+	if (!qos)
+		goto out;
+
+	/* Flush the constraints lists for the device. */
+	c = &qos->resume_latency;
+	plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
+		/*
+		 * Update constraints list and call the notification
+		 * callbacks if needed
+		 */
+		apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+		memset(req, 0, sizeof(*req));
+	}
+	c = &qos->latency_tolerance;
+	plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
+		apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+		memset(req, 0, sizeof(*req));
+	}
+	f = &qos->flags;
+	list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) {
+		apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+		memset(req, 0, sizeof(*req));
+	}
+
+	spin_lock_irq(&dev->power.lock);
+	dev->power.qos = ERR_PTR(-ENODEV);
+	spin_unlock_irq(&dev->power.lock);
+
+	kfree(c->notifiers);
+	kfree(qos);
+
+ out:
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+}
+
+static bool dev_pm_qos_invalid_request(struct device *dev,
+				       struct dev_pm_qos_request *req)
+{
+	return !req || (req->type == DEV_PM_QOS_LATENCY_TOLERANCE
+			&& !dev->power.set_latency_tolerance);
+}
+
+static int __dev_pm_qos_add_request(struct device *dev,
+				    struct dev_pm_qos_request *req,
+				    enum dev_pm_qos_req_type type, s32 value)
+{
+	int ret = 0;
+
+	if (!dev || dev_pm_qos_invalid_request(dev, req))
+		return -EINVAL;
+
+	if (WARN(dev_pm_qos_request_active(req),
+		 "%s() called for already added request\n", __func__))
+		return -EINVAL;
+
+	if (IS_ERR(dev->power.qos))
+		ret = -ENODEV;
+	else if (!dev->power.qos)
+		ret = dev_pm_qos_constraints_allocate(dev);
+
+	trace_dev_pm_qos_add_request(dev_name(dev), type, value);
+	if (!ret) {
+		req->dev = dev;
+		req->type = type;
+		ret = apply_constraint(req, PM_QOS_ADD_REQ, value);
+	}
+	return ret;
+}
+
+/**
+ * dev_pm_qos_add_request - inserts new qos request into the list
+ * @dev: target device for the constraint
+ * @req: pointer to a preallocated handle
+ * @type: type of the request
+ * @value: defines the qos request
+ *
+ * This function inserts a new entry in the device constraints list of
+ * requested qos performance characteristics. It recomputes the aggregate
+ * QoS expectations of parameters and initializes the dev_pm_qos_request
+ * handle.  Caller needs to save this handle for later use in updates and
+ * removal.
+ *
+ * Returns 1 if the aggregated constraint value has changed,
+ * 0 if the aggregated constraint value has not changed,
+ * -EINVAL in case of wrong parameters, -ENOMEM if there's not enough memory
+ * to allocate for data structures, -ENODEV if the device has just been removed
+ * from the system.
+ *
+ * Callers should ensure that the target device is not RPM_SUSPENDED before
+ * using this function for requests of type DEV_PM_QOS_FLAGS.
+ */
+int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
+			   enum dev_pm_qos_req_type type, s32 value)
+{
+	int ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+	ret = __dev_pm_qos_add_request(dev, req, type, value);
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);
+
+/**
+ * __dev_pm_qos_update_request - Modify an existing device PM QoS request.
+ * @req : PM QoS request to modify.
+ * @new_value: New value to request.
+ */
+static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req,
+				       s32 new_value)
+{
+	s32 curr_value;
+	int ret = 0;
+
+	if (!req) /*guard against callers passing in null */
+		return -EINVAL;
+
+	if (WARN(!dev_pm_qos_request_active(req),
+		 "%s() called for unknown object\n", __func__))
+		return -EINVAL;
+
+	if (IS_ERR_OR_NULL(req->dev->power.qos))
+		return -ENODEV;
+
+	switch(req->type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		curr_value = req->data.pnode.prio;
+		break;
+	case DEV_PM_QOS_FLAGS:
+		curr_value = req->data.flr.flags;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	trace_dev_pm_qos_update_request(dev_name(req->dev), req->type,
+					new_value);
+	if (curr_value != new_value)
+		ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value);
+
+	return ret;
+}
+
+/**
+ * dev_pm_qos_update_request - modifies an existing qos request
+ * @req : handle to list element holding a dev_pm_qos request to use
+ * @new_value: defines the qos request
+ *
+ * Updates an existing dev PM qos request along with updating the
+ * target value.
+ *
+ * Attempts are made to make this code callable on hot code paths.
+ *
+ * Returns 1 if the aggregated constraint value has changed,
+ * 0 if the aggregated constraint value has not changed,
+ * -EINVAL in case of wrong parameters, -ENODEV if the device has been
+ * removed from the system
+ *
+ * Callers should ensure that the target device is not RPM_SUSPENDED before
+ * using this function for requests of type DEV_PM_QOS_FLAGS.
+ */
+int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value)
+{
+	int ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+	ret = __dev_pm_qos_update_request(req, new_value);
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_update_request);
+
+static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
+{
+	int ret;
+
+	if (!req) /*guard against callers passing in null */
+		return -EINVAL;
+
+	if (WARN(!dev_pm_qos_request_active(req),
+		 "%s() called for unknown object\n", __func__))
+		return -EINVAL;
+
+	if (IS_ERR_OR_NULL(req->dev->power.qos))
+		return -ENODEV;
+
+	trace_dev_pm_qos_remove_request(dev_name(req->dev), req->type,
+					PM_QOS_DEFAULT_VALUE);
+	ret = apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+	memset(req, 0, sizeof(*req));
+	return ret;
+}
+
+/**
+ * dev_pm_qos_remove_request - modifies an existing qos request
+ * @req: handle to request list element
+ *
+ * Will remove pm qos request from the list of constraints and
+ * recompute the current target value. Call this on slow code paths.
+ *
+ * Returns 1 if the aggregated constraint value has changed,
+ * 0 if the aggregated constraint value has not changed,
+ * -EINVAL in case of wrong parameters, -ENODEV if the device has been
+ * removed from the system
+ *
+ * Callers should ensure that the target device is not RPM_SUSPENDED before
+ * using this function for requests of type DEV_PM_QOS_FLAGS.
+ */
+int dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
+{
+	int ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+	ret = __dev_pm_qos_remove_request(req);
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request);
+
+/**
+ * dev_pm_qos_add_notifier - sets notification entry for changes to target value
+ * of per-device PM QoS constraints
+ *
+ * @dev: target device for the constraint
+ * @notifier: notifier block managed by caller.
+ *
+ * Will register the notifier into a notification chain that gets called
+ * upon changes to the target value for the device.
+ *
+ * If the device's constraints object doesn't exist when this routine is called,
+ * it will be created (or error code will be returned if that fails).
+ */
+int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier)
+{
+	int ret = 0;
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR(dev->power.qos))
+		ret = -ENODEV;
+	else if (!dev->power.qos)
+		ret = dev_pm_qos_constraints_allocate(dev);
+
+	if (!ret)
+		ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers,
+						       notifier);
+
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier);
+
+/**
+ * dev_pm_qos_remove_notifier - deletes notification for changes to target value
+ * of per-device PM QoS constraints
+ *
+ * @dev: target device for the constraint
+ * @notifier: notifier block to be removed.
+ *
+ * Will remove the notifier from the notification chain that gets called
+ * upon changes to the target value.
+ */
+int dev_pm_qos_remove_notifier(struct device *dev,
+			       struct notifier_block *notifier)
+{
+	int retval = 0;
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	/* Silently return if the constraints object is not present. */
+	if (!IS_ERR_OR_NULL(dev->power.qos))
+		retval = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers,
+							    notifier);
+
+	mutex_unlock(&dev_pm_qos_mtx);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier);
+
+/**
+ * dev_pm_qos_add_global_notifier - sets notification entry for changes to
+ * target value of the PM QoS constraints for any device
+ *
+ * @notifier: notifier block managed by caller.
+ *
+ * Will register the notifier into a notification chain that gets called
+ * upon changes to the target value for any device.
+ */
+int dev_pm_qos_add_global_notifier(struct notifier_block *notifier)
+{
+	return blocking_notifier_chain_register(&dev_pm_notifiers, notifier);
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_add_global_notifier);
+
+/**
+ * dev_pm_qos_remove_global_notifier - deletes notification for changes to
+ * target value of PM QoS constraints for any device
+ *
+ * @notifier: notifier block to be removed.
+ *
+ * Will remove the notifier from the notification chain that gets called
+ * upon changes to the target value for any device.
+ */
+int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier)
+{
+	return blocking_notifier_chain_unregister(&dev_pm_notifiers, notifier);
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier);
+
+/**
+ * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor.
+ * @dev: Device whose ancestor to add the request for.
+ * @req: Pointer to the preallocated handle.
+ * @type: Type of the request.
+ * @value: Constraint latency value.
+ */
+int dev_pm_qos_add_ancestor_request(struct device *dev,
+				    struct dev_pm_qos_request *req,
+				    enum dev_pm_qos_req_type type, s32 value)
+{
+	struct device *ancestor = dev->parent;
+	int ret = -ENODEV;
+
+	switch (type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+		while (ancestor && !ancestor->power.ignore_children)
+			ancestor = ancestor->parent;
+
+		break;
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		while (ancestor && !ancestor->power.set_latency_tolerance)
+			ancestor = ancestor->parent;
+
+		break;
+	default:
+		ancestor = NULL;
+	}
+	if (ancestor)
+		ret = dev_pm_qos_add_request(ancestor, req, type, value);
+
+	if (ret < 0)
+		req->dev = NULL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request);
+
+#ifdef CONFIG_PM_RUNTIME
+static void __dev_pm_qos_drop_user_request(struct device *dev,
+					   enum dev_pm_qos_req_type type)
+{
+	struct dev_pm_qos_request *req = NULL;
+
+	switch(type) {
+	case DEV_PM_QOS_RESUME_LATENCY:
+		req = dev->power.qos->resume_latency_req;
+		dev->power.qos->resume_latency_req = NULL;
+		break;
+	case DEV_PM_QOS_LATENCY_TOLERANCE:
+		req = dev->power.qos->latency_tolerance_req;
+		dev->power.qos->latency_tolerance_req = NULL;
+		break;
+	case DEV_PM_QOS_FLAGS:
+		req = dev->power.qos->flags_req;
+		dev->power.qos->flags_req = NULL;
+		break;
+	}
+	__dev_pm_qos_remove_request(req);
+	kfree(req);
+}
+
+static void dev_pm_qos_drop_user_request(struct device *dev,
+					 enum dev_pm_qos_req_type type)
+{
+	mutex_lock(&dev_pm_qos_mtx);
+	__dev_pm_qos_drop_user_request(dev, type);
+	mutex_unlock(&dev_pm_qos_mtx);
+}
+
+/**
+ * dev_pm_qos_expose_latency_limit - Expose PM QoS latency limit to user space.
+ * @dev: Device whose PM QoS latency limit is to be exposed to user space.
+ * @value: Initial value of the latency limit.
+ */
+int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
+{
+	struct dev_pm_qos_request *req;
+	int ret;
+
+	if (!device_is_registered(dev) || value < 0)
+		return -EINVAL;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_RESUME_LATENCY, value);
+	if (ret < 0) {
+		kfree(req);
+		return ret;
+	}
+
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos))
+		ret = -ENODEV;
+	else if (dev->power.qos->resume_latency_req)
+		ret = -EEXIST;
+
+	if (ret < 0) {
+		__dev_pm_qos_remove_request(req);
+		kfree(req);
+		mutex_unlock(&dev_pm_qos_mtx);
+		goto out;
+	}
+	dev->power.qos->resume_latency_req = req;
+
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	ret = pm_qos_sysfs_add_resume_latency(dev);
+	if (ret)
+		dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY);
+
+ out:
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit);
+
+static void __dev_pm_qos_hide_latency_limit(struct device *dev)
+{
+	if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->resume_latency_req)
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY);
+}
+
+/**
+ * dev_pm_qos_hide_latency_limit - Hide PM QoS latency limit from user space.
+ * @dev: Device whose PM QoS latency limit is to be hidden from user space.
+ */
+void dev_pm_qos_hide_latency_limit(struct device *dev)
+{
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	pm_qos_sysfs_remove_resume_latency(dev);
+
+	mutex_lock(&dev_pm_qos_mtx);
+	__dev_pm_qos_hide_latency_limit(dev);
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit);
+
+/**
+ * dev_pm_qos_expose_flags - Expose PM QoS flags of a device to user space.
+ * @dev: Device whose PM QoS flags are to be exposed to user space.
+ * @val: Initial values of the flags.
+ */
+int dev_pm_qos_expose_flags(struct device *dev, s32 val)
+{
+	struct dev_pm_qos_request *req;
+	int ret;
+
+	if (!device_is_registered(dev))
+		return -EINVAL;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val);
+	if (ret < 0) {
+		kfree(req);
+		return ret;
+	}
+
+	pm_runtime_get_sync(dev);
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos))
+		ret = -ENODEV;
+	else if (dev->power.qos->flags_req)
+		ret = -EEXIST;
+
+	if (ret < 0) {
+		__dev_pm_qos_remove_request(req);
+		kfree(req);
+		mutex_unlock(&dev_pm_qos_mtx);
+		goto out;
+	}
+	dev->power.qos->flags_req = req;
+
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	ret = pm_qos_sysfs_add_flags(dev);
+	if (ret)
+		dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);
+
+ out:
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+	pm_runtime_put(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags);
+
+static void __dev_pm_qos_hide_flags(struct device *dev)
+{
+	if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->flags_req)
+		__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);
+}
+
+/**
+ * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space.
+ * @dev: Device whose PM QoS flags are to be hidden from user space.
+ */
+void dev_pm_qos_hide_flags(struct device *dev)
+{
+	pm_runtime_get_sync(dev);
+	mutex_lock(&dev_pm_qos_sysfs_mtx);
+
+	pm_qos_sysfs_remove_flags(dev);
+
+	mutex_lock(&dev_pm_qos_mtx);
+	__dev_pm_qos_hide_flags(dev);
+	mutex_unlock(&dev_pm_qos_mtx);
+
+	mutex_unlock(&dev_pm_qos_sysfs_mtx);
+	pm_runtime_put(dev);
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags);
+
+/**
+ * dev_pm_qos_update_flags - Update PM QoS flags request owned by user space.
+ * @dev: Device to update the PM QoS flags request for.
+ * @mask: Flags to set/clear.
+ * @set: Whether to set or clear the flags (true means set).
+ */
+int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set)
+{
+	s32 value;
+	int ret;
+
+	pm_runtime_get_sync(dev);
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->flags_req) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	value = dev_pm_qos_requested_flags(dev);
+	if (set)
+		value |= mask;
+	else
+		value &= ~mask;
+
+	ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value);
+
+ out:
+	mutex_unlock(&dev_pm_qos_mtx);
+	pm_runtime_put(dev);
+	return ret;
+}
+
+/**
+ * dev_pm_qos_get_user_latency_tolerance - Get user space latency tolerance.
+ * @dev: Device to obtain the user space latency tolerance for.
+ */
+s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev)
+{
+	s32 ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+	ret = IS_ERR_OR_NULL(dev->power.qos)
+		|| !dev->power.qos->latency_tolerance_req ?
+			PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT :
+			dev->power.qos->latency_tolerance_req->data.pnode.prio;
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+
+/**
+ * dev_pm_qos_update_user_latency_tolerance - Update user space latency tolerance.
+ * @dev: Device to update the user space latency tolerance for.
+ * @val: New user space latency tolerance for @dev (negative values disable).
+ */
+int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val)
+{
+	int ret;
+
+	mutex_lock(&dev_pm_qos_mtx);
+
+	if (IS_ERR_OR_NULL(dev->power.qos)
+	    || !dev->power.qos->latency_tolerance_req) {
+		struct dev_pm_qos_request *req;
+
+		if (val < 0) {
+			ret = -EINVAL;
+			goto out;
+		}
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		ret = __dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY_TOLERANCE, val);
+		if (ret < 0) {
+			kfree(req);
+			goto out;
+		}
+		dev->power.qos->latency_tolerance_req = req;
+	} else {
+		if (val < 0) {
+			__dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY_TOLERANCE);
+			ret = 0;
+		} else {
+			ret = __dev_pm_qos_update_request(dev->power.qos->latency_tolerance_req, val);
+		}
+	}
+
+ out:
+	mutex_unlock(&dev_pm_qos_mtx);
+	return ret;
+}
+#else /* !CONFIG_PM_RUNTIME */
+static void __dev_pm_qos_hide_latency_limit(struct device *dev) {}
+static void __dev_pm_qos_hide_flags(struct device *dev) {}
+#endif /* CONFIG_PM_RUNTIME */
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 02c652be83e..67c7938e430 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1,5 +1,5 @@
 /*
- * drivers/base/power/runtime.c - Helper functions for device run-time PM
+ * drivers/base/power/runtime.c - Helper functions for device runtime PM
  *
  * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
  * Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu>
@@ -8,9 +8,48 @@
  */
 
 #include <linux/sched.h>
+#include <linux/export.h>
 #include <linux/pm_runtime.h>
+#include <trace/events/rpm.h>
 #include "power.h"
 
+#define RPM_GET_CALLBACK(dev, cb)				\
+({								\
+	int (*__rpm_cb)(struct device *__d);			\
+								\
+	if (dev->pm_domain)					\
+		__rpm_cb = dev->pm_domain->ops.cb;		\
+	else if (dev->type && dev->type->pm)			\
+		__rpm_cb = dev->type->pm->cb;			\
+	else if (dev->class && dev->class->pm)			\
+		__rpm_cb = dev->class->pm->cb;			\
+	else if (dev->bus && dev->bus->pm)			\
+		__rpm_cb = dev->bus->pm->cb;			\
+	else							\
+		__rpm_cb = NULL;				\
+								\
+	if (!__rpm_cb && dev->driver && dev->driver->pm)	\
+		__rpm_cb = dev->driver->pm->cb;			\
+								\
+	__rpm_cb;						\
+})
+
+static int (*rpm_get_suspend_cb(struct device *dev))(struct device *)
+{
+	return RPM_GET_CALLBACK(dev, runtime_suspend);
+}
+
+static int (*rpm_get_resume_cb(struct device *dev))(struct device *)
+{
+	return RPM_GET_CALLBACK(dev, runtime_resume);
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static int (*rpm_get_idle_cb(struct device *dev))(struct device *)
+{
+	return RPM_GET_CALLBACK(dev, runtime_idle);
+}
+
 static int rpm_resume(struct device *dev, int rpmflags);
 static int rpm_suspend(struct device *dev, int rpmflags);
 
@@ -28,13 +67,10 @@ static int rpm_suspend(struct device *dev, int rpmflags);
 void update_pm_runtime_accounting(struct device *dev)
 {
 	unsigned long now = jiffies;
-	int delta;
+	unsigned long delta;
 
 	delta = now - dev->power.accounting_timestamp;
 
-	if (delta < 0)
-		delta = 0;
-
 	dev->power.accounting_timestamp = now;
 
 	if (dev->power.disable_depth > 0)
@@ -125,6 +161,76 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
 
+static int dev_memalloc_noio(struct device *dev, void *data)
+{
+	return dev->power.memalloc_noio;
+}
+
+/*
+ * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag.
+ * @dev: Device to handle.
+ * @enable: True for setting the flag and False for clearing the flag.
+ *
+ * Set the flag for all devices in the path from the device to the
+ * root device in the device tree if @enable is true, otherwise clear
+ * the flag for devices in the path whose siblings don't set the flag.
+ *
+ * The function should only be called by block device, or network
+ * device driver for solving the deadlock problem during runtime
+ * resume/suspend:
+ *
+ *     If memory allocation with GFP_KERNEL is called inside runtime
+ *     resume/suspend callback of any one of its ancestors(or the
+ *     block device itself), the deadlock may be triggered inside the
+ *     memory allocation since it might not complete until the block
+ *     device becomes active and the involed page I/O finishes. The
+ *     situation is pointed out first by Alan Stern. Network device
+ *     are involved in iSCSI kind of situation.
+ *
+ * The lock of dev_hotplug_mutex is held in the function for handling
+ * hotplug race because pm_runtime_set_memalloc_noio() may be called
+ * in async probe().
+ *
+ * The function should be called between device_add() and device_del()
+ * on the affected device(block/network device).
+ */
+void pm_runtime_set_memalloc_noio(struct device *dev, bool enable)
+{
+	static DEFINE_MUTEX(dev_hotplug_mutex);
+
+	mutex_lock(&dev_hotplug_mutex);
+	for (;;) {
+		bool enabled;
+
+		/* hold power lock since bitfield is not SMP-safe. */
+		spin_lock_irq(&dev->power.lock);
+		enabled = dev->power.memalloc_noio;
+		dev->power.memalloc_noio = enable;
+		spin_unlock_irq(&dev->power.lock);
+
+		/*
+		 * not need to enable ancestors any more if the device
+		 * has been enabled.
+		 */
+		if (enabled && enable)
+			break;
+
+		dev = dev->parent;
+
+		/*
+		 * clear flag of the parent device only if all the
+		 * children don't set the flag because ancestor's
+		 * flag was set by any one of the descendants.
+		 */
+		if (!dev || (!enable &&
+			     device_for_each_child(dev, NULL,
+						   dev_memalloc_noio)))
+			break;
+	}
+	mutex_unlock(&dev_hotplug_mutex);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio);
+
 /**
  * rpm_check_suspend_allowed - Test whether a device may be suspended.
  * @dev: Device to test.
@@ -135,8 +241,9 @@ static int rpm_check_suspend_allowed(struct device *dev)
 
 	if (dev->power.runtime_error)
 		retval = -EINVAL;
-	else if (atomic_read(&dev->power.usage_count) > 0
-	    || dev->power.disable_depth > 0)
+	else if (dev->power.disable_depth > 0)
+		retval = -EACCES;
+	else if (atomic_read(&dev->power.usage_count) > 0)
 		retval = -EAGAIN;
 	else if (!pm_children_suspended(dev))
 		retval = -EBUSY;
@@ -147,6 +254,8 @@ static int rpm_check_suspend_allowed(struct device *dev)
 	    || (dev->power.request_pending
 			&& dev->power.request == RPM_REQ_RESUME))
 		retval = -EAGAIN;
+	else if (__dev_pm_qos_read_value(dev) < 0)
+		retval = -EPERM;
 	else if (dev->power.runtime_status == RPM_SUSPENDED)
 		retval = 1;
 
@@ -154,14 +263,40 @@ static int rpm_check_suspend_allowed(struct device *dev)
 }
 
 /**
+ * __rpm_callback - Run a given runtime PM callback for a given device.
+ * @cb: Runtime PM callback to run.
+ * @dev: Device to run the callback for.
+ */
+static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
+	__releases(&dev->power.lock) __acquires(&dev->power.lock)
+{
+	int retval;
+
+	if (dev->power.irq_safe)
+		spin_unlock(&dev->power.lock);
+	else
+		spin_unlock_irq(&dev->power.lock);
+
+	retval = cb(dev);
+
+	if (dev->power.irq_safe)
+		spin_lock(&dev->power.lock);
+	else
+		spin_lock_irq(&dev->power.lock);
+
+	return retval;
+}
+
+/**
  * rpm_idle - Notify device bus type if the device can be suspended.
  * @dev: Device to notify the bus type about.
  * @rpmflags: Flag bits.
  *
- * Check if the device's run-time PM status allows it to be suspended.  If
+ * Check if the device's runtime PM status allows it to be suspended.  If
  * another idle notification has been started earlier, return immediately.  If
  * the RPM_ASYNC flag is set then queue an idle-notification request; otherwise
- * run the ->runtime_idle() callback directly.
+ * run the ->runtime_idle() callback directly. If the ->runtime_idle callback
+ * doesn't exist or if it returns 0, call rpm_suspend with the RPM_AUTO flag.
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
@@ -170,6 +305,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	int (*callback)(struct device *);
 	int retval;
 
+	trace_rpm_idle(dev, rpmflags);
 	retval = rpm_check_suspend_allowed(dev);
 	if (retval < 0)
 		;	/* Conditions are wrong. */
@@ -195,11 +331,8 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	/* Pending requests need to be canceled. */
 	dev->power.request = RPM_REQ_NONE;
 
-	if (dev->power.no_callbacks) {
-		/* Assume ->runtime_idle() callback would have suspended. */
-		retval = rpm_suspend(dev, rpmflags);
+	if (dev->power.no_callbacks)
 		goto out;
-	}
 
 	/* Carry out an asynchronous or a synchronous idle notification. */
 	if (rpmflags & RPM_ASYNC) {
@@ -208,33 +341,23 @@ static int rpm_idle(struct device *dev, int rpmflags)
 			dev->power.request_pending = true;
 			queue_work(pm_wq, &dev->power.work);
 		}
-		goto out;
+		trace_rpm_return_int(dev, _THIS_IP_, 0);
+		return 0;
 	}
 
 	dev->power.idle_notification = true;
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle)
-		callback = dev->bus->pm->runtime_idle;
-	else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle)
-		callback = dev->type->pm->runtime_idle;
-	else if (dev->class && dev->class->pm)
-		callback = dev->class->pm->runtime_idle;
-	else
-		callback = NULL;
+	callback = rpm_get_idle_cb(dev);
 
-	if (callback) {
-		spin_unlock_irq(&dev->power.lock);
-
-		callback(dev);
-
-		spin_lock_irq(&dev->power.lock);
-	}
+	if (callback)
+		retval = __rpm_callback(callback, dev);
 
 	dev->power.idle_notification = false;
 	wake_up_all(&dev->power.wait_queue);
 
  out:
-	return retval;
+	trace_rpm_return_int(dev, _THIS_IP_, retval);
+	return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
 }
 
 /**
@@ -243,36 +366,53 @@ static int rpm_idle(struct device *dev, int rpmflags)
  * @dev: Device to run the callback for.
  */
 static int rpm_callback(int (*cb)(struct device *), struct device *dev)
-	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	int retval;
 
 	if (!cb)
 		return -ENOSYS;
 
-	spin_unlock_irq(&dev->power.lock);
+	if (dev->power.memalloc_noio) {
+		unsigned int noio_flag;
 
-	retval = cb(dev);
+		/*
+		 * Deadlock might be caused if memory allocation with
+		 * GFP_KERNEL happens inside runtime_suspend and
+		 * runtime_resume callbacks of one block device's
+		 * ancestor or the block device itself. Network
+		 * device might be thought as part of iSCSI block
+		 * device, so network device and its ancestor should
+		 * be marked as memalloc_noio too.
+		 */
+		noio_flag = memalloc_noio_save();
+		retval = __rpm_callback(cb, dev);
+		memalloc_noio_restore(noio_flag);
+	} else {
+		retval = __rpm_callback(cb, dev);
+	}
 
-	spin_lock_irq(&dev->power.lock);
 	dev->power.runtime_error = retval;
-
-	return retval;
+	return retval != -EACCES ? retval : -EIO;
 }
 
 /**
- * rpm_suspend - Carry out run-time suspend of given device.
+ * rpm_suspend - Carry out runtime suspend of given device.
  * @dev: Device to suspend.
  * @rpmflags: Flag bits.
  *
- * Check if the device's run-time PM status allows it to be suspended.  If
- * another suspend has been started earlier, either return immediately or wait
- * for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC flags.  Cancel a
- * pending idle notification.  If the RPM_ASYNC flag is set then queue a
- * suspend request; otherwise run the ->runtime_suspend() callback directly.
- * If a deferred resume was requested while the callback was running then carry
- * it out; otherwise send an idle notification for the device (if the suspend
- * failed) or for its parent (if the suspend succeeded).
+ * Check if the device's runtime PM status allows it to be suspended.
+ * Cancel a pending idle notification, autosuspend or suspend. If
+ * another suspend has been started earlier, either return immediately
+ * or wait for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC
+ * flags. If the RPM_ASYNC flag is set then queue a suspend request;
+ * otherwise run the ->runtime_suspend() callback directly. When
+ * ->runtime_suspend succeeded, if a deferred resume was requested while
+ * the callback was running then carry it out, otherwise send an idle
+ * notification for its parent (if the suspend succeeded and both
+ * ignore_children of parent->power and irq_safe of dev->power are not set).
+ * If ->runtime_suspend failed with -EAGAIN or -EBUSY, and if the RPM_AUTO
+ * flag is set and the next autosuspend-delay expiration time is in the
+ * future, schedule another autosuspend attempt.
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
@@ -283,7 +423,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	struct device *parent = NULL;
 	int retval;
 
-	dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
+	trace_rpm_suspend(dev, rpmflags);
 
  repeat:
 	retval = rpm_check_suspend_allowed(dev);
@@ -335,6 +475,15 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 			goto out;
 		}
 
+		if (dev->power.irq_safe) {
+			spin_unlock(&dev->power.lock);
+
+			cpu_relax();
+
+			spin_lock(&dev->power.lock);
+			goto repeat;
+		}
+
 		/* Wait for the other suspend running in parallel with us. */
 		for (;;) {
 			prepare_to_wait(&dev->power.wait_queue, &wait,
@@ -352,7 +501,6 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		goto repeat;
 	}
 
-	dev->power.deferred_resume = false;
 	if (dev->power.no_callbacks)
 		goto no_callback;	/* Assume success. */
 
@@ -369,63 +517,76 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 
 	__update_runtime_status(dev, RPM_SUSPENDING);
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
-		callback = dev->bus->pm->runtime_suspend;
-	else if (dev->type && dev->type->pm && dev->type->pm->runtime_suspend)
-		callback = dev->type->pm->runtime_suspend;
-	else if (dev->class && dev->class->pm)
-		callback = dev->class->pm->runtime_suspend;
-	else
-		callback = NULL;
+	callback = rpm_get_suspend_cb(dev);
 
 	retval = rpm_callback(callback, dev);
-	if (retval) {
-		__update_runtime_status(dev, RPM_ACTIVE);
-		dev->power.deferred_resume = 0;
-		if (retval == -EAGAIN || retval == -EBUSY)
-			dev->power.runtime_error = 0;
-		else
-			pm_runtime_cancel_pending(dev);
-	} else {
+	if (retval)
+		goto fail;
+
  no_callback:
-		__update_runtime_status(dev, RPM_SUSPENDED);
-		pm_runtime_deactivate_timer(dev);
+	__update_runtime_status(dev, RPM_SUSPENDED);
+	pm_runtime_deactivate_timer(dev);
 
-		if (dev->parent) {
-			parent = dev->parent;
-			atomic_add_unless(&parent->power.child_count, -1, 0);
-		}
+	if (dev->parent) {
+		parent = dev->parent;
+		atomic_add_unless(&parent->power.child_count, -1, 0);
 	}
 	wake_up_all(&dev->power.wait_queue);
 
 	if (dev->power.deferred_resume) {
+		dev->power.deferred_resume = false;
 		rpm_resume(dev, 0);
 		retval = -EAGAIN;
 		goto out;
 	}
 
-	if (parent && !parent->power.ignore_children) {
-		spin_unlock_irq(&dev->power.lock);
+	/* Maybe the parent is now able to suspend. */
+	if (parent && !parent->power.ignore_children && !dev->power.irq_safe) {
+		spin_unlock(&dev->power.lock);
 
-		pm_request_idle(parent);
+		spin_lock(&parent->power.lock);
+		rpm_idle(parent, RPM_ASYNC);
+		spin_unlock(&parent->power.lock);
 
-		spin_lock_irq(&dev->power.lock);
+		spin_lock(&dev->power.lock);
 	}
 
  out:
-	dev_dbg(dev, "%s returns %d\n", __func__, retval);
+	trace_rpm_return_int(dev, _THIS_IP_, retval);
 
 	return retval;
+
+ fail:
+	__update_runtime_status(dev, RPM_ACTIVE);
+	dev->power.deferred_resume = false;
+	wake_up_all(&dev->power.wait_queue);
+
+	if (retval == -EAGAIN || retval == -EBUSY) {
+		dev->power.runtime_error = 0;
+
+		/*
+		 * If the callback routine failed an autosuspend, and
+		 * if the last_busy time has been updated so that there
+		 * is a new autosuspend expiration time, automatically
+		 * reschedule another autosuspend.
+		 */
+		if ((rpmflags & RPM_AUTO) &&
+		    pm_runtime_autosuspend_expiration(dev) != 0)
+			goto repeat;
+	} else {
+		pm_runtime_cancel_pending(dev);
+	}
+	goto out;
 }
 
 /**
- * rpm_resume - Carry out run-time resume of given device.
+ * rpm_resume - Carry out runtime resume of given device.
  * @dev: Device to resume.
  * @rpmflags: Flag bits.
  *
- * Check if the device's run-time PM status allows it to be resumed.  Cancel
+ * Check if the device's runtime PM status allows it to be resumed.  Cancel
  * any scheduled or pending requests.  If another resume has been started
- * earlier, either return imediately or wait for it to finish, depending on the
+ * earlier, either return immediately or wait for it to finish, depending on the
  * RPM_NOWAIT and RPM_ASYNC flags.  Similarly, if there's a suspend running in
  * parallel with this function, either tell the other process to resume after
  * suspending (deferred_resume) or wait for it to finish.  If the RPM_ASYNC
@@ -442,13 +603,16 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	struct device *parent = NULL;
 	int retval = 0;
 
-	dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
+	trace_rpm_resume(dev, rpmflags);
 
  repeat:
 	if (dev->power.runtime_error)
 		retval = -EINVAL;
+	else if (dev->power.disable_depth == 1 && dev->power.is_suspended
+	    && dev->power.runtime_status == RPM_ACTIVE)
+		retval = 1;
 	else if (dev->power.disable_depth > 0)
-		retval = -EAGAIN;
+		retval = -EACCES;
 	if (retval)
 		goto out;
 
@@ -479,6 +643,15 @@ static int rpm_resume(struct device *dev, int rpmflags)
 			goto out;
 		}
 
+		if (dev->power.irq_safe) {
+			spin_unlock(&dev->power.lock);
+
+			cpu_relax();
+
+			spin_lock(&dev->power.lock);
+			goto repeat;
+		}
+
 		/* Wait for the operation carried out in parallel with us. */
 		for (;;) {
 			prepare_to_wait(&dev->power.wait_queue, &wait,
@@ -509,6 +682,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		    || dev->parent->power.runtime_status == RPM_ACTIVE) {
 			atomic_inc(&dev->parent->power.child_count);
 			spin_unlock(&dev->parent->power.lock);
+			retval = 1;
 			goto no_callback;	/* Assume success. */
 		}
 		spin_unlock(&dev->parent->power.lock);
@@ -527,17 +701,20 @@ static int rpm_resume(struct device *dev, int rpmflags)
 
 	if (!parent && dev->parent) {
 		/*
-		 * Increment the parent's resume counter and resume it if
-		 * necessary.
+		 * Increment the parent's usage counter and resume it if
+		 * necessary.  Not needed if dev is irq-safe; then the
+		 * parent is permanently resumed.
 		 */
 		parent = dev->parent;
+		if (dev->power.irq_safe)
+			goto skip_parent;
 		spin_unlock(&dev->power.lock);
 
 		pm_runtime_get_noresume(parent);
 
 		spin_lock(&parent->power.lock);
 		/*
-		 * We can resume if the parent's run-time PM is disabled or it
+		 * We can resume if the parent's runtime PM is disabled or it
 		 * is set to ignore children.
 		 */
 		if (!parent->power.disable_depth
@@ -553,20 +730,14 @@ static int rpm_resume(struct device *dev, int rpmflags)
 			goto out;
 		goto repeat;
 	}
+ skip_parent:
 
 	if (dev->power.no_callbacks)
 		goto no_callback;	/* Assume success. */
 
 	__update_runtime_status(dev, RPM_RESUMING);
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
-		callback = dev->bus->pm->runtime_resume;
-	else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume)
-		callback = dev->type->pm->runtime_resume;
-	else if (dev->class && dev->class->pm)
-		callback = dev->class->pm->runtime_resume;
-	else
-		callback = NULL;
+	callback = rpm_get_resume_cb(dev);
 
 	retval = rpm_callback(callback, dev);
 	if (retval) {
@@ -580,11 +751,11 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	}
 	wake_up_all(&dev->power.wait_queue);
 
-	if (!retval)
+	if (retval >= 0)
 		rpm_idle(dev, RPM_ASYNC);
 
  out:
-	if (parent) {
+	if (parent && !dev->power.irq_safe) {
 		spin_unlock_irq(&dev->power.lock);
 
 		pm_runtime_put(parent);
@@ -592,17 +763,17 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		spin_lock_irq(&dev->power.lock);
 	}
 
-	dev_dbg(dev, "%s returns %d\n", __func__, retval);
+	trace_rpm_return_int(dev, _THIS_IP_, retval);
 
 	return retval;
 }
 
 /**
- * pm_runtime_work - Universal run-time PM work function.
+ * pm_runtime_work - Universal runtime PM work function.
  * @work: Work structure used for scheduling the execution of this function.
  *
  * Use @work to get the device object the work is to be done for, determine what
- * is to be done and execute the appropriate run-time PM function.
+ * is to be done and execute the appropriate runtime PM function.
  */
 static void pm_runtime_work(struct work_struct *work)
 {
@@ -701,7 +872,7 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 EXPORT_SYMBOL_GPL(pm_schedule_suspend);
 
 /**
- * __pm_runtime_idle - Entry point for run-time idle operations.
+ * __pm_runtime_idle - Entry point for runtime idle operations.
  * @dev: Device to send idle notification for.
  * @rpmflags: Flag bits.
  *
@@ -709,13 +880,16 @@ EXPORT_SYMBOL_GPL(pm_schedule_suspend);
  * return immediately if it is larger than zero.  Then carry out an idle
  * notification, either synchronous or asynchronous.
  *
- * This routine may be called in atomic context if the RPM_ASYNC flag is set.
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set,
+ * or if pm_runtime_irq_safe() has been called.
  */
 int __pm_runtime_idle(struct device *dev, int rpmflags)
 {
 	unsigned long flags;
 	int retval;
 
+	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
+
 	if (rpmflags & RPM_GET_PUT) {
 		if (!atomic_dec_and_test(&dev->power.usage_count))
 			return 0;
@@ -730,7 +904,7 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
 EXPORT_SYMBOL_GPL(__pm_runtime_idle);
 
 /**
- * __pm_runtime_suspend - Entry point for run-time put/suspend operations.
+ * __pm_runtime_suspend - Entry point for runtime put/suspend operations.
  * @dev: Device to suspend.
  * @rpmflags: Flag bits.
  *
@@ -738,13 +912,16 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle);
  * return immediately if it is larger than zero.  Then carry out a suspend,
  * either synchronous or asynchronous.
  *
- * This routine may be called in atomic context if the RPM_ASYNC flag is set.
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set,
+ * or if pm_runtime_irq_safe() has been called.
  */
 int __pm_runtime_suspend(struct device *dev, int rpmflags)
 {
 	unsigned long flags;
 	int retval;
 
+	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
+
 	if (rpmflags & RPM_GET_PUT) {
 		if (!atomic_dec_and_test(&dev->power.usage_count))
 			return 0;
@@ -759,20 +936,23 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
 EXPORT_SYMBOL_GPL(__pm_runtime_suspend);
 
 /**
- * __pm_runtime_resume - Entry point for run-time resume operations.
+ * __pm_runtime_resume - Entry point for runtime resume operations.
  * @dev: Device to resume.
  * @rpmflags: Flag bits.
  *
  * If the RPM_GET_PUT flag is set, increment the device's usage count.  Then
  * carry out a resume, either synchronous or asynchronous.
  *
- * This routine may be called in atomic context if the RPM_ASYNC flag is set.
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set,
+ * or if pm_runtime_irq_safe() has been called.
  */
 int __pm_runtime_resume(struct device *dev, int rpmflags)
 {
 	unsigned long flags;
 	int retval;
 
+	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
+
 	if (rpmflags & RPM_GET_PUT)
 		atomic_inc(&dev->power.usage_count);
 
@@ -785,11 +965,11 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
 EXPORT_SYMBOL_GPL(__pm_runtime_resume);
 
 /**
- * __pm_runtime_set_status - Set run-time PM status of a device.
+ * __pm_runtime_set_status - Set runtime PM status of a device.
  * @dev: Device to handle.
- * @status: New run-time PM status of the device.
+ * @status: New runtime PM status of the device.
  *
- * If run-time PM of the device is disabled or its power.runtime_error field is
+ * If runtime PM of the device is disabled or its power.runtime_error field is
  * different from zero, the status may be changed either to RPM_ACTIVE, or to
  * RPM_SUSPENDED, as long as that reflects the actual state of the device.
  * However, if the device has a parent and the parent is not active, and the
@@ -835,7 +1015,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 
 		/*
 		 * It is invalid to put an active child under a parent that is
-		 * not active, has run-time PM enabled and the
+		 * not active, has runtime PM enabled and the
 		 * 'power.ignore_children' flag unset.
 		 */
 		if (!parent->power.disable_depth
@@ -869,7 +1049,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_set_status);
  * @dev: Device to handle.
  *
  * Flush all pending requests for the device from pm_wq and wait for all
- * run-time PM operations involving the device in progress to complete.
+ * runtime PM operations involving the device in progress to complete.
  *
  * Should be called under dev->power.lock with interrupts disabled.
  */
@@ -917,7 +1097,7 @@ static void __pm_runtime_barrier(struct device *dev)
  * Prevent the device from being suspended by incrementing its usage counter and
  * if there's a pending resume request for the device, wake the device up.
  * Next, make sure that all pending requests for the device have been flushed
- * from pm_wq and wait for all run-time PM operations involving the device in
+ * from pm_wq and wait for all runtime PM operations involving the device in
  * progress to complete.
  *
  * Return value:
@@ -947,18 +1127,18 @@ int pm_runtime_barrier(struct device *dev)
 EXPORT_SYMBOL_GPL(pm_runtime_barrier);
 
 /**
- * __pm_runtime_disable - Disable run-time PM of a device.
+ * __pm_runtime_disable - Disable runtime PM of a device.
  * @dev: Device to handle.
  * @check_resume: If set, check if there's a resume request for the device.
  *
- * Increment power.disable_depth for the device and if was zero previously,
- * cancel all pending run-time PM requests for the device and wait for all
+ * Increment power.disable_depth for the device and if it was zero previously,
+ * cancel all pending runtime PM requests for the device and wait for all
  * operations in progress to complete.  The device can be either active or
- * suspended after its run-time PM has been disabled.
+ * suspended after its runtime PM has been disabled.
  *
  * If @check_resume is set and there's a resume request pending when
  * __pm_runtime_disable() is called and power.disable_depth is zero, the
- * function will wake up the device before disabling its run-time PM.
+ * function will wake up the device before disabling its runtime PM.
  */
 void __pm_runtime_disable(struct device *dev, bool check_resume)
 {
@@ -971,7 +1151,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
 
 	/*
 	 * Wake up the device if there's a resume request pending, because that
-	 * means there probably is some I/O to process and disabling run-time PM
+	 * means there probably is some I/O to process and disabling runtime PM
 	 * shouldn't prevent the device from processing the I/O.
 	 */
 	if (check_resume && dev->power.request_pending
@@ -996,7 +1176,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
 EXPORT_SYMBOL_GPL(__pm_runtime_disable);
 
 /**
- * pm_runtime_enable - Enable run-time PM of a device.
+ * pm_runtime_enable - Enable runtime PM of a device.
  * @dev: Device to handle.
  */
 void pm_runtime_enable(struct device *dev)
@@ -1015,7 +1195,7 @@ void pm_runtime_enable(struct device *dev)
 EXPORT_SYMBOL_GPL(pm_runtime_enable);
 
 /**
- * pm_runtime_forbid - Block run-time PM of a device.
+ * pm_runtime_forbid - Block runtime PM of a device.
  * @dev: Device to handle.
  *
  * Increase the device's usage count and clear its power.runtime_auto flag,
@@ -1038,7 +1218,7 @@ void pm_runtime_forbid(struct device *dev)
 EXPORT_SYMBOL_GPL(pm_runtime_forbid);
 
 /**
- * pm_runtime_allow - Unblock run-time PM of a device.
+ * pm_runtime_allow - Unblock runtime PM of a device.
  * @dev: Device to handle.
  *
  * Decrease the device's usage count and set its power.runtime_auto flag.
@@ -1059,13 +1239,12 @@ void pm_runtime_allow(struct device *dev)
 EXPORT_SYMBOL_GPL(pm_runtime_allow);
 
 /**
- * pm_runtime_no_callbacks - Ignore run-time PM callbacks for a device.
+ * pm_runtime_no_callbacks - Ignore runtime PM callbacks for a device.
  * @dev: Device to handle.
  *
  * Set the power.no_callbacks flag, which tells the PM core that this
- * device is power-managed through its parent and has no run-time PM
- * callbacks of its own.  The run-time sysfs attributes will be removed.
- *
+ * device is power-managed through its parent and has no runtime PM
+ * callbacks of its own.  The runtime sysfs attributes will be removed.
  */
 void pm_runtime_no_callbacks(struct device *dev)
 {
@@ -1078,6 +1257,27 @@ void pm_runtime_no_callbacks(struct device *dev)
 EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);
 
 /**
+ * pm_runtime_irq_safe - Leave interrupts disabled during callbacks.
+ * @dev: Device to handle
+ *
+ * Set the power.irq_safe flag, which tells the PM core that the
+ * ->runtime_suspend() and ->runtime_resume() callbacks for this device should
+ * always be invoked with the spinlock held and interrupts disabled.  It also
+ * causes the parent's usage counter to be permanently incremented, preventing
+ * the parent from runtime suspending -- otherwise an irq-safe child might have
+ * to wait for a non-irq-safe parent.
+ */
+void pm_runtime_irq_safe(struct device *dev)
+{
+	if (dev->parent)
+		pm_runtime_get_sync(dev->parent);
+	spin_lock_irq(&dev->power.lock);
+	dev->power.irq_safe = 1;
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_irq_safe);
+
+/**
  * update_autosuspend - Handle a change to a device's autosuspend settings.
  * @dev: Device to handle.
  * @old_delay: The former autosuspend_delay value.
@@ -1120,8 +1320,8 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
  * @delay: Value of the new delay in milliseconds.
  *
  * Set the device's power.autosuspend_delay value.  If it changes to negative
- * and the power.use_autosuspend flag is set, prevent run-time suspends.  If it
- * changes the other way, allow run-time suspends.
+ * and the power.use_autosuspend flag is set, prevent runtime suspends.  If it
+ * changes the other way, allow runtime suspends.
  */
 void pm_runtime_set_autosuspend_delay(struct device *dev, int delay)
 {
@@ -1141,7 +1341,7 @@ EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay);
  * @dev: Device to handle.
  * @use: New value for use_autosuspend.
  *
- * Set the device's power.use_autosuspend flag, and allow or prevent run-time
+ * Set the device's power.use_autosuspend flag, and allow or prevent runtime
  * suspends as needed.
  */
 void __pm_runtime_use_autosuspend(struct device *dev, bool use)
@@ -1158,7 +1358,7 @@ void __pm_runtime_use_autosuspend(struct device *dev, bool use)
 EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
 
 /**
- * pm_runtime_init - Initialize run-time PM fields in given device object.
+ * pm_runtime_init - Initialize runtime PM fields in given device object.
  * @dev: Device object to initialize.
  */
 void pm_runtime_init(struct device *dev)
@@ -1199,4 +1399,89 @@ void pm_runtime_remove(struct device *dev)
 	/* Change the status back to 'suspended' to match the initial status. */
 	if (dev->power.runtime_status == RPM_ACTIVE)
 		pm_runtime_set_suspended(dev);
+	if (dev->power.irq_safe && dev->parent)
+		pm_runtime_put(dev->parent);
+}
+#endif
+
+/**
+ * pm_runtime_force_suspend - Force a device into suspend state if needed.
+ * @dev: Device to suspend.
+ *
+ * Disable runtime PM so we safely can check the device's runtime PM status and
+ * if it is active, invoke it's .runtime_suspend callback to bring it into
+ * suspend state. Keep runtime PM disabled to preserve the state unless we
+ * encounter errors.
+ *
+ * Typically this function may be invoked from a system suspend callback to make
+ * sure the device is put into low power state.
+ */
+int pm_runtime_force_suspend(struct device *dev)
+{
+	int (*callback)(struct device *);
+	int ret = 0;
+
+	pm_runtime_disable(dev);
+
+	/*
+	 * Note that pm_runtime_status_suspended() returns false while
+	 * !CONFIG_PM_RUNTIME, which means the device will be put into low
+	 * power state.
+	 */
+	if (pm_runtime_status_suspended(dev))
+		return 0;
+
+	callback = rpm_get_suspend_cb(dev);
+
+	if (!callback) {
+		ret = -ENOSYS;
+		goto err;
+	}
+
+	ret = callback(dev);
+	if (ret)
+		goto err;
+
+	pm_runtime_set_suspended(dev);
+	return 0;
+err:
+	pm_runtime_enable(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_force_suspend);
+
+/**
+ * pm_runtime_force_resume - Force a device into resume state.
+ * @dev: Device to resume.
+ *
+ * Prior invoking this function we expect the user to have brought the device
+ * into low power state by a call to pm_runtime_force_suspend(). Here we reverse
+ * those actions and brings the device into full power. We update the runtime PM
+ * status and re-enables runtime PM.
+ *
+ * Typically this function may be invoked from a system resume callback to make
+ * sure the device is put into full power state.
+ */
+int pm_runtime_force_resume(struct device *dev)
+{
+	int (*callback)(struct device *);
+	int ret = 0;
+
+	callback = rpm_get_resume_cb(dev);
+
+	if (!callback) {
+		ret = -ENOSYS;
+		goto out;
+	}
+
+	ret = callback(dev);
+	if (ret)
+		goto out;
+
+	pm_runtime_set_active(dev);
+	pm_runtime_mark_last_busy(dev);
+out:
+	pm_runtime_enable(dev);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(pm_runtime_force_resume);
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 0b1e46bf3e5..95b181d1ca6 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -4,8 +4,10 @@
 
 #include <linux/device.h>
 #include <linux/string.h>
+#include <linux/export.h>
+#include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/jiffies.h>
 #include "power.h"
 
@@ -116,12 +118,14 @@ static ssize_t control_store(struct device * dev, struct device_attribute *attr,
 	cp = memchr(buf, '\n', n);
 	if (cp)
 		len = cp - buf;
+	device_lock(dev);
 	if (len == sizeof ctrl_auto - 1 && strncmp(buf, ctrl_auto, len) == 0)
 		pm_runtime_allow(dev);
 	else if (len == sizeof ctrl_on - 1 && strncmp(buf, ctrl_on, len) == 0)
 		pm_runtime_forbid(dev);
 	else
-		return -EINVAL;
+		n = -EINVAL;
+	device_unlock(dev);
 	return n;
 }
 
@@ -202,18 +206,136 @@ static ssize_t autosuspend_delay_ms_store(struct device *dev,
 	if (!dev->power.use_autosuspend)
 		return -EIO;
 
-	if (strict_strtol(buf, 10, &delay) != 0 || delay != (int) delay)
+	if (kstrtol(buf, 10, &delay) != 0 || delay != (int) delay)
 		return -EINVAL;
 
+	device_lock(dev);
 	pm_runtime_set_autosuspend_delay(dev, delay);
+	device_unlock(dev);
 	return n;
 }
 
 static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
 		autosuspend_delay_ms_store);
 
-#endif
+static ssize_t pm_qos_resume_latency_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	return sprintf(buf, "%d\n", dev_pm_qos_requested_resume_latency(dev));
+}
+
+static ssize_t pm_qos_resume_latency_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t n)
+{
+	s32 value;
+	int ret;
+
+	if (kstrtos32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value < 0)
+		return -EINVAL;
 
+	ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req,
+					value);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_resume_latency_us, 0644,
+		   pm_qos_resume_latency_show, pm_qos_resume_latency_store);
+
+static ssize_t pm_qos_latency_tolerance_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	s32 value = dev_pm_qos_get_user_latency_tolerance(dev);
+
+	if (value < 0)
+		return sprintf(buf, "auto\n");
+	else if (value == PM_QOS_LATENCY_ANY)
+		return sprintf(buf, "any\n");
+
+	return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t pm_qos_latency_tolerance_store(struct device *dev,
+					      struct device_attribute *attr,
+					      const char *buf, size_t n)
+{
+	s32 value;
+	int ret;
+
+	if (kstrtos32(buf, 0, &value)) {
+		if (!strcmp(buf, "auto") || !strcmp(buf, "auto\n"))
+			value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
+		else if (!strcmp(buf, "any") || !strcmp(buf, "any\n"))
+			value = PM_QOS_LATENCY_ANY;
+	}
+	ret = dev_pm_qos_update_user_latency_tolerance(dev, value);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_latency_tolerance_us, 0644,
+		   pm_qos_latency_tolerance_show, pm_qos_latency_tolerance_store);
+
+static ssize_t pm_qos_no_power_off_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
+					& PM_QOS_FLAG_NO_POWER_OFF));
+}
+
+static ssize_t pm_qos_no_power_off_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t n)
+{
+	int ret;
+
+	if (kstrtoint(buf, 0, &ret))
+		return -EINVAL;
+
+	if (ret != 0 && ret != 1)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_no_power_off, 0644,
+		   pm_qos_no_power_off_show, pm_qos_no_power_off_store);
+
+static ssize_t pm_qos_remote_wakeup_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
+					& PM_QOS_FLAG_REMOTE_WAKEUP));
+}
+
+static ssize_t pm_qos_remote_wakeup_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t n)
+{
+	int ret;
+
+	if (kstrtoint(buf, 0, &ret))
+		return -EINVAL;
+
+	if (ret != 0 && ret != 1)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_REMOTE_WAKEUP, ret);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_remote_wakeup, 0644,
+		   pm_qos_remote_wakeup_show, pm_qos_remote_wakeup_store);
+#endif /* CONFIG_PM_RUNTIME */
+
+#ifdef CONFIG_PM_SLEEP
 static ssize_t
 wake_show(struct device * dev, struct device_attribute *attr, char * buf)
 {
@@ -248,7 +370,6 @@ wake_store(struct device * dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 
-#ifdef CONFIG_PM_SLEEP
 static ssize_t wakeup_count_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -283,22 +404,41 @@ static ssize_t wakeup_active_count_show(struct device *dev,
 
 static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL);
 
-static ssize_t wakeup_hit_count_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t wakeup_abort_count_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->wakeup_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL);
+
+static ssize_t wakeup_expire_count_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
 {
 	unsigned long count = 0;
 	bool enabled = false;
 
 	spin_lock_irq(&dev->power.lock);
 	if (dev->power.wakeup) {
-		count = dev->power.wakeup->hit_count;
+		count = dev->power.wakeup->expire_count;
 		enabled = true;
 	}
 	spin_unlock_irq(&dev->power.lock);
 	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
 }
 
-static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL);
+static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL);
 
 static ssize_t wakeup_active_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -367,6 +507,27 @@ static ssize_t wakeup_last_time_show(struct device *dev,
 }
 
 static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL);
+
+#ifdef CONFIG_PM_AUTOSLEEP
+static ssize_t wakeup_prevent_sleep_time_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444,
+		   wakeup_prevent_sleep_time_show, NULL);
+#endif /* CONFIG_PM_AUTOSLEEP */
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM_ADVANCED_DEBUG
@@ -403,6 +564,8 @@ static DEVICE_ATTR(runtime_enabled, 0444, rtpm_enabled_show, NULL);
 
 #endif
 
+#ifdef CONFIG_PM_SLEEP
+
 static ssize_t async_show(struct device *dev, struct device_attribute *attr,
 			  char *buf)
 {
@@ -429,28 +592,22 @@ static ssize_t async_store(struct device *dev, struct device_attribute *attr,
 }
 
 static DEVICE_ATTR(async, 0644, async_show, async_store);
-#endif /* CONFIG_PM_ADVANCED_DEBUG */
 
-static struct attribute * power_attrs[] = {
-	&dev_attr_wakeup.attr,
-#ifdef CONFIG_PM_SLEEP
-	&dev_attr_wakeup_count.attr,
-	&dev_attr_wakeup_active_count.attr,
-	&dev_attr_wakeup_hit_count.attr,
-	&dev_attr_wakeup_active.attr,
-	&dev_attr_wakeup_total_time_ms.attr,
-	&dev_attr_wakeup_max_time_ms.attr,
-	&dev_attr_wakeup_last_time_ms.attr,
 #endif
+#endif /* CONFIG_PM_ADVANCED_DEBUG */
+
+static struct attribute *power_attrs[] = {
 #ifdef CONFIG_PM_ADVANCED_DEBUG
+#ifdef CONFIG_PM_SLEEP
 	&dev_attr_async.attr,
+#endif
 #ifdef CONFIG_PM_RUNTIME
 	&dev_attr_runtime_status.attr,
 	&dev_attr_runtime_usage.attr,
 	&dev_attr_runtime_active_kids.attr,
 	&dev_attr_runtime_enabled.attr,
 #endif
-#endif
+#endif /* CONFIG_PM_ADVANCED_DEBUG */
 	NULL,
 };
 static struct attribute_group pm_attr_group = {
@@ -458,9 +615,30 @@ static struct attribute_group pm_attr_group = {
 	.attrs	= power_attrs,
 };
 
-#ifdef CONFIG_PM_RUNTIME
+static struct attribute *wakeup_attrs[] = {
+#ifdef CONFIG_PM_SLEEP
+	&dev_attr_wakeup.attr,
+	&dev_attr_wakeup_count.attr,
+	&dev_attr_wakeup_active_count.attr,
+	&dev_attr_wakeup_abort_count.attr,
+	&dev_attr_wakeup_expire_count.attr,
+	&dev_attr_wakeup_active.attr,
+	&dev_attr_wakeup_total_time_ms.attr,
+	&dev_attr_wakeup_max_time_ms.attr,
+	&dev_attr_wakeup_last_time_ms.attr,
+#ifdef CONFIG_PM_AUTOSLEEP
+	&dev_attr_wakeup_prevent_sleep_time_ms.attr,
+#endif
+#endif
+	NULL,
+};
+static struct attribute_group pm_wakeup_attr_group = {
+	.name	= power_group_name,
+	.attrs	= wakeup_attrs,
+};
 
 static struct attribute *runtime_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
 #ifndef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_runtime_status.attr,
 #endif
@@ -468,6 +646,7 @@ static struct attribute *runtime_attrs[] = {
 	&dev_attr_runtime_suspended_time.attr,
 	&dev_attr_runtime_active_time.attr,
 	&dev_attr_autosuspend_delay_ms.attr,
+#endif /* CONFIG_PM_RUNTIME */
 	NULL,
 };
 static struct attribute_group pm_runtime_attr_group = {
@@ -475,40 +654,115 @@ static struct attribute_group pm_runtime_attr_group = {
 	.attrs	= runtime_attrs,
 };
 
+static struct attribute *pm_qos_resume_latency_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_resume_latency_us.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_resume_latency_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_resume_latency_attrs,
+};
+
+static struct attribute *pm_qos_latency_tolerance_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_latency_tolerance_us.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_latency_tolerance_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_latency_tolerance_attrs,
+};
+
+static struct attribute *pm_qos_flags_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_no_power_off.attr,
+	&dev_attr_pm_qos_remote_wakeup.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_flags_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_flags_attrs,
+};
+
 int dpm_sysfs_add(struct device *dev)
 {
 	int rc;
 
 	rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
-	if (rc == 0 && !dev->power.no_callbacks) {
+	if (rc)
+		return rc;
+
+	if (pm_runtime_callbacks_present(dev)) {
 		rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
 		if (rc)
-			sysfs_remove_group(&dev->kobj, &pm_attr_group);
+			goto err_out;
+	}
+	if (device_can_wakeup(dev)) {
+		rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
+		if (rc)
+			goto err_runtime;
+	}
+	if (dev->power.set_latency_tolerance) {
+		rc = sysfs_merge_group(&dev->kobj,
+				       &pm_qos_latency_tolerance_attr_group);
+		if (rc)
+			goto err_wakeup;
 	}
+	return 0;
+
+ err_wakeup:
+	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
+ err_runtime:
+	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
+ err_out:
+	sysfs_remove_group(&dev->kobj, &pm_attr_group);
 	return rc;
 }
 
-void rpm_sysfs_remove(struct device *dev)
+int wakeup_sysfs_add(struct device *dev)
 {
-	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
+	return sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
 }
 
-void dpm_sysfs_remove(struct device *dev)
+void wakeup_sysfs_remove(struct device *dev)
 {
-	rpm_sysfs_remove(dev);
-	sysfs_remove_group(&dev->kobj, &pm_attr_group);
+	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
+}
+
+int pm_qos_sysfs_add_resume_latency(struct device *dev)
+{
+	return sysfs_merge_group(&dev->kobj, &pm_qos_resume_latency_attr_group);
 }
 
-#else /* CONFIG_PM_RUNTIME */
+void pm_qos_sysfs_remove_resume_latency(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_resume_latency_attr_group);
+}
 
-int dpm_sysfs_add(struct device * dev)
+int pm_qos_sysfs_add_flags(struct device *dev)
 {
-	return sysfs_create_group(&dev->kobj, &pm_attr_group);
+	return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group);
 }
 
-void dpm_sysfs_remove(struct device * dev)
+void pm_qos_sysfs_remove_flags(struct device *dev)
 {
-	sysfs_remove_group(&dev->kobj, &pm_attr_group);
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group);
 }
 
-#endif
+void rpm_sysfs_remove(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
+}
+
+void dpm_sysfs_remove(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group);
+	dev_pm_qos_constraints_destroy(dev);
+	rpm_sysfs_remove(dev);
+	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
+	sysfs_remove_group(&dev->kobj, &pm_attr_group);
+}
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 9f4258df4cf..d94a1f5121c 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/resume-trace.h>
+#include <linux/export.h>
 #include <linux/rtc.h>
 
 #include <asm/rtc.h>
@@ -112,7 +113,7 @@ static unsigned int read_magic_time(void)
 	unsigned int val;
 
 	get_rtc_time(&time);
-	printk("Time: %2d:%02d:%02d  Date: %02d/%02d/%02d\n",
+	pr_info("RTC time: %2d:%02d:%02d, date: %02d/%02d/%02d\n",
 		time.tm_hour, time.tm_min, time.tm_sec,
 		time.tm_mon + 1, time.tm_mday, time.tm_year % 100);
 	val = time.tm_year;				/* 100 years */
@@ -179,7 +180,7 @@ static int show_file_hash(unsigned int value)
 		unsigned int hash = hash_string(lineno, file, FILEHASH);
 		if (hash != value)
 			continue;
-		printk("  hash matches %s:%u\n", file, lineno);
+		pr_info("  hash matches %s:%u\n", file, lineno);
 		match++;
 	}
 	return match;
@@ -255,7 +256,7 @@ static int late_resume_init(void)
 	val = val / FILEHASH;
 	dev = val /* % DEVHASH */;
 
-	printk("  Magic number: %d:%d:%d\n", user, file, dev);
+	pr_info("  Magic number: %d:%d:%d\n", user, file, dev);
 	show_file_hash(file);
 	show_dev_hash(dev);
 	return 0;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 71c5528e1c3..eb1bd2ecad8 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -10,26 +10,40 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/capability.h>
+#include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <trace/events/power.h>
 
 #include "power.h"
 
-#define TIMEOUT		100
-
 /*
  * If set, the suspend/hibernate code will abort transitions to a sleep state
  * if wakeup events are registered during or immediately before the transition.
  */
-bool events_check_enabled;
+bool events_check_enabled __read_mostly;
+
+/*
+ * Combined counters of registered wakeup events and wakeup events in progress.
+ * They need to be modified together atomically, so it's better to use one
+ * atomic variable to hold them both.
+ */
+static atomic_t combined_event_count = ATOMIC_INIT(0);
+
+#define IN_PROGRESS_BITS	(sizeof(int) * 4)
+#define MAX_IN_PROGRESS		((1 << IN_PROGRESS_BITS) - 1)
+
+static void split_counters(unsigned int *cnt, unsigned int *inpr)
+{
+	unsigned int comb = atomic_read(&combined_event_count);
+
+	*cnt = (comb >> IN_PROGRESS_BITS);
+	*inpr = comb & MAX_IN_PROGRESS;
+}
 
-/* The counter of registered wakeup events. */
-static atomic_t event_count = ATOMIC_INIT(0);
-/* A preserved old value of event_count. */
+/* A preserved old value of the events counter. */
 static unsigned int saved_count;
-/* The counter of wakeup events being processed. */
-static atomic_t events_in_progress = ATOMIC_INIT(0);
 
 static DEFINE_SPINLOCK(events_lock);
 
@@ -37,6 +51,25 @@ static void pm_wakeup_timer_fn(unsigned long data);
 
 static LIST_HEAD(wakeup_sources);
 
+static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue);
+
+/**
+ * wakeup_source_prepare - Prepare a new wakeup source for initialization.
+ * @ws: Wakeup source to prepare.
+ * @name: Pointer to the name of the new wakeup source.
+ *
+ * Callers must ensure that the @name string won't be freed when @ws is still in
+ * use.
+ */
+void wakeup_source_prepare(struct wakeup_source *ws, const char *name)
+{
+	if (ws) {
+		memset(ws, 0, sizeof(*ws));
+		ws->name = name;
+	}
+}
+EXPORT_SYMBOL_GPL(wakeup_source_prepare);
+
 /**
  * wakeup_source_create - Create a struct wakeup_source object.
  * @name: Name of the new wakeup source.
@@ -45,37 +78,44 @@ struct wakeup_source *wakeup_source_create(const char *name)
 {
 	struct wakeup_source *ws;
 
-	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+	ws = kmalloc(sizeof(*ws), GFP_KERNEL);
 	if (!ws)
 		return NULL;
 
-	spin_lock_init(&ws->lock);
-	if (name)
-		ws->name = kstrdup(name, GFP_KERNEL);
-
+	wakeup_source_prepare(ws, name ? kstrdup(name, GFP_KERNEL) : NULL);
 	return ws;
 }
 EXPORT_SYMBOL_GPL(wakeup_source_create);
 
 /**
+ * wakeup_source_drop - Prepare a struct wakeup_source object for destruction.
+ * @ws: Wakeup source to prepare for destruction.
+ *
+ * Callers must ensure that __pm_stay_awake() or __pm_wakeup_event() will never
+ * be run in parallel with this function for the same wakeup source object.
+ */
+void wakeup_source_drop(struct wakeup_source *ws)
+{
+	if (!ws)
+		return;
+
+	del_timer_sync(&ws->timer);
+	__pm_relax(ws);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_drop);
+
+/**
  * wakeup_source_destroy - Destroy a struct wakeup_source object.
  * @ws: Wakeup source to destroy.
+ *
+ * Use only for wakeup source objects created with wakeup_source_create().
  */
 void wakeup_source_destroy(struct wakeup_source *ws)
 {
 	if (!ws)
 		return;
 
-	spin_lock_irq(&ws->lock);
-	while (ws->active) {
-		spin_unlock_irq(&ws->lock);
-
-		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
-
-		spin_lock_irq(&ws->lock);
-	}
-	spin_unlock_irq(&ws->lock);
-
+	wakeup_source_drop(ws);
 	kfree(ws->name);
 	kfree(ws);
 }
@@ -87,16 +127,19 @@ EXPORT_SYMBOL_GPL(wakeup_source_destroy);
  */
 void wakeup_source_add(struct wakeup_source *ws)
 {
+	unsigned long flags;
+
 	if (WARN_ON(!ws))
 		return;
 
+	spin_lock_init(&ws->lock);
 	setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws);
 	ws->active = false;
+	ws->last_time = ktime_get();
 
-	spin_lock_irq(&events_lock);
+	spin_lock_irqsave(&events_lock, flags);
 	list_add_rcu(&ws->entry, &wakeup_sources);
-	spin_unlock_irq(&events_lock);
-	synchronize_rcu();
+	spin_unlock_irqrestore(&events_lock, flags);
 }
 EXPORT_SYMBOL_GPL(wakeup_source_add);
 
@@ -106,12 +149,14 @@ EXPORT_SYMBOL_GPL(wakeup_source_add);
  */
 void wakeup_source_remove(struct wakeup_source *ws)
 {
+	unsigned long flags;
+
 	if (WARN_ON(!ws))
 		return;
 
-	spin_lock_irq(&events_lock);
+	spin_lock_irqsave(&events_lock, flags);
 	list_del_rcu(&ws->entry);
-	spin_unlock_irq(&events_lock);
+	spin_unlock_irqrestore(&events_lock, flags);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(wakeup_source_remove);
@@ -138,8 +183,10 @@ EXPORT_SYMBOL_GPL(wakeup_source_register);
  */
 void wakeup_source_unregister(struct wakeup_source *ws)
 {
-	wakeup_source_remove(ws);
-	wakeup_source_destroy(ws);
+	if (ws) {
+		wakeup_source_remove(ws);
+		wakeup_source_destroy(ws);
+	}
 }
 EXPORT_SYMBOL_GPL(wakeup_source_unregister);
 
@@ -228,22 +275,59 @@ int device_wakeup_disable(struct device *dev)
 EXPORT_SYMBOL_GPL(device_wakeup_disable);
 
 /**
+ * device_set_wakeup_capable - Set/reset device wakeup capability flag.
+ * @dev: Device to handle.
+ * @capable: Whether or not @dev is capable of waking up the system from sleep.
+ *
+ * If @capable is set, set the @dev's power.can_wakeup flag and add its
+ * wakeup-related attributes to sysfs.  Otherwise, unset the @dev's
+ * power.can_wakeup flag and remove its wakeup-related attributes from sysfs.
+ *
+ * This function may sleep and it can't be called from any context where
+ * sleeping is not allowed.
+ */
+void device_set_wakeup_capable(struct device *dev, bool capable)
+{
+	if (!!dev->power.can_wakeup == !!capable)
+		return;
+
+	if (device_is_registered(dev) && !list_empty(&dev->power.entry)) {
+		if (capable) {
+			if (wakeup_sysfs_add(dev))
+				return;
+		} else {
+			wakeup_sysfs_remove(dev);
+		}
+	}
+	dev->power.can_wakeup = capable;
+}
+EXPORT_SYMBOL_GPL(device_set_wakeup_capable);
+
+/**
  * device_init_wakeup - Device wakeup initialization.
  * @dev: Device to handle.
  * @enable: Whether or not to enable @dev as a wakeup device.
  *
  * By default, most devices should leave wakeup disabled.  The exceptions are
  * devices that everyone expects to be wakeup sources: keyboards, power buttons,
- * possibly network interfaces, etc.
+ * possibly network interfaces, etc.  Also, devices that don't generate their
+ * own wakeup requests but merely forward requests from one bus to another
+ * (like PCI bridges) should have wakeup enabled by default.
  */
 int device_init_wakeup(struct device *dev, bool enable)
 {
 	int ret = 0;
 
+	if (!dev)
+		return -EINVAL;
+
 	if (enable) {
 		device_set_wakeup_capable(dev, true);
 		ret = device_wakeup_enable(dev);
 	} else {
+		if (dev->power.can_wakeup)
+			device_wakeup_disable(dev);
+
 		device_set_wakeup_capable(dev, false);
 	}
 
@@ -302,12 +386,39 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
  */
 static void wakeup_source_activate(struct wakeup_source *ws)
 {
+	unsigned int cec;
+
+	/*
+	 * active wakeup source should bring the system
+	 * out of PM_SUSPEND_FREEZE state
+	 */
+	freeze_wake();
+
 	ws->active = true;
 	ws->active_count++;
-	ws->timer_expires = jiffies;
 	ws->last_time = ktime_get();
+	if (ws->autosleep_enabled)
+		ws->start_prevent_time = ws->last_time;
 
-	atomic_inc(&events_in_progress);
+	/* Increment the counter of events in progress. */
+	cec = atomic_inc_return(&combined_event_count);
+
+	trace_wakeup_source_activate(ws->name, cec);
+}
+
+/**
+ * wakeup_source_report_event - Report wakeup event using the given source.
+ * @ws: Wakeup source to report the event for.
+ */
+static void wakeup_source_report_event(struct wakeup_source *ws)
+{
+	ws->event_count++;
+	/* This is racy, but the counter is approximate anyway. */
+	if (events_check_enabled)
+		ws->wakeup_count++;
+
+	if (!ws->active)
+		wakeup_source_activate(ws);
 }
 
 /**
@@ -324,9 +435,11 @@ void __pm_stay_awake(struct wakeup_source *ws)
 		return;
 
 	spin_lock_irqsave(&ws->lock, flags);
-	ws->event_count++;
-	if (!ws->active)
-		wakeup_source_activate(ws);
+
+	wakeup_source_report_event(ws);
+	del_timer(&ws->timer);
+	ws->timer_expires = 0;
+
 	spin_unlock_irqrestore(&ws->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__pm_stay_awake);
@@ -355,6 +468,17 @@ void pm_stay_awake(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_stay_awake);
 
+#ifdef CONFIG_PM_AUTOSLEEP
+static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now)
+{
+	ktime_t delta = ktime_sub(now, ws->start_prevent_time);
+	ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta);
+}
+#else
+static inline void update_prevent_sleep_time(struct wakeup_source *ws,
+					     ktime_t now) {}
+#endif
+
 /**
  * wakup_source_deactivate - Mark given wakeup source as inactive.
  * @ws: Wakeup source to handle.
@@ -365,6 +489,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake);
  */
 static void wakeup_source_deactivate(struct wakeup_source *ws)
 {
+	unsigned int cnt, inpr, cec;
 	ktime_t duration;
 	ktime_t now;
 
@@ -391,17 +516,23 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
 	if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
 		ws->max_time = duration;
 
+	ws->last_time = now;
 	del_timer(&ws->timer);
+	ws->timer_expires = 0;
+
+	if (ws->autosleep_enabled)
+		update_prevent_sleep_time(ws, now);
 
 	/*
-	 * event_count has to be incremented before events_in_progress is
-	 * modified, so that the callers of pm_check_wakeup_events() and
-	 * pm_save_wakeup_count() don't see the old value of event_count and
-	 * events_in_progress equal to zero at the same time.
+	 * Increment the counter of registered wakeup events and decrement the
+	 * couter of wakeup events in progress simultaneously.
 	 */
-	atomic_inc(&event_count);
-	smp_mb__before_atomic_dec();
-	atomic_dec(&events_in_progress);
+	cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
+	trace_wakeup_source_deactivate(ws->name, cec);
+
+	split_counters(&cnt, &inpr);
+	if (!inpr && waitqueue_active(&wakeup_count_wait_queue))
+		wake_up(&wakeup_count_wait_queue);
 }
 
 /**
@@ -450,11 +581,24 @@ EXPORT_SYMBOL_GPL(pm_relax);
  * pm_wakeup_timer_fn - Delayed finalization of a wakeup event.
  * @data: Address of the wakeup source object associated with the event source.
  *
- * Call __pm_relax() for the wakeup source whose address is stored in @data.
+ * Call wakeup_source_deactivate() for the wakeup source whose address is stored
+ * in @data if it is currently active and its timer has not been canceled and
+ * the expiration time of the timer is not in future.
  */
 static void pm_wakeup_timer_fn(unsigned long data)
 {
-	__pm_relax((struct wakeup_source *)data);
+	struct wakeup_source *ws = (struct wakeup_source *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ws->lock, flags);
+
+	if (ws->active && ws->timer_expires
+	    && time_after_eq(jiffies, ws->timer_expires)) {
+		wakeup_source_deactivate(ws);
+		ws->expire_count++;
+	}
+
+	spin_unlock_irqrestore(&ws->lock, flags);
 }
 
 /**
@@ -479,9 +623,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 
 	spin_lock_irqsave(&ws->lock, flags);
 
-	ws->event_count++;
-	if (!ws->active)
-		wakeup_source_activate(ws);
+	wakeup_source_report_event(ws);
 
 	if (!msec) {
 		wakeup_source_deactivate(ws);
@@ -492,7 +634,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 	if (!expires)
 		expires = 1;
 
-	if (time_after(expires, ws->timer_expires)) {
+	if (!ws->timer_expires || time_after(expires, ws->timer_expires)) {
 		mod_timer(&ws->timer, expires);
 		ws->timer_expires = expires;
 	}
@@ -523,75 +665,97 @@ void pm_wakeup_event(struct device *dev, unsigned int msec)
 }
 EXPORT_SYMBOL_GPL(pm_wakeup_event);
 
-/**
- * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources.
- */
-static void pm_wakeup_update_hit_counts(void)
+void pm_print_active_wakeup_sources(void)
 {
-	unsigned long flags;
 	struct wakeup_source *ws;
+	int active = 0;
+	struct wakeup_source *last_activity_ws = NULL;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
-		spin_lock_irqsave(&ws->lock, flags);
-		if (ws->active)
-			ws->hit_count++;
-		spin_unlock_irqrestore(&ws->lock, flags);
+		if (ws->active) {
+			pr_info("active wakeup source: %s\n", ws->name);
+			active = 1;
+		} else if (!active &&
+			   (!last_activity_ws ||
+			    ktime_to_ns(ws->last_time) >
+			    ktime_to_ns(last_activity_ws->last_time))) {
+			last_activity_ws = ws;
+		}
 	}
+
+	if (!active && last_activity_ws)
+		pr_info("last active wakeup source: %s\n",
+			last_activity_ws->name);
 	rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
 
 /**
- * pm_check_wakeup_events - Check for new wakeup events.
+ * pm_wakeup_pending - Check if power transition in progress should be aborted.
  *
  * Compare the current number of registered wakeup events with its preserved
- * value from the past to check if new wakeup events have been registered since
- * the old value was stored.  Check if the current number of wakeup events being
- * processed is zero.
+ * value from the past and return true if new wakeup events have been registered
+ * since the old value was stored.  Also return true if the current number of
+ * wakeup events being processed is different from zero.
  */
-bool pm_check_wakeup_events(void)
+bool pm_wakeup_pending(void)
 {
 	unsigned long flags;
-	bool ret = true;
+	bool ret = false;
 
 	spin_lock_irqsave(&events_lock, flags);
 	if (events_check_enabled) {
-		ret = ((unsigned int)atomic_read(&event_count) == saved_count)
-			&& !atomic_read(&events_in_progress);
-		events_check_enabled = ret;
+		unsigned int cnt, inpr;
+
+		split_counters(&cnt, &inpr);
+		ret = (cnt != saved_count || inpr > 0);
+		events_check_enabled = !ret;
 	}
 	spin_unlock_irqrestore(&events_lock, flags);
-	if (!ret)
-		pm_wakeup_update_hit_counts();
+
+	if (ret) {
+		pr_info("PM: Wakeup pending, aborting suspend\n");
+		pm_print_active_wakeup_sources();
+	}
+
 	return ret;
 }
 
 /**
  * pm_get_wakeup_count - Read the number of registered wakeup events.
  * @count: Address to store the value at.
+ * @block: Whether or not to block.
  *
- * Store the number of registered wakeup events at the address in @count.  Block
- * if the current number of wakeup events being processed is nonzero.
+ * Store the number of registered wakeup events at the address in @count.  If
+ * @block is set, block until the current number of wakeup events being
+ * processed is zero.
  *
- * Return false if the wait for the number of wakeup events being processed to
- * drop down to zero has been interrupted by a signal (and the current number
- * of wakeup events being processed is still nonzero).  Otherwise return true.
+ * Return 'false' if the current number of wakeup events being processed is
+ * nonzero.  Otherwise return 'true'.
  */
-bool pm_get_wakeup_count(unsigned int *count)
+bool pm_get_wakeup_count(unsigned int *count, bool block)
 {
-	bool ret;
+	unsigned int cnt, inpr;
 
-	if (capable(CAP_SYS_ADMIN))
-		events_check_enabled = false;
+	if (block) {
+		DEFINE_WAIT(wait);
 
-	while (atomic_read(&events_in_progress) && !signal_pending(current)) {
-		pm_wakeup_update_hit_counts();
-		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
+		for (;;) {
+			prepare_to_wait(&wakeup_count_wait_queue, &wait,
+					TASK_INTERRUPTIBLE);
+			split_counters(&cnt, &inpr);
+			if (inpr == 0 || signal_pending(current))
+				break;
+
+			schedule();
+		}
+		finish_wait(&wakeup_count_wait_queue, &wait);
 	}
 
-	ret = !atomic_read(&events_in_progress);
-	*count = atomic_read(&event_count);
-	return ret;
+	split_counters(&cnt, &inpr);
+	*count = cnt;
+	return !inpr;
 }
 
 /**
@@ -600,25 +764,53 @@ bool pm_get_wakeup_count(unsigned int *count)
  *
  * If @count is equal to the current number of registered wakeup events and the
  * current number of wakeup events being processed is zero, store @count as the
- * old number of registered wakeup events to be used by pm_check_wakeup_events()
- * and return true.  Otherwise return false.
+ * old number of registered wakeup events for pm_check_wakeup_events(), enable
+ * wakeup events detection and return 'true'.  Otherwise disable wakeup events
+ * detection and return 'false'.
  */
 bool pm_save_wakeup_count(unsigned int count)
 {
-	bool ret = false;
+	unsigned int cnt, inpr;
+	unsigned long flags;
 
-	spin_lock_irq(&events_lock);
-	if (count == (unsigned int)atomic_read(&event_count)
-	    && !atomic_read(&events_in_progress)) {
+	events_check_enabled = false;
+	spin_lock_irqsave(&events_lock, flags);
+	split_counters(&cnt, &inpr);
+	if (cnt == count && inpr == 0) {
 		saved_count = count;
 		events_check_enabled = true;
-		ret = true;
 	}
-	spin_unlock_irq(&events_lock);
-	if (!ret)
-		pm_wakeup_update_hit_counts();
-	return ret;
+	spin_unlock_irqrestore(&events_lock, flags);
+	return events_check_enabled;
+}
+
+#ifdef CONFIG_PM_AUTOSLEEP
+/**
+ * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources.
+ * @enabled: Whether to set or to clear the autosleep_enabled flags.
+ */
+void pm_wakep_autosleep_enabled(bool set)
+{
+	struct wakeup_source *ws;
+	ktime_t now = ktime_get();
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		spin_lock_irq(&ws->lock);
+		if (ws->autosleep_enabled != set) {
+			ws->autosleep_enabled = set;
+			if (ws->active) {
+				if (set)
+					ws->start_prevent_time = now;
+				else
+					update_prevent_sleep_time(ws, now);
+			}
+		}
+		spin_unlock_irq(&ws->lock);
+	}
+	rcu_read_unlock();
 }
+#endif /* CONFIG_PM_AUTOSLEEP */
 
 static struct dentry *wakeup_sources_stats_dentry;
 
@@ -635,27 +827,37 @@ static int print_wakeup_source_stats(struct seq_file *m,
 	ktime_t max_time;
 	unsigned long active_count;
 	ktime_t active_time;
+	ktime_t prevent_sleep_time;
 	int ret;
 
 	spin_lock_irqsave(&ws->lock, flags);
 
 	total_time = ws->total_time;
 	max_time = ws->max_time;
+	prevent_sleep_time = ws->prevent_sleep_time;
 	active_count = ws->active_count;
 	if (ws->active) {
-		active_time = ktime_sub(ktime_get(), ws->last_time);
+		ktime_t now = ktime_get();
+
+		active_time = ktime_sub(now, ws->last_time);
 		total_time = ktime_add(total_time, active_time);
 		if (active_time.tv64 > max_time.tv64)
 			max_time = active_time;
+
+		if (ws->autosleep_enabled)
+			prevent_sleep_time = ktime_add(prevent_sleep_time,
+				ktime_sub(now, ws->start_prevent_time));
 	} else {
 		active_time = ktime_set(0, 0);
 	}
 
-	ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t"
-			"%lld\t\t%lld\t\t%lld\t\t%lld\n",
-			ws->name, active_count, ws->event_count, ws->hit_count,
+	ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t"
+			"%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
+			ws->name, active_count, ws->event_count,
+			ws->wakeup_count, ws->expire_count,
 			ktime_to_ms(active_time), ktime_to_ms(total_time),
-			ktime_to_ms(max_time), ktime_to_ms(ws->last_time));
+			ktime_to_ms(max_time), ktime_to_ms(ws->last_time),
+			ktime_to_ms(prevent_sleep_time));
 
 	spin_unlock_irqrestore(&ws->lock, flags);
 
@@ -670,8 +872,9 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused)
 {
 	struct wakeup_source *ws;
 
-	seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t"
-		"active_since\ttotal_time\tmax_time\tlast_change\n");
+	seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
+		"expire_count\tactive_since\ttotal_time\tmax_time\t"
+		"last_change\tprevent_suspend_time\n");
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ws, &wakeup_sources, entry)
diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
new file mode 100644
index 00000000000..4251570610c
--- /dev/null
+++ b/drivers/base/regmap/Kconfig
@@ -0,0 +1,25 @@
+# Generic register map support.  There are no user servicable options here,
+# this is an API intended to be used by other kernel subsystems.  These
+# subsystems should select the appropriate symbols.
+
+config REGMAP
+	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_MMIO || REGMAP_IRQ)
+	select LZO_COMPRESS
+	select LZO_DECOMPRESS
+	select IRQ_DOMAIN if REGMAP_IRQ
+	bool
+
+config REGMAP_I2C
+	tristate
+
+config REGMAP_SPI
+	tristate
+
+config REGMAP_SPMI
+	tristate
+
+config REGMAP_MMIO
+	tristate
+
+config REGMAP_IRQ
+	bool
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
new file mode 100644
index 00000000000..a7c670b4123
--- /dev/null
+++ b/drivers/base/regmap/Makefile
@@ -0,0 +1,8 @@
+obj-$(CONFIG_REGMAP) += regmap.o regcache.o
+obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o regcache-flat.o
+obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
+obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
+obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
+obj-$(CONFIG_REGMAP_SPMI) += regmap-spmi.o
+obj-$(CONFIG_REGMAP_MMIO) += regmap-mmio.o
+obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
new file mode 100644
index 00000000000..7d1326985be
--- /dev/null
+++ b/drivers/base/regmap/internal.h
@@ -0,0 +1,235 @@
+/*
+ * Register map access API internal header
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _REGMAP_INTERNAL_H
+#define _REGMAP_INTERNAL_H
+
+#include <linux/regmap.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+
+struct regmap;
+struct regcache_ops;
+
+struct regmap_debugfs_off_cache {
+	struct list_head list;
+	off_t min;
+	off_t max;
+	unsigned int base_reg;
+	unsigned int max_reg;
+};
+
+struct regmap_format {
+	size_t buf_size;
+	size_t reg_bytes;
+	size_t pad_bytes;
+	size_t val_bytes;
+	void (*format_write)(struct regmap *map,
+			     unsigned int reg, unsigned int val);
+	void (*format_reg)(void *buf, unsigned int reg, unsigned int shift);
+	void (*format_val)(void *buf, unsigned int val, unsigned int shift);
+	unsigned int (*parse_val)(const void *buf);
+	void (*parse_inplace)(void *buf);
+};
+
+struct regmap_async {
+	struct list_head list;
+	struct regmap *map;
+	void *work_buf;
+};
+
+struct regmap {
+	struct mutex mutex;
+	spinlock_t spinlock;
+	unsigned long spinlock_flags;
+	regmap_lock lock;
+	regmap_unlock unlock;
+	void *lock_arg; /* This is passed to lock/unlock functions */
+
+	struct device *dev; /* Device we do I/O on */
+	void *work_buf;     /* Scratch buffer used to format I/O */
+	struct regmap_format format;  /* Buffer format */
+	const struct regmap_bus *bus;
+	void *bus_context;
+	const char *name;
+
+	bool async;
+	spinlock_t async_lock;
+	wait_queue_head_t async_waitq;
+	struct list_head async_list;
+	struct list_head async_free;
+	int async_ret;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+	const char *debugfs_name;
+
+	unsigned int debugfs_reg_len;
+	unsigned int debugfs_val_len;
+	unsigned int debugfs_tot_len;
+
+	struct list_head debugfs_off_cache;
+	struct mutex cache_lock;
+#endif
+
+	unsigned int max_register;
+	bool (*writeable_reg)(struct device *dev, unsigned int reg);
+	bool (*readable_reg)(struct device *dev, unsigned int reg);
+	bool (*volatile_reg)(struct device *dev, unsigned int reg);
+	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	const struct regmap_access_table *wr_table;
+	const struct regmap_access_table *rd_table;
+	const struct regmap_access_table *volatile_table;
+	const struct regmap_access_table *precious_table;
+
+	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
+	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+
+	bool defer_caching;
+
+	u8 read_flag_mask;
+	u8 write_flag_mask;
+
+	/* number of bits to (left) shift the reg value when formatting*/
+	int reg_shift;
+	int reg_stride;
+
+	/* regcache specific members */
+	const struct regcache_ops *cache_ops;
+	enum regcache_type cache_type;
+
+	/* number of bytes in reg_defaults_raw */
+	unsigned int cache_size_raw;
+	/* number of bytes per word in reg_defaults_raw */
+	unsigned int cache_word_size;
+	/* number of entries in reg_defaults */
+	unsigned int num_reg_defaults;
+	/* number of entries in reg_defaults_raw */
+	unsigned int num_reg_defaults_raw;
+
+	/* if set, only the cache is modified not the HW */
+	u32 cache_only;
+	/* if set, only the HW is modified not the cache */
+	u32 cache_bypass;
+	/* if set, remember to free reg_defaults_raw */
+	bool cache_free;
+
+	struct reg_default *reg_defaults;
+	const void *reg_defaults_raw;
+	void *cache;
+	u32 cache_dirty;
+
+	struct reg_default *patch;
+	int patch_regs;
+
+	/* if set, converts bulk rw to single rw */
+	bool use_single_rw;
+	/* if set, the device supports multi write mode */
+	bool can_multi_write;
+
+	struct rb_root range_tree;
+	void *selector_work_buf;	/* Scratch buffer used for selector */
+};
+
+struct regcache_ops {
+	const char *name;
+	enum regcache_type type;
+	int (*init)(struct regmap *map);
+	int (*exit)(struct regmap *map);
+	int (*read)(struct regmap *map, unsigned int reg, unsigned int *value);
+	int (*write)(struct regmap *map, unsigned int reg, unsigned int value);
+	int (*sync)(struct regmap *map, unsigned int min, unsigned int max);
+	int (*drop)(struct regmap *map, unsigned int min, unsigned int max);
+};
+
+bool regmap_writeable(struct regmap *map, unsigned int reg);
+bool regmap_readable(struct regmap *map, unsigned int reg);
+bool regmap_volatile(struct regmap *map, unsigned int reg);
+bool regmap_precious(struct regmap *map, unsigned int reg);
+
+int _regmap_write(struct regmap *map, unsigned int reg,
+		  unsigned int val);
+
+struct regmap_range_node {
+	struct rb_node node;
+	const char *name;
+	struct regmap *map;
+
+	unsigned int range_min;
+	unsigned int range_max;
+
+	unsigned int selector_reg;
+	unsigned int selector_mask;
+	int selector_shift;
+
+	unsigned int window_start;
+	unsigned int window_len;
+};
+
+struct regmap_field {
+	struct regmap *regmap;
+	unsigned int mask;
+	/* lsb */
+	unsigned int shift;
+	unsigned int reg;
+
+	unsigned int id_size;
+	unsigned int id_offset;
+};
+
+#ifdef CONFIG_DEBUG_FS
+extern void regmap_debugfs_initcall(void);
+extern void regmap_debugfs_init(struct regmap *map, const char *name);
+extern void regmap_debugfs_exit(struct regmap *map);
+#else
+static inline void regmap_debugfs_initcall(void) { }
+static inline void regmap_debugfs_init(struct regmap *map, const char *name) { }
+static inline void regmap_debugfs_exit(struct regmap *map) { }
+#endif
+
+/* regcache core declarations */
+int regcache_init(struct regmap *map, const struct regmap_config *config);
+void regcache_exit(struct regmap *map);
+int regcache_read(struct regmap *map,
+		       unsigned int reg, unsigned int *value);
+int regcache_write(struct regmap *map,
+			unsigned int reg, unsigned int value);
+int regcache_sync(struct regmap *map);
+int regcache_sync_block(struct regmap *map, void *block,
+			unsigned long *cache_present,
+			unsigned int block_base, unsigned int start,
+			unsigned int end);
+
+static inline const void *regcache_get_val_addr(struct regmap *map,
+						const void *base,
+						unsigned int idx)
+{
+	return base + (map->cache_word_size * idx);
+}
+
+unsigned int regcache_get_val(struct regmap *map, const void *base,
+			      unsigned int idx);
+bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
+		      unsigned int val);
+int regcache_lookup_reg(struct regmap *map, unsigned int reg);
+
+int _regmap_raw_write(struct regmap *map, unsigned int reg,
+		      const void *val, size_t val_len);
+
+void regmap_async_complete_cb(struct regmap_async *async, int ret);
+
+extern struct regcache_ops regcache_rbtree_ops;
+extern struct regcache_ops regcache_lzo_ops;
+extern struct regcache_ops regcache_flat_ops;
+
+#endif
diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c
new file mode 100644
index 00000000000..d9762e41959
--- /dev/null
+++ b/drivers/base/regmap/regcache-flat.c
@@ -0,0 +1,72 @@
+/*
+ * Register cache access API - flat caching support
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+
+#include "internal.h"
+
+static int regcache_flat_init(struct regmap *map)
+{
+	int i;
+	unsigned int *cache;
+
+	map->cache = kzalloc(sizeof(unsigned int) * (map->max_register + 1),
+			     GFP_KERNEL);
+	if (!map->cache)
+		return -ENOMEM;
+
+	cache = map->cache;
+
+	for (i = 0; i < map->num_reg_defaults; i++)
+		cache[map->reg_defaults[i].reg] = map->reg_defaults[i].def;
+
+	return 0;
+}
+
+static int regcache_flat_exit(struct regmap *map)
+{
+	kfree(map->cache);
+	map->cache = NULL;
+
+	return 0;
+}
+
+static int regcache_flat_read(struct regmap *map,
+			      unsigned int reg, unsigned int *value)
+{
+	unsigned int *cache = map->cache;
+
+	*value = cache[reg];
+
+	return 0;
+}
+
+static int regcache_flat_write(struct regmap *map, unsigned int reg,
+			       unsigned int value)
+{
+	unsigned int *cache = map->cache;
+
+	cache[reg] = value;
+
+	return 0;
+}
+
+struct regcache_ops regcache_flat_ops = {
+	.type = REGCACHE_FLAT,
+	.name = "flat",
+	.init = regcache_flat_init,
+	.exit = regcache_flat_exit,
+	.read = regcache_flat_read,
+	.write = regcache_flat_write,
+};
diff --git a/drivers/base/regmap/regcache-lzo.c b/drivers/base/regmap/regcache-lzo.c
new file mode 100644
index 00000000000..e210a6d1406
--- /dev/null
+++ b/drivers/base/regmap/regcache-lzo.c
@@ -0,0 +1,378 @@
+/*
+ * Register cache access API - LZO caching support
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/lzo.h>
+
+#include "internal.h"
+
+static int regcache_lzo_exit(struct regmap *map);
+
+struct regcache_lzo_ctx {
+	void *wmem;
+	void *dst;
+	const void *src;
+	size_t src_len;
+	size_t dst_len;
+	size_t decompressed_size;
+	unsigned long *sync_bmp;
+	int sync_bmp_nbits;
+};
+
+#define LZO_BLOCK_NUM 8
+static int regcache_lzo_block_count(struct regmap *map)
+{
+	return LZO_BLOCK_NUM;
+}
+
+static int regcache_lzo_prepare(struct regcache_lzo_ctx *lzo_ctx)
+{
+	lzo_ctx->wmem = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+	if (!lzo_ctx->wmem)
+		return -ENOMEM;
+	return 0;
+}
+
+static int regcache_lzo_compress(struct regcache_lzo_ctx *lzo_ctx)
+{
+	size_t compress_size;
+	int ret;
+
+	ret = lzo1x_1_compress(lzo_ctx->src, lzo_ctx->src_len,
+			       lzo_ctx->dst, &compress_size, lzo_ctx->wmem);
+	if (ret != LZO_E_OK || compress_size > lzo_ctx->dst_len)
+		return -EINVAL;
+	lzo_ctx->dst_len = compress_size;
+	return 0;
+}
+
+static int regcache_lzo_decompress(struct regcache_lzo_ctx *lzo_ctx)
+{
+	size_t dst_len;
+	int ret;
+
+	dst_len = lzo_ctx->dst_len;
+	ret = lzo1x_decompress_safe(lzo_ctx->src, lzo_ctx->src_len,
+				    lzo_ctx->dst, &dst_len);
+	if (ret != LZO_E_OK || dst_len != lzo_ctx->dst_len)
+		return -EINVAL;
+	return 0;
+}
+
+static int regcache_lzo_compress_cache_block(struct regmap *map,
+		struct regcache_lzo_ctx *lzo_ctx)
+{
+	int ret;
+
+	lzo_ctx->dst_len = lzo1x_worst_compress(PAGE_SIZE);
+	lzo_ctx->dst = kmalloc(lzo_ctx->dst_len, GFP_KERNEL);
+	if (!lzo_ctx->dst) {
+		lzo_ctx->dst_len = 0;
+		return -ENOMEM;
+	}
+
+	ret = regcache_lzo_compress(lzo_ctx);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static int regcache_lzo_decompress_cache_block(struct regmap *map,
+		struct regcache_lzo_ctx *lzo_ctx)
+{
+	int ret;
+
+	lzo_ctx->dst_len = lzo_ctx->decompressed_size;
+	lzo_ctx->dst = kmalloc(lzo_ctx->dst_len, GFP_KERNEL);
+	if (!lzo_ctx->dst) {
+		lzo_ctx->dst_len = 0;
+		return -ENOMEM;
+	}
+
+	ret = regcache_lzo_decompress(lzo_ctx);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static inline int regcache_lzo_get_blkindex(struct regmap *map,
+					    unsigned int reg)
+{
+	return ((reg / map->reg_stride) * map->cache_word_size) /
+		DIV_ROUND_UP(map->cache_size_raw,
+			     regcache_lzo_block_count(map));
+}
+
+static inline int regcache_lzo_get_blkpos(struct regmap *map,
+					  unsigned int reg)
+{
+	return (reg / map->reg_stride) %
+		    (DIV_ROUND_UP(map->cache_size_raw,
+				  regcache_lzo_block_count(map)) /
+		     map->cache_word_size);
+}
+
+static inline int regcache_lzo_get_blksize(struct regmap *map)
+{
+	return DIV_ROUND_UP(map->cache_size_raw,
+			    regcache_lzo_block_count(map));
+}
+
+static int regcache_lzo_init(struct regmap *map)
+{
+	struct regcache_lzo_ctx **lzo_blocks;
+	size_t bmp_size;
+	int ret, i, blksize, blkcount;
+	const char *p, *end;
+	unsigned long *sync_bmp;
+
+	ret = 0;
+
+	blkcount = regcache_lzo_block_count(map);
+	map->cache = kzalloc(blkcount * sizeof *lzo_blocks,
+			     GFP_KERNEL);
+	if (!map->cache)
+		return -ENOMEM;
+	lzo_blocks = map->cache;
+
+	/*
+	 * allocate a bitmap to be used when syncing the cache with
+	 * the hardware.  Each time a register is modified, the corresponding
+	 * bit is set in the bitmap, so we know that we have to sync
+	 * that register.
+	 */
+	bmp_size = map->num_reg_defaults_raw;
+	sync_bmp = kmalloc(BITS_TO_LONGS(bmp_size) * sizeof(long),
+			   GFP_KERNEL);
+	if (!sync_bmp) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	bitmap_zero(sync_bmp, bmp_size);
+
+	/* allocate the lzo blocks and initialize them */
+	for (i = 0; i < blkcount; i++) {
+		lzo_blocks[i] = kzalloc(sizeof **lzo_blocks,
+					GFP_KERNEL);
+		if (!lzo_blocks[i]) {
+			kfree(sync_bmp);
+			ret = -ENOMEM;
+			goto err;
+		}
+		lzo_blocks[i]->sync_bmp = sync_bmp;
+		lzo_blocks[i]->sync_bmp_nbits = bmp_size;
+		/* alloc the working space for the compressed block */
+		ret = regcache_lzo_prepare(lzo_blocks[i]);
+		if (ret < 0)
+			goto err;
+	}
+
+	blksize = regcache_lzo_get_blksize(map);
+	p = map->reg_defaults_raw;
+	end = map->reg_defaults_raw + map->cache_size_raw;
+	/* compress the register map and fill the lzo blocks */
+	for (i = 0; i < blkcount; i++, p += blksize) {
+		lzo_blocks[i]->src = p;
+		if (p + blksize > end)
+			lzo_blocks[i]->src_len = end - p;
+		else
+			lzo_blocks[i]->src_len = blksize;
+		ret = regcache_lzo_compress_cache_block(map,
+						       lzo_blocks[i]);
+		if (ret < 0)
+			goto err;
+		lzo_blocks[i]->decompressed_size =
+			lzo_blocks[i]->src_len;
+	}
+
+	return 0;
+err:
+	regcache_lzo_exit(map);
+	return ret;
+}
+
+static int regcache_lzo_exit(struct regmap *map)
+{
+	struct regcache_lzo_ctx **lzo_blocks;
+	int i, blkcount;
+
+	lzo_blocks = map->cache;
+	if (!lzo_blocks)
+		return 0;
+
+	blkcount = regcache_lzo_block_count(map);
+	/*
+	 * the pointer to the bitmap used for syncing the cache
+	 * is shared amongst all lzo_blocks.  Ensure it is freed
+	 * only once.
+	 */
+	if (lzo_blocks[0])
+		kfree(lzo_blocks[0]->sync_bmp);
+	for (i = 0; i < blkcount; i++) {
+		if (lzo_blocks[i]) {
+			kfree(lzo_blocks[i]->wmem);
+			kfree(lzo_blocks[i]->dst);
+		}
+		/* each lzo_block is a pointer returned by kmalloc or NULL */
+		kfree(lzo_blocks[i]);
+	}
+	kfree(lzo_blocks);
+	map->cache = NULL;
+	return 0;
+}
+
+static int regcache_lzo_read(struct regmap *map,
+			     unsigned int reg, unsigned int *value)
+{
+	struct regcache_lzo_ctx *lzo_block, **lzo_blocks;
+	int ret, blkindex, blkpos;
+	size_t blksize, tmp_dst_len;
+	void *tmp_dst;
+
+	/* index of the compressed lzo block */
+	blkindex = regcache_lzo_get_blkindex(map, reg);
+	/* register index within the decompressed block */
+	blkpos = regcache_lzo_get_blkpos(map, reg);
+	/* size of the compressed block */
+	blksize = regcache_lzo_get_blksize(map);
+	lzo_blocks = map->cache;
+	lzo_block = lzo_blocks[blkindex];
+
+	/* save the pointer and length of the compressed block */
+	tmp_dst = lzo_block->dst;
+	tmp_dst_len = lzo_block->dst_len;
+
+	/* prepare the source to be the compressed block */
+	lzo_block->src = lzo_block->dst;
+	lzo_block->src_len = lzo_block->dst_len;
+
+	/* decompress the block */
+	ret = regcache_lzo_decompress_cache_block(map, lzo_block);
+	if (ret >= 0)
+		/* fetch the value from the cache */
+		*value = regcache_get_val(map, lzo_block->dst, blkpos);
+
+	kfree(lzo_block->dst);
+	/* restore the pointer and length of the compressed block */
+	lzo_block->dst = tmp_dst;
+	lzo_block->dst_len = tmp_dst_len;
+
+	return ret;
+}
+
+static int regcache_lzo_write(struct regmap *map,
+			      unsigned int reg, unsigned int value)
+{
+	struct regcache_lzo_ctx *lzo_block, **lzo_blocks;
+	int ret, blkindex, blkpos;
+	size_t blksize, tmp_dst_len;
+	void *tmp_dst;
+
+	/* index of the compressed lzo block */
+	blkindex = regcache_lzo_get_blkindex(map, reg);
+	/* register index within the decompressed block */
+	blkpos = regcache_lzo_get_blkpos(map, reg);
+	/* size of the compressed block */
+	blksize = regcache_lzo_get_blksize(map);
+	lzo_blocks = map->cache;
+	lzo_block = lzo_blocks[blkindex];
+
+	/* save the pointer and length of the compressed block */
+	tmp_dst = lzo_block->dst;
+	tmp_dst_len = lzo_block->dst_len;
+
+	/* prepare the source to be the compressed block */
+	lzo_block->src = lzo_block->dst;
+	lzo_block->src_len = lzo_block->dst_len;
+
+	/* decompress the block */
+	ret = regcache_lzo_decompress_cache_block(map, lzo_block);
+	if (ret < 0) {
+		kfree(lzo_block->dst);
+		goto out;
+	}
+
+	/* write the new value to the cache */
+	if (regcache_set_val(map, lzo_block->dst, blkpos, value)) {
+		kfree(lzo_block->dst);
+		goto out;
+	}
+
+	/* prepare the source to be the decompressed block */
+	lzo_block->src = lzo_block->dst;
+	lzo_block->src_len = lzo_block->dst_len;
+
+	/* compress the block */
+	ret = regcache_lzo_compress_cache_block(map, lzo_block);
+	if (ret < 0) {
+		kfree(lzo_block->dst);
+		kfree(lzo_block->src);
+		goto out;
+	}
+
+	/* set the bit so we know we have to sync this register */
+	set_bit(reg / map->reg_stride, lzo_block->sync_bmp);
+	kfree(tmp_dst);
+	kfree(lzo_block->src);
+	return 0;
+out:
+	lzo_block->dst = tmp_dst;
+	lzo_block->dst_len = tmp_dst_len;
+	return ret;
+}
+
+static int regcache_lzo_sync(struct regmap *map, unsigned int min,
+			     unsigned int max)
+{
+	struct regcache_lzo_ctx **lzo_blocks;
+	unsigned int val;
+	int i;
+	int ret;
+
+	lzo_blocks = map->cache;
+	i = min;
+	for_each_set_bit_from(i, lzo_blocks[0]->sync_bmp,
+			      lzo_blocks[0]->sync_bmp_nbits) {
+		if (i > max)
+			continue;
+
+		ret = regcache_read(map, i, &val);
+		if (ret)
+			return ret;
+
+		/* Is this the hardware default?  If so skip. */
+		ret = regcache_lookup_reg(map, i);
+		if (ret > 0 && val == map->reg_defaults[ret].def)
+			continue;
+
+		map->cache_bypass = 1;
+		ret = _regmap_write(map, i, val);
+		map->cache_bypass = 0;
+		if (ret)
+			return ret;
+		dev_dbg(map->dev, "Synced register %#x, value %#x\n",
+			i, val);
+	}
+
+	return 0;
+}
+
+struct regcache_ops regcache_lzo_ops = {
+	.type = REGCACHE_COMPRESSED,
+	.name = "lzo",
+	.init = regcache_lzo_init,
+	.exit = regcache_lzo_exit,
+	.read = regcache_lzo_read,
+	.write = regcache_lzo_write,
+	.sync = regcache_lzo_sync
+};
diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
new file mode 100644
index 00000000000..6a7e4fa1285
--- /dev/null
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -0,0 +1,539 @@
+/*
+ * Register cache access API - rbtree caching support
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+
+#include "internal.h"
+
+static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
+				 unsigned int value);
+static int regcache_rbtree_exit(struct regmap *map);
+
+struct regcache_rbtree_node {
+	/* block of adjacent registers */
+	void *block;
+	/* Which registers are present */
+	long *cache_present;
+	/* base register handled by this block */
+	unsigned int base_reg;
+	/* number of registers available in the block */
+	unsigned int blklen;
+	/* the actual rbtree node holding this block */
+	struct rb_node node;
+} __attribute__ ((packed));
+
+struct regcache_rbtree_ctx {
+	struct rb_root root;
+	struct regcache_rbtree_node *cached_rbnode;
+};
+
+static inline void regcache_rbtree_get_base_top_reg(
+	struct regmap *map,
+	struct regcache_rbtree_node *rbnode,
+	unsigned int *base, unsigned int *top)
+{
+	*base = rbnode->base_reg;
+	*top = rbnode->base_reg + ((rbnode->blklen - 1) * map->reg_stride);
+}
+
+static unsigned int regcache_rbtree_get_register(struct regmap *map,
+	struct regcache_rbtree_node *rbnode, unsigned int idx)
+{
+	return regcache_get_val(map, rbnode->block, idx);
+}
+
+static void regcache_rbtree_set_register(struct regmap *map,
+					 struct regcache_rbtree_node *rbnode,
+					 unsigned int idx, unsigned int val)
+{
+	set_bit(idx, rbnode->cache_present);
+	regcache_set_val(map, rbnode->block, idx, val);
+}
+
+static struct regcache_rbtree_node *regcache_rbtree_lookup(struct regmap *map,
+							   unsigned int reg)
+{
+	struct regcache_rbtree_ctx *rbtree_ctx = map->cache;
+	struct rb_node *node;
+	struct regcache_rbtree_node *rbnode;
+	unsigned int base_reg, top_reg;
+
+	rbnode = rbtree_ctx->cached_rbnode;
+	if (rbnode) {
+		regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
+						 &top_reg);
+		if (reg >= base_reg && reg <= top_reg)
+			return rbnode;
+	}
+
+	node = rbtree_ctx->root.rb_node;
+	while (node) {
+		rbnode = container_of(node, struct regcache_rbtree_node, node);
+		regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
+						 &top_reg);
+		if (reg >= base_reg && reg <= top_reg) {
+			rbtree_ctx->cached_rbnode = rbnode;
+			return rbnode;
+		} else if (reg > top_reg) {
+			node = node->rb_right;
+		} else if (reg < base_reg) {
+			node = node->rb_left;
+		}
+	}
+
+	return NULL;
+}
+
+static int regcache_rbtree_insert(struct regmap *map, struct rb_root *root,
+				  struct regcache_rbtree_node *rbnode)
+{
+	struct rb_node **new, *parent;
+	struct regcache_rbtree_node *rbnode_tmp;
+	unsigned int base_reg_tmp, top_reg_tmp;
+	unsigned int base_reg;
+
+	parent = NULL;
+	new = &root->rb_node;
+	while (*new) {
+		rbnode_tmp = container_of(*new, struct regcache_rbtree_node,
+					  node);
+		/* base and top registers of the current rbnode */
+		regcache_rbtree_get_base_top_reg(map, rbnode_tmp, &base_reg_tmp,
+						 &top_reg_tmp);
+		/* base register of the rbnode to be added */
+		base_reg = rbnode->base_reg;
+		parent = *new;
+		/* if this register has already been inserted, just return */
+		if (base_reg >= base_reg_tmp &&
+		    base_reg <= top_reg_tmp)
+			return 0;
+		else if (base_reg > top_reg_tmp)
+			new = &((*new)->rb_right);
+		else if (base_reg < base_reg_tmp)
+			new = &((*new)->rb_left);
+	}
+
+	/* insert the node into the rbtree */
+	rb_link_node(&rbnode->node, parent, new);
+	rb_insert_color(&rbnode->node, root);
+
+	return 1;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int rbtree_show(struct seq_file *s, void *ignored)
+{
+	struct regmap *map = s->private;
+	struct regcache_rbtree_ctx *rbtree_ctx = map->cache;
+	struct regcache_rbtree_node *n;
+	struct rb_node *node;
+	unsigned int base, top;
+	size_t mem_size;
+	int nodes = 0;
+	int registers = 0;
+	int this_registers, average;
+
+	map->lock(map->lock_arg);
+
+	mem_size = sizeof(*rbtree_ctx);
+
+	for (node = rb_first(&rbtree_ctx->root); node != NULL;
+	     node = rb_next(node)) {
+		n = container_of(node, struct regcache_rbtree_node, node);
+		mem_size += sizeof(*n);
+		mem_size += (n->blklen * map->cache_word_size);
+		mem_size += BITS_TO_LONGS(n->blklen) * sizeof(long);
+
+		regcache_rbtree_get_base_top_reg(map, n, &base, &top);
+		this_registers = ((top - base) / map->reg_stride) + 1;
+		seq_printf(s, "%x-%x (%d)\n", base, top, this_registers);
+
+		nodes++;
+		registers += this_registers;
+	}
+
+	if (nodes)
+		average = registers / nodes;
+	else
+		average = 0;
+
+	seq_printf(s, "%d nodes, %d registers, average %d registers, used %zu bytes\n",
+		   nodes, registers, average, mem_size);
+
+	map->unlock(map->lock_arg);
+
+	return 0;
+}
+
+static int rbtree_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rbtree_show, inode->i_private);
+}
+
+static const struct file_operations rbtree_fops = {
+	.open		= rbtree_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void rbtree_debugfs_init(struct regmap *map)
+{
+	debugfs_create_file("rbtree", 0400, map->debugfs, map, &rbtree_fops);
+}
+#else
+static void rbtree_debugfs_init(struct regmap *map)
+{
+}
+#endif
+
+static int regcache_rbtree_init(struct regmap *map)
+{
+	struct regcache_rbtree_ctx *rbtree_ctx;
+	int i;
+	int ret;
+
+	map->cache = kmalloc(sizeof *rbtree_ctx, GFP_KERNEL);
+	if (!map->cache)
+		return -ENOMEM;
+
+	rbtree_ctx = map->cache;
+	rbtree_ctx->root = RB_ROOT;
+	rbtree_ctx->cached_rbnode = NULL;
+
+	for (i = 0; i < map->num_reg_defaults; i++) {
+		ret = regcache_rbtree_write(map,
+					    map->reg_defaults[i].reg,
+					    map->reg_defaults[i].def);
+		if (ret)
+			goto err;
+	}
+
+	rbtree_debugfs_init(map);
+
+	return 0;
+
+err:
+	regcache_rbtree_exit(map);
+	return ret;
+}
+
+static int regcache_rbtree_exit(struct regmap *map)
+{
+	struct rb_node *next;
+	struct regcache_rbtree_ctx *rbtree_ctx;
+	struct regcache_rbtree_node *rbtree_node;
+
+	/* if we've already been called then just return */
+	rbtree_ctx = map->cache;
+	if (!rbtree_ctx)
+		return 0;
+
+	/* free up the rbtree */
+	next = rb_first(&rbtree_ctx->root);
+	while (next) {
+		rbtree_node = rb_entry(next, struct regcache_rbtree_node, node);
+		next = rb_next(&rbtree_node->node);
+		rb_erase(&rbtree_node->node, &rbtree_ctx->root);
+		kfree(rbtree_node->cache_present);
+		kfree(rbtree_node->block);
+		kfree(rbtree_node);
+	}
+
+	/* release the resources */
+	kfree(map->cache);
+	map->cache = NULL;
+
+	return 0;
+}
+
+static int regcache_rbtree_read(struct regmap *map,
+				unsigned int reg, unsigned int *value)
+{
+	struct regcache_rbtree_node *rbnode;
+	unsigned int reg_tmp;
+
+	rbnode = regcache_rbtree_lookup(map, reg);
+	if (rbnode) {
+		reg_tmp = (reg - rbnode->base_reg) / map->reg_stride;
+		if (!test_bit(reg_tmp, rbnode->cache_present))
+			return -ENOENT;
+		*value = regcache_rbtree_get_register(map, rbnode, reg_tmp);
+	} else {
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+
+static int regcache_rbtree_insert_to_block(struct regmap *map,
+					   struct regcache_rbtree_node *rbnode,
+					   unsigned int base_reg,
+					   unsigned int top_reg,
+					   unsigned int reg,
+					   unsigned int value)
+{
+	unsigned int blklen;
+	unsigned int pos, offset;
+	unsigned long *present;
+	u8 *blk;
+
+	blklen = (top_reg - base_reg) / map->reg_stride + 1;
+	pos = (reg - base_reg) / map->reg_stride;
+	offset = (rbnode->base_reg - base_reg) / map->reg_stride;
+
+	blk = krealloc(rbnode->block,
+		       blklen * map->cache_word_size,
+		       GFP_KERNEL);
+	if (!blk)
+		return -ENOMEM;
+
+	present = krealloc(rbnode->cache_present,
+		    BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL);
+	if (!present) {
+		kfree(blk);
+		return -ENOMEM;
+	}
+
+	/* insert the register value in the correct place in the rbnode block */
+	if (pos == 0) {
+		memmove(blk + offset * map->cache_word_size,
+			blk, rbnode->blklen * map->cache_word_size);
+		bitmap_shift_right(present, present, offset, blklen);
+	}
+
+	/* update the rbnode block, its size and the base register */
+	rbnode->block = blk;
+	rbnode->blklen = blklen;
+	rbnode->base_reg = base_reg;
+	rbnode->cache_present = present;
+
+	regcache_rbtree_set_register(map, rbnode, pos, value);
+	return 0;
+}
+
+static struct regcache_rbtree_node *
+regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
+{
+	struct regcache_rbtree_node *rbnode;
+	const struct regmap_range *range;
+	int i;
+
+	rbnode = kzalloc(sizeof(*rbnode), GFP_KERNEL);
+	if (!rbnode)
+		return NULL;
+
+	/* If there is a read table then use it to guess at an allocation */
+	if (map->rd_table) {
+		for (i = 0; i < map->rd_table->n_yes_ranges; i++) {
+			if (regmap_reg_in_range(reg,
+						&map->rd_table->yes_ranges[i]))
+				break;
+		}
+
+		if (i != map->rd_table->n_yes_ranges) {
+			range = &map->rd_table->yes_ranges[i];
+			rbnode->blklen = (range->range_max - range->range_min) /
+				map->reg_stride	+ 1;
+			rbnode->base_reg = range->range_min;
+		}
+	}
+
+	if (!rbnode->blklen) {
+		rbnode->blklen = 1;
+		rbnode->base_reg = reg;
+	}
+
+	rbnode->block = kmalloc(rbnode->blklen * map->cache_word_size,
+				GFP_KERNEL);
+	if (!rbnode->block)
+		goto err_free;
+
+	rbnode->cache_present = kzalloc(BITS_TO_LONGS(rbnode->blklen) *
+		sizeof(*rbnode->cache_present), GFP_KERNEL);
+	if (!rbnode->cache_present)
+		goto err_free_block;
+
+	return rbnode;
+
+err_free_block:
+	kfree(rbnode->block);
+err_free:
+	kfree(rbnode);
+	return NULL;
+}
+
+static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
+				 unsigned int value)
+{
+	struct regcache_rbtree_ctx *rbtree_ctx;
+	struct regcache_rbtree_node *rbnode, *rbnode_tmp;
+	struct rb_node *node;
+	unsigned int reg_tmp;
+	int ret;
+
+	rbtree_ctx = map->cache;
+
+	/* if we can't locate it in the cached rbnode we'll have
+	 * to traverse the rbtree looking for it.
+	 */
+	rbnode = regcache_rbtree_lookup(map, reg);
+	if (rbnode) {
+		reg_tmp = (reg - rbnode->base_reg) / map->reg_stride;
+		regcache_rbtree_set_register(map, rbnode, reg_tmp, value);
+	} else {
+		unsigned int base_reg, top_reg;
+		unsigned int new_base_reg, new_top_reg;
+		unsigned int min, max;
+		unsigned int max_dist;
+
+		max_dist = map->reg_stride * sizeof(*rbnode_tmp) /
+			map->cache_word_size;
+		if (reg < max_dist)
+			min = 0;
+		else
+			min = reg - max_dist;
+		max = reg + max_dist;
+
+		/* look for an adjacent register to the one we are about to add */
+		for (node = rb_first(&rbtree_ctx->root); node;
+		     node = rb_next(node)) {
+			rbnode_tmp = rb_entry(node, struct regcache_rbtree_node,
+					      node);
+
+			regcache_rbtree_get_base_top_reg(map, rbnode_tmp,
+				&base_reg, &top_reg);
+
+			if (base_reg <= max && top_reg >= min) {
+				new_base_reg = min(reg, base_reg);
+				new_top_reg = max(reg, top_reg);
+			} else {
+				continue;
+			}
+
+			ret = regcache_rbtree_insert_to_block(map, rbnode_tmp,
+							      new_base_reg,
+							      new_top_reg, reg,
+							      value);
+			if (ret)
+				return ret;
+			rbtree_ctx->cached_rbnode = rbnode_tmp;
+			return 0;
+		}
+
+		/* We did not manage to find a place to insert it in
+		 * an existing block so create a new rbnode.
+		 */
+		rbnode = regcache_rbtree_node_alloc(map, reg);
+		if (!rbnode)
+			return -ENOMEM;
+		regcache_rbtree_set_register(map, rbnode,
+					     reg - rbnode->base_reg, value);
+		regcache_rbtree_insert(map, &rbtree_ctx->root, rbnode);
+		rbtree_ctx->cached_rbnode = rbnode;
+	}
+
+	return 0;
+}
+
+static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
+				unsigned int max)
+{
+	struct regcache_rbtree_ctx *rbtree_ctx;
+	struct rb_node *node;
+	struct regcache_rbtree_node *rbnode;
+	unsigned int base_reg, top_reg;
+	unsigned int start, end;
+	int ret;
+
+	rbtree_ctx = map->cache;
+	for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
+		rbnode = rb_entry(node, struct regcache_rbtree_node, node);
+
+		regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
+			&top_reg);
+		if (base_reg > max)
+			break;
+		if (top_reg < min)
+			continue;
+
+		if (min > base_reg)
+			start = (min - base_reg) / map->reg_stride;
+		else
+			start = 0;
+
+		if (max < top_reg)
+			end = (max - base_reg) / map->reg_stride + 1;
+		else
+			end = rbnode->blklen;
+
+		ret = regcache_sync_block(map, rbnode->block,
+					  rbnode->cache_present,
+					  rbnode->base_reg, start, end);
+		if (ret != 0)
+			return ret;
+	}
+
+	return regmap_async_complete(map);
+}
+
+static int regcache_rbtree_drop(struct regmap *map, unsigned int min,
+				unsigned int max)
+{
+	struct regcache_rbtree_ctx *rbtree_ctx;
+	struct regcache_rbtree_node *rbnode;
+	struct rb_node *node;
+	unsigned int base_reg, top_reg;
+	unsigned int start, end;
+
+	rbtree_ctx = map->cache;
+	for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
+		rbnode = rb_entry(node, struct regcache_rbtree_node, node);
+
+		regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
+			&top_reg);
+		if (base_reg > max)
+			break;
+		if (top_reg < min)
+			continue;
+
+		if (min > base_reg)
+			start = (min - base_reg) / map->reg_stride;
+		else
+			start = 0;
+
+		if (max < top_reg)
+			end = (max - base_reg) / map->reg_stride + 1;
+		else
+			end = rbnode->blklen;
+
+		bitmap_clear(rbnode->cache_present, start, end - start);
+	}
+
+	return 0;
+}
+
+struct regcache_ops regcache_rbtree_ops = {
+	.type = REGCACHE_RBTREE,
+	.name = "rbtree",
+	.init = regcache_rbtree_init,
+	.exit = regcache_rbtree_exit,
+	.read = regcache_rbtree_read,
+	.write = regcache_rbtree_write,
+	.sync = regcache_rbtree_sync,
+	.drop = regcache_rbtree_drop,
+};
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
new file mode 100644
index 00000000000..29b4128da0b
--- /dev/null
+++ b/drivers/base/regmap/regcache.c
@@ -0,0 +1,707 @@
+/*
+ * Register cache access API
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <trace/events/regmap.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
+
+#include "internal.h"
+
+static const struct regcache_ops *cache_types[] = {
+	&regcache_rbtree_ops,
+	&regcache_lzo_ops,
+	&regcache_flat_ops,
+};
+
+static int regcache_hw_init(struct regmap *map)
+{
+	int i, j;
+	int ret;
+	int count;
+	unsigned int val;
+	void *tmp_buf;
+
+	if (!map->num_reg_defaults_raw)
+		return -EINVAL;
+
+	if (!map->reg_defaults_raw) {
+		u32 cache_bypass = map->cache_bypass;
+		dev_warn(map->dev, "No cache defaults, reading back from HW\n");
+
+		/* Bypass the cache access till data read from HW*/
+		map->cache_bypass = 1;
+		tmp_buf = kmalloc(map->cache_size_raw, GFP_KERNEL);
+		if (!tmp_buf)
+			return -EINVAL;
+		ret = regmap_raw_read(map, 0, tmp_buf,
+				      map->num_reg_defaults_raw);
+		map->cache_bypass = cache_bypass;
+		if (ret < 0) {
+			kfree(tmp_buf);
+			return ret;
+		}
+		map->reg_defaults_raw = tmp_buf;
+		map->cache_free = 1;
+	}
+
+	/* calculate the size of reg_defaults */
+	for (count = 0, i = 0; i < map->num_reg_defaults_raw; i++) {
+		val = regcache_get_val(map, map->reg_defaults_raw, i);
+		if (regmap_volatile(map, i * map->reg_stride))
+			continue;
+		count++;
+	}
+
+	map->reg_defaults = kmalloc(count * sizeof(struct reg_default),
+				      GFP_KERNEL);
+	if (!map->reg_defaults) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	/* fill the reg_defaults */
+	map->num_reg_defaults = count;
+	for (i = 0, j = 0; i < map->num_reg_defaults_raw; i++) {
+		val = regcache_get_val(map, map->reg_defaults_raw, i);
+		if (regmap_volatile(map, i * map->reg_stride))
+			continue;
+		map->reg_defaults[j].reg = i * map->reg_stride;
+		map->reg_defaults[j].def = val;
+		j++;
+	}
+
+	return 0;
+
+err_free:
+	if (map->cache_free)
+		kfree(map->reg_defaults_raw);
+
+	return ret;
+}
+
+int regcache_init(struct regmap *map, const struct regmap_config *config)
+{
+	int ret;
+	int i;
+	void *tmp_buf;
+
+	for (i = 0; i < config->num_reg_defaults; i++)
+		if (config->reg_defaults[i].reg % map->reg_stride)
+			return -EINVAL;
+
+	if (map->cache_type == REGCACHE_NONE) {
+		map->cache_bypass = true;
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cache_types); i++)
+		if (cache_types[i]->type == map->cache_type)
+			break;
+
+	if (i == ARRAY_SIZE(cache_types)) {
+		dev_err(map->dev, "Could not match compress type: %d\n",
+			map->cache_type);
+		return -EINVAL;
+	}
+
+	map->num_reg_defaults = config->num_reg_defaults;
+	map->num_reg_defaults_raw = config->num_reg_defaults_raw;
+	map->reg_defaults_raw = config->reg_defaults_raw;
+	map->cache_word_size = DIV_ROUND_UP(config->val_bits, 8);
+	map->cache_size_raw = map->cache_word_size * config->num_reg_defaults_raw;
+
+	map->cache = NULL;
+	map->cache_ops = cache_types[i];
+
+	if (!map->cache_ops->read ||
+	    !map->cache_ops->write ||
+	    !map->cache_ops->name)
+		return -EINVAL;
+
+	/* We still need to ensure that the reg_defaults
+	 * won't vanish from under us.  We'll need to make
+	 * a copy of it.
+	 */
+	if (config->reg_defaults) {
+		if (!map->num_reg_defaults)
+			return -EINVAL;
+		tmp_buf = kmemdup(config->reg_defaults, map->num_reg_defaults *
+				  sizeof(struct reg_default), GFP_KERNEL);
+		if (!tmp_buf)
+			return -ENOMEM;
+		map->reg_defaults = tmp_buf;
+	} else if (map->num_reg_defaults_raw) {
+		/* Some devices such as PMICs don't have cache defaults,
+		 * we cope with this by reading back the HW registers and
+		 * crafting the cache defaults by hand.
+		 */
+		ret = regcache_hw_init(map);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (!map->max_register)
+		map->max_register = map->num_reg_defaults_raw;
+
+	if (map->cache_ops->init) {
+		dev_dbg(map->dev, "Initializing %s cache\n",
+			map->cache_ops->name);
+		ret = map->cache_ops->init(map);
+		if (ret)
+			goto err_free;
+	}
+	return 0;
+
+err_free:
+	kfree(map->reg_defaults);
+	if (map->cache_free)
+		kfree(map->reg_defaults_raw);
+
+	return ret;
+}
+
+void regcache_exit(struct regmap *map)
+{
+	if (map->cache_type == REGCACHE_NONE)
+		return;
+
+	BUG_ON(!map->cache_ops);
+
+	kfree(map->reg_defaults);
+	if (map->cache_free)
+		kfree(map->reg_defaults_raw);
+
+	if (map->cache_ops->exit) {
+		dev_dbg(map->dev, "Destroying %s cache\n",
+			map->cache_ops->name);
+		map->cache_ops->exit(map);
+	}
+}
+
+/**
+ * regcache_read: Fetch the value of a given register from the cache.
+ *
+ * @map: map to configure.
+ * @reg: The register index.
+ * @value: The value to be returned.
+ *
+ * Return a negative value on failure, 0 on success.
+ */
+int regcache_read(struct regmap *map,
+		  unsigned int reg, unsigned int *value)
+{
+	int ret;
+
+	if (map->cache_type == REGCACHE_NONE)
+		return -ENOSYS;
+
+	BUG_ON(!map->cache_ops);
+
+	if (!regmap_volatile(map, reg)) {
+		ret = map->cache_ops->read(map, reg, value);
+
+		if (ret == 0)
+			trace_regmap_reg_read_cache(map->dev, reg, *value);
+
+		return ret;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * regcache_write: Set the value of a given register in the cache.
+ *
+ * @map: map to configure.
+ * @reg: The register index.
+ * @value: The new register value.
+ *
+ * Return a negative value on failure, 0 on success.
+ */
+int regcache_write(struct regmap *map,
+		   unsigned int reg, unsigned int value)
+{
+	if (map->cache_type == REGCACHE_NONE)
+		return 0;
+
+	BUG_ON(!map->cache_ops);
+
+	if (!regmap_volatile(map, reg))
+		return map->cache_ops->write(map, reg, value);
+
+	return 0;
+}
+
+static int regcache_default_sync(struct regmap *map, unsigned int min,
+				 unsigned int max)
+{
+	unsigned int reg;
+
+	for (reg = min; reg <= max; reg += map->reg_stride) {
+		unsigned int val;
+		int ret;
+
+		if (regmap_volatile(map, reg) ||
+		    !regmap_writeable(map, reg))
+			continue;
+
+		ret = regcache_read(map, reg, &val);
+		if (ret)
+			return ret;
+
+		/* Is this the hardware default?  If so skip. */
+		ret = regcache_lookup_reg(map, reg);
+		if (ret >= 0 && val == map->reg_defaults[ret].def)
+			continue;
+
+		map->cache_bypass = 1;
+		ret = _regmap_write(map, reg, val);
+		map->cache_bypass = 0;
+		if (ret)
+			return ret;
+		dev_dbg(map->dev, "Synced register %#x, value %#x\n", reg, val);
+	}
+
+	return 0;
+}
+
+/**
+ * regcache_sync: Sync the register cache with the hardware.
+ *
+ * @map: map to configure.
+ *
+ * Any registers that should not be synced should be marked as
+ * volatile.  In general drivers can choose not to use the provided
+ * syncing functionality if they so require.
+ *
+ * Return a negative value on failure, 0 on success.
+ */
+int regcache_sync(struct regmap *map)
+{
+	int ret = 0;
+	unsigned int i;
+	const char *name;
+	unsigned int bypass;
+
+	BUG_ON(!map->cache_ops);
+
+	map->lock(map->lock_arg);
+	/* Remember the initial bypass state */
+	bypass = map->cache_bypass;
+	dev_dbg(map->dev, "Syncing %s cache\n",
+		map->cache_ops->name);
+	name = map->cache_ops->name;
+	trace_regcache_sync(map->dev, name, "start");
+
+	if (!map->cache_dirty)
+		goto out;
+
+	map->async = true;
+
+	/* Apply any patch first */
+	map->cache_bypass = 1;
+	for (i = 0; i < map->patch_regs; i++) {
+		ret = _regmap_write(map, map->patch[i].reg, map->patch[i].def);
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to write %x = %x: %d\n",
+				map->patch[i].reg, map->patch[i].def, ret);
+			goto out;
+		}
+	}
+	map->cache_bypass = 0;
+
+	if (map->cache_ops->sync)
+		ret = map->cache_ops->sync(map, 0, map->max_register);
+	else
+		ret = regcache_default_sync(map, 0, map->max_register);
+
+	if (ret == 0)
+		map->cache_dirty = false;
+
+out:
+	/* Restore the bypass state */
+	map->async = false;
+	map->cache_bypass = bypass;
+	map->unlock(map->lock_arg);
+
+	regmap_async_complete(map);
+
+	trace_regcache_sync(map->dev, name, "stop");
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regcache_sync);
+
+/**
+ * regcache_sync_region: Sync part  of the register cache with the hardware.
+ *
+ * @map: map to sync.
+ * @min: first register to sync
+ * @max: last register to sync
+ *
+ * Write all non-default register values in the specified region to
+ * the hardware.
+ *
+ * Return a negative value on failure, 0 on success.
+ */
+int regcache_sync_region(struct regmap *map, unsigned int min,
+			 unsigned int max)
+{
+	int ret = 0;
+	const char *name;
+	unsigned int bypass;
+
+	BUG_ON(!map->cache_ops);
+
+	map->lock(map->lock_arg);
+
+	/* Remember the initial bypass state */
+	bypass = map->cache_bypass;
+
+	name = map->cache_ops->name;
+	dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max);
+
+	trace_regcache_sync(map->dev, name, "start region");
+
+	if (!map->cache_dirty)
+		goto out;
+
+	map->async = true;
+
+	if (map->cache_ops->sync)
+		ret = map->cache_ops->sync(map, min, max);
+	else
+		ret = regcache_default_sync(map, min, max);
+
+out:
+	/* Restore the bypass state */
+	map->cache_bypass = bypass;
+	map->async = false;
+	map->unlock(map->lock_arg);
+
+	regmap_async_complete(map);
+
+	trace_regcache_sync(map->dev, name, "stop region");
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regcache_sync_region);
+
+/**
+ * regcache_drop_region: Discard part of the register cache
+ *
+ * @map: map to operate on
+ * @min: first register to discard
+ * @max: last register to discard
+ *
+ * Discard part of the register cache.
+ *
+ * Return a negative value on failure, 0 on success.
+ */
+int regcache_drop_region(struct regmap *map, unsigned int min,
+			 unsigned int max)
+{
+	int ret = 0;
+
+	if (!map->cache_ops || !map->cache_ops->drop)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	trace_regcache_drop_region(map->dev, min, max);
+
+	ret = map->cache_ops->drop(map, min, max);
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regcache_drop_region);
+
+/**
+ * regcache_cache_only: Put a register map into cache only mode
+ *
+ * @map: map to configure
+ * @cache_only: flag if changes should be written to the hardware
+ *
+ * When a register map is marked as cache only writes to the register
+ * map API will only update the register cache, they will not cause
+ * any hardware changes.  This is useful for allowing portions of
+ * drivers to act as though the device were functioning as normal when
+ * it is disabled for power saving reasons.
+ */
+void regcache_cache_only(struct regmap *map, bool enable)
+{
+	map->lock(map->lock_arg);
+	WARN_ON(map->cache_bypass && enable);
+	map->cache_only = enable;
+	trace_regmap_cache_only(map->dev, enable);
+	map->unlock(map->lock_arg);
+}
+EXPORT_SYMBOL_GPL(regcache_cache_only);
+
+/**
+ * regcache_mark_dirty: Mark the register cache as dirty
+ *
+ * @map: map to mark
+ *
+ * Mark the register cache as dirty, for example due to the device
+ * having been powered down for suspend.  If the cache is not marked
+ * as dirty then the cache sync will be suppressed.
+ */
+void regcache_mark_dirty(struct regmap *map)
+{
+	map->lock(map->lock_arg);
+	map->cache_dirty = true;
+	map->unlock(map->lock_arg);
+}
+EXPORT_SYMBOL_GPL(regcache_mark_dirty);
+
+/**
+ * regcache_cache_bypass: Put a register map into cache bypass mode
+ *
+ * @map: map to configure
+ * @cache_bypass: flag if changes should not be written to the hardware
+ *
+ * When a register map is marked with the cache bypass option, writes
+ * to the register map API will only update the hardware and not the
+ * the cache directly.  This is useful when syncing the cache back to
+ * the hardware.
+ */
+void regcache_cache_bypass(struct regmap *map, bool enable)
+{
+	map->lock(map->lock_arg);
+	WARN_ON(map->cache_only && enable);
+	map->cache_bypass = enable;
+	trace_regmap_cache_bypass(map->dev, enable);
+	map->unlock(map->lock_arg);
+}
+EXPORT_SYMBOL_GPL(regcache_cache_bypass);
+
+bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
+		      unsigned int val)
+{
+	if (regcache_get_val(map, base, idx) == val)
+		return true;
+
+	/* Use device native format if possible */
+	if (map->format.format_val) {
+		map->format.format_val(base + (map->cache_word_size * idx),
+				       val, 0);
+		return false;
+	}
+
+	switch (map->cache_word_size) {
+	case 1: {
+		u8 *cache = base;
+		cache[idx] = val;
+		break;
+	}
+	case 2: {
+		u16 *cache = base;
+		cache[idx] = val;
+		break;
+	}
+	case 4: {
+		u32 *cache = base;
+		cache[idx] = val;
+		break;
+	}
+	default:
+		BUG();
+	}
+	return false;
+}
+
+unsigned int regcache_get_val(struct regmap *map, const void *base,
+			      unsigned int idx)
+{
+	if (!base)
+		return -EINVAL;
+
+	/* Use device native format if possible */
+	if (map->format.parse_val)
+		return map->format.parse_val(regcache_get_val_addr(map, base,
+								   idx));
+
+	switch (map->cache_word_size) {
+	case 1: {
+		const u8 *cache = base;
+		return cache[idx];
+	}
+	case 2: {
+		const u16 *cache = base;
+		return cache[idx];
+	}
+	case 4: {
+		const u32 *cache = base;
+		return cache[idx];
+	}
+	default:
+		BUG();
+	}
+	/* unreachable */
+	return -1;
+}
+
+static int regcache_default_cmp(const void *a, const void *b)
+{
+	const struct reg_default *_a = a;
+	const struct reg_default *_b = b;
+
+	return _a->reg - _b->reg;
+}
+
+int regcache_lookup_reg(struct regmap *map, unsigned int reg)
+{
+	struct reg_default key;
+	struct reg_default *r;
+
+	key.reg = reg;
+	key.def = 0;
+
+	r = bsearch(&key, map->reg_defaults, map->num_reg_defaults,
+		    sizeof(struct reg_default), regcache_default_cmp);
+
+	if (r)
+		return r - map->reg_defaults;
+	else
+		return -ENOENT;
+}
+
+static bool regcache_reg_present(unsigned long *cache_present, unsigned int idx)
+{
+	if (!cache_present)
+		return true;
+
+	return test_bit(idx, cache_present);
+}
+
+static int regcache_sync_block_single(struct regmap *map, void *block,
+				      unsigned long *cache_present,
+				      unsigned int block_base,
+				      unsigned int start, unsigned int end)
+{
+	unsigned int i, regtmp, val;
+	int ret;
+
+	for (i = start; i < end; i++) {
+		regtmp = block_base + (i * map->reg_stride);
+
+		if (!regcache_reg_present(cache_present, i))
+			continue;
+
+		val = regcache_get_val(map, block, i);
+
+		/* Is this the hardware default?  If so skip. */
+		ret = regcache_lookup_reg(map, regtmp);
+		if (ret >= 0 && val == map->reg_defaults[ret].def)
+			continue;
+
+		map->cache_bypass = 1;
+
+		ret = _regmap_write(map, regtmp, val);
+
+		map->cache_bypass = 0;
+		if (ret != 0)
+			return ret;
+		dev_dbg(map->dev, "Synced register %#x, value %#x\n",
+			regtmp, val);
+	}
+
+	return 0;
+}
+
+static int regcache_sync_block_raw_flush(struct regmap *map, const void **data,
+					 unsigned int base, unsigned int cur)
+{
+	size_t val_bytes = map->format.val_bytes;
+	int ret, count;
+
+	if (*data == NULL)
+		return 0;
+
+	count = (cur - base) / map->reg_stride;
+
+	dev_dbg(map->dev, "Writing %zu bytes for %d registers from 0x%x-0x%x\n",
+		count * val_bytes, count, base, cur - map->reg_stride);
+
+	map->cache_bypass = 1;
+
+	ret = _regmap_raw_write(map, base, *data, count * val_bytes);
+
+	map->cache_bypass = 0;
+
+	*data = NULL;
+
+	return ret;
+}
+
+static int regcache_sync_block_raw(struct regmap *map, void *block,
+			    unsigned long *cache_present,
+			    unsigned int block_base, unsigned int start,
+			    unsigned int end)
+{
+	unsigned int i, val;
+	unsigned int regtmp = 0;
+	unsigned int base = 0;
+	const void *data = NULL;
+	int ret;
+
+	for (i = start; i < end; i++) {
+		regtmp = block_base + (i * map->reg_stride);
+
+		if (!regcache_reg_present(cache_present, i)) {
+			ret = regcache_sync_block_raw_flush(map, &data,
+							    base, regtmp);
+			if (ret != 0)
+				return ret;
+			continue;
+		}
+
+		val = regcache_get_val(map, block, i);
+
+		/* Is this the hardware default?  If so skip. */
+		ret = regcache_lookup_reg(map, regtmp);
+		if (ret >= 0 && val == map->reg_defaults[ret].def) {
+			ret = regcache_sync_block_raw_flush(map, &data,
+							    base, regtmp);
+			if (ret != 0)
+				return ret;
+			continue;
+		}
+
+		if (!data) {
+			data = regcache_get_val_addr(map, block, i);
+			base = regtmp;
+		}
+	}
+
+	return regcache_sync_block_raw_flush(map, &data, base, regtmp +
+			map->reg_stride);
+}
+
+int regcache_sync_block(struct regmap *map, void *block,
+			unsigned long *cache_present,
+			unsigned int block_base, unsigned int start,
+			unsigned int end)
+{
+	if (regmap_can_raw_write(map))
+		return regcache_sync_block_raw(map, block, cache_present,
+					       block_base, start, end);
+	else
+		return regcache_sync_block_single(map, block, cache_present,
+						  block_base, start, end);
+}
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
new file mode 100644
index 00000000000..45d812c0ea7
--- /dev/null
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -0,0 +1,583 @@
+/*
+ * Register map access API - debugfs
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/device.h>
+#include <linux/list.h>
+
+#include "internal.h"
+
+struct regmap_debugfs_node {
+	struct regmap *map;
+	const char *name;
+	struct list_head link;
+};
+
+static struct dentry *regmap_debugfs_root;
+static LIST_HEAD(regmap_debugfs_early_list);
+static DEFINE_MUTEX(regmap_debugfs_early_lock);
+
+/* Calculate the length of a fixed format  */
+static size_t regmap_calc_reg_len(int max_val, char *buf, size_t buf_size)
+{
+	snprintf(buf, buf_size, "%x", max_val);
+	return strlen(buf);
+}
+
+static ssize_t regmap_name_read_file(struct file *file,
+				     char __user *user_buf, size_t count,
+				     loff_t *ppos)
+{
+	struct regmap *map = file->private_data;
+	int ret;
+	char *buf;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = snprintf(buf, PAGE_SIZE, "%s\n", map->dev->driver->name);
+	if (ret < 0) {
+		kfree(buf);
+		return ret;
+	}
+
+	ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations regmap_name_fops = {
+	.open = simple_open,
+	.read = regmap_name_read_file,
+	.llseek = default_llseek,
+};
+
+static void regmap_debugfs_free_dump_cache(struct regmap *map)
+{
+	struct regmap_debugfs_off_cache *c;
+
+	while (!list_empty(&map->debugfs_off_cache)) {
+		c = list_first_entry(&map->debugfs_off_cache,
+				     struct regmap_debugfs_off_cache,
+				     list);
+		list_del(&c->list);
+		kfree(c);
+	}
+}
+
+/*
+ * Work out where the start offset maps into register numbers, bearing
+ * in mind that we suppress hidden registers.
+ */
+static unsigned int regmap_debugfs_get_dump_start(struct regmap *map,
+						  unsigned int base,
+						  loff_t from,
+						  loff_t *pos)
+{
+	struct regmap_debugfs_off_cache *c = NULL;
+	loff_t p = 0;
+	unsigned int i, ret;
+	unsigned int fpos_offset;
+	unsigned int reg_offset;
+
+	/* Suppress the cache if we're using a subrange */
+	if (base)
+		return base;
+
+	/*
+	 * If we don't have a cache build one so we don't have to do a
+	 * linear scan each time.
+	 */
+	mutex_lock(&map->cache_lock);
+	i = base;
+	if (list_empty(&map->debugfs_off_cache)) {
+		for (; i <= map->max_register; i += map->reg_stride) {
+			/* Skip unprinted registers, closing off cache entry */
+			if (!regmap_readable(map, i) ||
+			    regmap_precious(map, i)) {
+				if (c) {
+					c->max = p - 1;
+					c->max_reg = i - map->reg_stride;
+					list_add_tail(&c->list,
+						      &map->debugfs_off_cache);
+					c = NULL;
+				}
+
+				continue;
+			}
+
+			/* No cache entry?  Start a new one */
+			if (!c) {
+				c = kzalloc(sizeof(*c), GFP_KERNEL);
+				if (!c) {
+					regmap_debugfs_free_dump_cache(map);
+					mutex_unlock(&map->cache_lock);
+					return base;
+				}
+				c->min = p;
+				c->base_reg = i;
+			}
+
+			p += map->debugfs_tot_len;
+		}
+	}
+
+	/* Close the last entry off if we didn't scan beyond it */
+	if (c) {
+		c->max = p - 1;
+		c->max_reg = i - map->reg_stride;
+		list_add_tail(&c->list,
+			      &map->debugfs_off_cache);
+	}
+
+	/*
+	 * This should never happen; we return above if we fail to
+	 * allocate and we should never be in this code if there are
+	 * no registers at all.
+	 */
+	WARN_ON(list_empty(&map->debugfs_off_cache));
+	ret = base;
+
+	/* Find the relevant block:offset */
+	list_for_each_entry(c, &map->debugfs_off_cache, list) {
+		if (from >= c->min && from <= c->max) {
+			fpos_offset = from - c->min;
+			reg_offset = fpos_offset / map->debugfs_tot_len;
+			*pos = c->min + (reg_offset * map->debugfs_tot_len);
+			mutex_unlock(&map->cache_lock);
+			return c->base_reg + (reg_offset * map->reg_stride);
+		}
+
+		*pos = c->max;
+		ret = c->max_reg;
+	}
+	mutex_unlock(&map->cache_lock);
+
+	return ret;
+}
+
+static inline void regmap_calc_tot_len(struct regmap *map,
+				       void *buf, size_t count)
+{
+	/* Calculate the length of a fixed format  */
+	if (!map->debugfs_tot_len) {
+		map->debugfs_reg_len = regmap_calc_reg_len(map->max_register,
+							   buf, count);
+		map->debugfs_val_len = 2 * map->format.val_bytes;
+		map->debugfs_tot_len = map->debugfs_reg_len +
+			map->debugfs_val_len + 3;      /* : \n */
+	}
+}
+
+static ssize_t regmap_read_debugfs(struct regmap *map, unsigned int from,
+				   unsigned int to, char __user *user_buf,
+				   size_t count, loff_t *ppos)
+{
+	size_t buf_pos = 0;
+	loff_t p = *ppos;
+	ssize_t ret;
+	int i;
+	char *buf;
+	unsigned int val, start_reg;
+
+	if (*ppos < 0 || !count)
+		return -EINVAL;
+
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	regmap_calc_tot_len(map, buf, count);
+
+	/* Work out which register we're starting at */
+	start_reg = regmap_debugfs_get_dump_start(map, from, *ppos, &p);
+
+	for (i = start_reg; i <= to; i += map->reg_stride) {
+		if (!regmap_readable(map, i))
+			continue;
+
+		if (regmap_precious(map, i))
+			continue;
+
+		/* If we're in the region the user is trying to read */
+		if (p >= *ppos) {
+			/* ...but not beyond it */
+			if (buf_pos + map->debugfs_tot_len > count)
+				break;
+
+			/* Format the register */
+			snprintf(buf + buf_pos, count - buf_pos, "%.*x: ",
+				 map->debugfs_reg_len, i - from);
+			buf_pos += map->debugfs_reg_len + 2;
+
+			/* Format the value, write all X if we can't read */
+			ret = regmap_read(map, i, &val);
+			if (ret == 0)
+				snprintf(buf + buf_pos, count - buf_pos,
+					 "%.*x", map->debugfs_val_len, val);
+			else
+				memset(buf + buf_pos, 'X',
+				       map->debugfs_val_len);
+			buf_pos += 2 * map->format.val_bytes;
+
+			buf[buf_pos++] = '\n';
+		}
+		p += map->debugfs_tot_len;
+	}
+
+	ret = buf_pos;
+
+	if (copy_to_user(user_buf, buf, buf_pos)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	*ppos += buf_pos;
+
+out:
+	kfree(buf);
+	return ret;
+}
+
+static ssize_t regmap_map_read_file(struct file *file, char __user *user_buf,
+				    size_t count, loff_t *ppos)
+{
+	struct regmap *map = file->private_data;
+
+	return regmap_read_debugfs(map, 0, map->max_register, user_buf,
+				   count, ppos);
+}
+
+#undef REGMAP_ALLOW_WRITE_DEBUGFS
+#ifdef REGMAP_ALLOW_WRITE_DEBUGFS
+/*
+ * This can be dangerous especially when we have clients such as
+ * PMICs, therefore don't provide any real compile time configuration option
+ * for this feature, people who want to use this will need to modify
+ * the source code directly.
+ */
+static ssize_t regmap_map_write_file(struct file *file,
+				     const char __user *user_buf,
+				     size_t count, loff_t *ppos)
+{
+	char buf[32];
+	size_t buf_size;
+	char *start = buf;
+	unsigned long reg, value;
+	struct regmap *map = file->private_data;
+	int ret;
+
+	buf_size = min(count, (sizeof(buf)-1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+	buf[buf_size] = 0;
+
+	while (*start == ' ')
+		start++;
+	reg = simple_strtoul(start, &start, 16);
+	while (*start == ' ')
+		start++;
+	if (kstrtoul(start, 16, &value))
+		return -EINVAL;
+
+	/* Userspace has been fiddling around behind the kernel's back */
+	add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+
+	ret = regmap_write(map, reg, value);
+	if (ret < 0)
+		return ret;
+	return buf_size;
+}
+#else
+#define regmap_map_write_file NULL
+#endif
+
+static const struct file_operations regmap_map_fops = {
+	.open = simple_open,
+	.read = regmap_map_read_file,
+	.write = regmap_map_write_file,
+	.llseek = default_llseek,
+};
+
+static ssize_t regmap_range_read_file(struct file *file, char __user *user_buf,
+				      size_t count, loff_t *ppos)
+{
+	struct regmap_range_node *range = file->private_data;
+	struct regmap *map = range->map;
+
+	return regmap_read_debugfs(map, range->range_min, range->range_max,
+				   user_buf, count, ppos);
+}
+
+static const struct file_operations regmap_range_fops = {
+	.open = simple_open,
+	.read = regmap_range_read_file,
+	.llseek = default_llseek,
+};
+
+static ssize_t regmap_reg_ranges_read_file(struct file *file,
+					   char __user *user_buf, size_t count,
+					   loff_t *ppos)
+{
+	struct regmap *map = file->private_data;
+	struct regmap_debugfs_off_cache *c;
+	loff_t p = 0;
+	size_t buf_pos = 0;
+	char *buf;
+	char *entry;
+	int ret;
+
+	if (*ppos < 0 || !count)
+		return -EINVAL;
+
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	entry = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!entry) {
+		kfree(buf);
+		return -ENOMEM;
+	}
+
+	/* While we are at it, build the register dump cache
+	 * now so the read() operation on the `registers' file
+	 * can benefit from using the cache.  We do not care
+	 * about the file position information that is contained
+	 * in the cache, just about the actual register blocks */
+	regmap_calc_tot_len(map, buf, count);
+	regmap_debugfs_get_dump_start(map, 0, *ppos, &p);
+
+	/* Reset file pointer as the fixed-format of the `registers'
+	 * file is not compatible with the `range' file */
+	p = 0;
+	mutex_lock(&map->cache_lock);
+	list_for_each_entry(c, &map->debugfs_off_cache, list) {
+		snprintf(entry, PAGE_SIZE, "%x-%x",
+			 c->base_reg, c->max_reg);
+		if (p >= *ppos) {
+			if (buf_pos + 1 + strlen(entry) > count)
+				break;
+			snprintf(buf + buf_pos, count - buf_pos,
+				 "%s", entry);
+			buf_pos += strlen(entry);
+			buf[buf_pos] = '\n';
+			buf_pos++;
+		}
+		p += strlen(entry) + 1;
+	}
+	mutex_unlock(&map->cache_lock);
+
+	kfree(entry);
+	ret = buf_pos;
+
+	if (copy_to_user(user_buf, buf, buf_pos)) {
+		ret = -EFAULT;
+		goto out_buf;
+	}
+
+	*ppos += buf_pos;
+out_buf:
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations regmap_reg_ranges_fops = {
+	.open = simple_open,
+	.read = regmap_reg_ranges_read_file,
+	.llseek = default_llseek,
+};
+
+static ssize_t regmap_access_read_file(struct file *file,
+				       char __user *user_buf, size_t count,
+				       loff_t *ppos)
+{
+	int reg_len, tot_len;
+	size_t buf_pos = 0;
+	loff_t p = 0;
+	ssize_t ret;
+	int i;
+	struct regmap *map = file->private_data;
+	char *buf;
+
+	if (*ppos < 0 || !count)
+		return -EINVAL;
+
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Calculate the length of a fixed format  */
+	reg_len = regmap_calc_reg_len(map->max_register, buf, count);
+	tot_len = reg_len + 10; /* ': R W V P\n' */
+
+	for (i = 0; i <= map->max_register; i += map->reg_stride) {
+		/* Ignore registers which are neither readable nor writable */
+		if (!regmap_readable(map, i) && !regmap_writeable(map, i))
+			continue;
+
+		/* If we're in the region the user is trying to read */
+		if (p >= *ppos) {
+			/* ...but not beyond it */
+			if (buf_pos >= count - 1 - tot_len)
+				break;
+
+			/* Format the register */
+			snprintf(buf + buf_pos, count - buf_pos,
+				 "%.*x: %c %c %c %c\n",
+				 reg_len, i,
+				 regmap_readable(map, i) ? 'y' : 'n',
+				 regmap_writeable(map, i) ? 'y' : 'n',
+				 regmap_volatile(map, i) ? 'y' : 'n',
+				 regmap_precious(map, i) ? 'y' : 'n');
+
+			buf_pos += tot_len;
+		}
+		p += tot_len;
+	}
+
+	ret = buf_pos;
+
+	if (copy_to_user(user_buf, buf, buf_pos)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	*ppos += buf_pos;
+
+out:
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations regmap_access_fops = {
+	.open = simple_open,
+	.read = regmap_access_read_file,
+	.llseek = default_llseek,
+};
+
+void regmap_debugfs_init(struct regmap *map, const char *name)
+{
+	struct rb_node *next;
+	struct regmap_range_node *range_node;
+
+	/* If we don't have the debugfs root yet, postpone init */
+	if (!regmap_debugfs_root) {
+		struct regmap_debugfs_node *node;
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node)
+			return;
+		node->map = map;
+		node->name = name;
+		mutex_lock(&regmap_debugfs_early_lock);
+		list_add(&node->link, &regmap_debugfs_early_list);
+		mutex_unlock(&regmap_debugfs_early_lock);
+		return;
+	}
+
+	INIT_LIST_HEAD(&map->debugfs_off_cache);
+	mutex_init(&map->cache_lock);
+
+	if (name) {
+		map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s",
+					      dev_name(map->dev), name);
+		name = map->debugfs_name;
+	} else {
+		name = dev_name(map->dev);
+	}
+
+	map->debugfs = debugfs_create_dir(name, regmap_debugfs_root);
+	if (!map->debugfs) {
+		dev_warn(map->dev, "Failed to create debugfs directory\n");
+		return;
+	}
+
+	debugfs_create_file("name", 0400, map->debugfs,
+			    map, &regmap_name_fops);
+
+	debugfs_create_file("range", 0400, map->debugfs,
+			    map, &regmap_reg_ranges_fops);
+
+	if (map->max_register || regmap_readable(map, 0)) {
+		debugfs_create_file("registers", 0400, map->debugfs,
+				    map, &regmap_map_fops);
+		debugfs_create_file("access", 0400, map->debugfs,
+				    map, &regmap_access_fops);
+	}
+
+	if (map->cache_type) {
+		debugfs_create_bool("cache_only", 0400, map->debugfs,
+				    &map->cache_only);
+		debugfs_create_bool("cache_dirty", 0400, map->debugfs,
+				    &map->cache_dirty);
+		debugfs_create_bool("cache_bypass", 0400, map->debugfs,
+				    &map->cache_bypass);
+	}
+
+	next = rb_first(&map->range_tree);
+	while (next) {
+		range_node = rb_entry(next, struct regmap_range_node, node);
+
+		if (range_node->name)
+			debugfs_create_file(range_node->name, 0400,
+					    map->debugfs, range_node,
+					    &regmap_range_fops);
+
+		next = rb_next(&range_node->node);
+	}
+}
+
+void regmap_debugfs_exit(struct regmap *map)
+{
+	if (map->debugfs) {
+		debugfs_remove_recursive(map->debugfs);
+		mutex_lock(&map->cache_lock);
+		regmap_debugfs_free_dump_cache(map);
+		mutex_unlock(&map->cache_lock);
+		kfree(map->debugfs_name);
+	} else {
+		struct regmap_debugfs_node *node, *tmp;
+
+		mutex_lock(&regmap_debugfs_early_lock);
+		list_for_each_entry_safe(node, tmp, &regmap_debugfs_early_list,
+					 link) {
+			if (node->map == map) {
+				list_del(&node->link);
+				kfree(node);
+			}
+		}
+		mutex_unlock(&regmap_debugfs_early_lock);
+	}
+}
+
+void regmap_debugfs_initcall(void)
+{
+	struct regmap_debugfs_node *node, *tmp;
+
+	regmap_debugfs_root = debugfs_create_dir("regmap", NULL);
+	if (!regmap_debugfs_root) {
+		pr_warn("regmap: Failed to create debugfs root\n");
+		return;
+	}
+
+	mutex_lock(&regmap_debugfs_early_lock);
+	list_for_each_entry_safe(node, tmp, &regmap_debugfs_early_list, link) {
+		regmap_debugfs_init(node->map, node->name);
+		list_del(&node->link);
+		kfree(node);
+	}
+	mutex_unlock(&regmap_debugfs_early_lock);
+}
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
new file mode 100644
index 00000000000..ca193d1ef47
--- /dev/null
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -0,0 +1,233 @@
+/*
+ * Register map access API - I2C support
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/regmap.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+
+static int regmap_smbus_byte_reg_read(void *context, unsigned int reg,
+				      unsigned int *val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	int ret;
+
+	if (reg > 0xff)
+		return -EINVAL;
+
+	ret = i2c_smbus_read_byte_data(i2c, reg);
+	if (ret < 0)
+		return ret;
+
+	*val = ret;
+
+	return 0;
+}
+
+static int regmap_smbus_byte_reg_write(void *context, unsigned int reg,
+				       unsigned int val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	if (val > 0xff || reg > 0xff)
+		return -EINVAL;
+
+	return i2c_smbus_write_byte_data(i2c, reg, val);
+}
+
+static struct regmap_bus regmap_smbus_byte = {
+	.reg_write = regmap_smbus_byte_reg_write,
+	.reg_read = regmap_smbus_byte_reg_read,
+};
+
+static int regmap_smbus_word_reg_read(void *context, unsigned int reg,
+				      unsigned int *val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	int ret;
+
+	if (reg > 0xff)
+		return -EINVAL;
+
+	ret = i2c_smbus_read_word_data(i2c, reg);
+	if (ret < 0)
+		return ret;
+
+	*val = ret;
+
+	return 0;
+}
+
+static int regmap_smbus_word_reg_write(void *context, unsigned int reg,
+				       unsigned int val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	if (val > 0xffff || reg > 0xff)
+		return -EINVAL;
+
+	return i2c_smbus_write_word_data(i2c, reg, val);
+}
+
+static struct regmap_bus regmap_smbus_word = {
+	.reg_write = regmap_smbus_word_reg_write,
+	.reg_read = regmap_smbus_word_reg_read,
+};
+
+static int regmap_i2c_write(void *context, const void *data, size_t count)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	int ret;
+
+	ret = i2c_master_send(i2c, data, count);
+	if (ret == count)
+		return 0;
+	else if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
+static int regmap_i2c_gather_write(void *context,
+				   const void *reg, size_t reg_size,
+				   const void *val, size_t val_size)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	struct i2c_msg xfer[2];
+	int ret;
+
+	/* If the I2C controller can't do a gather tell the core, it
+	 * will substitute in a linear write for us.
+	 */
+	if (!i2c_check_functionality(i2c->adapter, I2C_FUNC_NOSTART))
+		return -ENOTSUPP;
+
+	xfer[0].addr = i2c->addr;
+	xfer[0].flags = 0;
+	xfer[0].len = reg_size;
+	xfer[0].buf = (void *)reg;
+
+	xfer[1].addr = i2c->addr;
+	xfer[1].flags = I2C_M_NOSTART;
+	xfer[1].len = val_size;
+	xfer[1].buf = (void *)val;
+
+	ret = i2c_transfer(i2c->adapter, xfer, 2);
+	if (ret == 2)
+		return 0;
+	if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
+static int regmap_i2c_read(void *context,
+			   const void *reg, size_t reg_size,
+			   void *val, size_t val_size)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	struct i2c_msg xfer[2];
+	int ret;
+
+	xfer[0].addr = i2c->addr;
+	xfer[0].flags = 0;
+	xfer[0].len = reg_size;
+	xfer[0].buf = (void *)reg;
+
+	xfer[1].addr = i2c->addr;
+	xfer[1].flags = I2C_M_RD;
+	xfer[1].len = val_size;
+	xfer[1].buf = val;
+
+	ret = i2c_transfer(i2c->adapter, xfer, 2);
+	if (ret == 2)
+		return 0;
+	else if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
+static struct regmap_bus regmap_i2c = {
+	.write = regmap_i2c_write,
+	.gather_write = regmap_i2c_gather_write,
+	.read = regmap_i2c_read,
+};
+
+static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
+					const struct regmap_config *config)
+{
+	if (i2c_check_functionality(i2c->adapter, I2C_FUNC_I2C))
+		return &regmap_i2c;
+	else if (config->val_bits == 16 && config->reg_bits == 8 &&
+		 i2c_check_functionality(i2c->adapter,
+					 I2C_FUNC_SMBUS_WORD_DATA))
+		return &regmap_smbus_word;
+	else if (config->val_bits == 8 && config->reg_bits == 8 &&
+		 i2c_check_functionality(i2c->adapter,
+					 I2C_FUNC_SMBUS_BYTE_DATA))
+		return &regmap_smbus_byte;
+
+	return ERR_PTR(-ENOTSUPP);
+}
+
+/**
+ * regmap_init_i2c(): Initialise register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+struct regmap *regmap_init_i2c(struct i2c_client *i2c,
+			       const struct regmap_config *config)
+{
+	const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return regmap_init(&i2c->dev, bus, &i2c->dev, config);
+}
+EXPORT_SYMBOL_GPL(regmap_init_i2c);
+
+/**
+ * devm_regmap_init_i2c(): Initialise managed register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c,
+				    const struct regmap_config *config)
+{
+	const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return devm_regmap_init(&i2c->dev, bus, &i2c->dev, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_i2c);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
new file mode 100644
index 00000000000..6299a50a596
--- /dev/null
+++ b/drivers/base/regmap/regmap-irq.c
@@ -0,0 +1,598 @@
+/*
+ * regmap based irq_chip
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include "internal.h"
+
+struct regmap_irq_chip_data {
+	struct mutex lock;
+	struct irq_chip irq_chip;
+
+	struct regmap *map;
+	const struct regmap_irq_chip *chip;
+
+	int irq_base;
+	struct irq_domain *domain;
+
+	int irq;
+	int wake_count;
+
+	void *status_reg_buf;
+	unsigned int *status_buf;
+	unsigned int *mask_buf;
+	unsigned int *mask_buf_def;
+	unsigned int *wake_buf;
+
+	unsigned int irq_reg_stride;
+};
+
+static inline const
+struct regmap_irq *irq_to_regmap_irq(struct regmap_irq_chip_data *data,
+				     int irq)
+{
+	return &data->chip->irqs[irq];
+}
+
+static void regmap_irq_lock(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&d->lock);
+}
+
+static void regmap_irq_sync_unlock(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	struct regmap *map = d->map;
+	int i, ret;
+	u32 reg;
+
+	if (d->chip->runtime_pm) {
+		ret = pm_runtime_get_sync(map->dev);
+		if (ret < 0)
+			dev_err(map->dev, "IRQ sync failed to resume: %d\n",
+				ret);
+	}
+
+	/*
+	 * If there's been a change in the mask write it back to the
+	 * hardware.  We rely on the use of the regmap core cache to
+	 * suppress pointless writes.
+	 */
+	for (i = 0; i < d->chip->num_regs; i++) {
+		reg = d->chip->mask_base +
+			(i * map->reg_stride * d->irq_reg_stride);
+		if (d->chip->mask_invert)
+			ret = regmap_update_bits(d->map, reg,
+					 d->mask_buf_def[i], ~d->mask_buf[i]);
+		else
+			ret = regmap_update_bits(d->map, reg,
+					 d->mask_buf_def[i], d->mask_buf[i]);
+		if (ret != 0)
+			dev_err(d->map->dev, "Failed to sync masks in %x\n",
+				reg);
+
+		reg = d->chip->wake_base +
+			(i * map->reg_stride * d->irq_reg_stride);
+		if (d->wake_buf) {
+			if (d->chip->wake_invert)
+				ret = regmap_update_bits(d->map, reg,
+							 d->mask_buf_def[i],
+							 ~d->wake_buf[i]);
+			else
+				ret = regmap_update_bits(d->map, reg,
+							 d->mask_buf_def[i],
+							 d->wake_buf[i]);
+			if (ret != 0)
+				dev_err(d->map->dev,
+					"Failed to sync wakes in %x: %d\n",
+					reg, ret);
+		}
+
+		if (!d->chip->init_ack_masked)
+			continue;
+		/*
+		 * Ack all the masked interrupts uncondictionly,
+		 * OR if there is masked interrupt which hasn't been Acked,
+		 * it'll be ignored in irq handler, then may introduce irq storm
+		 */
+		if (d->mask_buf[i] && (d->chip->ack_base || d->chip->use_ack)) {
+			reg = d->chip->ack_base +
+				(i * map->reg_stride * d->irq_reg_stride);
+			ret = regmap_write(map, reg, d->mask_buf[i]);
+			if (ret != 0)
+				dev_err(d->map->dev, "Failed to ack 0x%x: %d\n",
+					reg, ret);
+		}
+	}
+
+	if (d->chip->runtime_pm)
+		pm_runtime_put(map->dev);
+
+	/* If we've changed our wakeup count propagate it to the parent */
+	if (d->wake_count < 0)
+		for (i = d->wake_count; i < 0; i++)
+			irq_set_irq_wake(d->irq, 0);
+	else if (d->wake_count > 0)
+		for (i = 0; i < d->wake_count; i++)
+			irq_set_irq_wake(d->irq, 1);
+
+	d->wake_count = 0;
+
+	mutex_unlock(&d->lock);
+}
+
+static void regmap_irq_enable(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	struct regmap *map = d->map;
+	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
+
+	d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~irq_data->mask;
+}
+
+static void regmap_irq_disable(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	struct regmap *map = d->map;
+	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
+
+	d->mask_buf[irq_data->reg_offset / map->reg_stride] |= irq_data->mask;
+}
+
+static int regmap_irq_set_wake(struct irq_data *data, unsigned int on)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	struct regmap *map = d->map;
+	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
+
+	if (on) {
+		if (d->wake_buf)
+			d->wake_buf[irq_data->reg_offset / map->reg_stride]
+				&= ~irq_data->mask;
+		d->wake_count++;
+	} else {
+		if (d->wake_buf)
+			d->wake_buf[irq_data->reg_offset / map->reg_stride]
+				|= irq_data->mask;
+		d->wake_count--;
+	}
+
+	return 0;
+}
+
+static const struct irq_chip regmap_irq_chip = {
+	.irq_bus_lock		= regmap_irq_lock,
+	.irq_bus_sync_unlock	= regmap_irq_sync_unlock,
+	.irq_disable		= regmap_irq_disable,
+	.irq_enable		= regmap_irq_enable,
+	.irq_set_wake		= regmap_irq_set_wake,
+};
+
+static irqreturn_t regmap_irq_thread(int irq, void *d)
+{
+	struct regmap_irq_chip_data *data = d;
+	const struct regmap_irq_chip *chip = data->chip;
+	struct regmap *map = data->map;
+	int ret, i;
+	bool handled = false;
+	u32 reg;
+
+	if (chip->runtime_pm) {
+		ret = pm_runtime_get_sync(map->dev);
+		if (ret < 0) {
+			dev_err(map->dev, "IRQ thread failed to resume: %d\n",
+				ret);
+			pm_runtime_put(map->dev);
+			return IRQ_NONE;
+		}
+	}
+
+	/*
+	 * Read in the statuses, using a single bulk read if possible
+	 * in order to reduce the I/O overheads.
+	 */
+	if (!map->use_single_rw && map->reg_stride == 1 &&
+	    data->irq_reg_stride == 1) {
+		u8 *buf8 = data->status_reg_buf;
+		u16 *buf16 = data->status_reg_buf;
+		u32 *buf32 = data->status_reg_buf;
+
+		BUG_ON(!data->status_reg_buf);
+
+		ret = regmap_bulk_read(map, chip->status_base,
+				       data->status_reg_buf,
+				       chip->num_regs);
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to read IRQ status: %d\n",
+				ret);
+			return IRQ_NONE;
+		}
+
+		for (i = 0; i < data->chip->num_regs; i++) {
+			switch (map->format.val_bytes) {
+			case 1:
+				data->status_buf[i] = buf8[i];
+				break;
+			case 2:
+				data->status_buf[i] = buf16[i];
+				break;
+			case 4:
+				data->status_buf[i] = buf32[i];
+				break;
+			default:
+				BUG();
+				return IRQ_NONE;
+			}
+		}
+
+	} else {
+		for (i = 0; i < data->chip->num_regs; i++) {
+			ret = regmap_read(map, chip->status_base +
+					  (i * map->reg_stride
+					   * data->irq_reg_stride),
+					  &data->status_buf[i]);
+
+			if (ret != 0) {
+				dev_err(map->dev,
+					"Failed to read IRQ status: %d\n",
+					ret);
+				if (chip->runtime_pm)
+					pm_runtime_put(map->dev);
+				return IRQ_NONE;
+			}
+		}
+	}
+
+	/*
+	 * Ignore masked IRQs and ack if we need to; we ack early so
+	 * there is no race between handling and acknowleding the
+	 * interrupt.  We assume that typically few of the interrupts
+	 * will fire simultaneously so don't worry about overhead from
+	 * doing a write per register.
+	 */
+	for (i = 0; i < data->chip->num_regs; i++) {
+		data->status_buf[i] &= ~data->mask_buf[i];
+
+		if (data->status_buf[i] && (chip->ack_base || chip->use_ack)) {
+			reg = chip->ack_base +
+				(i * map->reg_stride * data->irq_reg_stride);
+			ret = regmap_write(map, reg, data->status_buf[i]);
+			if (ret != 0)
+				dev_err(map->dev, "Failed to ack 0x%x: %d\n",
+					reg, ret);
+		}
+	}
+
+	for (i = 0; i < chip->num_irqs; i++) {
+		if (data->status_buf[chip->irqs[i].reg_offset /
+				     map->reg_stride] & chip->irqs[i].mask) {
+			handle_nested_irq(irq_find_mapping(data->domain, i));
+			handled = true;
+		}
+	}
+
+	if (chip->runtime_pm)
+		pm_runtime_put(map->dev);
+
+	if (handled)
+		return IRQ_HANDLED;
+	else
+		return IRQ_NONE;
+}
+
+static int regmap_irq_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	struct regmap_irq_chip_data *data = h->host_data;
+
+	irq_set_chip_data(virq, data);
+	irq_set_chip(virq, &data->irq_chip);
+	irq_set_nested_thread(virq, 1);
+
+	/* ARM needs us to explicitly flag the IRQ as valid
+	 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+	set_irq_flags(virq, IRQF_VALID);
+#else
+	irq_set_noprobe(virq);
+#endif
+
+	return 0;
+}
+
+static struct irq_domain_ops regmap_domain_ops = {
+	.map	= regmap_irq_map,
+	.xlate	= irq_domain_xlate_twocell,
+};
+
+/**
+ * regmap_add_irq_chip(): Use standard regmap IRQ controller handling
+ *
+ * map:       The regmap for the device.
+ * irq:       The IRQ the device uses to signal interrupts
+ * irq_flags: The IRQF_ flags to use for the primary interrupt.
+ * chip:      Configuration for the interrupt controller.
+ * data:      Runtime data structure for the controller, allocated on success
+ *
+ * Returns 0 on success or an errno on failure.
+ *
+ * In order for this to be efficient the chip really should use a
+ * register cache.  The chip driver is responsible for restoring the
+ * register values used by the IRQ controller over suspend and resume.
+ */
+int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
+			int irq_base, const struct regmap_irq_chip *chip,
+			struct regmap_irq_chip_data **data)
+{
+	struct regmap_irq_chip_data *d;
+	int i;
+	int ret = -ENOMEM;
+	u32 reg;
+
+	if (chip->num_regs <= 0)
+		return -EINVAL;
+
+	for (i = 0; i < chip->num_irqs; i++) {
+		if (chip->irqs[i].reg_offset % map->reg_stride)
+			return -EINVAL;
+		if (chip->irqs[i].reg_offset / map->reg_stride >=
+		    chip->num_regs)
+			return -EINVAL;
+	}
+
+	if (irq_base) {
+		irq_base = irq_alloc_descs(irq_base, 0, chip->num_irqs, 0);
+		if (irq_base < 0) {
+			dev_warn(map->dev, "Failed to allocate IRQs: %d\n",
+				 irq_base);
+			return irq_base;
+		}
+	}
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->status_buf = kzalloc(sizeof(unsigned int) * chip->num_regs,
+				GFP_KERNEL);
+	if (!d->status_buf)
+		goto err_alloc;
+
+	d->mask_buf = kzalloc(sizeof(unsigned int) * chip->num_regs,
+			      GFP_KERNEL);
+	if (!d->mask_buf)
+		goto err_alloc;
+
+	d->mask_buf_def = kzalloc(sizeof(unsigned int) * chip->num_regs,
+				  GFP_KERNEL);
+	if (!d->mask_buf_def)
+		goto err_alloc;
+
+	if (chip->wake_base) {
+		d->wake_buf = kzalloc(sizeof(unsigned int) * chip->num_regs,
+				      GFP_KERNEL);
+		if (!d->wake_buf)
+			goto err_alloc;
+	}
+
+	d->irq_chip = regmap_irq_chip;
+	d->irq_chip.name = chip->name;
+	d->irq = irq;
+	d->map = map;
+	d->chip = chip;
+	d->irq_base = irq_base;
+
+	if (chip->irq_reg_stride)
+		d->irq_reg_stride = chip->irq_reg_stride;
+	else
+		d->irq_reg_stride = 1;
+
+	if (!map->use_single_rw && map->reg_stride == 1 &&
+	    d->irq_reg_stride == 1) {
+		d->status_reg_buf = kmalloc(map->format.val_bytes *
+					    chip->num_regs, GFP_KERNEL);
+		if (!d->status_reg_buf)
+			goto err_alloc;
+	}
+
+	mutex_init(&d->lock);
+
+	for (i = 0; i < chip->num_irqs; i++)
+		d->mask_buf_def[chip->irqs[i].reg_offset / map->reg_stride]
+			|= chip->irqs[i].mask;
+
+	/* Mask all the interrupts by default */
+	for (i = 0; i < chip->num_regs; i++) {
+		d->mask_buf[i] = d->mask_buf_def[i];
+		reg = chip->mask_base +
+			(i * map->reg_stride * d->irq_reg_stride);
+		if (chip->mask_invert)
+			ret = regmap_update_bits(map, reg,
+					 d->mask_buf[i], ~d->mask_buf[i]);
+		else
+			ret = regmap_update_bits(map, reg,
+					 d->mask_buf[i], d->mask_buf[i]);
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
+				reg, ret);
+			goto err_alloc;
+		}
+
+		if (!chip->init_ack_masked)
+			continue;
+
+		/* Ack masked but set interrupts */
+		reg = chip->status_base +
+			(i * map->reg_stride * d->irq_reg_stride);
+		ret = regmap_read(map, reg, &d->status_buf[i]);
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to read IRQ status: %d\n",
+				ret);
+			goto err_alloc;
+		}
+
+		if (d->status_buf[i] && (chip->ack_base || chip->use_ack)) {
+			reg = chip->ack_base +
+				(i * map->reg_stride * d->irq_reg_stride);
+			ret = regmap_write(map, reg,
+					d->status_buf[i] & d->mask_buf[i]);
+			if (ret != 0) {
+				dev_err(map->dev, "Failed to ack 0x%x: %d\n",
+					reg, ret);
+				goto err_alloc;
+			}
+		}
+	}
+
+	/* Wake is disabled by default */
+	if (d->wake_buf) {
+		for (i = 0; i < chip->num_regs; i++) {
+			d->wake_buf[i] = d->mask_buf_def[i];
+			reg = chip->wake_base +
+				(i * map->reg_stride * d->irq_reg_stride);
+
+			if (chip->wake_invert)
+				ret = regmap_update_bits(map, reg,
+							 d->mask_buf_def[i],
+							 0);
+			else
+				ret = regmap_update_bits(map, reg,
+							 d->mask_buf_def[i],
+							 d->wake_buf[i]);
+			if (ret != 0) {
+				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
+					reg, ret);
+				goto err_alloc;
+			}
+		}
+	}
+
+	if (irq_base)
+		d->domain = irq_domain_add_legacy(map->dev->of_node,
+						  chip->num_irqs, irq_base, 0,
+						  &regmap_domain_ops, d);
+	else
+		d->domain = irq_domain_add_linear(map->dev->of_node,
+						  chip->num_irqs,
+						  &regmap_domain_ops, d);
+	if (!d->domain) {
+		dev_err(map->dev, "Failed to create IRQ domain\n");
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags,
+				   chip->name, d);
+	if (ret != 0) {
+		dev_err(map->dev, "Failed to request IRQ %d for %s: %d\n",
+			irq, chip->name, ret);
+		goto err_domain;
+	}
+
+	*data = d;
+
+	return 0;
+
+err_domain:
+	/* Should really dispose of the domain but... */
+err_alloc:
+	kfree(d->wake_buf);
+	kfree(d->mask_buf_def);
+	kfree(d->mask_buf);
+	kfree(d->status_buf);
+	kfree(d->status_reg_buf);
+	kfree(d);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_add_irq_chip);
+
+/**
+ * regmap_del_irq_chip(): Stop interrupt handling for a regmap IRQ chip
+ *
+ * @irq: Primary IRQ for the device
+ * @d:   regmap_irq_chip_data allocated by regmap_add_irq_chip()
+ */
+void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
+{
+	if (!d)
+		return;
+
+	free_irq(irq, d);
+	irq_domain_remove(d->domain);
+	kfree(d->wake_buf);
+	kfree(d->mask_buf_def);
+	kfree(d->mask_buf);
+	kfree(d->status_reg_buf);
+	kfree(d->status_buf);
+	kfree(d);
+}
+EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
+
+/**
+ * regmap_irq_chip_get_base(): Retrieve interrupt base for a regmap IRQ chip
+ *
+ * Useful for drivers to request their own IRQs.
+ *
+ * @data: regmap_irq controller to operate on.
+ */
+int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data)
+{
+	WARN_ON(!data->irq_base);
+	return data->irq_base;
+}
+EXPORT_SYMBOL_GPL(regmap_irq_chip_get_base);
+
+/**
+ * regmap_irq_get_virq(): Map an interrupt on a chip to a virtual IRQ
+ *
+ * Useful for drivers to request their own IRQs.
+ *
+ * @data: regmap_irq controller to operate on.
+ * @irq: index of the interrupt requested in the chip IRQs
+ */
+int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq)
+{
+	/* Handle holes in the IRQ list */
+	if (!data->chip->irqs[irq].mask)
+		return -EINVAL;
+
+	return irq_create_mapping(data->domain, irq);
+}
+EXPORT_SYMBOL_GPL(regmap_irq_get_virq);
+
+/**
+ * regmap_irq_get_domain(): Retrieve the irq_domain for the chip
+ *
+ * Useful for drivers to request their own IRQs and for integration
+ * with subsystems.  For ease of integration NULL is accepted as a
+ * domain, allowing devices to just call this even if no domain is
+ * allocated.
+ *
+ * @data: regmap_irq controller to operate on.
+ */
+struct irq_domain *regmap_irq_get_domain(struct regmap_irq_chip_data *data)
+{
+	if (data)
+		return data->domain;
+	else
+		return NULL;
+}
+EXPORT_SYMBOL_GPL(regmap_irq_get_domain);
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
new file mode 100644
index 00000000000..04a329a377e
--- /dev/null
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -0,0 +1,350 @@
+/*
+ * Register map access API - MMIO support
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+struct regmap_mmio_context {
+	void __iomem *regs;
+	unsigned reg_bytes;
+	unsigned val_bytes;
+	unsigned pad_bytes;
+	struct clk *clk;
+};
+
+static inline void regmap_mmio_regsize_check(size_t reg_size)
+{
+	switch (reg_size) {
+	case 1:
+	case 2:
+	case 4:
+#ifdef CONFIG_64BIT
+	case 8:
+#endif
+		break;
+	default:
+		BUG();
+	}
+}
+
+static int regmap_mmio_regbits_check(size_t reg_bits)
+{
+	switch (reg_bits) {
+	case 8:
+	case 16:
+	case 32:
+#ifdef CONFIG_64BIT
+	case 64:
+#endif
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static inline void regmap_mmio_count_check(size_t count, u32 offset)
+{
+	BUG_ON(count <= offset);
+}
+
+static inline unsigned int
+regmap_mmio_get_offset(const void *reg, size_t reg_size)
+{
+	switch (reg_size) {
+	case 1:
+		return *(u8 *)reg;
+	case 2:
+		return *(u16 *)reg;
+	case 4:
+		return *(u32 *)reg;
+#ifdef CONFIG_64BIT
+	case 8:
+		return *(u64 *)reg;
+#endif
+	default:
+		BUG();
+	}
+}
+
+static int regmap_mmio_gather_write(void *context,
+				    const void *reg, size_t reg_size,
+				    const void *val, size_t val_size)
+{
+	struct regmap_mmio_context *ctx = context;
+	unsigned int offset;
+	int ret;
+
+	regmap_mmio_regsize_check(reg_size);
+
+	if (!IS_ERR(ctx->clk)) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	offset = regmap_mmio_get_offset(reg, reg_size);
+
+	while (val_size) {
+		switch (ctx->val_bytes) {
+		case 1:
+			writeb(*(u8 *)val, ctx->regs + offset);
+			break;
+		case 2:
+			writew(*(u16 *)val, ctx->regs + offset);
+			break;
+		case 4:
+			writel(*(u32 *)val, ctx->regs + offset);
+			break;
+#ifdef CONFIG_64BIT
+		case 8:
+			writeq(*(u64 *)val, ctx->regs + offset);
+			break;
+#endif
+		default:
+			/* Should be caught by regmap_mmio_check_config */
+			BUG();
+		}
+		val_size -= ctx->val_bytes;
+		val += ctx->val_bytes;
+		offset += ctx->val_bytes;
+	}
+
+	if (!IS_ERR(ctx->clk))
+		clk_disable(ctx->clk);
+
+	return 0;
+}
+
+static int regmap_mmio_write(void *context, const void *data, size_t count)
+{
+	struct regmap_mmio_context *ctx = context;
+	unsigned int offset = ctx->reg_bytes + ctx->pad_bytes;
+
+	regmap_mmio_count_check(count, offset);
+
+	return regmap_mmio_gather_write(context, data, ctx->reg_bytes,
+					data + offset, count - offset);
+}
+
+static int regmap_mmio_read(void *context,
+			    const void *reg, size_t reg_size,
+			    void *val, size_t val_size)
+{
+	struct regmap_mmio_context *ctx = context;
+	unsigned int offset;
+	int ret;
+
+	regmap_mmio_regsize_check(reg_size);
+
+	if (!IS_ERR(ctx->clk)) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	offset = regmap_mmio_get_offset(reg, reg_size);
+
+	while (val_size) {
+		switch (ctx->val_bytes) {
+		case 1:
+			*(u8 *)val = readb(ctx->regs + offset);
+			break;
+		case 2:
+			*(u16 *)val = readw(ctx->regs + offset);
+			break;
+		case 4:
+			*(u32 *)val = readl(ctx->regs + offset);
+			break;
+#ifdef CONFIG_64BIT
+		case 8:
+			*(u64 *)val = readq(ctx->regs + offset);
+			break;
+#endif
+		default:
+			/* Should be caught by regmap_mmio_check_config */
+			BUG();
+		}
+		val_size -= ctx->val_bytes;
+		val += ctx->val_bytes;
+		offset += ctx->val_bytes;
+	}
+
+	if (!IS_ERR(ctx->clk))
+		clk_disable(ctx->clk);
+
+	return 0;
+}
+
+static void regmap_mmio_free_context(void *context)
+{
+	struct regmap_mmio_context *ctx = context;
+
+	if (!IS_ERR(ctx->clk)) {
+		clk_unprepare(ctx->clk);
+		clk_put(ctx->clk);
+	}
+	kfree(context);
+}
+
+static struct regmap_bus regmap_mmio = {
+	.fast_io = true,
+	.write = regmap_mmio_write,
+	.gather_write = regmap_mmio_gather_write,
+	.read = regmap_mmio_read,
+	.free_context = regmap_mmio_free_context,
+	.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
+	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+};
+
+static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
+					const char *clk_id,
+					void __iomem *regs,
+					const struct regmap_config *config)
+{
+	struct regmap_mmio_context *ctx;
+	int min_stride;
+	int ret;
+
+	ret = regmap_mmio_regbits_check(config->reg_bits);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (config->pad_bits)
+		return ERR_PTR(-EINVAL);
+
+	switch (config->val_bits) {
+	case 8:
+		/* The core treats 0 as 1 */
+		min_stride = 0;
+		break;
+	case 16:
+		min_stride = 2;
+		break;
+	case 32:
+		min_stride = 4;
+		break;
+#ifdef CONFIG_64BIT
+	case 64:
+		min_stride = 8;
+		break;
+#endif
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (config->reg_stride < min_stride)
+		return ERR_PTR(-EINVAL);
+
+	switch (config->reg_format_endian) {
+	case REGMAP_ENDIAN_DEFAULT:
+	case REGMAP_ENDIAN_NATIVE:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->regs = regs;
+	ctx->val_bytes = config->val_bits / 8;
+	ctx->reg_bytes = config->reg_bits / 8;
+	ctx->pad_bytes = config->pad_bits / 8;
+	ctx->clk = ERR_PTR(-ENODEV);
+
+	if (clk_id == NULL)
+		return ctx;
+
+	ctx->clk = clk_get(dev, clk_id);
+	if (IS_ERR(ctx->clk)) {
+		ret = PTR_ERR(ctx->clk);
+		goto err_free;
+	}
+
+	ret = clk_prepare(ctx->clk);
+	if (ret < 0) {
+		clk_put(ctx->clk);
+		goto err_free;
+	}
+
+	return ctx;
+
+err_free:
+	kfree(ctx);
+
+	return ERR_PTR(ret);
+}
+
+/**
+ * regmap_init_mmio_clk(): Initialise register map with register clock
+ *
+ * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+				    void __iomem *regs,
+				    const struct regmap_config *config)
+{
+	struct regmap_mmio_context *ctx;
+
+	ctx = regmap_mmio_gen_context(dev, clk_id, regs, config);
+	if (IS_ERR(ctx))
+		return ERR_CAST(ctx);
+
+	return regmap_init(dev, &regmap_mmio, ctx, config);
+}
+EXPORT_SYMBOL_GPL(regmap_init_mmio_clk);
+
+/**
+ * devm_regmap_init_mmio_clk(): Initialise managed register map with clock
+ *
+ * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+					 void __iomem *regs,
+					 const struct regmap_config *config)
+{
+	struct regmap_mmio_context *ctx;
+
+	ctx = regmap_mmio_gen_context(dev, clk_id, regs, config);
+	if (IS_ERR(ctx))
+		return ERR_CAST(ctx);
+
+	return devm_regmap_init(dev, &regmap_mmio, ctx, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_mmio_clk);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c
new file mode 100644
index 00000000000..0eb3097c0d7
--- /dev/null
+++ b/drivers/base/regmap/regmap-spi.c
@@ -0,0 +1,147 @@
+/*
+ * Register map access API - SPI support
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+struct regmap_async_spi {
+	struct regmap_async core;
+	struct spi_message m;
+	struct spi_transfer t[2];
+};
+
+static void regmap_spi_complete(void *data)
+{
+	struct regmap_async_spi *async = data;
+
+	regmap_async_complete_cb(&async->core, async->m.status);
+}
+
+static int regmap_spi_write(void *context, const void *data, size_t count)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+
+	return spi_write(spi, data, count);
+}
+
+static int regmap_spi_gather_write(void *context,
+				   const void *reg, size_t reg_len,
+				   const void *val, size_t val_len)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+	struct spi_message m;
+	struct spi_transfer t[2] = { { .tx_buf = reg, .len = reg_len, },
+				     { .tx_buf = val, .len = val_len, }, };
+
+	spi_message_init(&m);
+	spi_message_add_tail(&t[0], &m);
+	spi_message_add_tail(&t[1], &m);
+
+	return spi_sync(spi, &m);
+}
+
+static int regmap_spi_async_write(void *context,
+				  const void *reg, size_t reg_len,
+				  const void *val, size_t val_len,
+				  struct regmap_async *a)
+{
+	struct regmap_async_spi *async = container_of(a,
+						      struct regmap_async_spi,
+						      core);
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+
+	async->t[0].tx_buf = reg;
+	async->t[0].len = reg_len;
+	async->t[1].tx_buf = val;
+	async->t[1].len = val_len;
+
+	spi_message_init(&async->m);
+	spi_message_add_tail(&async->t[0], &async->m);
+	if (val)
+		spi_message_add_tail(&async->t[1], &async->m);
+
+	async->m.complete = regmap_spi_complete;
+	async->m.context = async;
+
+	return spi_async(spi, &async->m);
+}
+
+static struct regmap_async *regmap_spi_async_alloc(void)
+{
+	struct regmap_async_spi *async_spi;
+
+	async_spi = kzalloc(sizeof(*async_spi), GFP_KERNEL);
+	if (!async_spi)
+		return NULL;
+
+	return &async_spi->core;
+}
+
+static int regmap_spi_read(void *context,
+			   const void *reg, size_t reg_size,
+			   void *val, size_t val_size)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+
+	return spi_write_then_read(spi, reg, reg_size, val, val_size);
+}
+
+static struct regmap_bus regmap_spi = {
+	.write = regmap_spi_write,
+	.gather_write = regmap_spi_gather_write,
+	.async_write = regmap_spi_async_write,
+	.async_alloc = regmap_spi_async_alloc,
+	.read = regmap_spi_read,
+	.read_flag_mask = 0x80,
+};
+
+/**
+ * regmap_init_spi(): Initialise register map
+ *
+ * @spi: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+struct regmap *regmap_init_spi(struct spi_device *spi,
+			       const struct regmap_config *config)
+{
+	return regmap_init(&spi->dev, &regmap_spi, &spi->dev, config);
+}
+EXPORT_SYMBOL_GPL(regmap_init_spi);
+
+/**
+ * devm_regmap_init_spi(): Initialise register map
+ *
+ * @spi: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The map will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_spi(struct spi_device *spi,
+				    const struct regmap_config *config)
+{
+	return devm_regmap_init(&spi->dev, &regmap_spi, &spi->dev, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_spi);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c
new file mode 100644
index 00000000000..d7026dc3338
--- /dev/null
+++ b/drivers/base/regmap/regmap-spmi.c
@@ -0,0 +1,256 @@
+/*
+ * Register map access API - SPMI support
+ *
+ * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+ *
+ * Based on regmap-i2c.c:
+ * Copyright 2011 Wolfson Microelectronics plc
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/regmap.h>
+#include <linux/spmi.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+static int regmap_spmi_base_read(void *context,
+				 const void *reg, size_t reg_size,
+				 void *val, size_t val_size)
+{
+	u8 addr = *(u8 *)reg;
+	int err = 0;
+
+	BUG_ON(reg_size != 1);
+
+	while (val_size-- && !err)
+		err = spmi_register_read(context, addr++, val++);
+
+	return err;
+}
+
+static int regmap_spmi_base_gather_write(void *context,
+					 const void *reg, size_t reg_size,
+					 const void *val, size_t val_size)
+{
+	const u8 *data = val;
+	u8 addr = *(u8 *)reg;
+	int err = 0;
+
+	BUG_ON(reg_size != 1);
+
+	/*
+	 * SPMI defines a more bandwidth-efficient 'Register 0 Write' sequence,
+	 * use it when possible.
+	 */
+	if (addr == 0 && val_size) {
+		err = spmi_register_zero_write(context, *data);
+		if (err)
+			goto err_out;
+
+		data++;
+		addr++;
+		val_size--;
+	}
+
+	while (val_size) {
+		err = spmi_register_write(context, addr, *data);
+		if (err)
+			goto err_out;
+
+		data++;
+		addr++;
+		val_size--;
+	}
+
+err_out:
+	return err;
+}
+
+static int regmap_spmi_base_write(void *context, const void *data,
+				  size_t count)
+{
+	BUG_ON(count < 1);
+	return regmap_spmi_base_gather_write(context, data, 1, data + 1,
+					     count - 1);
+}
+
+static struct regmap_bus regmap_spmi_base = {
+	.read				= regmap_spmi_base_read,
+	.write				= regmap_spmi_base_write,
+	.gather_write			= regmap_spmi_base_gather_write,
+	.reg_format_endian_default	= REGMAP_ENDIAN_NATIVE,
+	.val_format_endian_default	= REGMAP_ENDIAN_NATIVE,
+};
+
+/**
+ * regmap_init_spmi_base(): Create regmap for the Base register space
+ * @sdev:	SPMI device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+struct regmap *regmap_init_spmi_base(struct spmi_device *sdev,
+				     const struct regmap_config *config)
+{
+	return regmap_init(&sdev->dev, &regmap_spmi_base, sdev, config);
+}
+EXPORT_SYMBOL_GPL(regmap_init_spmi_base);
+
+/**
+ * devm_regmap_init_spmi_base(): Create managed regmap for Base register space
+ * @sdev:	SPMI device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_spmi_base(struct spmi_device *sdev,
+					  const struct regmap_config *config)
+{
+	return devm_regmap_init(&sdev->dev, &regmap_spmi_base, sdev, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_spmi_base);
+
+static int regmap_spmi_ext_read(void *context,
+				const void *reg, size_t reg_size,
+				void *val, size_t val_size)
+{
+	int err = 0;
+	size_t len;
+	u16 addr;
+
+	BUG_ON(reg_size != 2);
+
+	addr = *(u16 *)reg;
+
+	/*
+	 * Split accesses into two to take advantage of the more
+	 * bandwidth-efficient 'Extended Register Read' command when possible
+	 */
+	while (addr <= 0xFF && val_size) {
+		len = min_t(size_t, val_size, 16);
+
+		err = spmi_ext_register_read(context, addr, val, len);
+		if (err)
+			goto err_out;
+
+		addr += len;
+		val += len;
+		val_size -= len;
+	}
+
+	while (val_size) {
+		len = min_t(size_t, val_size, 8);
+
+		err = spmi_ext_register_readl(context, addr, val, val_size);
+		if (err)
+			goto err_out;
+
+		addr += len;
+		val += len;
+		val_size -= len;
+	}
+
+err_out:
+	return err;
+}
+
+static int regmap_spmi_ext_gather_write(void *context,
+					const void *reg, size_t reg_size,
+					const void *val, size_t val_size)
+{
+	int err = 0;
+	size_t len;
+	u16 addr;
+
+	BUG_ON(reg_size != 2);
+
+	addr = *(u16 *)reg;
+
+	while (addr <= 0xFF && val_size) {
+		len = min_t(size_t, val_size, 16);
+
+		err = spmi_ext_register_write(context, addr, val, len);
+		if (err)
+			goto err_out;
+
+		addr += len;
+		val += len;
+		val_size -= len;
+	}
+
+	while (val_size) {
+		len = min_t(size_t, val_size, 8);
+
+		err = spmi_ext_register_writel(context, addr, val, len);
+		if (err)
+			goto err_out;
+
+		addr += len;
+		val += len;
+		val_size -= len;
+	}
+
+err_out:
+	return err;
+}
+
+static int regmap_spmi_ext_write(void *context, const void *data,
+				 size_t count)
+{
+	BUG_ON(count < 2);
+	return regmap_spmi_ext_gather_write(context, data, 2, data + 2,
+					    count - 2);
+}
+
+static struct regmap_bus regmap_spmi_ext = {
+	.read				= regmap_spmi_ext_read,
+	.write				= regmap_spmi_ext_write,
+	.gather_write			= regmap_spmi_ext_gather_write,
+	.reg_format_endian_default	= REGMAP_ENDIAN_NATIVE,
+	.val_format_endian_default	= REGMAP_ENDIAN_NATIVE,
+};
+
+/**
+ * regmap_init_spmi_ext(): Create regmap for Ext register space
+ * @sdev:	Device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+struct regmap *regmap_init_spmi_ext(struct spmi_device *sdev,
+				    const struct regmap_config *config)
+{
+	return regmap_init(&sdev->dev, &regmap_spmi_ext, sdev, config);
+}
+EXPORT_SYMBOL_GPL(regmap_init_spmi_ext);
+
+/**
+ * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space
+ * @sdev:	SPMI device that will be interacted with
+ * @config:	Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_spmi_ext(struct spmi_device *sdev,
+				     const struct regmap_config *config)
+{
+	return devm_regmap_init(&sdev->dev, &regmap_spmi_ext, sdev, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_spmi_ext);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
new file mode 100644
index 00000000000..74d8c0672cf
--- /dev/null
+++ b/drivers/base/regmap/regmap.c
@@ -0,0 +1,2564 @@
+/*
+ * Register map access API
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/regmap.h>
+
+#include "internal.h"
+
+/*
+ * Sometimes for failures during very early init the trace
+ * infrastructure isn't available early enough to be used.  For this
+ * sort of problem defining LOG_DEVICE will add printks for basic
+ * register I/O on a specific device.
+ */
+#undef LOG_DEVICE
+
+static int _regmap_update_bits(struct regmap *map, unsigned int reg,
+			       unsigned int mask, unsigned int val,
+			       bool *change);
+
+static int _regmap_bus_reg_read(void *context, unsigned int reg,
+				unsigned int *val);
+static int _regmap_bus_read(void *context, unsigned int reg,
+			    unsigned int *val);
+static int _regmap_bus_formatted_write(void *context, unsigned int reg,
+				       unsigned int val);
+static int _regmap_bus_reg_write(void *context, unsigned int reg,
+				 unsigned int val);
+static int _regmap_bus_raw_write(void *context, unsigned int reg,
+				 unsigned int val);
+
+bool regmap_reg_in_ranges(unsigned int reg,
+			  const struct regmap_range *ranges,
+			  unsigned int nranges)
+{
+	const struct regmap_range *r;
+	int i;
+
+	for (i = 0, r = ranges; i < nranges; i++, r++)
+		if (regmap_reg_in_range(reg, r))
+			return true;
+	return false;
+}
+EXPORT_SYMBOL_GPL(regmap_reg_in_ranges);
+
+bool regmap_check_range_table(struct regmap *map, unsigned int reg,
+			      const struct regmap_access_table *table)
+{
+	/* Check "no ranges" first */
+	if (regmap_reg_in_ranges(reg, table->no_ranges, table->n_no_ranges))
+		return false;
+
+	/* In case zero "yes ranges" are supplied, any reg is OK */
+	if (!table->n_yes_ranges)
+		return true;
+
+	return regmap_reg_in_ranges(reg, table->yes_ranges,
+				    table->n_yes_ranges);
+}
+EXPORT_SYMBOL_GPL(regmap_check_range_table);
+
+bool regmap_writeable(struct regmap *map, unsigned int reg)
+{
+	if (map->max_register && reg > map->max_register)
+		return false;
+
+	if (map->writeable_reg)
+		return map->writeable_reg(map->dev, reg);
+
+	if (map->wr_table)
+		return regmap_check_range_table(map, reg, map->wr_table);
+
+	return true;
+}
+
+bool regmap_readable(struct regmap *map, unsigned int reg)
+{
+	if (map->max_register && reg > map->max_register)
+		return false;
+
+	if (map->format.format_write)
+		return false;
+
+	if (map->readable_reg)
+		return map->readable_reg(map->dev, reg);
+
+	if (map->rd_table)
+		return regmap_check_range_table(map, reg, map->rd_table);
+
+	return true;
+}
+
+bool regmap_volatile(struct regmap *map, unsigned int reg)
+{
+	if (!regmap_readable(map, reg))
+		return false;
+
+	if (map->volatile_reg)
+		return map->volatile_reg(map->dev, reg);
+
+	if (map->volatile_table)
+		return regmap_check_range_table(map, reg, map->volatile_table);
+
+	if (map->cache_ops)
+		return false;
+	else
+		return true;
+}
+
+bool regmap_precious(struct regmap *map, unsigned int reg)
+{
+	if (!regmap_readable(map, reg))
+		return false;
+
+	if (map->precious_reg)
+		return map->precious_reg(map->dev, reg);
+
+	if (map->precious_table)
+		return regmap_check_range_table(map, reg, map->precious_table);
+
+	return false;
+}
+
+static bool regmap_volatile_range(struct regmap *map, unsigned int reg,
+	size_t num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		if (!regmap_volatile(map, reg + i))
+			return false;
+
+	return true;
+}
+
+static void regmap_format_2_6_write(struct regmap *map,
+				     unsigned int reg, unsigned int val)
+{
+	u8 *out = map->work_buf;
+
+	*out = (reg << 6) | val;
+}
+
+static void regmap_format_4_12_write(struct regmap *map,
+				     unsigned int reg, unsigned int val)
+{
+	__be16 *out = map->work_buf;
+	*out = cpu_to_be16((reg << 12) | val);
+}
+
+static void regmap_format_7_9_write(struct regmap *map,
+				    unsigned int reg, unsigned int val)
+{
+	__be16 *out = map->work_buf;
+	*out = cpu_to_be16((reg << 9) | val);
+}
+
+static void regmap_format_10_14_write(struct regmap *map,
+				    unsigned int reg, unsigned int val)
+{
+	u8 *out = map->work_buf;
+
+	out[2] = val;
+	out[1] = (val >> 8) | (reg << 6);
+	out[0] = reg >> 2;
+}
+
+static void regmap_format_8(void *buf, unsigned int val, unsigned int shift)
+{
+	u8 *b = buf;
+
+	b[0] = val << shift;
+}
+
+static void regmap_format_16_be(void *buf, unsigned int val, unsigned int shift)
+{
+	__be16 *b = buf;
+
+	b[0] = cpu_to_be16(val << shift);
+}
+
+static void regmap_format_16_le(void *buf, unsigned int val, unsigned int shift)
+{
+	__le16 *b = buf;
+
+	b[0] = cpu_to_le16(val << shift);
+}
+
+static void regmap_format_16_native(void *buf, unsigned int val,
+				    unsigned int shift)
+{
+	*(u16 *)buf = val << shift;
+}
+
+static void regmap_format_24(void *buf, unsigned int val, unsigned int shift)
+{
+	u8 *b = buf;
+
+	val <<= shift;
+
+	b[0] = val >> 16;
+	b[1] = val >> 8;
+	b[2] = val;
+}
+
+static void regmap_format_32_be(void *buf, unsigned int val, unsigned int shift)
+{
+	__be32 *b = buf;
+
+	b[0] = cpu_to_be32(val << shift);
+}
+
+static void regmap_format_32_le(void *buf, unsigned int val, unsigned int shift)
+{
+	__le32 *b = buf;
+
+	b[0] = cpu_to_le32(val << shift);
+}
+
+static void regmap_format_32_native(void *buf, unsigned int val,
+				    unsigned int shift)
+{
+	*(u32 *)buf = val << shift;
+}
+
+static void regmap_parse_inplace_noop(void *buf)
+{
+}
+
+static unsigned int regmap_parse_8(const void *buf)
+{
+	const u8 *b = buf;
+
+	return b[0];
+}
+
+static unsigned int regmap_parse_16_be(const void *buf)
+{
+	const __be16 *b = buf;
+
+	return be16_to_cpu(b[0]);
+}
+
+static unsigned int regmap_parse_16_le(const void *buf)
+{
+	const __le16 *b = buf;
+
+	return le16_to_cpu(b[0]);
+}
+
+static void regmap_parse_16_be_inplace(void *buf)
+{
+	__be16 *b = buf;
+
+	b[0] = be16_to_cpu(b[0]);
+}
+
+static void regmap_parse_16_le_inplace(void *buf)
+{
+	__le16 *b = buf;
+
+	b[0] = le16_to_cpu(b[0]);
+}
+
+static unsigned int regmap_parse_16_native(const void *buf)
+{
+	return *(u16 *)buf;
+}
+
+static unsigned int regmap_parse_24(const void *buf)
+{
+	const u8 *b = buf;
+	unsigned int ret = b[2];
+	ret |= ((unsigned int)b[1]) << 8;
+	ret |= ((unsigned int)b[0]) << 16;
+
+	return ret;
+}
+
+static unsigned int regmap_parse_32_be(const void *buf)
+{
+	const __be32 *b = buf;
+
+	return be32_to_cpu(b[0]);
+}
+
+static unsigned int regmap_parse_32_le(const void *buf)
+{
+	const __le32 *b = buf;
+
+	return le32_to_cpu(b[0]);
+}
+
+static void regmap_parse_32_be_inplace(void *buf)
+{
+	__be32 *b = buf;
+
+	b[0] = be32_to_cpu(b[0]);
+}
+
+static void regmap_parse_32_le_inplace(void *buf)
+{
+	__le32 *b = buf;
+
+	b[0] = le32_to_cpu(b[0]);
+}
+
+static unsigned int regmap_parse_32_native(const void *buf)
+{
+	return *(u32 *)buf;
+}
+
+static void regmap_lock_mutex(void *__map)
+{
+	struct regmap *map = __map;
+	mutex_lock(&map->mutex);
+}
+
+static void regmap_unlock_mutex(void *__map)
+{
+	struct regmap *map = __map;
+	mutex_unlock(&map->mutex);
+}
+
+static void regmap_lock_spinlock(void *__map)
+__acquires(&map->spinlock)
+{
+	struct regmap *map = __map;
+	unsigned long flags;
+
+	spin_lock_irqsave(&map->spinlock, flags);
+	map->spinlock_flags = flags;
+}
+
+static void regmap_unlock_spinlock(void *__map)
+__releases(&map->spinlock)
+{
+	struct regmap *map = __map;
+	spin_unlock_irqrestore(&map->spinlock, map->spinlock_flags);
+}
+
+static void dev_get_regmap_release(struct device *dev, void *res)
+{
+	/*
+	 * We don't actually have anything to do here; the goal here
+	 * is not to manage the regmap but to provide a simple way to
+	 * get the regmap back given a struct device.
+	 */
+}
+
+static bool _regmap_range_add(struct regmap *map,
+			      struct regmap_range_node *data)
+{
+	struct rb_root *root = &map->range_tree;
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	while (*new) {
+		struct regmap_range_node *this =
+			container_of(*new, struct regmap_range_node, node);
+
+		parent = *new;
+		if (data->range_max < this->range_min)
+			new = &((*new)->rb_left);
+		else if (data->range_min > this->range_max)
+			new = &((*new)->rb_right);
+		else
+			return false;
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+
+	return true;
+}
+
+static struct regmap_range_node *_regmap_range_lookup(struct regmap *map,
+						      unsigned int reg)
+{
+	struct rb_node *node = map->range_tree.rb_node;
+
+	while (node) {
+		struct regmap_range_node *this =
+			container_of(node, struct regmap_range_node, node);
+
+		if (reg < this->range_min)
+			node = node->rb_left;
+		else if (reg > this->range_max)
+			node = node->rb_right;
+		else
+			return this;
+	}
+
+	return NULL;
+}
+
+static void regmap_range_exit(struct regmap *map)
+{
+	struct rb_node *next;
+	struct regmap_range_node *range_node;
+
+	next = rb_first(&map->range_tree);
+	while (next) {
+		range_node = rb_entry(next, struct regmap_range_node, node);
+		next = rb_next(&range_node->node);
+		rb_erase(&range_node->node, &map->range_tree);
+		kfree(range_node);
+	}
+
+	kfree(map->selector_work_buf);
+}
+
+int regmap_attach_dev(struct device *dev, struct regmap *map,
+		      const struct regmap_config *config)
+{
+	struct regmap **m;
+
+	map->dev = dev;
+
+	regmap_debugfs_init(map, config->name);
+
+	/* Add a devres resource for dev_get_regmap() */
+	m = devres_alloc(dev_get_regmap_release, sizeof(*m), GFP_KERNEL);
+	if (!m) {
+		regmap_debugfs_exit(map);
+		return -ENOMEM;
+	}
+	*m = map;
+	devres_add(dev, m);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(regmap_attach_dev);
+
+/**
+ * regmap_init(): Initialise register map
+ *
+ * @dev: Device that will be interacted with
+ * @bus: Bus-specific callbacks to use with device
+ * @bus_context: Data passed to bus-specific callbacks
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.  This function should generally not be called
+ * directly, it should be called by bus-specific init functions.
+ */
+struct regmap *regmap_init(struct device *dev,
+			   const struct regmap_bus *bus,
+			   void *bus_context,
+			   const struct regmap_config *config)
+{
+	struct regmap *map;
+	int ret = -EINVAL;
+	enum regmap_endian reg_endian, val_endian;
+	int i, j;
+
+	if (!config)
+		goto err;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (map == NULL) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	if (config->lock && config->unlock) {
+		map->lock = config->lock;
+		map->unlock = config->unlock;
+		map->lock_arg = config->lock_arg;
+	} else {
+		if ((bus && bus->fast_io) ||
+		    config->fast_io) {
+			spin_lock_init(&map->spinlock);
+			map->lock = regmap_lock_spinlock;
+			map->unlock = regmap_unlock_spinlock;
+		} else {
+			mutex_init(&map->mutex);
+			map->lock = regmap_lock_mutex;
+			map->unlock = regmap_unlock_mutex;
+		}
+		map->lock_arg = map;
+	}
+	map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
+	map->format.pad_bytes = config->pad_bits / 8;
+	map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8);
+	map->format.buf_size = DIV_ROUND_UP(config->reg_bits +
+			config->val_bits + config->pad_bits, 8);
+	map->reg_shift = config->pad_bits % 8;
+	if (config->reg_stride)
+		map->reg_stride = config->reg_stride;
+	else
+		map->reg_stride = 1;
+	map->use_single_rw = config->use_single_rw;
+	map->can_multi_write = config->can_multi_write;
+	map->dev = dev;
+	map->bus = bus;
+	map->bus_context = bus_context;
+	map->max_register = config->max_register;
+	map->wr_table = config->wr_table;
+	map->rd_table = config->rd_table;
+	map->volatile_table = config->volatile_table;
+	map->precious_table = config->precious_table;
+	map->writeable_reg = config->writeable_reg;
+	map->readable_reg = config->readable_reg;
+	map->volatile_reg = config->volatile_reg;
+	map->precious_reg = config->precious_reg;
+	map->cache_type = config->cache_type;
+	map->name = config->name;
+
+	spin_lock_init(&map->async_lock);
+	INIT_LIST_HEAD(&map->async_list);
+	INIT_LIST_HEAD(&map->async_free);
+	init_waitqueue_head(&map->async_waitq);
+
+	if (config->read_flag_mask || config->write_flag_mask) {
+		map->read_flag_mask = config->read_flag_mask;
+		map->write_flag_mask = config->write_flag_mask;
+	} else if (bus) {
+		map->read_flag_mask = bus->read_flag_mask;
+	}
+
+	if (!bus) {
+		map->reg_read  = config->reg_read;
+		map->reg_write = config->reg_write;
+
+		map->defer_caching = false;
+		goto skip_format_initialization;
+	} else if (!bus->read || !bus->write) {
+		map->reg_read = _regmap_bus_reg_read;
+		map->reg_write = _regmap_bus_reg_write;
+
+		map->defer_caching = false;
+		goto skip_format_initialization;
+	} else {
+		map->reg_read  = _regmap_bus_read;
+	}
+
+	reg_endian = config->reg_format_endian;
+	if (reg_endian == REGMAP_ENDIAN_DEFAULT)
+		reg_endian = bus->reg_format_endian_default;
+	if (reg_endian == REGMAP_ENDIAN_DEFAULT)
+		reg_endian = REGMAP_ENDIAN_BIG;
+
+	val_endian = config->val_format_endian;
+	if (val_endian == REGMAP_ENDIAN_DEFAULT)
+		val_endian = bus->val_format_endian_default;
+	if (val_endian == REGMAP_ENDIAN_DEFAULT)
+		val_endian = REGMAP_ENDIAN_BIG;
+
+	switch (config->reg_bits + map->reg_shift) {
+	case 2:
+		switch (config->val_bits) {
+		case 6:
+			map->format.format_write = regmap_format_2_6_write;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+
+	case 4:
+		switch (config->val_bits) {
+		case 12:
+			map->format.format_write = regmap_format_4_12_write;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+
+	case 7:
+		switch (config->val_bits) {
+		case 9:
+			map->format.format_write = regmap_format_7_9_write;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+
+	case 10:
+		switch (config->val_bits) {
+		case 14:
+			map->format.format_write = regmap_format_10_14_write;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+
+	case 8:
+		map->format.format_reg = regmap_format_8;
+		break;
+
+	case 16:
+		switch (reg_endian) {
+		case REGMAP_ENDIAN_BIG:
+			map->format.format_reg = regmap_format_16_be;
+			break;
+		case REGMAP_ENDIAN_NATIVE:
+			map->format.format_reg = regmap_format_16_native;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+
+	case 24:
+		if (reg_endian != REGMAP_ENDIAN_BIG)
+			goto err_map;
+		map->format.format_reg = regmap_format_24;
+		break;
+
+	case 32:
+		switch (reg_endian) {
+		case REGMAP_ENDIAN_BIG:
+			map->format.format_reg = regmap_format_32_be;
+			break;
+		case REGMAP_ENDIAN_NATIVE:
+			map->format.format_reg = regmap_format_32_native;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+
+	default:
+		goto err_map;
+	}
+
+	if (val_endian == REGMAP_ENDIAN_NATIVE)
+		map->format.parse_inplace = regmap_parse_inplace_noop;
+
+	switch (config->val_bits) {
+	case 8:
+		map->format.format_val = regmap_format_8;
+		map->format.parse_val = regmap_parse_8;
+		map->format.parse_inplace = regmap_parse_inplace_noop;
+		break;
+	case 16:
+		switch (val_endian) {
+		case REGMAP_ENDIAN_BIG:
+			map->format.format_val = regmap_format_16_be;
+			map->format.parse_val = regmap_parse_16_be;
+			map->format.parse_inplace = regmap_parse_16_be_inplace;
+			break;
+		case REGMAP_ENDIAN_LITTLE:
+			map->format.format_val = regmap_format_16_le;
+			map->format.parse_val = regmap_parse_16_le;
+			map->format.parse_inplace = regmap_parse_16_le_inplace;
+			break;
+		case REGMAP_ENDIAN_NATIVE:
+			map->format.format_val = regmap_format_16_native;
+			map->format.parse_val = regmap_parse_16_native;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+	case 24:
+		if (val_endian != REGMAP_ENDIAN_BIG)
+			goto err_map;
+		map->format.format_val = regmap_format_24;
+		map->format.parse_val = regmap_parse_24;
+		break;
+	case 32:
+		switch (val_endian) {
+		case REGMAP_ENDIAN_BIG:
+			map->format.format_val = regmap_format_32_be;
+			map->format.parse_val = regmap_parse_32_be;
+			map->format.parse_inplace = regmap_parse_32_be_inplace;
+			break;
+		case REGMAP_ENDIAN_LITTLE:
+			map->format.format_val = regmap_format_32_le;
+			map->format.parse_val = regmap_parse_32_le;
+			map->format.parse_inplace = regmap_parse_32_le_inplace;
+			break;
+		case REGMAP_ENDIAN_NATIVE:
+			map->format.format_val = regmap_format_32_native;
+			map->format.parse_val = regmap_parse_32_native;
+			break;
+		default:
+			goto err_map;
+		}
+		break;
+	}
+
+	if (map->format.format_write) {
+		if ((reg_endian != REGMAP_ENDIAN_BIG) ||
+		    (val_endian != REGMAP_ENDIAN_BIG))
+			goto err_map;
+		map->use_single_rw = true;
+	}
+
+	if (!map->format.format_write &&
+	    !(map->format.format_reg && map->format.format_val))
+		goto err_map;
+
+	map->work_buf = kzalloc(map->format.buf_size, GFP_KERNEL);
+	if (map->work_buf == NULL) {
+		ret = -ENOMEM;
+		goto err_map;
+	}
+
+	if (map->format.format_write) {
+		map->defer_caching = false;
+		map->reg_write = _regmap_bus_formatted_write;
+	} else if (map->format.format_val) {
+		map->defer_caching = true;
+		map->reg_write = _regmap_bus_raw_write;
+	}
+
+skip_format_initialization:
+
+	map->range_tree = RB_ROOT;
+	for (i = 0; i < config->num_ranges; i++) {
+		const struct regmap_range_cfg *range_cfg = &config->ranges[i];
+		struct regmap_range_node *new;
+
+		/* Sanity check */
+		if (range_cfg->range_max < range_cfg->range_min) {
+			dev_err(map->dev, "Invalid range %d: %d < %d\n", i,
+				range_cfg->range_max, range_cfg->range_min);
+			goto err_range;
+		}
+
+		if (range_cfg->range_max > map->max_register) {
+			dev_err(map->dev, "Invalid range %d: %d > %d\n", i,
+				range_cfg->range_max, map->max_register);
+			goto err_range;
+		}
+
+		if (range_cfg->selector_reg > map->max_register) {
+			dev_err(map->dev,
+				"Invalid range %d: selector out of map\n", i);
+			goto err_range;
+		}
+
+		if (range_cfg->window_len == 0) {
+			dev_err(map->dev, "Invalid range %d: window_len 0\n",
+				i);
+			goto err_range;
+		}
+
+		/* Make sure, that this register range has no selector
+		   or data window within its boundary */
+		for (j = 0; j < config->num_ranges; j++) {
+			unsigned sel_reg = config->ranges[j].selector_reg;
+			unsigned win_min = config->ranges[j].window_start;
+			unsigned win_max = win_min +
+					   config->ranges[j].window_len - 1;
+
+			/* Allow data window inside its own virtual range */
+			if (j == i)
+				continue;
+
+			if (range_cfg->range_min <= sel_reg &&
+			    sel_reg <= range_cfg->range_max) {
+				dev_err(map->dev,
+					"Range %d: selector for %d in window\n",
+					i, j);
+				goto err_range;
+			}
+
+			if (!(win_max < range_cfg->range_min ||
+			      win_min > range_cfg->range_max)) {
+				dev_err(map->dev,
+					"Range %d: window for %d in window\n",
+					i, j);
+				goto err_range;
+			}
+		}
+
+		new = kzalloc(sizeof(*new), GFP_KERNEL);
+		if (new == NULL) {
+			ret = -ENOMEM;
+			goto err_range;
+		}
+
+		new->map = map;
+		new->name = range_cfg->name;
+		new->range_min = range_cfg->range_min;
+		new->range_max = range_cfg->range_max;
+		new->selector_reg = range_cfg->selector_reg;
+		new->selector_mask = range_cfg->selector_mask;
+		new->selector_shift = range_cfg->selector_shift;
+		new->window_start = range_cfg->window_start;
+		new->window_len = range_cfg->window_len;
+
+		if (!_regmap_range_add(map, new)) {
+			dev_err(map->dev, "Failed to add range %d\n", i);
+			kfree(new);
+			goto err_range;
+		}
+
+		if (map->selector_work_buf == NULL) {
+			map->selector_work_buf =
+				kzalloc(map->format.buf_size, GFP_KERNEL);
+			if (map->selector_work_buf == NULL) {
+				ret = -ENOMEM;
+				goto err_range;
+			}
+		}
+	}
+
+	ret = regcache_init(map, config);
+	if (ret != 0)
+		goto err_range;
+
+	if (dev) {
+		ret = regmap_attach_dev(dev, map, config);
+		if (ret != 0)
+			goto err_regcache;
+	}
+
+	return map;
+
+err_regcache:
+	regcache_exit(map);
+err_range:
+	regmap_range_exit(map);
+	kfree(map->work_buf);
+err_map:
+	kfree(map);
+err:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(regmap_init);
+
+static void devm_regmap_release(struct device *dev, void *res)
+{
+	regmap_exit(*(struct regmap **)res);
+}
+
+/**
+ * devm_regmap_init(): Initialise managed register map
+ *
+ * @dev: Device that will be interacted with
+ * @bus: Bus-specific callbacks to use with device
+ * @bus_context: Data passed to bus-specific callbacks
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  This function should generally not be called
+ * directly, it should be called by bus-specific init functions.  The
+ * map will be automatically freed by the device management code.
+ */
+struct regmap *devm_regmap_init(struct device *dev,
+				const struct regmap_bus *bus,
+				void *bus_context,
+				const struct regmap_config *config)
+{
+	struct regmap **ptr, *regmap;
+
+	ptr = devres_alloc(devm_regmap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	regmap = regmap_init(dev, bus, bus_context, config);
+	if (!IS_ERR(regmap)) {
+		*ptr = regmap;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return regmap;
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init);
+
+static void regmap_field_init(struct regmap_field *rm_field,
+	struct regmap *regmap, struct reg_field reg_field)
+{
+	int field_bits = reg_field.msb - reg_field.lsb + 1;
+	rm_field->regmap = regmap;
+	rm_field->reg = reg_field.reg;
+	rm_field->shift = reg_field.lsb;
+	rm_field->mask = ((BIT(field_bits) - 1) << reg_field.lsb);
+	rm_field->id_size = reg_field.id_size;
+	rm_field->id_offset = reg_field.id_offset;
+}
+
+/**
+ * devm_regmap_field_alloc(): Allocate and initialise a register field
+ * in a register map.
+ *
+ * @dev: Device that will be interacted with
+ * @regmap: regmap bank in which this register field is located.
+ * @reg_field: Register field with in the bank.
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap_field. The regmap_field will be automatically freed
+ * by the device management code.
+ */
+struct regmap_field *devm_regmap_field_alloc(struct device *dev,
+		struct regmap *regmap, struct reg_field reg_field)
+{
+	struct regmap_field *rm_field = devm_kzalloc(dev,
+					sizeof(*rm_field), GFP_KERNEL);
+	if (!rm_field)
+		return ERR_PTR(-ENOMEM);
+
+	regmap_field_init(rm_field, regmap, reg_field);
+
+	return rm_field;
+
+}
+EXPORT_SYMBOL_GPL(devm_regmap_field_alloc);
+
+/**
+ * devm_regmap_field_free(): Free register field allocated using
+ * devm_regmap_field_alloc. Usally drivers need not call this function,
+ * as the memory allocated via devm will be freed as per device-driver
+ * life-cyle.
+ *
+ * @dev: Device that will be interacted with
+ * @field: regmap field which should be freed.
+ */
+void devm_regmap_field_free(struct device *dev,
+	struct regmap_field *field)
+{
+	devm_kfree(dev, field);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_field_free);
+
+/**
+ * regmap_field_alloc(): Allocate and initialise a register field
+ * in a register map.
+ *
+ * @regmap: regmap bank in which this register field is located.
+ * @reg_field: Register field with in the bank.
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap_field. The regmap_field should be freed by the
+ * user once its finished working with it using regmap_field_free().
+ */
+struct regmap_field *regmap_field_alloc(struct regmap *regmap,
+		struct reg_field reg_field)
+{
+	struct regmap_field *rm_field = kzalloc(sizeof(*rm_field), GFP_KERNEL);
+
+	if (!rm_field)
+		return ERR_PTR(-ENOMEM);
+
+	regmap_field_init(rm_field, regmap, reg_field);
+
+	return rm_field;
+}
+EXPORT_SYMBOL_GPL(regmap_field_alloc);
+
+/**
+ * regmap_field_free(): Free register field allocated using regmap_field_alloc
+ *
+ * @field: regmap field which should be freed.
+ */
+void regmap_field_free(struct regmap_field *field)
+{
+	kfree(field);
+}
+EXPORT_SYMBOL_GPL(regmap_field_free);
+
+/**
+ * regmap_reinit_cache(): Reinitialise the current register cache
+ *
+ * @map: Register map to operate on.
+ * @config: New configuration.  Only the cache data will be used.
+ *
+ * Discard any existing register cache for the map and initialize a
+ * new cache.  This can be used to restore the cache to defaults or to
+ * update the cache configuration to reflect runtime discovery of the
+ * hardware.
+ *
+ * No explicit locking is done here, the user needs to ensure that
+ * this function will not race with other calls to regmap.
+ */
+int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
+{
+	regcache_exit(map);
+	regmap_debugfs_exit(map);
+
+	map->max_register = config->max_register;
+	map->writeable_reg = config->writeable_reg;
+	map->readable_reg = config->readable_reg;
+	map->volatile_reg = config->volatile_reg;
+	map->precious_reg = config->precious_reg;
+	map->cache_type = config->cache_type;
+
+	regmap_debugfs_init(map, config->name);
+
+	map->cache_bypass = false;
+	map->cache_only = false;
+
+	return regcache_init(map, config);
+}
+EXPORT_SYMBOL_GPL(regmap_reinit_cache);
+
+/**
+ * regmap_exit(): Free a previously allocated register map
+ */
+void regmap_exit(struct regmap *map)
+{
+	struct regmap_async *async;
+
+	regcache_exit(map);
+	regmap_debugfs_exit(map);
+	regmap_range_exit(map);
+	if (map->bus && map->bus->free_context)
+		map->bus->free_context(map->bus_context);
+	kfree(map->work_buf);
+	while (!list_empty(&map->async_free)) {
+		async = list_first_entry_or_null(&map->async_free,
+						 struct regmap_async,
+						 list);
+		list_del(&async->list);
+		kfree(async->work_buf);
+		kfree(async);
+	}
+	kfree(map);
+}
+EXPORT_SYMBOL_GPL(regmap_exit);
+
+static int dev_get_regmap_match(struct device *dev, void *res, void *data)
+{
+	struct regmap **r = res;
+	if (!r || !*r) {
+		WARN_ON(!r || !*r);
+		return 0;
+	}
+
+	/* If the user didn't specify a name match any */
+	if (data)
+		return (*r)->name == data;
+	else
+		return 1;
+}
+
+/**
+ * dev_get_regmap(): Obtain the regmap (if any) for a device
+ *
+ * @dev: Device to retrieve the map for
+ * @name: Optional name for the register map, usually NULL.
+ *
+ * Returns the regmap for the device if one is present, or NULL.  If
+ * name is specified then it must match the name specified when
+ * registering the device, if it is NULL then the first regmap found
+ * will be used.  Devices with multiple register maps are very rare,
+ * generic code should normally not need to specify a name.
+ */
+struct regmap *dev_get_regmap(struct device *dev, const char *name)
+{
+	struct regmap **r = devres_find(dev, dev_get_regmap_release,
+					dev_get_regmap_match, (void *)name);
+
+	if (!r)
+		return NULL;
+	return *r;
+}
+EXPORT_SYMBOL_GPL(dev_get_regmap);
+
+static int _regmap_select_page(struct regmap *map, unsigned int *reg,
+			       struct regmap_range_node *range,
+			       unsigned int val_num)
+{
+	void *orig_work_buf;
+	unsigned int win_offset;
+	unsigned int win_page;
+	bool page_chg;
+	int ret;
+
+	win_offset = (*reg - range->range_min) % range->window_len;
+	win_page = (*reg - range->range_min) / range->window_len;
+
+	if (val_num > 1) {
+		/* Bulk write shouldn't cross range boundary */
+		if (*reg + val_num - 1 > range->range_max)
+			return -EINVAL;
+
+		/* ... or single page boundary */
+		if (val_num > range->window_len - win_offset)
+			return -EINVAL;
+	}
+
+	/* It is possible to have selector register inside data window.
+	   In that case, selector register is located on every page and
+	   it needs no page switching, when accessed alone. */
+	if (val_num > 1 ||
+	    range->window_start + win_offset != range->selector_reg) {
+		/* Use separate work_buf during page switching */
+		orig_work_buf = map->work_buf;
+		map->work_buf = map->selector_work_buf;
+
+		ret = _regmap_update_bits(map, range->selector_reg,
+					  range->selector_mask,
+					  win_page << range->selector_shift,
+					  &page_chg);
+
+		map->work_buf = orig_work_buf;
+
+		if (ret != 0)
+			return ret;
+	}
+
+	*reg = range->window_start + win_offset;
+
+	return 0;
+}
+
+int _regmap_raw_write(struct regmap *map, unsigned int reg,
+		      const void *val, size_t val_len)
+{
+	struct regmap_range_node *range;
+	unsigned long flags;
+	u8 *u8 = map->work_buf;
+	void *work_val = map->work_buf + map->format.reg_bytes +
+		map->format.pad_bytes;
+	void *buf;
+	int ret = -ENOTSUPP;
+	size_t len;
+	int i;
+
+	WARN_ON(!map->bus);
+
+	/* Check for unwritable registers before we start */
+	if (map->writeable_reg)
+		for (i = 0; i < val_len / map->format.val_bytes; i++)
+			if (!map->writeable_reg(map->dev,
+						reg + (i * map->reg_stride)))
+				return -EINVAL;
+
+	if (!map->cache_bypass && map->format.parse_val) {
+		unsigned int ival;
+		int val_bytes = map->format.val_bytes;
+		for (i = 0; i < val_len / val_bytes; i++) {
+			ival = map->format.parse_val(val + (i * val_bytes));
+			ret = regcache_write(map, reg + (i * map->reg_stride),
+					     ival);
+			if (ret) {
+				dev_err(map->dev,
+					"Error in caching of register: %x ret: %d\n",
+					reg + i, ret);
+				return ret;
+			}
+		}
+		if (map->cache_only) {
+			map->cache_dirty = true;
+			return 0;
+		}
+	}
+
+	range = _regmap_range_lookup(map, reg);
+	if (range) {
+		int val_num = val_len / map->format.val_bytes;
+		int win_offset = (reg - range->range_min) % range->window_len;
+		int win_residue = range->window_len - win_offset;
+
+		/* If the write goes beyond the end of the window split it */
+		while (val_num > win_residue) {
+			dev_dbg(map->dev, "Writing window %d/%zu\n",
+				win_residue, val_len / map->format.val_bytes);
+			ret = _regmap_raw_write(map, reg, val, win_residue *
+						map->format.val_bytes);
+			if (ret != 0)
+				return ret;
+
+			reg += win_residue;
+			val_num -= win_residue;
+			val += win_residue * map->format.val_bytes;
+			val_len -= win_residue * map->format.val_bytes;
+
+			win_offset = (reg - range->range_min) %
+				range->window_len;
+			win_residue = range->window_len - win_offset;
+		}
+
+		ret = _regmap_select_page(map, &reg, range, val_num);
+		if (ret != 0)
+			return ret;
+	}
+
+	map->format.format_reg(map->work_buf, reg, map->reg_shift);
+
+	u8[0] |= map->write_flag_mask;
+
+	/*
+	 * Essentially all I/O mechanisms will be faster with a single
+	 * buffer to write.  Since register syncs often generate raw
+	 * writes of single registers optimise that case.
+	 */
+	if (val != work_val && val_len == map->format.val_bytes) {
+		memcpy(work_val, val, map->format.val_bytes);
+		val = work_val;
+	}
+
+	if (map->async && map->bus->async_write) {
+		struct regmap_async *async;
+
+		trace_regmap_async_write_start(map->dev, reg, val_len);
+
+		spin_lock_irqsave(&map->async_lock, flags);
+		async = list_first_entry_or_null(&map->async_free,
+						 struct regmap_async,
+						 list);
+		if (async)
+			list_del(&async->list);
+		spin_unlock_irqrestore(&map->async_lock, flags);
+
+		if (!async) {
+			async = map->bus->async_alloc();
+			if (!async)
+				return -ENOMEM;
+
+			async->work_buf = kzalloc(map->format.buf_size,
+						  GFP_KERNEL | GFP_DMA);
+			if (!async->work_buf) {
+				kfree(async);
+				return -ENOMEM;
+			}
+		}
+
+		async->map = map;
+
+		/* If the caller supplied the value we can use it safely. */
+		memcpy(async->work_buf, map->work_buf, map->format.pad_bytes +
+		       map->format.reg_bytes + map->format.val_bytes);
+
+		spin_lock_irqsave(&map->async_lock, flags);
+		list_add_tail(&async->list, &map->async_list);
+		spin_unlock_irqrestore(&map->async_lock, flags);
+
+		if (val != work_val)
+			ret = map->bus->async_write(map->bus_context,
+						    async->work_buf,
+						    map->format.reg_bytes +
+						    map->format.pad_bytes,
+						    val, val_len, async);
+		else
+			ret = map->bus->async_write(map->bus_context,
+						    async->work_buf,
+						    map->format.reg_bytes +
+						    map->format.pad_bytes +
+						    val_len, NULL, 0, async);
+
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to schedule write: %d\n",
+				ret);
+
+			spin_lock_irqsave(&map->async_lock, flags);
+			list_move(&async->list, &map->async_free);
+			spin_unlock_irqrestore(&map->async_lock, flags);
+		}
+
+		return ret;
+	}
+
+	trace_regmap_hw_write_start(map->dev, reg,
+				    val_len / map->format.val_bytes);
+
+	/* If we're doing a single register write we can probably just
+	 * send the work_buf directly, otherwise try to do a gather
+	 * write.
+	 */
+	if (val == work_val)
+		ret = map->bus->write(map->bus_context, map->work_buf,
+				      map->format.reg_bytes +
+				      map->format.pad_bytes +
+				      val_len);
+	else if (map->bus->gather_write)
+		ret = map->bus->gather_write(map->bus_context, map->work_buf,
+					     map->format.reg_bytes +
+					     map->format.pad_bytes,
+					     val, val_len);
+
+	/* If that didn't work fall back on linearising by hand. */
+	if (ret == -ENOTSUPP) {
+		len = map->format.reg_bytes + map->format.pad_bytes + val_len;
+		buf = kzalloc(len, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		memcpy(buf, map->work_buf, map->format.reg_bytes);
+		memcpy(buf + map->format.reg_bytes + map->format.pad_bytes,
+		       val, val_len);
+		ret = map->bus->write(map->bus_context, buf, len);
+
+		kfree(buf);
+	}
+
+	trace_regmap_hw_write_done(map->dev, reg,
+				   val_len / map->format.val_bytes);
+
+	return ret;
+}
+
+/**
+ * regmap_can_raw_write - Test if regmap_raw_write() is supported
+ *
+ * @map: Map to check.
+ */
+bool regmap_can_raw_write(struct regmap *map)
+{
+	return map->bus && map->format.format_val && map->format.format_reg;
+}
+EXPORT_SYMBOL_GPL(regmap_can_raw_write);
+
+static int _regmap_bus_formatted_write(void *context, unsigned int reg,
+				       unsigned int val)
+{
+	int ret;
+	struct regmap_range_node *range;
+	struct regmap *map = context;
+
+	WARN_ON(!map->bus || !map->format.format_write);
+
+	range = _regmap_range_lookup(map, reg);
+	if (range) {
+		ret = _regmap_select_page(map, &reg, range, 1);
+		if (ret != 0)
+			return ret;
+	}
+
+	map->format.format_write(map, reg, val);
+
+	trace_regmap_hw_write_start(map->dev, reg, 1);
+
+	ret = map->bus->write(map->bus_context, map->work_buf,
+			      map->format.buf_size);
+
+	trace_regmap_hw_write_done(map->dev, reg, 1);
+
+	return ret;
+}
+
+static int _regmap_bus_reg_write(void *context, unsigned int reg,
+				 unsigned int val)
+{
+	struct regmap *map = context;
+
+	return map->bus->reg_write(map->bus_context, reg, val);
+}
+
+static int _regmap_bus_raw_write(void *context, unsigned int reg,
+				 unsigned int val)
+{
+	struct regmap *map = context;
+
+	WARN_ON(!map->bus || !map->format.format_val);
+
+	map->format.format_val(map->work_buf + map->format.reg_bytes
+			       + map->format.pad_bytes, val, 0);
+	return _regmap_raw_write(map, reg,
+				 map->work_buf +
+				 map->format.reg_bytes +
+				 map->format.pad_bytes,
+				 map->format.val_bytes);
+}
+
+static inline void *_regmap_map_get_context(struct regmap *map)
+{
+	return (map->bus) ? map : map->bus_context;
+}
+
+int _regmap_write(struct regmap *map, unsigned int reg,
+		  unsigned int val)
+{
+	int ret;
+	void *context = _regmap_map_get_context(map);
+
+	if (!regmap_writeable(map, reg))
+		return -EIO;
+
+	if (!map->cache_bypass && !map->defer_caching) {
+		ret = regcache_write(map, reg, val);
+		if (ret != 0)
+			return ret;
+		if (map->cache_only) {
+			map->cache_dirty = true;
+			return 0;
+		}
+	}
+
+#ifdef LOG_DEVICE
+	if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
+		dev_info(map->dev, "%x <= %x\n", reg, val);
+#endif
+
+	trace_regmap_reg_write(map->dev, reg, val);
+
+	return map->reg_write(context, reg, val);
+}
+
+/**
+ * regmap_write(): Write a value to a single register
+ *
+ * @map: Register map to write to
+ * @reg: Register to write to
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_write(struct regmap *map, unsigned int reg, unsigned int val)
+{
+	int ret;
+
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	ret = _regmap_write(map, reg, val);
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_write);
+
+/**
+ * regmap_write_async(): Write a value to a single register asynchronously
+ *
+ * @map: Register map to write to
+ * @reg: Register to write to
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val)
+{
+	int ret;
+
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	map->async = true;
+
+	ret = _regmap_write(map, reg, val);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_write_async);
+
+/**
+ * regmap_raw_write(): Write raw values to one or more registers
+ *
+ * @map: Register map to write to
+ * @reg: Initial register to write to
+ * @val: Block of data to be written, laid out for direct transmission to the
+ *       device
+ * @val_len: Length of data pointed to by val.
+ *
+ * This function is intended to be used for things like firmware
+ * download where a large block of data needs to be transferred to the
+ * device.  No formatting will be done on the data provided.
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_raw_write(struct regmap *map, unsigned int reg,
+		     const void *val, size_t val_len)
+{
+	int ret;
+
+	if (!regmap_can_raw_write(map))
+		return -EINVAL;
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	ret = _regmap_raw_write(map, reg, val, val_len);
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_raw_write);
+
+/**
+ * regmap_field_write(): Write a value to a single register field
+ *
+ * @field: Register field to write to
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_field_write(struct regmap_field *field, unsigned int val)
+{
+	return regmap_update_bits(field->regmap, field->reg,
+				field->mask, val << field->shift);
+}
+EXPORT_SYMBOL_GPL(regmap_field_write);
+
+/**
+ * regmap_field_update_bits():	Perform a read/modify/write cycle
+ *                              on the register field
+ *
+ * @field: Register field to write to
+ * @mask: Bitmask to change
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_field_update_bits(struct regmap_field *field, unsigned int mask, unsigned int val)
+{
+	mask = (mask << field->shift) & field->mask;
+
+	return regmap_update_bits(field->regmap, field->reg,
+				  mask, val << field->shift);
+}
+EXPORT_SYMBOL_GPL(regmap_field_update_bits);
+
+/**
+ * regmap_fields_write(): Write a value to a single register field with port ID
+ *
+ * @field: Register field to write to
+ * @id: port ID
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_fields_write(struct regmap_field *field, unsigned int id,
+			unsigned int val)
+{
+	if (id >= field->id_size)
+		return -EINVAL;
+
+	return regmap_update_bits(field->regmap,
+				  field->reg + (field->id_offset * id),
+				  field->mask, val << field->shift);
+}
+EXPORT_SYMBOL_GPL(regmap_fields_write);
+
+/**
+ * regmap_fields_update_bits():	Perform a read/modify/write cycle
+ *                              on the register field
+ *
+ * @field: Register field to write to
+ * @id: port ID
+ * @mask: Bitmask to change
+ * @val: Value to be written
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_fields_update_bits(struct regmap_field *field,  unsigned int id,
+			      unsigned int mask, unsigned int val)
+{
+	if (id >= field->id_size)
+		return -EINVAL;
+
+	mask = (mask << field->shift) & field->mask;
+
+	return regmap_update_bits(field->regmap,
+				  field->reg + (field->id_offset * id),
+				  mask, val << field->shift);
+}
+EXPORT_SYMBOL_GPL(regmap_fields_update_bits);
+
+/*
+ * regmap_bulk_write(): Write multiple registers to the device
+ *
+ * @map: Register map to write to
+ * @reg: First register to be write from
+ * @val: Block of data to be written, in native register size for device
+ * @val_count: Number of registers to write
+ *
+ * This function is intended to be used for writing a large block of
+ * data to the device either in single transfer or multiple transfer.
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
+		     size_t val_count)
+{
+	int ret = 0, i;
+	size_t val_bytes = map->format.val_bytes;
+
+	if (map->bus && !map->format.parse_inplace)
+		return -EINVAL;
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	/*
+	 * Some devices don't support bulk write, for
+	 * them we have a series of single write operations.
+	 */
+	if (!map->bus || map->use_single_rw) {
+		map->lock(map->lock_arg);
+		for (i = 0; i < val_count; i++) {
+			unsigned int ival;
+
+			switch (val_bytes) {
+			case 1:
+				ival = *(u8 *)(val + (i * val_bytes));
+				break;
+			case 2:
+				ival = *(u16 *)(val + (i * val_bytes));
+				break;
+			case 4:
+				ival = *(u32 *)(val + (i * val_bytes));
+				break;
+#ifdef CONFIG_64BIT
+			case 8:
+				ival = *(u64 *)(val + (i * val_bytes));
+				break;
+#endif
+			default:
+				ret = -EINVAL;
+				goto out;
+			}
+
+			ret = _regmap_write(map, reg + (i * map->reg_stride),
+					ival);
+			if (ret != 0)
+				goto out;
+		}
+out:
+		map->unlock(map->lock_arg);
+	} else {
+		void *wval;
+
+		wval = kmemdup(val, val_count * val_bytes, GFP_KERNEL);
+		if (!wval) {
+			dev_err(map->dev, "Error in memory allocation\n");
+			return -ENOMEM;
+		}
+		for (i = 0; i < val_count * val_bytes; i += val_bytes)
+			map->format.parse_inplace(wval + i);
+
+		map->lock(map->lock_arg);
+		ret = _regmap_raw_write(map, reg, wval, val_bytes * val_count);
+		map->unlock(map->lock_arg);
+
+		kfree(wval);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_bulk_write);
+
+/*
+ * _regmap_raw_multi_reg_write()
+ *
+ * the (register,newvalue) pairs in regs have not been formatted, but
+ * they are all in the same page and have been changed to being page
+ * relative. The page register has been written if that was neccessary.
+ */
+static int _regmap_raw_multi_reg_write(struct regmap *map,
+				       const struct reg_default *regs,
+				       size_t num_regs)
+{
+	int ret;
+	void *buf;
+	int i;
+	u8 *u8;
+	size_t val_bytes = map->format.val_bytes;
+	size_t reg_bytes = map->format.reg_bytes;
+	size_t pad_bytes = map->format.pad_bytes;
+	size_t pair_size = reg_bytes + pad_bytes + val_bytes;
+	size_t len = pair_size * num_regs;
+
+	if (!len)
+		return -EINVAL;
+
+	buf = kzalloc(len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* We have to linearise by hand. */
+
+	u8 = buf;
+
+	for (i = 0; i < num_regs; i++) {
+		int reg = regs[i].reg;
+		int val = regs[i].def;
+		trace_regmap_hw_write_start(map->dev, reg, 1);
+		map->format.format_reg(u8, reg, map->reg_shift);
+		u8 += reg_bytes + pad_bytes;
+		map->format.format_val(u8, val, 0);
+		u8 += val_bytes;
+	}
+	u8 = buf;
+	*u8 |= map->write_flag_mask;
+
+	ret = map->bus->write(map->bus_context, buf, len);
+
+	kfree(buf);
+
+	for (i = 0; i < num_regs; i++) {
+		int reg = regs[i].reg;
+		trace_regmap_hw_write_done(map->dev, reg, 1);
+	}
+	return ret;
+}
+
+static unsigned int _regmap_register_page(struct regmap *map,
+					  unsigned int reg,
+					  struct regmap_range_node *range)
+{
+	unsigned int win_page = (reg - range->range_min) / range->window_len;
+
+	return win_page;
+}
+
+static int _regmap_range_multi_paged_reg_write(struct regmap *map,
+					       struct reg_default *regs,
+					       size_t num_regs)
+{
+	int ret;
+	int i, n;
+	struct reg_default *base;
+	unsigned int this_page = 0;
+	/*
+	 * the set of registers are not neccessarily in order, but
+	 * since the order of write must be preserved this algorithm
+	 * chops the set each time the page changes
+	 */
+	base = regs;
+	for (i = 0, n = 0; i < num_regs; i++, n++) {
+		unsigned int reg = regs[i].reg;
+		struct regmap_range_node *range;
+
+		range = _regmap_range_lookup(map, reg);
+		if (range) {
+			unsigned int win_page = _regmap_register_page(map, reg,
+								      range);
+
+			if (i == 0)
+				this_page = win_page;
+			if (win_page != this_page) {
+				this_page = win_page;
+				ret = _regmap_raw_multi_reg_write(map, base, n);
+				if (ret != 0)
+					return ret;
+				base += n;
+				n = 0;
+			}
+			ret = _regmap_select_page(map, &base[n].reg, range, 1);
+			if (ret != 0)
+				return ret;
+		}
+	}
+	if (n > 0)
+		return _regmap_raw_multi_reg_write(map, base, n);
+	return 0;
+}
+
+static int _regmap_multi_reg_write(struct regmap *map,
+				   const struct reg_default *regs,
+				   size_t num_regs)
+{
+	int i;
+	int ret;
+
+	if (!map->can_multi_write) {
+		for (i = 0; i < num_regs; i++) {
+			ret = _regmap_write(map, regs[i].reg, regs[i].def);
+			if (ret != 0)
+				return ret;
+		}
+		return 0;
+	}
+
+	if (!map->format.parse_inplace)
+		return -EINVAL;
+
+	if (map->writeable_reg)
+		for (i = 0; i < num_regs; i++) {
+			int reg = regs[i].reg;
+			if (!map->writeable_reg(map->dev, reg))
+				return -EINVAL;
+			if (reg % map->reg_stride)
+				return -EINVAL;
+		}
+
+	if (!map->cache_bypass) {
+		for (i = 0; i < num_regs; i++) {
+			unsigned int val = regs[i].def;
+			unsigned int reg = regs[i].reg;
+			ret = regcache_write(map, reg, val);
+			if (ret) {
+				dev_err(map->dev,
+				"Error in caching of register: %x ret: %d\n",
+								reg, ret);
+				return ret;
+			}
+		}
+		if (map->cache_only) {
+			map->cache_dirty = true;
+			return 0;
+		}
+	}
+
+	WARN_ON(!map->bus);
+
+	for (i = 0; i < num_regs; i++) {
+		unsigned int reg = regs[i].reg;
+		struct regmap_range_node *range;
+		range = _regmap_range_lookup(map, reg);
+		if (range) {
+			size_t len = sizeof(struct reg_default)*num_regs;
+			struct reg_default *base = kmemdup(regs, len,
+							   GFP_KERNEL);
+			if (!base)
+				return -ENOMEM;
+			ret = _regmap_range_multi_paged_reg_write(map, base,
+								  num_regs);
+			kfree(base);
+
+			return ret;
+		}
+	}
+	return _regmap_raw_multi_reg_write(map, regs, num_regs);
+}
+
+/*
+ * regmap_multi_reg_write(): Write multiple registers to the device
+ *
+ * where the set of register,value pairs are supplied in any order,
+ * possibly not all in a single range.
+ *
+ * @map: Register map to write to
+ * @regs: Array of structures containing register,value to be written
+ * @num_regs: Number of registers to write
+ *
+ * The 'normal' block write mode will send ultimately send data on the
+ * target bus as R,V1,V2,V3,..,Vn where successively higer registers are
+ * addressed. However, this alternative block multi write mode will send
+ * the data as R1,V1,R2,V2,..,Rn,Vn on the target bus. The target device
+ * must of course support the mode.
+ *
+ * A value of zero will be returned on success, a negative errno will be
+ * returned in error cases.
+ */
+int regmap_multi_reg_write(struct regmap *map, const struct reg_default *regs,
+			   int num_regs)
+{
+	int ret;
+
+	map->lock(map->lock_arg);
+
+	ret = _regmap_multi_reg_write(map, regs, num_regs);
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_multi_reg_write);
+
+/*
+ * regmap_multi_reg_write_bypassed(): Write multiple registers to the
+ *                                    device but not the cache
+ *
+ * where the set of register are supplied in any order
+ *
+ * @map: Register map to write to
+ * @regs: Array of structures containing register,value to be written
+ * @num_regs: Number of registers to write
+ *
+ * This function is intended to be used for writing a large block of data
+ * atomically to the device in single transfer for those I2C client devices
+ * that implement this alternative block write mode.
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_multi_reg_write_bypassed(struct regmap *map,
+				    const struct reg_default *regs,
+				    int num_regs)
+{
+	int ret;
+	bool bypass;
+
+	map->lock(map->lock_arg);
+
+	bypass = map->cache_bypass;
+	map->cache_bypass = true;
+
+	ret = _regmap_multi_reg_write(map, regs, num_regs);
+
+	map->cache_bypass = bypass;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_multi_reg_write_bypassed);
+
+/**
+ * regmap_raw_write_async(): Write raw values to one or more registers
+ *                           asynchronously
+ *
+ * @map: Register map to write to
+ * @reg: Initial register to write to
+ * @val: Block of data to be written, laid out for direct transmission to the
+ *       device.  Must be valid until regmap_async_complete() is called.
+ * @val_len: Length of data pointed to by val.
+ *
+ * This function is intended to be used for things like firmware
+ * download where a large block of data needs to be transferred to the
+ * device.  No formatting will be done on the data provided.
+ *
+ * If supported by the underlying bus the write will be scheduled
+ * asynchronously, helping maximise I/O speed on higher speed buses
+ * like SPI.  regmap_async_complete() can be called to ensure that all
+ * asynchrnous writes have been completed.
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_raw_write_async(struct regmap *map, unsigned int reg,
+			   const void *val, size_t val_len)
+{
+	int ret;
+
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	map->async = true;
+
+	ret = _regmap_raw_write(map, reg, val, val_len);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_raw_write_async);
+
+static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+			    unsigned int val_len)
+{
+	struct regmap_range_node *range;
+	u8 *u8 = map->work_buf;
+	int ret;
+
+	WARN_ON(!map->bus);
+
+	range = _regmap_range_lookup(map, reg);
+	if (range) {
+		ret = _regmap_select_page(map, &reg, range,
+					  val_len / map->format.val_bytes);
+		if (ret != 0)
+			return ret;
+	}
+
+	map->format.format_reg(map->work_buf, reg, map->reg_shift);
+
+	/*
+	 * Some buses or devices flag reads by setting the high bits in the
+	 * register addresss; since it's always the high bits for all
+	 * current formats we can do this here rather than in
+	 * formatting.  This may break if we get interesting formats.
+	 */
+	u8[0] |= map->read_flag_mask;
+
+	trace_regmap_hw_read_start(map->dev, reg,
+				   val_len / map->format.val_bytes);
+
+	ret = map->bus->read(map->bus_context, map->work_buf,
+			     map->format.reg_bytes + map->format.pad_bytes,
+			     val, val_len);
+
+	trace_regmap_hw_read_done(map->dev, reg,
+				  val_len / map->format.val_bytes);
+
+	return ret;
+}
+
+static int _regmap_bus_reg_read(void *context, unsigned int reg,
+				unsigned int *val)
+{
+	struct regmap *map = context;
+
+	return map->bus->reg_read(map->bus_context, reg, val);
+}
+
+static int _regmap_bus_read(void *context, unsigned int reg,
+			    unsigned int *val)
+{
+	int ret;
+	struct regmap *map = context;
+
+	if (!map->format.parse_val)
+		return -EINVAL;
+
+	ret = _regmap_raw_read(map, reg, map->work_buf, map->format.val_bytes);
+	if (ret == 0)
+		*val = map->format.parse_val(map->work_buf);
+
+	return ret;
+}
+
+static int _regmap_read(struct regmap *map, unsigned int reg,
+			unsigned int *val)
+{
+	int ret;
+	void *context = _regmap_map_get_context(map);
+
+	WARN_ON(!map->reg_read);
+
+	if (!map->cache_bypass) {
+		ret = regcache_read(map, reg, val);
+		if (ret == 0)
+			return 0;
+	}
+
+	if (map->cache_only)
+		return -EBUSY;
+
+	if (!regmap_readable(map, reg))
+		return -EIO;
+
+	ret = map->reg_read(context, reg, val);
+	if (ret == 0) {
+#ifdef LOG_DEVICE
+		if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
+			dev_info(map->dev, "%x => %x\n", reg, *val);
+#endif
+
+		trace_regmap_reg_read(map->dev, reg, *val);
+
+		if (!map->cache_bypass)
+			regcache_write(map, reg, *val);
+	}
+
+	return ret;
+}
+
+/**
+ * regmap_read(): Read a value from a single register
+ *
+ * @map: Register map to read from
+ * @reg: Register to be read from
+ * @val: Pointer to store read value
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val)
+{
+	int ret;
+
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	ret = _regmap_read(map, reg, val);
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_read);
+
+/**
+ * regmap_raw_read(): Read raw data from the device
+ *
+ * @map: Register map to read from
+ * @reg: First register to be read from
+ * @val: Pointer to store read value
+ * @val_len: Size of data to read
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+		    size_t val_len)
+{
+	size_t val_bytes = map->format.val_bytes;
+	size_t val_count = val_len / val_bytes;
+	unsigned int v;
+	int ret, i;
+
+	if (!map->bus)
+		return -EINVAL;
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	if (regmap_volatile_range(map, reg, val_count) || map->cache_bypass ||
+	    map->cache_type == REGCACHE_NONE) {
+		/* Physical block read if there's no cache involved */
+		ret = _regmap_raw_read(map, reg, val, val_len);
+
+	} else {
+		/* Otherwise go word by word for the cache; should be low
+		 * cost as we expect to hit the cache.
+		 */
+		for (i = 0; i < val_count; i++) {
+			ret = _regmap_read(map, reg + (i * map->reg_stride),
+					   &v);
+			if (ret != 0)
+				goto out;
+
+			map->format.format_val(val + (i * val_bytes), v, 0);
+		}
+	}
+
+ out:
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_raw_read);
+
+/**
+ * regmap_field_read(): Read a value to a single register field
+ *
+ * @field: Register field to read from
+ * @val: Pointer to store read value
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_field_read(struct regmap_field *field, unsigned int *val)
+{
+	int ret;
+	unsigned int reg_val;
+	ret = regmap_read(field->regmap, field->reg, &reg_val);
+	if (ret != 0)
+		return ret;
+
+	reg_val &= field->mask;
+	reg_val >>= field->shift;
+	*val = reg_val;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_field_read);
+
+/**
+ * regmap_fields_read(): Read a value to a single register field with port ID
+ *
+ * @field: Register field to read from
+ * @id: port ID
+ * @val: Pointer to store read value
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_fields_read(struct regmap_field *field, unsigned int id,
+		       unsigned int *val)
+{
+	int ret;
+	unsigned int reg_val;
+
+	if (id >= field->id_size)
+		return -EINVAL;
+
+	ret = regmap_read(field->regmap,
+			  field->reg + (field->id_offset * id),
+			  &reg_val);
+	if (ret != 0)
+		return ret;
+
+	reg_val &= field->mask;
+	reg_val >>= field->shift;
+	*val = reg_val;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_fields_read);
+
+/**
+ * regmap_bulk_read(): Read multiple registers from the device
+ *
+ * @map: Register map to read from
+ * @reg: First register to be read from
+ * @val: Pointer to store read value, in native register size for device
+ * @val_count: Number of registers to read
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
+		     size_t val_count)
+{
+	int ret, i;
+	size_t val_bytes = map->format.val_bytes;
+	bool vol = regmap_volatile_range(map, reg, val_count);
+
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	if (map->bus && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
+		/*
+		 * Some devices does not support bulk read, for
+		 * them we have a series of single read operations.
+		 */
+		if (map->use_single_rw) {
+			for (i = 0; i < val_count; i++) {
+				ret = regmap_raw_read(map,
+						reg + (i * map->reg_stride),
+						val + (i * val_bytes),
+						val_bytes);
+				if (ret != 0)
+					return ret;
+			}
+		} else {
+			ret = regmap_raw_read(map, reg, val,
+					      val_bytes * val_count);
+			if (ret != 0)
+				return ret;
+		}
+
+		for (i = 0; i < val_count * val_bytes; i += val_bytes)
+			map->format.parse_inplace(val + i);
+	} else {
+		for (i = 0; i < val_count; i++) {
+			unsigned int ival;
+			ret = regmap_read(map, reg + (i * map->reg_stride),
+					  &ival);
+			if (ret != 0)
+				return ret;
+			memcpy(val + (i * val_bytes), &ival, val_bytes);
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(regmap_bulk_read);
+
+static int _regmap_update_bits(struct regmap *map, unsigned int reg,
+			       unsigned int mask, unsigned int val,
+			       bool *change)
+{
+	int ret;
+	unsigned int tmp, orig;
+
+	ret = _regmap_read(map, reg, &orig);
+	if (ret != 0)
+		return ret;
+
+	tmp = orig & ~mask;
+	tmp |= val & mask;
+
+	if (tmp != orig) {
+		ret = _regmap_write(map, reg, tmp);
+		if (change)
+			*change = true;
+	} else {
+		if (change)
+			*change = false;
+	}
+
+	return ret;
+}
+
+/**
+ * regmap_update_bits: Perform a read/modify/write cycle on the register map
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits(struct regmap *map, unsigned int reg,
+		       unsigned int mask, unsigned int val)
+{
+	int ret;
+
+	map->lock(map->lock_arg);
+	ret = _regmap_update_bits(map, reg, mask, val, NULL);
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits);
+
+/**
+ * regmap_update_bits_async: Perform a read/modify/write cycle on the register
+ *                           map asynchronously
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ *
+ * With most buses the read must be done synchronously so this is most
+ * useful for devices with a cache which do not need to interact with
+ * the hardware to determine the current register value.
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits_async(struct regmap *map, unsigned int reg,
+			     unsigned int mask, unsigned int val)
+{
+	int ret;
+
+	map->lock(map->lock_arg);
+
+	map->async = true;
+
+	ret = _regmap_update_bits(map, reg, mask, val, NULL);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits_async);
+
+/**
+ * regmap_update_bits_check: Perform a read/modify/write cycle on the
+ *                           register map and report if updated
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ * @change: Boolean indicating if a write was done
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits_check(struct regmap *map, unsigned int reg,
+			     unsigned int mask, unsigned int val,
+			     bool *change)
+{
+	int ret;
+
+	map->lock(map->lock_arg);
+	ret = _regmap_update_bits(map, reg, mask, val, change);
+	map->unlock(map->lock_arg);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits_check);
+
+/**
+ * regmap_update_bits_check_async: Perform a read/modify/write cycle on the
+ *                                 register map asynchronously and report if
+ *                                 updated
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ * @change: Boolean indicating if a write was done
+ *
+ * With most buses the read must be done synchronously so this is most
+ * useful for devices with a cache which do not need to interact with
+ * the hardware to determine the current register value.
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
+				   unsigned int mask, unsigned int val,
+				   bool *change)
+{
+	int ret;
+
+	map->lock(map->lock_arg);
+
+	map->async = true;
+
+	ret = _regmap_update_bits(map, reg, mask, val, change);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits_check_async);
+
+void regmap_async_complete_cb(struct regmap_async *async, int ret)
+{
+	struct regmap *map = async->map;
+	bool wake;
+
+	trace_regmap_async_io_complete(map->dev);
+
+	spin_lock(&map->async_lock);
+	list_move(&async->list, &map->async_free);
+	wake = list_empty(&map->async_list);
+
+	if (ret != 0)
+		map->async_ret = ret;
+
+	spin_unlock(&map->async_lock);
+
+	if (wake)
+		wake_up(&map->async_waitq);
+}
+EXPORT_SYMBOL_GPL(regmap_async_complete_cb);
+
+static int regmap_async_is_done(struct regmap *map)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&map->async_lock, flags);
+	ret = list_empty(&map->async_list);
+	spin_unlock_irqrestore(&map->async_lock, flags);
+
+	return ret;
+}
+
+/**
+ * regmap_async_complete: Ensure all asynchronous I/O has completed.
+ *
+ * @map: Map to operate on.
+ *
+ * Blocks until any pending asynchronous I/O has completed.  Returns
+ * an error code for any failed I/O operations.
+ */
+int regmap_async_complete(struct regmap *map)
+{
+	unsigned long flags;
+	int ret;
+
+	/* Nothing to do with no async support */
+	if (!map->bus || !map->bus->async_write)
+		return 0;
+
+	trace_regmap_async_complete_start(map->dev);
+
+	wait_event(map->async_waitq, regmap_async_is_done(map));
+
+	spin_lock_irqsave(&map->async_lock, flags);
+	ret = map->async_ret;
+	map->async_ret = 0;
+	spin_unlock_irqrestore(&map->async_lock, flags);
+
+	trace_regmap_async_complete_done(map->dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_async_complete);
+
+/**
+ * regmap_register_patch: Register and apply register updates to be applied
+ *                        on device initialistion
+ *
+ * @map: Register map to apply updates to.
+ * @regs: Values to update.
+ * @num_regs: Number of entries in regs.
+ *
+ * Register a set of register updates to be applied to the device
+ * whenever the device registers are synchronised with the cache and
+ * apply them immediately.  Typically this is used to apply
+ * corrections to be applied to the device defaults on startup, such
+ * as the updates some vendors provide to undocumented registers.
+ *
+ * The caller must ensure that this function cannot be called
+ * concurrently with either itself or regcache_sync().
+ */
+int regmap_register_patch(struct regmap *map, const struct reg_default *regs,
+			  int num_regs)
+{
+	struct reg_default *p;
+	int ret;
+	bool bypass;
+
+	if (WARN_ONCE(num_regs <= 0, "invalid registers number (%d)\n",
+	    num_regs))
+		return 0;
+
+	p = krealloc(map->patch,
+		     sizeof(struct reg_default) * (map->patch_regs + num_regs),
+		     GFP_KERNEL);
+	if (p) {
+		memcpy(p + map->patch_regs, regs, num_regs * sizeof(*regs));
+		map->patch = p;
+		map->patch_regs += num_regs;
+	} else {
+		return -ENOMEM;
+	}
+
+	map->lock(map->lock_arg);
+
+	bypass = map->cache_bypass;
+
+	map->cache_bypass = true;
+	map->async = true;
+
+	ret = _regmap_multi_reg_write(map, regs, num_regs);
+	if (ret != 0)
+		goto out;
+
+out:
+	map->async = false;
+	map->cache_bypass = bypass;
+
+	map->unlock(map->lock_arg);
+
+	regmap_async_complete(map);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_register_patch);
+
+/*
+ * regmap_get_val_bytes(): Report the size of a register value
+ *
+ * Report the size of a register value, mainly intended to for use by
+ * generic infrastructure built on top of regmap.
+ */
+int regmap_get_val_bytes(struct regmap *map)
+{
+	if (map->format.format_write)
+		return -EINVAL;
+
+	return map->format.val_bytes;
+}
+EXPORT_SYMBOL_GPL(regmap_get_val_bytes);
+
+int regmap_parse_val(struct regmap *map, const void *buf,
+			unsigned int *val)
+{
+	if (!map->format.parse_val)
+		return -EINVAL;
+
+	*val = map->format.parse_val(buf);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(regmap_parse_val);
+
+static int __init regmap_initcall(void)
+{
+	regmap_debugfs_initcall();
+
+	return 0;
+}
+postcore_initcall(regmap_initcall);
diff --git a/drivers/base/reservation.c b/drivers/base/reservation.c
new file mode 100644
index 00000000000..a73fbf3b8e5
--- /dev/null
+++ b/drivers/base/reservation.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2012-2013 Canonical Ltd
+ *
+ * Based on bo.c which bears the following copyright notice,
+ * but is dual licensed:
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include <linux/reservation.h>
+#include <linux/export.h>
+
+DEFINE_WW_CLASS(reservation_ww_class);
+EXPORT_SYMBOL(reservation_ww_class);
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
new file mode 100644
index 00000000000..72b5e7280d1
--- /dev/null
+++ b/drivers/base/soc.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2011
+ *
+ * Author: Lee Jones <lee.jones@linaro.org> for ST-Ericsson.
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/spinlock.h>
+#include <linux/sys_soc.h>
+#include <linux/err.h>
+
+static DEFINE_IDA(soc_ida);
+static DEFINE_SPINLOCK(soc_lock);
+
+static ssize_t soc_info_get(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf);
+
+struct soc_device {
+	struct device dev;
+	struct soc_device_attribute *attr;
+	int soc_dev_num;
+};
+
+static struct bus_type soc_bus_type = {
+	.name  = "soc",
+};
+
+static DEVICE_ATTR(machine,  S_IRUGO, soc_info_get,  NULL);
+static DEVICE_ATTR(family,   S_IRUGO, soc_info_get,  NULL);
+static DEVICE_ATTR(soc_id,   S_IRUGO, soc_info_get,  NULL);
+static DEVICE_ATTR(revision, S_IRUGO, soc_info_get,  NULL);
+
+struct device *soc_device_to_device(struct soc_device *soc_dev)
+{
+	return &soc_dev->dev;
+}
+
+static umode_t soc_attribute_mode(struct kobject *kobj,
+                                 struct attribute *attr,
+                                 int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct soc_device *soc_dev = container_of(dev, struct soc_device, dev);
+
+	if ((attr == &dev_attr_machine.attr)
+	    && (soc_dev->attr->machine != NULL))
+		return attr->mode;
+	if ((attr == &dev_attr_family.attr)
+	    && (soc_dev->attr->family != NULL))
+		return attr->mode;
+	if ((attr == &dev_attr_revision.attr)
+	    && (soc_dev->attr->revision != NULL))
+		return attr->mode;
+	if ((attr == &dev_attr_soc_id.attr)
+	    && (soc_dev->attr->soc_id != NULL))
+	        return attr->mode;
+
+	/* Unknown or unfilled attribute. */
+	return 0;
+}
+
+static ssize_t soc_info_get(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	struct soc_device *soc_dev = container_of(dev, struct soc_device, dev);
+
+	if (attr == &dev_attr_machine)
+		return sprintf(buf, "%s\n", soc_dev->attr->machine);
+	if (attr == &dev_attr_family)
+		return sprintf(buf, "%s\n", soc_dev->attr->family);
+	if (attr == &dev_attr_revision)
+		return sprintf(buf, "%s\n", soc_dev->attr->revision);
+	if (attr == &dev_attr_soc_id)
+		return sprintf(buf, "%s\n", soc_dev->attr->soc_id);
+
+	return -EINVAL;
+
+}
+
+static struct attribute *soc_attr[] = {
+	&dev_attr_machine.attr,
+	&dev_attr_family.attr,
+	&dev_attr_soc_id.attr,
+	&dev_attr_revision.attr,
+	NULL,
+};
+
+static const struct attribute_group soc_attr_group = {
+	.attrs = soc_attr,
+	.is_visible = soc_attribute_mode,
+};
+
+static const struct attribute_group *soc_attr_groups[] = {
+	&soc_attr_group,
+	NULL,
+};
+
+static void soc_release(struct device *dev)
+{
+	struct soc_device *soc_dev = container_of(dev, struct soc_device, dev);
+
+	kfree(soc_dev);
+}
+
+struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr)
+{
+	struct soc_device *soc_dev;
+	int ret;
+
+	soc_dev = kzalloc(sizeof(*soc_dev), GFP_KERNEL);
+	if (!soc_dev) {
+	        ret = -ENOMEM;
+		goto out1;
+	}
+
+	/* Fetch a unique (reclaimable) SOC ID. */
+	do {
+		if (!ida_pre_get(&soc_ida, GFP_KERNEL)) {
+			ret = -ENOMEM;
+			goto out2;
+		}
+
+		spin_lock(&soc_lock);
+		ret = ida_get_new(&soc_ida, &soc_dev->soc_dev_num);
+		spin_unlock(&soc_lock);
+
+	} while (ret == -EAGAIN);
+
+	if (ret)
+	         goto out2;
+
+	soc_dev->attr = soc_dev_attr;
+	soc_dev->dev.bus = &soc_bus_type;
+	soc_dev->dev.groups = soc_attr_groups;
+	soc_dev->dev.release = soc_release;
+
+	dev_set_name(&soc_dev->dev, "soc%d", soc_dev->soc_dev_num);
+
+	ret = device_register(&soc_dev->dev);
+	if (ret)
+		goto out3;
+
+	return soc_dev;
+
+out3:
+	ida_remove(&soc_ida, soc_dev->soc_dev_num);
+out2:
+	kfree(soc_dev);
+out1:
+	return ERR_PTR(ret);
+}
+
+/* Ensure soc_dev->attr is freed prior to calling soc_device_unregister. */
+void soc_device_unregister(struct soc_device *soc_dev)
+{
+	ida_remove(&soc_ida, soc_dev->soc_dev_num);
+
+	device_unregister(&soc_dev->dev);
+}
+
+static int __init soc_bus_register(void)
+{
+	return bus_register(&soc_bus_type);
+}
+core_initcall(soc_bus_register);
+
+static void __exit soc_bus_unregister(void)
+{
+	ida_destroy(&soc_ida);
+
+	bus_unregister(&soc_bus_type);
+}
+module_exit(soc_bus_unregister);
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
deleted file mode 100644
index 1667aaf4fde..00000000000
--- a/drivers/base/sys.c
+++ /dev/null
@@ -1,561 +0,0 @@
-/*
- * sys.c - pseudo-bus for system 'devices' (cpus, PICs, timers, etc)
- *
- * Copyright (c) 2002-3 Patrick Mochel
- *               2002-3 Open Source Development Lab
- *
- * This file is released under the GPLv2
- *
- * This exports a 'system' bus type.
- * By default, a 'sys' bus gets added to the root of the system. There will
- * always be core system devices. Devices can use sysdev_register() to
- * add themselves as children of the system bus.
- */
-
-#include <linux/sysdev.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-
-#include "base.h"
-
-#define to_sysdev(k) container_of(k, struct sys_device, kobj)
-#define to_sysdev_attr(a) container_of(a, struct sysdev_attribute, attr)
-
-
-static ssize_t
-sysdev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->show)
-		return sysdev_attr->show(sysdev, sysdev_attr, buffer);
-	return -EIO;
-}
-
-
-static ssize_t
-sysdev_store(struct kobject *kobj, struct attribute *attr,
-	     const char *buffer, size_t count)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->store)
-		return sysdev_attr->store(sysdev, sysdev_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_ops = {
-	.show	= sysdev_show,
-	.store	= sysdev_store,
-};
-
-static struct kobj_type ktype_sysdev = {
-	.sysfs_ops	= &sysfs_ops,
-};
-
-
-int sysdev_create_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	return sysfs_create_file(&s->kobj, &a->attr);
-}
-
-
-void sysdev_remove_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	sysfs_remove_file(&s->kobj, &a->attr);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_create_file);
-EXPORT_SYMBOL_GPL(sysdev_remove_file);
-
-#define to_sysdev_class(k) container_of(k, struct sysdev_class, kset.kobj)
-#define to_sysdev_class_attr(a) container_of(a, \
-	struct sysdev_class_attribute, attr)
-
-static ssize_t sysdev_class_show(struct kobject *kobj, struct attribute *attr,
-				 char *buffer)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->show)
-		return class_attr->show(class, class_attr, buffer);
-	return -EIO;
-}
-
-static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
-				  const char *buffer, size_t count)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->store)
-		return class_attr->store(class, class_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_class_ops = {
-	.show	= sysdev_class_show,
-	.store	= sysdev_class_store,
-};
-
-static struct kobj_type ktype_sysdev_class = {
-	.sysfs_ops	= &sysfs_class_ops,
-};
-
-int sysdev_class_create_file(struct sysdev_class *c,
-			     struct sysdev_class_attribute *a)
-{
-	return sysfs_create_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_create_file);
-
-void sysdev_class_remove_file(struct sysdev_class *c,
-			      struct sysdev_class_attribute *a)
-{
-	sysfs_remove_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_remove_file);
-
-static struct kset *system_kset;
-
-int sysdev_class_register(struct sysdev_class *cls)
-{
-	int retval;
-
-	pr_debug("Registering sysdev class '%s'\n", cls->name);
-
-	INIT_LIST_HEAD(&cls->drivers);
-	memset(&cls->kset.kobj, 0x00, sizeof(struct kobject));
-	cls->kset.kobj.parent = &system_kset->kobj;
-	cls->kset.kobj.ktype = &ktype_sysdev_class;
-	cls->kset.kobj.kset = system_kset;
-
-	retval = kobject_set_name(&cls->kset.kobj, "%s", cls->name);
-	if (retval)
-		return retval;
-
-	retval = kset_register(&cls->kset);
-	if (!retval && cls->attrs)
-		retval = sysfs_create_files(&cls->kset.kobj,
-					    (const struct attribute **)cls->attrs);
-	return retval;
-}
-
-void sysdev_class_unregister(struct sysdev_class *cls)
-{
-	pr_debug("Unregistering sysdev class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-	if (cls->attrs)
-		sysfs_remove_files(&cls->kset.kobj,
-				   (const struct attribute **)cls->attrs);
-	kset_unregister(&cls->kset);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_class_register);
-EXPORT_SYMBOL_GPL(sysdev_class_unregister);
-
-static DEFINE_MUTEX(sysdev_drivers_lock);
-
-/**
- *	sysdev_driver_register - Register auxillary driver
- *	@cls:	Device class driver belongs to.
- *	@drv:	Driver.
- *
- *	@drv is inserted into @cls->drivers to be
- *	called on each operation on devices of that class. The refcount
- *	of @cls is incremented.
- */
-
-int sysdev_driver_register(struct sysdev_class *cls, struct sysdev_driver *drv)
-{
-	int err = 0;
-
-	if (!cls) {
-		WARN(1, KERN_WARNING "sysdev: invalid class passed to "
-			"sysdev_driver_register!\n");
-		return -EINVAL;
-	}
-
-	/* Check whether this driver has already been added to a class. */
-	if (drv->entry.next && !list_empty(&drv->entry))
-		WARN(1, KERN_WARNING "sysdev: class %s: driver (%p) has already"
-			" been registered to a class, something is wrong, but "
-			"will forge on!\n", cls->name, drv);
-
-	mutex_lock(&sysdev_drivers_lock);
-	if (cls && kset_get(&cls->kset)) {
-		list_add_tail(&drv->entry, &cls->drivers);
-
-		/* If devices of this class already exist, tell the driver */
-		if (drv->add) {
-			struct sys_device *dev;
-			list_for_each_entry(dev, &cls->kset.list, kobj.entry)
-				drv->add(dev);
-		}
-	} else {
-		err = -EINVAL;
-		WARN(1, KERN_ERR "%s: invalid device class\n", __func__);
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-	return err;
-}
-
-
-/**
- *	sysdev_driver_unregister - Remove an auxillary driver.
- *	@cls:	Class driver belongs to.
- *	@drv:	Driver.
- */
-void sysdev_driver_unregister(struct sysdev_class *cls,
-			      struct sysdev_driver *drv)
-{
-	mutex_lock(&sysdev_drivers_lock);
-	list_del_init(&drv->entry);
-	if (cls) {
-		if (drv->remove) {
-			struct sys_device *dev;
-			list_for_each_entry(dev, &cls->kset.list, kobj.entry)
-				drv->remove(dev);
-		}
-		kset_put(&cls->kset);
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_driver_register);
-EXPORT_SYMBOL_GPL(sysdev_driver_unregister);
-
-
-
-/**
- *	sysdev_register - add a system device to the tree
- *	@sysdev:	device in question
- *
- */
-int sysdev_register(struct sys_device *sysdev)
-{
-	int error;
-	struct sysdev_class *cls = sysdev->cls;
-
-	if (!cls)
-		return -EINVAL;
-
-	pr_debug("Registering sys device of class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-
-	/* initialize the kobject to 0, in case it had previously been used */
-	memset(&sysdev->kobj, 0x00, sizeof(struct kobject));
-
-	/* Make sure the kset is set */
-	sysdev->kobj.kset = &cls->kset;
-
-	/* Register the object */
-	error = kobject_init_and_add(&sysdev->kobj, &ktype_sysdev, NULL,
-				     "%s%d", kobject_name(&cls->kset.kobj),
-				     sysdev->id);
-
-	if (!error) {
-		struct sysdev_driver *drv;
-
-		pr_debug("Registering sys device '%s'\n",
-			 kobject_name(&sysdev->kobj));
-
-		mutex_lock(&sysdev_drivers_lock);
-		/* Generic notification is implicit, because it's that
-		 * code that should have called us.
-		 */
-
-		/* Notify class auxillary drivers */
-		list_for_each_entry(drv, &cls->drivers, entry) {
-			if (drv->add)
-				drv->add(sysdev);
-		}
-		mutex_unlock(&sysdev_drivers_lock);
-		kobject_uevent(&sysdev->kobj, KOBJ_ADD);
-	}
-
-	return error;
-}
-
-void sysdev_unregister(struct sys_device *sysdev)
-{
-	struct sysdev_driver *drv;
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry(drv, &sysdev->cls->drivers, entry) {
-		if (drv->remove)
-			drv->remove(sysdev);
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-
-	kobject_put(&sysdev->kobj);
-}
-
-
-
-/**
- *	sysdev_shutdown - Shut down all system devices.
- *
- *	Loop over each class of system devices, and the devices in each
- *	of those classes. For each device, we call the shutdown method for
- *	each driver registered for the device - the auxillaries,
- *	and the class driver.
- *
- *	Note: The list is iterated in reverse order, so that we shut down
- *	child devices before we shut down their parents. The list ordering
- *	is guaranteed by virtue of the fact that child devices are registered
- *	after their parents.
- */
-void sysdev_shutdown(void)
-{
-	struct sysdev_class *cls;
-
-	pr_debug("Shutting Down System Devices\n");
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Shutting down type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			struct sysdev_driver *drv;
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxillary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->shutdown)
-					drv->shutdown(sysdev);
-			}
-
-			/* Now call the generic one */
-			if (cls->shutdown)
-				cls->shutdown(sysdev);
-		}
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-}
-
-static void __sysdev_resume(struct sys_device *dev)
-{
-	struct sysdev_class *cls = dev->cls;
-	struct sysdev_driver *drv;
-
-	/* First, call the class-specific one */
-	if (cls->resume)
-		cls->resume(dev);
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled after %pF\n", cls->resume);
-
-	/* Call auxillary drivers next. */
-	list_for_each_entry(drv, &cls->drivers, entry) {
-		if (drv->resume)
-			drv->resume(dev);
-		WARN_ONCE(!irqs_disabled(),
-			"Interrupts enabled after %pF\n", drv->resume);
-	}
-}
-
-/**
- *	sysdev_suspend - Suspend all system devices.
- *	@state:		Power state to enter.
- *
- *	We perform an almost identical operation as sysdev_shutdown()
- *	above, though calling ->suspend() instead. Interrupts are disabled
- *	when this called. Devices are responsible for both saving state and
- *	quiescing or powering down the device.
- *
- *	This is only called by the device PM core, so we let them handle
- *	all synchronization.
- */
-int sysdev_suspend(pm_message_t state)
-{
-	struct sysdev_class *cls;
-	struct sys_device *sysdev, *err_dev;
-	struct sysdev_driver *drv, *err_drv;
-	int ret;
-
-	pr_debug("Checking wake-up interrupts\n");
-
-	/* Return error code if there are any wake-up interrupts pending */
-	ret = check_wakeup_irqs();
-	if (ret)
-		return ret;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while suspending system devices\n");
-
-	pr_debug("Suspending System Devices\n");
-
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		pr_debug("Suspending type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxillary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->suspend) {
-					ret = drv->suspend(sysdev, state);
-					if (ret)
-						goto aux_driver;
-				}
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					drv->suspend);
-			}
-
-			/* Now call the generic one */
-			if (cls->suspend) {
-				ret = cls->suspend(sysdev, state);
-				if (ret)
-					goto cls_driver;
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					cls->suspend);
-			}
-		}
-	}
-	return 0;
-	/* resume current sysdev */
-cls_driver:
-	drv = NULL;
-	printk(KERN_ERR "Class suspend failed for %s: %d\n",
-		kobject_name(&sysdev->kobj), ret);
-
-aux_driver:
-	if (drv)
-		printk(KERN_ERR "Class driver suspend failed for %s: %d\n",
-				kobject_name(&sysdev->kobj), ret);
-	list_for_each_entry(err_drv, &cls->drivers, entry) {
-		if (err_drv == drv)
-			break;
-		if (err_drv->resume)
-			err_drv->resume(sysdev);
-	}
-
-	/* resume other sysdevs in current class */
-	list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-		if (err_dev == sysdev)
-			break;
-		pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-		__sysdev_resume(err_dev);
-	}
-
-	/* resume other classes */
-	list_for_each_entry_continue(cls, &system_kset->list, kset.kobj.entry) {
-		list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-			__sysdev_resume(err_dev);
-		}
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sysdev_suspend);
-
-/**
- *	sysdev_resume - Bring system devices back to life.
- *
- *	Similar to sysdev_suspend(), but we iterate the list forwards
- *	to guarantee that parent devices are resumed before their children.
- *
- *	Note: Interrupts are disabled when called.
- */
-int sysdev_resume(void)
-{
-	struct sysdev_class *cls;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while resuming system devices\n");
-
-	pr_debug("Resuming System Devices\n");
-
-	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Resuming type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			__sysdev_resume(sysdev);
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(sysdev_resume);
-
-int __init system_bus_init(void)
-{
-	system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
-	if (!system_kset)
-		return -ENOMEM;
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
-#define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
-
-ssize_t sysdev_store_ulong(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	unsigned long new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
-		return -EINVAL;
-	*(unsigned long *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_ulong);
-
-ssize_t sysdev_show_ulong(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_ulong);
-
-ssize_t sysdev_store_int(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	long new = simple_strtol(buf, &end, 0);
-	if (end == buf || new > INT_MAX || new < INT_MIN)
-		return -EINVAL;
-	*(int *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_int);
-
-ssize_t sysdev_show_int(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_int);
-
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
new file mode 100644
index 00000000000..dbb8350ea8d
--- /dev/null
+++ b/drivers/base/syscore.c
@@ -0,0 +1,132 @@
+/*
+ *  syscore.c - Execution of system core operations.
+ *
+ *  Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ *  This file is released under the GPLv2.
+ */
+
+#include <linux/syscore_ops.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <trace/events/power.h>
+
+static LIST_HEAD(syscore_ops_list);
+static DEFINE_MUTEX(syscore_ops_lock);
+
+/**
+ * register_syscore_ops - Register a set of system core operations.
+ * @ops: System core operations to register.
+ */
+void register_syscore_ops(struct syscore_ops *ops)
+{
+	mutex_lock(&syscore_ops_lock);
+	list_add_tail(&ops->node, &syscore_ops_list);
+	mutex_unlock(&syscore_ops_lock);
+}
+EXPORT_SYMBOL_GPL(register_syscore_ops);
+
+/**
+ * unregister_syscore_ops - Unregister a set of system core operations.
+ * @ops: System core operations to unregister.
+ */
+void unregister_syscore_ops(struct syscore_ops *ops)
+{
+	mutex_lock(&syscore_ops_lock);
+	list_del(&ops->node);
+	mutex_unlock(&syscore_ops_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_syscore_ops);
+
+#ifdef CONFIG_PM_SLEEP
+/**
+ * syscore_suspend - Execute all the registered system core suspend callbacks.
+ *
+ * This function is executed with one CPU on-line and disabled interrupts.
+ */
+int syscore_suspend(void)
+{
+	struct syscore_ops *ops;
+	int ret = 0;
+
+	trace_suspend_resume(TPS("syscore_suspend"), 0, true);
+	pr_debug("Checking wakeup interrupts\n");
+
+	/* Return error code if there are any wakeup interrupts pending. */
+	ret = check_wakeup_irqs();
+	if (ret)
+		return ret;
+
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled before system core suspend.\n");
+
+	list_for_each_entry_reverse(ops, &syscore_ops_list, node)
+		if (ops->suspend) {
+			if (initcall_debug)
+				pr_info("PM: Calling %pF\n", ops->suspend);
+			ret = ops->suspend();
+			if (ret)
+				goto err_out;
+			WARN_ONCE(!irqs_disabled(),
+				"Interrupts enabled after %pF\n", ops->suspend);
+		}
+
+	trace_suspend_resume(TPS("syscore_suspend"), 0, false);
+	return 0;
+
+ err_out:
+	pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend);
+
+	list_for_each_entry_continue(ops, &syscore_ops_list, node)
+		if (ops->resume)
+			ops->resume();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(syscore_suspend);
+
+/**
+ * syscore_resume - Execute all the registered system core resume callbacks.
+ *
+ * This function is executed with one CPU on-line and disabled interrupts.
+ */
+void syscore_resume(void)
+{
+	struct syscore_ops *ops;
+
+	trace_suspend_resume(TPS("syscore_resume"), 0, true);
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled before system core resume.\n");
+
+	list_for_each_entry(ops, &syscore_ops_list, node)
+		if (ops->resume) {
+			if (initcall_debug)
+				pr_info("PM: Calling %pF\n", ops->resume);
+			ops->resume();
+			WARN_ONCE(!irqs_disabled(),
+				"Interrupts enabled after %pF\n", ops->resume);
+		}
+	trace_suspend_resume(TPS("syscore_resume"), 0, false);
+}
+EXPORT_SYMBOL_GPL(syscore_resume);
+#endif /* CONFIG_PM_SLEEP */
+
+/**
+ * syscore_shutdown - Execute all the registered system core shutdown callbacks.
+ */
+void syscore_shutdown(void)
+{
+	struct syscore_ops *ops;
+
+	mutex_lock(&syscore_ops_lock);
+
+	list_for_each_entry_reverse(ops, &syscore_ops_list, node)
+		if (ops->shutdown) {
+			if (initcall_debug)
+				pr_info("PM: Calling %pF\n", ops->shutdown);
+			ops->shutdown();
+		}
+
+	mutex_unlock(&syscore_ops_lock);
+}
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index f6f37a05a0c..be7c1fb7c0c 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -23,8 +23,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
-#include <linux/sysdev.h>
-#include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
@@ -32,17 +30,16 @@
 #include <linux/topology.h>
 
 #define define_one_ro_named(_name, _func)				\
-static SYSDEV_ATTR(_name, 0444, _func, NULL)
+	static DEVICE_ATTR(_name, 0444, _func, NULL)
 
 #define define_one_ro(_name)				\
-static SYSDEV_ATTR(_name, 0444, show_##_name, NULL)
+	static DEVICE_ATTR(_name, 0444, show_##_name, NULL)
 
 #define define_id_show_func(name)				\
-static ssize_t show_##name(struct sys_device *dev,		\
-		struct sysdev_attribute *attr, char *buf)	\
+static ssize_t show_##name(struct device *dev,			\
+		struct device_attribute *attr, char *buf)	\
 {								\
-	unsigned int cpu = dev->id;				\
-	return sprintf(buf, "%d\n", topology_##name(cpu));	\
+	return sprintf(buf, "%d\n", topology_##name(dev->id));	\
 }
 
 #if defined(topology_thread_cpumask) || defined(topology_core_cpumask) || \
@@ -63,40 +60,20 @@ static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf)
 }
 #endif
 
-#ifdef arch_provides_topology_pointers
 #define define_siblings_show_map(name)					\
-static ssize_t show_##name(struct sys_device *dev,			\
-			   struct sysdev_attribute *attr, char *buf)	\
-{									\
-	unsigned int cpu = dev->id;					\
-	return show_cpumap(0, topology_##name(cpu), buf);		\
-}
-
-#define define_siblings_show_list(name)					\
-static ssize_t show_##name##_list(struct sys_device *dev,		\
-				  struct sysdev_attribute *attr,	\
-				  char *buf)				\
-{									\
-	unsigned int cpu = dev->id;					\
-	return show_cpumap(1, topology_##name(cpu), buf);		\
-}
-
-#else
-#define define_siblings_show_map(name)					\
-static ssize_t show_##name(struct sys_device *dev,			\
-			   struct sysdev_attribute *attr, char *buf)	\
+static ssize_t show_##name(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
 {									\
 	return show_cpumap(0, topology_##name(dev->id), buf);		\
 }
 
 #define define_siblings_show_list(name)					\
-static ssize_t show_##name##_list(struct sys_device *dev,		\
-				  struct sysdev_attribute *attr,	\
+static ssize_t show_##name##_list(struct device *dev,			\
+				  struct device_attribute *attr,	\
 				  char *buf)				\
 {									\
 	return show_cpumap(1, topology_##name(dev->id), buf);		\
 }
-#endif
 
 #define define_siblings_show_func(name)		\
 	define_siblings_show_map(name); define_siblings_show_list(name)
@@ -124,16 +101,16 @@ define_one_ro_named(book_siblings_list, show_book_cpumask_list);
 #endif
 
 static struct attribute *default_attrs[] = {
-	&attr_physical_package_id.attr,
-	&attr_core_id.attr,
-	&attr_thread_siblings.attr,
-	&attr_thread_siblings_list.attr,
-	&attr_core_siblings.attr,
-	&attr_core_siblings_list.attr,
+	&dev_attr_physical_package_id.attr,
+	&dev_attr_core_id.attr,
+	&dev_attr_thread_siblings.attr,
+	&dev_attr_thread_siblings_list.attr,
+	&dev_attr_core_siblings.attr,
+	&dev_attr_core_siblings_list.attr,
 #ifdef CONFIG_SCHED_BOOK
-	&attr_book_id.attr,
-	&attr_book_siblings.attr,
-	&attr_book_siblings_list.attr,
+	&dev_attr_book_id.attr,
+	&dev_attr_book_siblings.attr,
+	&dev_attr_book_siblings_list.attr,
 #endif
 	NULL
 };
@@ -144,22 +121,22 @@ static struct attribute_group topology_attr_group = {
 };
 
 /* Add/Remove cpu_topology interface for CPU device */
-static int __cpuinit topology_add_dev(unsigned int cpu)
+static int topology_add_dev(unsigned int cpu)
 {
-	struct sys_device *sys_dev = get_cpu_sysdev(cpu);
+	struct device *dev = get_cpu_device(cpu);
 
-	return sysfs_create_group(&sys_dev->kobj, &topology_attr_group);
+	return sysfs_create_group(&dev->kobj, &topology_attr_group);
 }
 
-static void __cpuinit topology_remove_dev(unsigned int cpu)
+static void topology_remove_dev(unsigned int cpu)
 {
-	struct sys_device *sys_dev = get_cpu_sysdev(cpu);
+	struct device *dev = get_cpu_device(cpu);
 
-	sysfs_remove_group(&sys_dev->kobj, &topology_attr_group);
+	sysfs_remove_group(&dev->kobj, &topology_attr_group);
 }
 
-static int __cpuinit topology_cpu_callback(struct notifier_block *nfb,
-					   unsigned long action, void *hcpu)
+static int topology_cpu_callback(struct notifier_block *nfb,
+				 unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
 	int rc = 0;
@@ -179,19 +156,23 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb,
 	return notifier_from_errno(rc);
 }
 
-static int __cpuinit topology_sysfs_init(void)
+static int topology_sysfs_init(void)
 {
 	int cpu;
-	int rc;
+	int rc = 0;
+
+	cpu_notifier_register_begin();
 
 	for_each_online_cpu(cpu) {
 		rc = topology_add_dev(cpu);
 		if (rc)
-			return rc;
+			goto out;
 	}
-	hotcpu_notifier(topology_cpu_callback, 0);
+	__hotcpu_notifier(topology_cpu_callback, 0);
 
-	return 0;
+out:
+	cpu_notifier_register_done();
+	return rc;
 }
 
 device_initcall(topology_sysfs_init);
diff --git a/drivers/base/transport_class.c b/drivers/base/transport_class.c
index 84997efdb23..f6c453c3816 100644
--- a/drivers/base/transport_class.c
+++ b/drivers/base/transport_class.c
@@ -27,6 +27,7 @@
  * transport class is framed entirely in terms of generic devices to
  * allow it to be used by any physical HBA in the system.
  */
+#include <linux/export.h>
 #include <linux/attribute_container.h>
 #include <linux/transport_class.h>