24 files changed, 2306 insertions, 623 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index c4cc27e5c8a..1b96fb91d32 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -1,7 +1,10 @@
+menu "CPU Idle"
 
 config CPU_IDLE
 	bool "CPU idle PM support"
 	default y if ACPI || PPC_PSERIES
+	select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
+	select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE)
 	help
 	  CPU idle is a generic framework for supporting software-controlled
 	  idle processor power management.  It includes modular cross-platform
@@ -9,9 +12,10 @@ config CPU_IDLE
 
 	  If you're using an ACPI-enabled platform, you should say Y here.
 
+if CPU_IDLE
+
 config CPU_IDLE_MULTIPLE_DRIVERS
         bool "Support multiple cpuidle drivers"
-        depends on CPU_IDLE
         default n
         help
          Allows the cpuidle framework to use different drivers for each CPU.
@@ -19,24 +23,30 @@ config CPU_IDLE_MULTIPLE_DRIVERS
          states. If unsure say N.
 
 config CPU_IDLE_GOV_LADDER
-	bool
-	depends on CPU_IDLE
+	bool "Ladder governor (for periodic timer tick)"
 	default y
 
 config CPU_IDLE_GOV_MENU
-	bool
-	depends on CPU_IDLE && NO_HZ
+	bool "Menu governor (for tickless system)"
 	default y
 
-config ARCH_NEEDS_CPU_IDLE_COUPLED
-	def_bool n
+menu "ARM CPU Idle Drivers"
+depends on ARM
+source "drivers/cpuidle/Kconfig.arm"
+endmenu
 
-if CPU_IDLE
+menu "MIPS CPU Idle Drivers"
+depends on MIPS
+source "drivers/cpuidle/Kconfig.mips"
+endmenu
 
-config CPU_IDLE_CALXEDA
-	bool "CPU Idle Driver for Calxeda processors"
-	depends on ARCH_HIGHBANK
-	help
-	  Select this to enable cpuidle on Calxeda processors.
+menu "POWERPC CPU Idle Drivers"
+depends on PPC
+source "drivers/cpuidle/Kconfig.powerpc"
+endmenu
 
 endif
+
+config ARCH_NEEDS_CPU_IDLE_COUPLED
+	def_bool n
+endmenu
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
new file mode 100644
index 00000000000..b6d69e899f5
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.arm
@@ -0,0 +1,63 @@
+#
+# ARM CPU Idle drivers
+#
+config ARM_ARMADA_370_XP_CPUIDLE
+	bool "CPU Idle Driver for Armada 370/XP family processors"
+	depends on ARCH_MVEBU
+	help
+	  Select this to enable cpuidle on Armada 370/XP processors.
+
+config ARM_BIG_LITTLE_CPUIDLE
+	bool "Support for ARM big.LITTLE processors"
+	depends on ARCH_VEXPRESS_TC2_PM
+	select ARM_CPU_SUSPEND
+	select CPU_IDLE_MULTIPLE_DRIVERS
+	help
+	  Select this option to enable CPU idle driver for big.LITTLE based
+	  ARM systems. Driver manages CPUs coordination through MCPM and
+	  define different C-states for little and big cores through the
+	  multiple CPU idle drivers infrastructure.
+
+config ARM_CLPS711X_CPUIDLE
+	bool "CPU Idle Driver for CLPS711X processors"
+	depends on ARCH_CLPS711X || COMPILE_TEST
+	help
+	  Select this to enable cpuidle on Cirrus Logic CLPS711X SOCs.
+
+config ARM_HIGHBANK_CPUIDLE
+	bool "CPU Idle Driver for Calxeda processors"
+	depends on ARM_PSCI
+	select ARM_CPU_SUSPEND
+	help
+	  Select this to enable cpuidle on Calxeda processors.
+
+config ARM_KIRKWOOD_CPUIDLE
+	bool "CPU Idle Driver for Marvell Kirkwood SoCs"
+	depends on ARCH_KIRKWOOD || MACH_KIRKWOOD
+	help
+	  This adds the CPU Idle driver for Marvell Kirkwood SoCs.
+
+config ARM_ZYNQ_CPUIDLE
+	bool "CPU Idle Driver for Xilinx Zynq processors"
+	depends on ARCH_ZYNQ
+	help
+	  Select this to enable cpuidle on Xilinx Zynq processors.
+
+config ARM_U8500_CPUIDLE
+	bool "Cpu Idle Driver for the ST-E u8500 processors"
+	depends on ARCH_U8500
+	help
+	  Select this to enable cpuidle for ST-E u8500 processors
+
+config ARM_AT91_CPUIDLE
+	bool "Cpu Idle Driver for the AT91 processors"
+	default y
+	depends on ARCH_AT91
+	help
+	  Select this to enable cpuidle for AT91 processors
+
+config ARM_EXYNOS_CPUIDLE
+	bool "Cpu Idle Driver for the Exynos processors"
+	depends on ARCH_EXYNOS
+	help
+	  Select this to enable cpuidle for Exynos processors
diff --git a/drivers/cpuidle/Kconfig.mips b/drivers/cpuidle/Kconfig.mips
new file mode 100644
index 00000000000..0e70ee28a5c
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.mips
@@ -0,0 +1,17 @@
+#
+# MIPS CPU Idle Drivers
+#
+config MIPS_CPS_CPUIDLE
+	bool "CPU Idle driver for MIPS CPS platforms"
+	depends on CPU_IDLE
+	depends on SYS_SUPPORTS_MIPS_CPS
+	select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select MIPS_CPS_PM
+	default y
+	help
+	  Select this option to enable processor idle state management
+	  through cpuidle for systems built around the MIPS Coherent
+	  Processing System (CPS) architecture. In order to make use of
+	  the deepest idle states you will need to ensure that you are
+	  also using the CONFIG_MIPS_CPS SMP implementation.
diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc
new file mode 100644
index 00000000000..66c3a09574e
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.powerpc
@@ -0,0 +1,20 @@
+#
+# POWERPC CPU Idle Drivers
+#
+config PSERIES_CPUIDLE
+	bool "Cpuidle driver for pSeries platforms"
+	depends on CPU_IDLE
+	depends on PPC_PSERIES
+	default y
+	help
+	  Select this option to enable processor idle state management
+	  through cpuidle subsystem.
+
+config POWERNV_CPUIDLE
+	bool "Cpuidle driver for powernv platforms"
+	depends on CPU_IDLE
+	depends on PPC_POWERNV
+	default y
+	help
+	  Select this option to enable processor idle state management
+	  through cpuidle subsystem.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 0d8bd55e776..d8bb1ff7256 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -5,5 +5,23 @@
 obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
 obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
 
-obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
-obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o
+##################################################################################
+# ARM SoC drivers
+obj-$(CONFIG_ARM_ARMADA_370_XP_CPUIDLE) += cpuidle-armada-370-xp.o
+obj-$(CONFIG_ARM_BIG_LITTLE_CPUIDLE)	+= cpuidle-big_little.o
+obj-$(CONFIG_ARM_CLPS711X_CPUIDLE)	+= cpuidle-clps711x.o
+obj-$(CONFIG_ARM_HIGHBANK_CPUIDLE)	+= cpuidle-calxeda.o
+obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE)	+= cpuidle-kirkwood.o
+obj-$(CONFIG_ARM_ZYNQ_CPUIDLE)		+= cpuidle-zynq.o
+obj-$(CONFIG_ARM_U8500_CPUIDLE)         += cpuidle-ux500.o
+obj-$(CONFIG_ARM_AT91_CPUIDLE)          += cpuidle-at91.o
+obj-$(CONFIG_ARM_EXYNOS_CPUIDLE)        += cpuidle-exynos.o
+
+###############################################################################
+# MIPS drivers
+obj-$(CONFIG_MIPS_CPS_CPUIDLE)		+= cpuidle-cps.o
+
+###############################################################################
+# POWERPC drivers
+obj-$(CONFIG_PSERIES_CPUIDLE)		+= cpuidle-pseries.o
+obj-$(CONFIG_POWERNV_CPUIDLE)		+= cpuidle-powernv.o
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 2a297f86dba..73fe2f8d7f9 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -106,6 +106,7 @@ struct cpuidle_coupled {
 	cpumask_t coupled_cpus;
 	int requested_state[NR_CPUS];
 	atomic_t ready_waiting_counts;
+	atomic_t abort_barrier;
 	int online_count;
 	int refcnt;
 	int prevent;
@@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
- * The cpuidle_coupled_poked_mask mask is used to avoid calling
+ * The cpuidle_coupled_poke_pending mask is used to avoid calling
  * __smp_call_function_single with the per cpu call_single_data struct already
  * in use.  This prevents a deadlock where two cpus are waiting for each others
  * call_single_data struct to be available
  */
-static cpumask_t cpuidle_coupled_poked_mask;
+static cpumask_t cpuidle_coupled_poke_pending;
+
+/*
+ * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked
+ * once to minimize entering the ready loop with a poke pending, which would
+ * require aborting and retrying.
+ */
+static cpumask_t cpuidle_coupled_poked;
 
 /**
  * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus
@@ -139,7 +147,7 @@ static cpumask_t cpuidle_coupled_poked_mask;
  * has returned from this function, the barrier is immediately available for
  * reuse.
  *
- * The atomic variable a must be initialized to 0 before any cpu calls
+ * The atomic variable must be initialized to 0 before any cpu calls
  * this function, will be reset to 0 before any cpu returns from this function.
  *
  * Must only be called from within a coupled idle state handler
@@ -151,7 +159,7 @@ void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
 {
 	int n = dev->coupled->online_count;
 
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_inc(a);
 
 	while (atomic_read(a) < n)
@@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
 	return state;
 }
 
-static void cpuidle_coupled_poked(void *info)
+static void cpuidle_coupled_handle_poke(void *info)
 {
 	int cpu = (unsigned long)info;
-	cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask);
+	cpumask_set_cpu(cpu, &cpuidle_coupled_poked);
+	cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending);
 }
 
 /**
@@ -313,8 +322,8 @@ static void cpuidle_coupled_poke(int cpu)
 {
 	struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
 
-	if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask))
-		__smp_call_function_single(cpu, csd, 0);
+	if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
+		smp_call_function_single_async(cpu, csd);
 }
 
 /**
@@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu,
  * @coupled: the struct coupled that contains the current cpu
  * @next_state: the index in drv->states of the requested state for this cpu
  *
- * Updates the requested idle state for the specified cpuidle device,
- * poking all coupled cpus out of idle if necessary to let them see the new
- * state.
+ * Updates the requested idle state for the specified cpuidle device.
+ * Returns the number of waiting cpus.
  */
-static void cpuidle_coupled_set_waiting(int cpu,
+static int cpuidle_coupled_set_waiting(int cpu,
 		struct cpuidle_coupled *coupled, int next_state)
 {
-	int w;
-
 	coupled->requested_state[cpu] = next_state;
 
 	/*
-	 * If this is the last cpu to enter the waiting state, poke
-	 * all the other cpus out of their waiting state so they can
-	 * enter a deeper state.  This can race with one of the cpus
-	 * exiting the waiting state due to an interrupt and
-	 * decrementing waiting_count, see comment below.
-	 *
 	 * The atomic_inc_return provides a write barrier to order the write
 	 * to requested_state with the later write that increments ready_count.
 	 */
-	w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
-	if (w == coupled->online_count)
-		cpuidle_coupled_poke_others(cpu, coupled);
+	return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
 }
 
 /**
@@ -410,19 +408,33 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
  * been processed and the poke bit has been cleared.
  *
  * Other interrupts may also be processed while interrupts are enabled, so
- * need_resched() must be tested after turning interrupts off again to make sure
+ * need_resched() must be tested after this function returns to make sure
  * the interrupt didn't schedule work that should take the cpu out of idle.
  *
- * Returns 0 if need_resched was false, -EINTR if need_resched was true.
+ * Returns 0 if no poke was pending, 1 if a poke was cleared.
  */
 static int cpuidle_coupled_clear_pokes(int cpu)
 {
+	if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
+		return 0;
+
 	local_irq_enable();
-	while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask))
+	while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
 		cpu_relax();
 	local_irq_disable();
 
-	return need_resched() ? -EINTR : 0;
+	return 1;
+}
+
+static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled)
+{
+	cpumask_t cpus;
+	int ret;
+
+	cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
+	ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus);
+
+	return ret;
 }
 
 /**
@@ -449,31 +461,56 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
 {
 	int entered_state = -1;
 	struct cpuidle_coupled *coupled = dev->coupled;
+	int w;
 
 	if (!coupled)
 		return -EINVAL;
 
 	while (coupled->prevent) {
-		if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+		cpuidle_coupled_clear_pokes(dev->cpu);
+		if (need_resched()) {
 			local_irq_enable();
 			return entered_state;
 		}
 		entered_state = cpuidle_enter_state(dev, drv,
 			dev->safe_state_index);
+		local_irq_disable();
 	}
 
 	/* Read barrier ensures online_count is read after prevent is cleared */
 	smp_rmb();
 
-	cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
+reset:
+	cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked);
+
+	w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
+	/*
+	 * If this is the last cpu to enter the waiting state, poke
+	 * all the other cpus out of their waiting state so they can
+	 * enter a deeper state.  This can race with one of the cpus
+	 * exiting the waiting state due to an interrupt and
+	 * decrementing waiting_count, see comment below.
+	 */
+	if (w == coupled->online_count) {
+		cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked);
+		cpuidle_coupled_poke_others(dev->cpu, coupled);
+	}
 
 retry:
 	/*
 	 * Wait for all coupled cpus to be idle, using the deepest state
-	 * allowed for a single cpu.
+	 * allowed for a single cpu.  If this was not the poking cpu, wait
+	 * for at least one poke before leaving to avoid a race where
+	 * two cpus could arrive at the waiting loop at the same time,
+	 * but the first of the two to arrive could skip the loop without
+	 * processing the pokes from the last to arrive.
 	 */
-	while (!cpuidle_coupled_cpus_waiting(coupled)) {
-		if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+	while (!cpuidle_coupled_cpus_waiting(coupled) ||
+			!cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) {
+		if (cpuidle_coupled_clear_pokes(dev->cpu))
+			continue;
+
+		if (need_resched()) {
 			cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
 			goto out;
 		}
@@ -485,14 +522,22 @@ retry:
 
 		entered_state = cpuidle_enter_state(dev, drv,
 			dev->safe_state_index);
+		local_irq_disable();
 	}
 
-	if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+	cpuidle_coupled_clear_pokes(dev->cpu);
+	if (need_resched()) {
 		cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
 		goto out;
 	}
 
 	/*
+	 * Make sure final poke status for this cpu is visible before setting
+	 * cpu as ready.
+	 */
+	smp_wmb();
+
+	/*
 	 * All coupled cpus are probably idle.  There is a small chance that
 	 * one of the other cpus just became active.  Increment the ready count,
 	 * and spin until all coupled cpus have incremented the counter. Once a
@@ -511,6 +556,28 @@ retry:
 		cpu_relax();
 	}
 
+	/*
+	 * Make sure read of all cpus ready is done before reading pending pokes
+	 */
+	smp_rmb();
+
+	/*
+	 * There is a small chance that a cpu left and reentered idle after this
+	 * cpu saw that all cpus were waiting.  The cpu that reentered idle will
+	 * have sent this cpu a poke, which will still be pending after the
+	 * ready loop.  The pending interrupt may be lost by the interrupt
+	 * controller when entering the deep idle state.  It's not possible to
+	 * clear a pending interrupt without turning interrupts on and handling
+	 * it, and it's too late to turn on interrupts here, so reset the
+	 * coupled idle state of all cpus and retry.
+	 */
+	if (cpuidle_coupled_any_pokes_pending(coupled)) {
+		cpuidle_coupled_set_done(dev->cpu, coupled);
+		/* Wait for all cpus to see the pending pokes */
+		cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier);
+		goto reset;
+	}
+
 	/* all cpus have acked the coupled state */
 	next_state = cpuidle_coupled_get_state(dev, coupled);
 
@@ -596,7 +663,7 @@ have_coupled:
 	coupled->refcnt++;
 
 	csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
-	csd->func = cpuidle_coupled_poked;
+	csd->func = cpuidle_coupled_handle_poke;
 	csd->info = (void *)(unsigned long)dev->cpu;
 
 	return 0;
diff --git a/drivers/cpuidle/cpuidle-armada-370-xp.c b/drivers/cpuidle/cpuidle-armada-370-xp.c
new file mode 100644
index 00000000000..a5fba0287bf
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-armada-370-xp.c
@@ -0,0 +1,93 @@
+/*
+ * Marvell Armada 370 and Armada XP SoC cpuidle driver
+ *
+ * Copyright (C) 2014 Marvell
+ *
+ * Nadav Haklai <nadavh@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * Maintainer: Gregory CLEMENT <gregory.clement@free-electrons.com>
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/cpuidle.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/suspend.h>
+#include <linux/platform_device.h>
+#include <asm/cpuidle.h>
+
+#define ARMADA_370_XP_MAX_STATES	3
+#define ARMADA_370_XP_FLAG_DEEP_IDLE	0x10000
+
+static int (*armada_370_xp_cpu_suspend)(int);
+
+static int armada_370_xp_enter_idle(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
+				int index)
+{
+	int ret;
+	bool deepidle = false;
+	cpu_pm_enter();
+
+	if (drv->states[index].flags & ARMADA_370_XP_FLAG_DEEP_IDLE)
+		deepidle = true;
+
+	ret = armada_370_xp_cpu_suspend(deepidle);
+	if (ret)
+		return ret;
+
+	cpu_pm_exit();
+
+	return index;
+}
+
+static struct cpuidle_driver armada_370_xp_idle_driver = {
+	.name			= "armada_370_xp_idle",
+	.states[0]		= ARM_CPUIDLE_WFI_STATE,
+	.states[1]		= {
+		.enter			= armada_370_xp_enter_idle,
+		.exit_latency		= 10,
+		.power_usage		= 50,
+		.target_residency	= 100,
+		.flags			= CPUIDLE_FLAG_TIME_VALID,
+		.name			= "Idle",
+		.desc			= "CPU power down",
+	},
+	.states[2]		= {
+		.enter			= armada_370_xp_enter_idle,
+		.exit_latency		= 100,
+		.power_usage		= 5,
+		.target_residency	= 1000,
+		.flags			= CPUIDLE_FLAG_TIME_VALID |
+						ARMADA_370_XP_FLAG_DEEP_IDLE,
+		.name			= "Deep idle",
+		.desc			= "CPU and L2 Fabric power down",
+	},
+	.state_count = ARMADA_370_XP_MAX_STATES,
+};
+
+static int armada_370_xp_cpuidle_probe(struct platform_device *pdev)
+{
+
+	armada_370_xp_cpu_suspend = (void *)(pdev->dev.platform_data);
+	return cpuidle_register(&armada_370_xp_idle_driver, NULL);
+}
+
+static struct platform_driver armada_370_xp_cpuidle_plat_driver = {
+	.driver = {
+		.name = "cpuidle-armada-370-xp",
+		.owner = THIS_MODULE,
+	},
+	.probe = armada_370_xp_cpuidle_probe,
+};
+
+module_platform_driver(armada_370_xp_cpuidle_plat_driver);
+
+MODULE_AUTHOR("Gregory CLEMENT <gregory.clement@free-electrons.com>");
+MODULE_DESCRIPTION("Armada 370/XP cpu idle driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/cpuidle-at91.c b/drivers/cpuidle/cpuidle-at91.c
new file mode 100644
index 00000000000..a0774370c6b
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-at91.c
@@ -0,0 +1,69 @@
+/*
+ * based on arch/arm/mach-kirkwood/cpuidle.c
+ *
+ * CPU idle support for AT91 SoC
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * The cpu idle uses wait-for-interrupt and RAM self refresh in order
+ * to implement two idle states -
+ * #1 wait-for-interrupt
+ * #2 wait-for-interrupt and RAM self refresh
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/cpuidle.h>
+#include <linux/io.h>
+#include <linux/export.h>
+#include <asm/proc-fns.h>
+#include <asm/cpuidle.h>
+
+#define AT91_MAX_STATES	2
+
+static void (*at91_standby)(void);
+
+/* Actual code that puts the SoC in different idle states */
+static int at91_enter_idle(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			       int index)
+{
+	at91_standby();
+	return index;
+}
+
+static struct cpuidle_driver at91_idle_driver = {
+	.name			= "at91_idle",
+	.owner			= THIS_MODULE,
+	.states[0]		= ARM_CPUIDLE_WFI_STATE,
+	.states[1]		= {
+		.enter			= at91_enter_idle,
+		.exit_latency		= 10,
+		.target_residency	= 10000,
+		.flags			= CPUIDLE_FLAG_TIME_VALID,
+		.name			= "RAM_SR",
+		.desc			= "WFI and DDR Self Refresh",
+	},
+	.state_count = AT91_MAX_STATES,
+};
+
+/* Initialize CPU idle by registering the idle states */
+static int at91_cpuidle_probe(struct platform_device *dev)
+{
+	at91_standby = (void *)(dev->dev.platform_data);
+	
+	return cpuidle_register(&at91_idle_driver, NULL);
+}
+
+static struct platform_driver at91_cpuidle_driver = {
+	.driver = {
+		.name = "cpuidle-at91",
+		.owner = THIS_MODULE,
+	},
+	.probe = at91_cpuidle_probe,
+};
+
+module_platform_driver(at91_cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
new file mode 100644
index 00000000000..b45fc624904
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2013 ARM/Linaro
+ *
+ * Authors: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *          Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *          Nicolas Pitre <nicolas.pitre@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Maintainer: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ * Maintainer: Daniel Lezcano <daniel.lezcano@linaro.org>
+ */
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/cpuidle.h>
+#include <asm/mcpm.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+			      struct cpuidle_driver *drv, int idx);
+
+/*
+ * NB: Owing to current menu governor behaviour big and LITTLE
+ * index 1 states have to define exit_latency and target_residency for
+ * cluster state since, when all CPUs in a cluster hit it, the cluster
+ * can be shutdown. This means that when a single CPU enters this state
+ * the exit_latency and target_residency values are somewhat overkill.
+ * There is no notion of cluster states in the menu governor, so CPUs
+ * have to define CPU states where possibly the cluster will be shutdown
+ * depending on the state of other CPUs. idle states entry and exit happen
+ * at random times; however the cluster state provides target_residency
+ * values as if all CPUs in a cluster enter the state at once; this is
+ * somewhat optimistic and behaviour should be fixed either in the governor
+ * or in the MCPM back-ends.
+ * To make this driver 100% generic the number of states and the exit_latency
+ * target_residency values must be obtained from device tree bindings.
+ *
+ * exit_latency: refers to the TC2 vexpress test chip and depends on the
+ * current cluster operating point. It is the time it takes to get the CPU
+ * up and running when the CPU is powered up on cluster wake-up from shutdown.
+ * Current values for big and LITTLE clusters are provided for clusters
+ * running at default operating points.
+ *
+ * target_residency: it is the minimum amount of time the cluster has
+ * to be down to break even in terms of power consumption. cluster
+ * shutdown has inherent dynamic power costs (L2 writebacks to DRAM
+ * being the main factor) that depend on the current operating points.
+ * The current values for both clusters are provided for a CPU whose half
+ * of L2 lines are dirty and require cleaning to DRAM, and takes into
+ * account leakage static power values related to the vexpress TC2 testchip.
+ */
+static struct cpuidle_driver bl_idle_little_driver = {
+	.name = "little_idle",
+	.owner = THIS_MODULE,
+	.states[0] = ARM_CPUIDLE_WFI_STATE,
+	.states[1] = {
+		.enter			= bl_enter_powerdown,
+		.exit_latency		= 700,
+		.target_residency	= 2500,
+		.flags			= CPUIDLE_FLAG_TIME_VALID |
+					  CPUIDLE_FLAG_TIMER_STOP,
+		.name			= "C1",
+		.desc			= "ARM little-cluster power down",
+	},
+	.state_count = 2,
+};
+
+static struct cpuidle_driver bl_idle_big_driver = {
+	.name = "big_idle",
+	.owner = THIS_MODULE,
+	.states[0] = ARM_CPUIDLE_WFI_STATE,
+	.states[1] = {
+		.enter			= bl_enter_powerdown,
+		.exit_latency		= 500,
+		.target_residency	= 2000,
+		.flags			= CPUIDLE_FLAG_TIME_VALID |
+					  CPUIDLE_FLAG_TIMER_STOP,
+		.name			= "C1",
+		.desc			= "ARM big-cluster power down",
+	},
+	.state_count = 2,
+};
+
+/*
+ * notrace prevents trace shims from getting inserted where they
+ * should not. Global jumps and ldrex/strex must not be inserted
+ * in power down sequences where caches and MMU may be turned off.
+ */
+static int notrace bl_powerdown_finisher(unsigned long arg)
+{
+	/* MCPM works with HW CPU identifiers */
+	unsigned int mpidr = read_cpuid_mpidr();
+	unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+
+	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+
+	/*
+	 * Residency value passed to mcpm_cpu_suspend back-end
+	 * has to be given clear semantics. Set to 0 as a
+	 * temporary value.
+	 */
+	mcpm_cpu_suspend(0);
+
+	/* return value != 0 means failure */
+	return 1;
+}
+
+/**
+ * bl_enter_powerdown - Programs CPU to enter the specified state
+ * @dev: cpuidle device
+ * @drv: The target state to be programmed
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx)
+{
+	cpu_pm_enter();
+
+	cpu_suspend(0, bl_powerdown_finisher);
+
+	/* signals the MCPM core that CPU is out of low power state */
+	mcpm_cpu_powered_up();
+
+	cpu_pm_exit();
+
+	return idx;
+}
+
+static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int cpu_id)
+{
+	struct cpuinfo_arm *cpu_info;
+	struct cpumask *cpumask;
+	unsigned long cpuid;
+	int cpu;
+
+	cpumask = kzalloc(cpumask_size(), GFP_KERNEL);
+	if (!cpumask)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		cpu_info = &per_cpu(cpu_data, cpu);
+		cpuid = is_smp() ? cpu_info->cpuid : read_cpuid_id();
+
+		/* read cpu id part number */
+		if ((cpuid & 0xFFF0) == cpu_id)
+			cpumask_set_cpu(cpu, cpumask);
+	}
+
+	drv->cpumask = cpumask;
+
+	return 0;
+}
+
+static int __init bl_idle_init(void)
+{
+	int ret;
+
+	/*
+	 * Initialize the driver just for a compliant set of machines
+	 */
+	if (!of_machine_is_compatible("arm,vexpress,v2p-ca15_a7"))
+		return -ENODEV;
+	/*
+	 * For now the differentiation between little and big cores
+	 * is based on the part number. A7 cores are considered little
+	 * cores, A15 are considered big cores. This distinction may
+	 * evolve in the future with a more generic matching approach.
+	 */
+	ret = bl_idle_driver_init(&bl_idle_little_driver,
+				  ARM_CPU_PART_CORTEX_A7);
+	if (ret)
+		return ret;
+
+	ret = bl_idle_driver_init(&bl_idle_big_driver, ARM_CPU_PART_CORTEX_A15);
+	if (ret)
+		goto out_uninit_little;
+
+	ret = cpuidle_register(&bl_idle_little_driver, NULL);
+	if (ret)
+		goto out_uninit_big;
+
+	ret = cpuidle_register(&bl_idle_big_driver, NULL);
+	if (ret)
+		goto out_unregister_little;
+
+	return 0;
+
+out_unregister_little:
+	cpuidle_unregister(&bl_idle_little_driver);
+out_uninit_big:
+	kfree(bl_idle_big_driver.cpumask);
+out_uninit_little:
+	kfree(bl_idle_little_driver.cpumask);
+
+	return ret;
+}
+device_initcall(bl_idle_init);
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index 223379169cb..6e51114057d 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -21,67 +21,30 @@
  */
 
 #include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
 #include <linux/init.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
 #include <asm/cpuidle.h>
-#include <asm/proc-fns.h>
-#include <asm/smp_scu.h>
 #include <asm/suspend.h>
-#include <asm/cacheflush.h>
-#include <asm/cp15.h>
-
-extern void highbank_set_cpu_jump(int cpu, void *jump_addr);
-extern void *scu_base_addr;
-
-static inline unsigned int get_auxcr(void)
-{
-	unsigned int val;
-	asm("mrc p15, 0, %0, c1, c0, 1	@ get AUXCR" : "=r" (val) : : "cc");
-	return val;
-}
-
-static inline void set_auxcr(unsigned int val)
-{
-	asm volatile("mcr p15, 0, %0, c1, c0, 1	@ set AUXCR"
-	  : : "r" (val) : "cc");
-	isb();
-}
-
-static noinline void calxeda_idle_restore(void)
-{
-	set_cr(get_cr() | CR_C);
-	set_auxcr(get_auxcr() | 0x40);
-	scu_power_mode(scu_base_addr, SCU_PM_NORMAL);
-}
+#include <asm/psci.h>
 
 static int calxeda_idle_finish(unsigned long val)
 {
-	/* Already flushed cache, but do it again as the outer cache functions
-	 * dirty the cache with spinlocks */
-	flush_cache_all();
-
-	set_auxcr(get_auxcr() & ~0x40);
-	set_cr(get_cr() & ~CR_C);
-
-	scu_power_mode(scu_base_addr, SCU_PM_DORMANT);
-
-	cpu_do_idle();
-
-	/* Restore things if we didn't enter power-gating */
-	calxeda_idle_restore();
-	return 1;
+	const struct psci_power_state ps = {
+		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+	};
+	return psci_ops.cpu_suspend(ps, __pa(cpu_resume));
 }
 
 static int calxeda_pwrdown_idle(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv,
 				int index)
 {
-	highbank_set_cpu_jump(smp_processor_id(), cpu_resume);
+	cpu_pm_enter();
 	cpu_suspend(0, calxeda_idle_finish);
+	cpu_pm_exit();
+
 	return index;
 }
 
@@ -102,11 +65,17 @@ static struct cpuidle_driver calxeda_idle_driver = {
 	.state_count = 2,
 };
 
-static int __init calxeda_cpuidle_init(void)
+static int calxeda_cpuidle_probe(struct platform_device *pdev)
 {
-	if (!of_machine_is_compatible("calxeda,highbank"))
-		return -ENODEV;
-
 	return cpuidle_register(&calxeda_idle_driver, NULL);
 }
-module_init(calxeda_cpuidle_init);
+
+static struct platform_driver calxeda_cpuidle_plat_driver = {
+        .driver = {
+                .name = "cpuidle-calxeda",
+                .owner = THIS_MODULE,
+        },
+        .probe = calxeda_cpuidle_probe,
+};
+
+module_platform_driver(calxeda_cpuidle_plat_driver);
diff --git a/drivers/cpuidle/cpuidle-clps711x.c b/drivers/cpuidle/cpuidle-clps711x.c
new file mode 100644
index 00000000000..5243811daa6
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-clps711x.c
@@ -0,0 +1,64 @@
+/*
+ *  CLPS711X CPU idle driver
+ *
+ *  Copyright (C) 2014 Alexander Shiyan <shc_work@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define CLPS711X_CPUIDLE_NAME	"clps711x-cpuidle"
+
+static void __iomem *clps711x_halt;
+
+static int clps711x_cpuidle_halt(struct cpuidle_device *dev,
+				 struct cpuidle_driver *drv, int index)
+{
+	writel(0xaa, clps711x_halt);
+
+	return index;
+}
+
+static struct cpuidle_driver clps711x_idle_driver = {
+	.name		= CLPS711X_CPUIDLE_NAME,
+	.owner		= THIS_MODULE,
+	.states[0]	= {
+		.name		= "HALT",
+		.desc		= "CLPS711X HALT",
+		.enter		= clps711x_cpuidle_halt,
+		.exit_latency	= 1,
+	},
+	.state_count	= 1,
+};
+
+static int __init clps711x_cpuidle_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	clps711x_halt = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(clps711x_halt))
+		return PTR_ERR(clps711x_halt);
+
+	return cpuidle_register(&clps711x_idle_driver, NULL);
+}
+
+static struct platform_driver clps711x_cpuidle_driver = {
+	.driver	= {
+		.name	= CLPS711X_CPUIDLE_NAME,
+		.owner	= THIS_MODULE,
+	},
+};
+module_platform_driver_probe(clps711x_cpuidle_driver, clps711x_cpuidle_probe);
+
+MODULE_AUTHOR("Alexander Shiyan <shc_work@mail.ru>");
+MODULE_DESCRIPTION("CLPS711X CPU idle driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c
new file mode 100644
index 00000000000..fc7b62720de
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-cps.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/cpuidle.h>
+#include <linux/init.h>
+
+#include <asm/idle.h>
+#include <asm/pm-cps.h>
+
+/* Enumeration of the various idle states this driver may enter */
+enum cps_idle_state {
+	STATE_WAIT = 0,		/* MIPS wait instruction, coherent */
+	STATE_NC_WAIT,		/* MIPS wait instruction, non-coherent */
+	STATE_CLOCK_GATED,	/* Core clock gated */
+	STATE_POWER_GATED,	/* Core power gated */
+	STATE_COUNT
+};
+
+static int cps_nc_enter(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv, int index)
+{
+	enum cps_pm_state pm_state;
+	int err;
+
+	/*
+	 * At least one core must remain powered up & clocked in order for the
+	 * system to have any hope of functioning.
+	 *
+	 * TODO: don't treat core 0 specially, just prevent the final core
+	 * TODO: remap interrupt affinity temporarily
+	 */
+	if (!cpu_data[dev->cpu].core && (index > STATE_NC_WAIT))
+		index = STATE_NC_WAIT;
+
+	/* Select the appropriate cps_pm_state */
+	switch (index) {
+	case STATE_NC_WAIT:
+		pm_state = CPS_PM_NC_WAIT;
+		break;
+	case STATE_CLOCK_GATED:
+		pm_state = CPS_PM_CLOCK_GATED;
+		break;
+	case STATE_POWER_GATED:
+		pm_state = CPS_PM_POWER_GATED;
+		break;
+	default:
+		BUG();
+		return -EINVAL;
+	}
+
+	/* Notify listeners the CPU is about to power down */
+	if ((pm_state == CPS_PM_POWER_GATED) && cpu_pm_enter())
+		return -EINTR;
+
+	/* Enter that state */
+	err = cps_pm_enter_state(pm_state);
+
+	/* Notify listeners the CPU is back up */
+	if (pm_state == CPS_PM_POWER_GATED)
+		cpu_pm_exit();
+
+	return err ?: index;
+}
+
+static struct cpuidle_driver cps_driver = {
+	.name			= "cpc_cpuidle",
+	.owner			= THIS_MODULE,
+	.states = {
+		[STATE_WAIT] = MIPS_CPUIDLE_WAIT_STATE,
+		[STATE_NC_WAIT] = {
+			.enter	= cps_nc_enter,
+			.exit_latency		= 200,
+			.target_residency	= 450,
+			.flags	= CPUIDLE_FLAG_TIME_VALID,
+			.name	= "nc-wait",
+			.desc	= "non-coherent MIPS wait",
+		},
+		[STATE_CLOCK_GATED] = {
+			.enter	= cps_nc_enter,
+			.exit_latency		= 300,
+			.target_residency	= 700,
+			.flags	= CPUIDLE_FLAG_TIME_VALID |
+				  CPUIDLE_FLAG_TIMER_STOP,
+			.name	= "clock-gated",
+			.desc	= "core clock gated",
+		},
+		[STATE_POWER_GATED] = {
+			.enter	= cps_nc_enter,
+			.exit_latency		= 600,
+			.target_residency	= 1000,
+			.flags	= CPUIDLE_FLAG_TIME_VALID |
+				  CPUIDLE_FLAG_TIMER_STOP,
+			.name	= "power-gated",
+			.desc	= "core power gated",
+		},
+	},
+	.state_count		= STATE_COUNT,
+	.safe_state_index	= 0,
+};
+
+static void __init cps_cpuidle_unregister(void)
+{
+	int cpu;
+	struct cpuidle_device *device;
+
+	for_each_possible_cpu(cpu) {
+		device = &per_cpu(cpuidle_dev, cpu);
+		cpuidle_unregister_device(device);
+	}
+
+	cpuidle_unregister_driver(&cps_driver);
+}
+
+static int __init cps_cpuidle_init(void)
+{
+	int err, cpu, core, i;
+	struct cpuidle_device *device;
+
+	/* Detect supported states */
+	if (!cps_pm_support_state(CPS_PM_POWER_GATED))
+		cps_driver.state_count = STATE_CLOCK_GATED + 1;
+	if (!cps_pm_support_state(CPS_PM_CLOCK_GATED))
+		cps_driver.state_count = STATE_NC_WAIT + 1;
+	if (!cps_pm_support_state(CPS_PM_NC_WAIT))
+		cps_driver.state_count = STATE_WAIT + 1;
+
+	/* Inform the user if some states are unavailable */
+	if (cps_driver.state_count < STATE_COUNT) {
+		pr_info("cpuidle-cps: limited to ");
+		switch (cps_driver.state_count - 1) {
+		case STATE_WAIT:
+			pr_cont("coherent wait\n");
+			break;
+		case STATE_NC_WAIT:
+			pr_cont("non-coherent wait\n");
+			break;
+		case STATE_CLOCK_GATED:
+			pr_cont("clock gating\n");
+			break;
+		}
+	}
+
+	/*
+	 * Set the coupled flag on the appropriate states if this system
+	 * requires it.
+	 */
+	if (coupled_coherence)
+		for (i = STATE_NC_WAIT; i < cps_driver.state_count; i++)
+			cps_driver.states[i].flags |= CPUIDLE_FLAG_COUPLED;
+
+	err = cpuidle_register_driver(&cps_driver);
+	if (err) {
+		pr_err("Failed to register CPS cpuidle driver\n");
+		return err;
+	}
+
+	for_each_possible_cpu(cpu) {
+		core = cpu_data[cpu].core;
+		device = &per_cpu(cpuidle_dev, cpu);
+		device->cpu = cpu;
+#ifdef CONFIG_MIPS_MT
+		cpumask_copy(&device->coupled_cpus, &cpu_sibling_map[cpu]);
+#endif
+
+		err = cpuidle_register_device(device);
+		if (err) {
+			pr_err("Failed to register CPU%d cpuidle device\n",
+			       cpu);
+			goto err_out;
+		}
+	}
+
+	return 0;
+err_out:
+	cps_cpuidle_unregister();
+	return err;
+}
+device_initcall(cps_cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c
new file mode 100644
index 00000000000..7c015126382
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-exynos.c
@@ -0,0 +1,99 @@
+/* linux/arch/arm/mach-exynos/cpuidle.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/proc-fns.h>
+#include <asm/suspend.h>
+#include <asm/cpuidle.h>
+
+static void (*exynos_enter_aftr)(void);
+
+static int idle_finisher(unsigned long flags)
+{
+	exynos_enter_aftr();
+	cpu_do_idle();
+
+	return 1;
+}
+
+static int exynos_enter_core0_aftr(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
+				int index)
+{
+	cpu_pm_enter();
+	cpu_suspend(0, idle_finisher);
+	cpu_pm_exit();
+
+	return index;
+}
+
+static int exynos_enter_lowpower(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
+				int index)
+{
+	int new_index = index;
+
+	/* AFTR can only be entered when cores other than CPU0 are offline */
+	if (num_online_cpus() > 1 || dev->cpu != 0)
+		new_index = drv->safe_state_index;
+
+	if (new_index == 0)
+		return arm_cpuidle_simple_enter(dev, drv, new_index);
+	else
+		return exynos_enter_core0_aftr(dev, drv, new_index);
+}
+
+static struct cpuidle_driver exynos_idle_driver = {
+	.name			= "exynos_idle",
+	.owner			= THIS_MODULE,
+	.states = {
+		[0] = ARM_CPUIDLE_WFI_STATE,
+		[1] = {
+			.enter			= exynos_enter_lowpower,
+			.exit_latency		= 300,
+			.target_residency	= 100000,
+			.flags			= CPUIDLE_FLAG_TIME_VALID,
+			.name			= "C1",
+			.desc			= "ARM power down",
+		},
+	},
+	.state_count = 2,
+	.safe_state_index = 0,
+};
+
+static int exynos_cpuidle_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	exynos_enter_aftr = (void *)(pdev->dev.platform_data);
+
+	ret = cpuidle_register(&exynos_idle_driver, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register cpuidle driver\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct platform_driver exynos_cpuidle_driver = {
+	.probe	= exynos_cpuidle_probe,
+	.driver = {
+		.name = "exynos_cpuidle",
+		.owner = THIS_MODULE,
+	},
+};
+
+module_platform_driver(exynos_cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c
index 521b0a7fdd8..41ba843251b 100644
--- a/drivers/cpuidle/cpuidle-kirkwood.c
+++ b/drivers/cpuidle/cpuidle-kirkwood.c
@@ -60,9 +60,6 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev)
 	struct resource *res;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res == NULL)
-		return -EINVAL;
-
 	ddr_operation_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(ddr_operation_base))
 		return PTR_ERR(ddr_operation_base);
@@ -70,7 +67,7 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev)
 	return cpuidle_register(&kirkwood_idle_driver, NULL);
 }
 
-int kirkwood_cpuidle_remove(struct platform_device *pdev)
+static int kirkwood_cpuidle_remove(struct platform_device *pdev)
 {
 	cpuidle_unregister(&kirkwood_idle_driver);
 	return 0;
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
new file mode 100644
index 00000000000..74f5788d50b
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -0,0 +1,254 @@
+/*
+ *  cpuidle-powernv - idle state cpuidle driver.
+ *  Adapted from drivers/cpuidle/cpuidle-pseries
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/clockchips.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/runlatch.h>
+
+/* Flags and constants used in PowerNV platform */
+
+#define MAX_POWERNV_IDLE_STATES	8
+#define IDLE_USE_INST_NAP	0x00010000 /* Use nap instruction */
+#define IDLE_USE_INST_SLEEP	0x00020000 /* Use sleep instruction */
+
+struct cpuidle_driver powernv_idle_driver = {
+	.name             = "powernv_idle",
+	.owner            = THIS_MODULE,
+};
+
+static int max_idle_state;
+static struct cpuidle_state *cpuidle_state_table;
+
+static int snooze_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	local_irq_enable();
+	set_thread_flag(TIF_POLLING_NRFLAG);
+
+	ppc64_runlatch_off();
+	while (!need_resched()) {
+		HMT_low();
+		HMT_very_low();
+	}
+
+	HMT_medium();
+	ppc64_runlatch_on();
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+	smp_mb();
+	return index;
+}
+
+static int nap_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	ppc64_runlatch_off();
+	power7_idle();
+	ppc64_runlatch_on();
+	return index;
+}
+
+static int fastsleep_loop(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
+				int index)
+{
+	unsigned long old_lpcr = mfspr(SPRN_LPCR);
+	unsigned long new_lpcr;
+
+	if (unlikely(system_state < SYSTEM_RUNNING))
+		return index;
+
+	new_lpcr = old_lpcr;
+	/* Do not exit powersave upon decrementer as we've setup the timer
+	 * offload.
+	 */
+	new_lpcr &= ~LPCR_PECE1;
+
+	mtspr(SPRN_LPCR, new_lpcr);
+	power7_sleep();
+
+	mtspr(SPRN_LPCR, old_lpcr);
+
+	return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = {
+	{ /* Snooze */
+		.name = "snooze",
+		.desc = "snooze",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 0,
+		.target_residency = 0,
+		.enter = &snooze_loop },
+};
+
+static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
+			unsigned long action, void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+	struct cpuidle_device *dev =
+				per_cpu(cpuidle_devices, hotcpu);
+
+	if (dev && cpuidle_get_driver()) {
+		switch (action) {
+		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_enable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		case CPU_DEAD:
+		case CPU_DEAD_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_disable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		default:
+			return NOTIFY_DONE;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block setup_hotplug_notifier = {
+	.notifier_call = powernv_cpuidle_add_cpu_notifier,
+};
+
+/*
+ * powernv_cpuidle_driver_init()
+ */
+static int powernv_cpuidle_driver_init(void)
+{
+	int idle_state;
+	struct cpuidle_driver *drv = &powernv_idle_driver;
+
+	drv->state_count = 0;
+
+	for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+		/* Is the state not enabled? */
+		if (cpuidle_state_table[idle_state].enter == NULL)
+			continue;
+
+		drv->states[drv->state_count] =	/* structure copy */
+			cpuidle_state_table[idle_state];
+
+		drv->state_count += 1;
+	}
+
+	return 0;
+}
+
+static int powernv_add_idle_states(void)
+{
+	struct device_node *power_mgt;
+	struct property *prop;
+	int nr_idle_states = 1; /* Snooze */
+	int dt_idle_states;
+	u32 *flags;
+	int i;
+
+	/* Currently we have snooze statically defined */
+
+	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!power_mgt) {
+		pr_warn("opal: PowerMgmt Node not found\n");
+		return nr_idle_states;
+	}
+
+	prop = of_find_property(power_mgt, "ibm,cpu-idle-state-flags", NULL);
+	if (!prop) {
+		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+		return nr_idle_states;
+	}
+
+	dt_idle_states = prop->length / sizeof(u32);
+	flags = (u32 *) prop->value;
+
+	for (i = 0; i < dt_idle_states; i++) {
+
+		if (flags[i] & IDLE_USE_INST_NAP) {
+			/* Add NAP state */
+			strcpy(powernv_states[nr_idle_states].name, "Nap");
+			strcpy(powernv_states[nr_idle_states].desc, "Nap");
+			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID;
+			powernv_states[nr_idle_states].exit_latency = 10;
+			powernv_states[nr_idle_states].target_residency = 100;
+			powernv_states[nr_idle_states].enter = &nap_loop;
+			nr_idle_states++;
+		}
+
+		if (flags[i] & IDLE_USE_INST_SLEEP) {
+			/* Add FASTSLEEP state */
+			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
+			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
+			powernv_states[nr_idle_states].flags =
+				CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
+			powernv_states[nr_idle_states].exit_latency = 300;
+			powernv_states[nr_idle_states].target_residency = 1000000;
+			powernv_states[nr_idle_states].enter = &fastsleep_loop;
+			nr_idle_states++;
+		}
+	}
+
+	return nr_idle_states;
+}
+
+/*
+ * powernv_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int powernv_idle_probe(void)
+{
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return -ENODEV;
+
+	if (firmware_has_feature(FW_FEATURE_OPALv3)) {
+		cpuidle_state_table = powernv_states;
+		/* Device tree can indicate more idle states */
+		max_idle_state = powernv_add_idle_states();
+ 	} else
+ 		return -ENODEV;
+
+	return 0;
+}
+
+static int __init powernv_processor_idle_init(void)
+{
+	int retval;
+
+	retval = powernv_idle_probe();
+	if (retval)
+		return retval;
+
+	powernv_cpuidle_driver_init();
+	retval = cpuidle_register(&powernv_idle_driver, NULL);
+	if (retval) {
+		printk(KERN_DEBUG "Registration of powernv driver failed.\n");
+		return retval;
+	}
+
+	register_cpu_notifier(&setup_hotplug_notifier);
+	printk(KERN_DEBUG "powernv_idle_driver registered\n");
+	return 0;
+}
+
+device_initcall(powernv_processor_idle_init);
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
new file mode 100644
index 00000000000..6f7b0195688
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -0,0 +1,273 @@
+/*
+ *  cpuidle-pseries - idle state cpuidle driver.
+ *  Adapted from drivers/idle/intel_idle.c and
+ *  drivers/acpi/processor_idle.c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+#include <asm/paca.h>
+#include <asm/reg.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/runlatch.h>
+#include <asm/plpar_wrappers.h>
+
+struct cpuidle_driver pseries_idle_driver = {
+	.name             = "pseries_idle",
+	.owner            = THIS_MODULE,
+};
+
+static int max_idle_state;
+static struct cpuidle_state *cpuidle_state_table;
+
+static inline void idle_loop_prolog(unsigned long *in_purr)
+{
+	ppc64_runlatch_off();
+	*in_purr = mfspr(SPRN_PURR);
+	/*
+	 * Indicate to the HV that we are idle. Now would be
+	 * a good time to find other work to dispatch.
+	 */
+	get_lppaca()->idle = 1;
+}
+
+static inline void idle_loop_epilog(unsigned long in_purr)
+{
+	u64 wait_cycles;
+
+	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
+	wait_cycles += mfspr(SPRN_PURR) - in_purr;
+	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
+	get_lppaca()->idle = 0;
+
+	if (irqs_disabled())
+		local_irq_enable();
+	ppc64_runlatch_on();
+}
+
+static int snooze_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	unsigned long in_purr;
+
+	idle_loop_prolog(&in_purr);
+	local_irq_enable();
+	set_thread_flag(TIF_POLLING_NRFLAG);
+
+	while (!need_resched()) {
+		HMT_low();
+		HMT_very_low();
+	}
+
+	HMT_medium();
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+	smp_mb();
+
+	idle_loop_epilog(in_purr);
+
+	return index;
+}
+
+static void check_and_cede_processor(void)
+{
+	/*
+	 * Ensure our interrupt state is properly tracked,
+	 * also checks if no interrupt has occurred while we
+	 * were soft-disabled
+	 */
+	if (prep_irq_for_idle()) {
+		cede_processor();
+#ifdef CONFIG_TRACE_IRQFLAGS
+		/* Ensure that H_CEDE returns with IRQs on */
+		if (WARN_ON(!(mfmsr() & MSR_EE)))
+			__hard_irq_enable();
+#endif
+	}
+}
+
+static int dedicated_cede_loop(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
+				int index)
+{
+	unsigned long in_purr;
+
+	idle_loop_prolog(&in_purr);
+	get_lppaca()->donate_dedicated_cpu = 1;
+
+	HMT_medium();
+	check_and_cede_processor();
+
+	get_lppaca()->donate_dedicated_cpu = 0;
+
+	idle_loop_epilog(in_purr);
+
+	return index;
+}
+
+static int shared_cede_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	unsigned long in_purr;
+
+	idle_loop_prolog(&in_purr);
+
+	/*
+	 * Yield the processor to the hypervisor.  We return if
+	 * an external interrupt occurs (which are driven prior
+	 * to returning here) or if a prod occurs from another
+	 * processor. When returning here, external interrupts
+	 * are enabled.
+	 */
+	check_and_cede_processor();
+
+	idle_loop_epilog(in_purr);
+
+	return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state dedicated_states[] = {
+	{ /* Snooze */
+		.name = "snooze",
+		.desc = "snooze",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 0,
+		.target_residency = 0,
+		.enter = &snooze_loop },
+	{ /* CEDE */
+		.name = "CEDE",
+		.desc = "CEDE",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 100,
+		.enter = &dedicated_cede_loop },
+};
+
+/*
+ * States for shared partition case.
+ */
+static struct cpuidle_state shared_states[] = {
+	{ /* Shared Cede */
+		.name = "Shared Cede",
+		.desc = "Shared Cede",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 0,
+		.target_residency = 0,
+		.enter = &shared_cede_loop },
+};
+
+static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
+			unsigned long action, void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+	struct cpuidle_device *dev =
+				per_cpu(cpuidle_devices, hotcpu);
+
+	if (dev && cpuidle_get_driver()) {
+		switch (action) {
+		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_enable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		case CPU_DEAD:
+		case CPU_DEAD_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_disable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		default:
+			return NOTIFY_DONE;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block setup_hotplug_notifier = {
+	.notifier_call = pseries_cpuidle_add_cpu_notifier,
+};
+
+/*
+ * pseries_cpuidle_driver_init()
+ */
+static int pseries_cpuidle_driver_init(void)
+{
+	int idle_state;
+	struct cpuidle_driver *drv = &pseries_idle_driver;
+
+	drv->state_count = 0;
+
+	for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+		/* Is the state not enabled? */
+		if (cpuidle_state_table[idle_state].enter == NULL)
+			continue;
+
+		drv->states[drv->state_count] =	/* structure copy */
+			cpuidle_state_table[idle_state];
+
+		drv->state_count += 1;
+	}
+
+	return 0;
+}
+
+/*
+ * pseries_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int pseries_idle_probe(void)
+{
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return -ENODEV;
+
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		if (lppaca_shared_proc(get_lppaca())) {
+			cpuidle_state_table = shared_states;
+			max_idle_state = ARRAY_SIZE(shared_states);
+		} else {
+			cpuidle_state_table = dedicated_states;
+			max_idle_state = ARRAY_SIZE(dedicated_states);
+		}
+	} else
+		return -ENODEV;
+
+	return 0;
+}
+
+static int __init pseries_processor_idle_init(void)
+{
+	int retval;
+
+	retval = pseries_idle_probe();
+	if (retval)
+		return retval;
+
+	pseries_cpuidle_driver_init();
+	retval = cpuidle_register(&pseries_idle_driver, NULL);
+	if (retval) {
+		printk(KERN_DEBUG "Registration of pseries driver failed.\n");
+		return retval;
+	}
+
+	register_cpu_notifier(&setup_hotplug_notifier);
+	printk(KERN_DEBUG "pseries_idle_driver registered\n");
+	return 0;
+}
+
+device_initcall(pseries_processor_idle_init);
diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c
new file mode 100644
index 00000000000..5e35804b1a9
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-ux500.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM)
+ *
+ * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com>
+ * and Jonas Aaberg <jonas.aberg@stericsson.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/smp.h>
+#include <linux/mfd/dbx500-prcmu.h>
+#include <linux/platform_data/arm-ux500-pm.h>
+#include <linux/platform_device.h>
+
+#include <asm/cpuidle.h>
+#include <asm/proc-fns.h>
+
+static atomic_t master = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(master_lock);
+
+static inline int ux500_enter_idle(struct cpuidle_device *dev,
+				   struct cpuidle_driver *drv, int index)
+{
+	int this_cpu = smp_processor_id();
+	bool recouple = false;
+
+	if (atomic_inc_return(&master) == num_online_cpus()) {
+
+		/* With this lock, we prevent the other cpu to exit and enter
+		 * this function again and become the master */
+		if (!spin_trylock(&master_lock))
+			goto wfi;
+
+		/* decouple the gic from the A9 cores */
+		if (prcmu_gic_decouple()) {
+			spin_unlock(&master_lock);
+			goto out;
+		}
+
+		/* If an error occur, we will have to recouple the gic
+		 * manually */
+		recouple = true;
+
+		/* At this state, as the gic is decoupled, if the other
+		 * cpu is in WFI, we have the guarantee it won't be wake
+		 * up, so we can safely go to retention */
+		if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1))
+			goto out;
+
+		/* The prcmu will be in charge of watching the interrupts
+		 * and wake up the cpus */
+		if (prcmu_copy_gic_settings())
+			goto out;
+
+		/* Check in the meantime an interrupt did
+		 * not occur on the gic ... */
+		if (prcmu_gic_pending_irq())
+			goto out;
+
+		/* ... and the prcmu */
+		if (prcmu_pending_irq())
+			goto out;
+
+		/* Go to the retention state, the prcmu will wait for the
+		 * cpu to go WFI and this is what happens after exiting this
+		 * 'master' critical section */
+		if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true))
+			goto out;
+
+		/* When we switch to retention, the prcmu is in charge
+		 * of recoupling the gic automatically */
+		recouple = false;
+
+		spin_unlock(&master_lock);
+	}
+wfi:
+	cpu_do_idle();
+out:
+	atomic_dec(&master);
+
+	if (recouple) {
+		prcmu_gic_recouple();
+		spin_unlock(&master_lock);
+	}
+
+	return index;
+}
+
+static struct cpuidle_driver ux500_idle_driver = {
+	.name = "ux500_idle",
+	.owner = THIS_MODULE,
+	.states = {
+		ARM_CPUIDLE_WFI_STATE,
+		{
+			.enter		  = ux500_enter_idle,
+			.exit_latency	  = 70,
+			.target_residency = 260,
+			.flags		  = CPUIDLE_FLAG_TIME_VALID |
+			                    CPUIDLE_FLAG_TIMER_STOP,
+			.name		  = "ApIdle",
+			.desc		  = "ARM Retention",
+		},
+	},
+	.safe_state_index = 0,
+	.state_count = 2,
+};
+
+static int dbx500_cpuidle_probe(struct platform_device *pdev)
+{
+	/* Configure wake up reasons */
+	prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) |
+			     PRCMU_WAKEUP(ABB));
+
+	return cpuidle_register(&ux500_idle_driver, NULL);
+}
+
+static struct platform_driver dbx500_cpuidle_plat_driver = {
+	.driver = {
+		.name = "cpuidle-dbx500",
+		.owner = THIS_MODULE,
+	},
+	.probe = dbx500_cpuidle_probe,
+};
+
+module_platform_driver(dbx500_cpuidle_plat_driver);
diff --git a/drivers/cpuidle/cpuidle-zynq.c b/drivers/cpuidle/cpuidle-zynq.c
new file mode 100644
index 00000000000..aded7592802
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-zynq.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2012-2013 Xilinx
+ *
+ * CPU idle support for Xilinx Zynq
+ *
+ * based on arch/arm/mach-at91/cpuidle.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The cpu idle uses wait-for-interrupt and RAM self refresh in order
+ * to implement two idle states -
+ * #1 wait-for-interrupt
+ * #2 wait-for-interrupt and RAM self refresh
+ *
+ * Maintainer: Michal Simek <michal.simek@xilinx.com>
+ */
+
+#include <linux/init.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpuidle.h>
+#include <linux/platform_device.h>
+#include <asm/proc-fns.h>
+#include <asm/cpuidle.h>
+
+#define ZYNQ_MAX_STATES		2
+
+/* Actual code that puts the SoC in different idle states */
+static int zynq_enter_idle(struct cpuidle_device *dev,
+			   struct cpuidle_driver *drv, int index)
+{
+	/* Devices must be stopped here */
+	cpu_pm_enter();
+
+	/* Add code for DDR self refresh start */
+	cpu_do_idle();
+
+	/* Add code for DDR self refresh stop */
+	cpu_pm_exit();
+
+	return index;
+}
+
+static struct cpuidle_driver zynq_idle_driver = {
+	.name = "zynq_idle",
+	.owner = THIS_MODULE,
+	.states = {
+		ARM_CPUIDLE_WFI_STATE,
+		{
+			.enter			= zynq_enter_idle,
+			.exit_latency		= 10,
+			.target_residency	= 10000,
+			.flags			= CPUIDLE_FLAG_TIME_VALID |
+						  CPUIDLE_FLAG_TIMER_STOP,
+			.name			= "RAM_SR",
+			.desc			= "WFI and RAM Self Refresh",
+		},
+	},
+	.safe_state_index = 0,
+	.state_count = ZYNQ_MAX_STATES,
+};
+
+/* Initialize CPU idle by registering the idle states */
+static int zynq_cpuidle_probe(struct platform_device *pdev)
+{
+	pr_info("Xilinx Zynq CpuIdle Driver started\n");
+
+	return cpuidle_register(&zynq_idle_driver, NULL);
+}
+
+static struct platform_driver zynq_cpuidle_driver = {
+	.driver = {
+		.name = "cpuidle-zynq",
+		.owner = THIS_MODULE,
+	},
+	.probe = zynq_cpuidle_probe,
+};
+
+module_platform_driver(zynq_cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c3a93fece81..cb7019977c5 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -32,6 +32,7 @@ LIST_HEAD(cpuidle_detected_devices);
 static int enabled_devices;
 static int off __read_mostly;
 static int initialized __read_mostly;
+static bool use_deepest_state __read_mostly;
 
 int cpuidle_disabled(void)
 {
@@ -42,8 +43,6 @@ void disable_cpuidle(void)
 	off = 1;
 }
 
-static int __cpuidle_register_device(struct cpuidle_device *dev);
-
 /**
  * cpuidle_play_dead - cpu off-lining
  *
@@ -67,6 +66,45 @@ int cpuidle_play_dead(void)
 }
 
 /**
+ * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode.
+ * @enable: Whether enable or disable the feature.
+ *
+ * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and
+ * always use the state with the greatest exit latency (out of the states that
+ * are not disabled).
+ *
+ * This function can only be called after cpuidle_pause() to avoid races.
+ */
+void cpuidle_use_deepest_state(bool enable)
+{
+	use_deepest_state = enable;
+}
+
+/**
+ * cpuidle_find_deepest_state - Find the state of the greatest exit latency.
+ * @drv: cpuidle driver for a given CPU.
+ * @dev: cpuidle device for a given CPU.
+ */
+static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+				      struct cpuidle_device *dev)
+{
+	unsigned int latency_req = 0;
+	int i, ret = CPUIDLE_DRIVER_STATE_START - 1;
+
+	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
+		struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+		if (s->disabled || su->disable || s->exit_latency <= latency_req)
+			continue;
+
+		latency_req = s->exit_latency;
+		ret = i;
+	}
+	return ret;
+}
+
+/**
  * cpuidle_enter_state - enter the state and update stats
  * @dev: cpuidle device for this cpu
  * @drv: cpuidle driver for this cpu
@@ -87,7 +125,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
 	time_end = ktime_get();
 
-	local_irq_enable();
+	if (!cpuidle_state_is_coupled(dev, drv, entered_state))
+		local_irq_enable();
 
 	diff = ktime_to_us(ktime_sub(time_end, time_start));
 	if (diff > INT_MAX)
@@ -110,63 +149,57 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 }
 
 /**
- * cpuidle_idle_call - the main idle loop
+ * cpuidle_select - ask the cpuidle framework to choose an idle state
+ *
+ * @drv: the cpuidle driver
+ * @dev: the cpuidle device
  *
- * NOTE: no locks or semaphores should be used here
- * return non-zero on failure
+ * Returns the index of the idle state.
  */
-int cpuidle_idle_call(void)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
-	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-	struct cpuidle_driver *drv;
-	int next_state, entered_state;
-
-	if (off)
-		return -ENODEV;
-
-	if (!initialized)
+	if (off || !initialized)
 		return -ENODEV;
 
-	/* check if the device is ready */
-	if (!dev || !dev->enabled)
+	if (!drv || !dev || !dev->enabled)
 		return -EBUSY;
 
-	drv = cpuidle_get_cpu_driver(dev);
-
-	/* ask the governor for the next state */
-	next_state = cpuidle_curr_governor->select(drv, dev);
-	if (need_resched()) {
-		dev->last_residency = 0;
-		/* give the governor an opportunity to reflect on the outcome */
-		if (cpuidle_curr_governor->reflect)
-			cpuidle_curr_governor->reflect(dev, next_state);
-		local_irq_enable();
-		return 0;
-	}
-
-	trace_cpu_idle_rcuidle(next_state, dev->cpu);
-
-	if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP)
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
-				   &dev->cpu);
-
-	if (cpuidle_state_is_coupled(dev, drv, next_state))
-		entered_state = cpuidle_enter_state_coupled(dev, drv,
-							    next_state);
-	else
-		entered_state = cpuidle_enter_state(dev, drv, next_state);
+	if (unlikely(use_deepest_state))
+		return cpuidle_find_deepest_state(drv, dev);
 
-	if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP)
-		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
-				   &dev->cpu);
-
-	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
+	return cpuidle_curr_governor->select(drv, dev);
+}
 
-	/* give the governor an opportunity to reflect on the outcome */
-	if (cpuidle_curr_governor->reflect)
-		cpuidle_curr_governor->reflect(dev, entered_state);
+/**
+ * cpuidle_enter - enter into the specified idle state
+ *
+ * @drv:   the cpuidle driver tied with the cpu
+ * @dev:   the cpuidle device
+ * @index: the index in the idle state table
+ *
+ * Returns the index in the idle state, < 0 in case of error.
+ * The error code depends on the backend driver
+ */
+int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+		  int index)
+{
+	if (cpuidle_state_is_coupled(dev, drv, index))
+		return cpuidle_enter_state_coupled(dev, drv, index);
+	return cpuidle_enter_state(dev, drv, index);
+}
 
-	return 0;
+/**
+ * cpuidle_reflect - tell the underlying governor what was the state
+ * we were in
+ *
+ * @dev  : the cpuidle device
+ * @index: the index in the idle state table
+ *
+ */
+void cpuidle_reflect(struct cpuidle_device *dev, int index)
+{
+	if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state))
+		cpuidle_curr_governor->reflect(dev, index);
 }
 
 /**
@@ -230,45 +263,6 @@ void cpuidle_resume(void)
 	mutex_unlock(&cpuidle_lock);
 }
 
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
-static int poll_idle(struct cpuidle_device *dev,
-		struct cpuidle_driver *drv, int index)
-{
-	ktime_t	t1, t2;
-	s64 diff;
-
-	t1 = ktime_get();
-	local_irq_enable();
-	while (!need_resched())
-		cpu_relax();
-
-	t2 = ktime_get();
-	diff = ktime_to_us(ktime_sub(t2, t1));
-	if (diff > INT_MAX)
-		diff = INT_MAX;
-
-	dev->last_residency = (int) diff;
-
-	return index;
-}
-
-static void poll_idle_init(struct cpuidle_driver *drv)
-{
-	struct cpuidle_state *state = &drv->states[0];
-
-	snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
-	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
-	state->exit_latency = 0;
-	state->target_residency = 0;
-	state->power_usage = -1;
-	state->flags = 0;
-	state->enter = poll_idle;
-	state->disabled = false;
-}
-#else
-static void poll_idle_init(struct cpuidle_driver *drv) {}
-#endif /* CONFIG_ARCH_HAS_CPU_RELAX */
-
 /**
  * cpuidle_enable_device - enables idle PM for a CPU
  * @dev: the CPU
@@ -278,7 +272,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) {}
  */
 int cpuidle_enable_device(struct cpuidle_device *dev)
 {
-	int ret, i;
+	int ret;
 	struct cpuidle_driver *drv;
 
 	if (!dev)
@@ -292,17 +286,12 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
 	if (!drv || !cpuidle_curr_governor)
 		return -EIO;
 
+	if (!dev->registered)
+		return -EINVAL;
+
 	if (!dev->state_count)
 		dev->state_count = drv->state_count;
 
-	if (dev->registered == 0) {
-		ret = __cpuidle_register_device(dev);
-		if (ret)
-			return ret;
-	}
-
-	poll_idle_init(drv);
-
 	ret = cpuidle_add_device_sysfs(dev);
 	if (ret)
 		return ret;
@@ -311,12 +300,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
 	    (ret = cpuidle_curr_governor->enable(drv, dev)))
 		goto fail_sysfs;
 
-	for (i = 0; i < dev->state_count; i++) {
-		dev->states_usage[i].usage = 0;
-		dev->states_usage[i].time = 0;
-	}
-	dev->last_residency = 0;
-
 	smp_wmb();
 
 	dev->enabled = 1;
@@ -360,6 +343,21 @@ void cpuidle_disable_device(struct cpuidle_device *dev)
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
 
+static void __cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
+	list_del(&dev->device_list);
+	per_cpu(cpuidle_devices, dev->cpu) = NULL;
+	module_put(drv->owner);
+}
+
+static void __cpuidle_device_init(struct cpuidle_device *dev)
+{
+	memset(dev->states_usage, 0, sizeof(dev->states_usage));
+	dev->last_residency = 0;
+}
+
 /**
  * __cpuidle_register_device - internal register function called before register
  * and enable routines
@@ -377,23 +375,13 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
 
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
 	list_add(&dev->device_list, &cpuidle_detected_devices);
-	ret = cpuidle_add_sysfs(dev);
-	if (ret)
-		goto err_sysfs;
 
 	ret = cpuidle_coupled_register_device(dev);
 	if (ret)
-		goto err_coupled;
-
-	dev->registered = 1;
-	return 0;
+		__cpuidle_unregister_device(dev);
+	else
+		dev->registered = 1;
 
-err_coupled:
-	cpuidle_remove_sysfs(dev);
-err_sysfs:
-	list_del(&dev->device_list);
-	per_cpu(cpuidle_devices, dev->cpu) = NULL;
-	module_put(drv->owner);
 	return ret;
 }
 
@@ -403,25 +391,42 @@ err_sysfs:
  */
 int cpuidle_register_device(struct cpuidle_device *dev)
 {
-	int ret;
+	int ret = -EBUSY;
 
 	if (!dev)
 		return -EINVAL;
 
 	mutex_lock(&cpuidle_lock);
 
-	if ((ret = __cpuidle_register_device(dev))) {
-		mutex_unlock(&cpuidle_lock);
-		return ret;
-	}
+	if (dev->registered)
+		goto out_unlock;
+
+	__cpuidle_device_init(dev);
+
+	ret = __cpuidle_register_device(dev);
+	if (ret)
+		goto out_unlock;
+
+	ret = cpuidle_add_sysfs(dev);
+	if (ret)
+		goto out_unregister;
+
+	ret = cpuidle_enable_device(dev);
+	if (ret)
+		goto out_sysfs;
 
-	cpuidle_enable_device(dev);
 	cpuidle_install_idle_handler();
 
+out_unlock:
 	mutex_unlock(&cpuidle_lock);
 
-	return 0;
+	return ret;
 
+out_sysfs:
+	cpuidle_remove_sysfs(dev);
+out_unregister:
+	__cpuidle_unregister_device(dev);
+	goto out_unlock;
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_register_device);
@@ -432,9 +437,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device);
  */
 void cpuidle_unregister_device(struct cpuidle_device *dev)
 {
-	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
-
-	if (dev->registered == 0)
+	if (!dev || dev->registered == 0)
 		return;
 
 	cpuidle_pause_and_lock();
@@ -442,14 +445,12 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
 	cpuidle_disable_device(dev);
 
 	cpuidle_remove_sysfs(dev);
-	list_del(&dev->device_list);
-	per_cpu(cpuidle_devices, dev->cpu) = NULL;
+
+	__cpuidle_unregister_device(dev);
 
 	cpuidle_coupled_unregister_device(dev);
 
 	cpuidle_resume_and_unlock();
-
-	module_put(drv->owner);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
@@ -466,7 +467,7 @@ void cpuidle_unregister(struct cpuidle_driver *drv)
 	int cpu;
 	struct cpuidle_device *device;
 
-	for_each_possible_cpu(cpu) {
+	for_each_cpu(cpu, drv->cpumask) {
 		device = &per_cpu(cpuidle_dev, cpu);
 		cpuidle_unregister_device(device);
 	}
@@ -498,13 +499,13 @@ int cpuidle_register(struct cpuidle_driver *drv,
 		return ret;
 	}
 
-	for_each_possible_cpu(cpu) {
+	for_each_cpu(cpu, drv->cpumask) {
 		device = &per_cpu(cpuidle_dev, cpu);
 		device->cpu = cpu;
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 		/*
-		 * On multiplatform for ARM, the coupled idle states could
+		 * On multiplatform for ARM, the coupled idle states could be
 		 * enabled in the kernel even if the cpuidle driver does not
 		 * use it. Note, coupled_cpus is a struct copy.
 		 */
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 8dfaaae9444..9634f20e392 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -10,6 +10,7 @@
 
 #include <linux/mutex.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
 #include <linux/clockchips.h>
@@ -18,182 +19,285 @@
 
 DEFINE_SPINLOCK(cpuidle_driver_lock);
 
-static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu);
-static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu);
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
 
-static void cpuidle_setup_broadcast_timer(void *arg)
+static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
+
+/**
+ * __cpuidle_get_cpu_driver - return the cpuidle driver tied to a CPU.
+ * @cpu: the CPU handled by the driver
+ *
+ * Returns a pointer to struct cpuidle_driver or NULL if no driver has been
+ * registered for @cpu.
+ */
+static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
 {
-	int cpu = smp_processor_id();
-	clockevents_notify((long)(arg), &cpu);
+	return per_cpu(cpuidle_drivers, cpu);
 }
 
-static void __cpuidle_driver_init(struct cpuidle_driver *drv, int cpu)
+/**
+ * __cpuidle_unset_driver - unset per CPU driver variables.
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * For each CPU in the driver's CPU mask, unset the registered driver per CPU
+ * variable. If @drv is different from the registered driver, the corresponding
+ * variable is not cleared.
+ */
+static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
 {
-	int i;
+	int cpu;
 
-	drv->refcnt = 0;
+	for_each_cpu(cpu, drv->cpumask) {
 
-	for (i = drv->state_count - 1; i >= 0 ; i--) {
-
-		if (!(drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP))
+		if (drv != __cpuidle_get_cpu_driver(cpu))
 			continue;
 
-		drv->bctimer = 1;
-		on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer,
-				 (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
-		break;
+		per_cpu(cpuidle_drivers, cpu) = NULL;
 	}
 }
 
-static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
+/**
+ * __cpuidle_set_driver - set per CPU driver variables for the given driver.
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * For each CPU in the driver's cpumask, unset the registered driver per CPU
+ * to @drv.
+ *
+ * Returns 0 on success, -EBUSY if the CPUs have driver(s) already.
+ */
+static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
 {
-	if (!drv || !drv->state_count)
-		return -EINVAL;
-
-	if (cpuidle_disabled())
-		return -ENODEV;
+	int cpu;
 
-	if (__cpuidle_get_cpu_driver(cpu))
-		return -EBUSY;
+	for_each_cpu(cpu, drv->cpumask) {
 
-	__cpuidle_driver_init(drv, cpu);
+		if (__cpuidle_get_cpu_driver(cpu)) {
+			__cpuidle_unset_driver(drv);
+			return -EBUSY;
+		}
 
-	__cpuidle_set_cpu_driver(drv, cpu);
+		per_cpu(cpuidle_drivers, cpu) = drv;
+	}
 
 	return 0;
 }
 
-static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu)
-{
-	if (drv != __cpuidle_get_cpu_driver(cpu))
-		return;
+#else
 
-	if (!WARN_ON(drv->refcnt > 0))
-		__cpuidle_set_cpu_driver(NULL, cpu);
+static struct cpuidle_driver *cpuidle_curr_driver;
 
-	if (drv->bctimer) {
-		drv->bctimer = 0;
-		on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer,
-				 (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
-	}
+/**
+ * __cpuidle_get_cpu_driver - return the global cpuidle driver pointer.
+ * @cpu: ignored without the multiple driver support
+ *
+ * Return a pointer to a struct cpuidle_driver object or NULL if no driver was
+ * previously registered.
+ */
+static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+{
+	return cpuidle_curr_driver;
 }
 
-#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+/**
+ * __cpuidle_set_driver - assign the global cpuidle driver variable.
+ * @drv: pointer to a struct cpuidle_driver object
+ *
+ * Returns 0 on success, -EBUSY if the driver is already registered.
+ */
+static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
+{
+	if (cpuidle_curr_driver)
+		return -EBUSY;
 
-static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
+	cpuidle_curr_driver = drv;
 
-static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
-{
-	per_cpu(cpuidle_drivers, cpu) = drv;
+	return 0;
 }
 
-static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+/**
+ * __cpuidle_unset_driver - unset the global cpuidle driver variable.
+ * @drv: a pointer to a struct cpuidle_driver
+ *
+ * Reset the global cpuidle variable to NULL.  If @drv does not match the
+ * registered driver, do nothing.
+ */
+static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
 {
-	return per_cpu(cpuidle_drivers, cpu);
+	if (drv == cpuidle_curr_driver)
+		cpuidle_curr_driver = NULL;
 }
 
-static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv)
+#endif
+
+/**
+ * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer
+ * @arg: a void pointer used to match the SMP cross call API
+ *
+ * @arg is used as a value of type 'long' with one of the two values:
+ * - CLOCK_EVT_NOTIFY_BROADCAST_ON
+ * - CLOCK_EVT_NOTIFY_BROADCAST_OFF
+ *
+ * Set the broadcast timer notification for the current CPU.  This function
+ * is executed per CPU by an SMP cross call.  It not supposed to be called
+ * directly.
+ */
+static void cpuidle_setup_broadcast_timer(void *arg)
 {
-	int cpu;
-	for_each_present_cpu(cpu)
-		__cpuidle_unregister_driver(drv, cpu);
+	int cpu = smp_processor_id();
+	clockevents_notify((long)(arg), &cpu);
 }
 
-static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
+/**
+ * __cpuidle_driver_init - initialize the driver's internal data
+ * @drv: a valid pointer to a struct cpuidle_driver
+ */
+static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 {
-	int ret = 0;
-	int i, cpu;
+	int i;
 
-	for_each_present_cpu(cpu) {
-		ret = __cpuidle_register_driver(drv, cpu);
-		if (ret)
-			break;
-	}
+	drv->refcnt = 0;
 
-	if (ret)
-		for_each_present_cpu(i) {
-			if (i == cpu)
-				break;
-			__cpuidle_unregister_driver(drv, i);
+	/*
+	 * Use all possible CPUs as the default, because if the kernel boots
+	 * with some CPUs offline and then we online one of them, the CPU
+	 * notifier has to know which driver to assign.
+	 */
+	if (!drv->cpumask)
+		drv->cpumask = (struct cpumask *)cpu_possible_mask;
+
+	/*
+	 * Look for the timer stop flag in the different states, so that we know
+	 * if the broadcast timer has to be set up.  The loop is in the reverse
+	 * order, because usually one of the deeper states have this flag set.
+	 */
+	for (i = drv->state_count - 1; i >= 0 ; i--) {
+		if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
+			drv->bctimer = 1;
+			break;
 		}
-
-
-	return ret;
+	}
 }
 
-int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu)
+#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+static int poll_idle(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
 {
-	int ret;
+	ktime_t	t1, t2;
+	s64 diff;
+
+	t1 = ktime_get();
+	local_irq_enable();
+	if (!current_set_polling_and_test()) {
+		while (!need_resched())
+			cpu_relax();
+	}
+	current_clr_polling();
 
-	spin_lock(&cpuidle_driver_lock);
-	ret = __cpuidle_register_driver(drv, cpu);
-	spin_unlock(&cpuidle_driver_lock);
+	t2 = ktime_get();
+	diff = ktime_to_us(ktime_sub(t2, t1));
+	if (diff > INT_MAX)
+		diff = INT_MAX;
 
-	return ret;
+	dev->last_residency = (int) diff;
+
+	return index;
 }
 
-void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu)
+static void poll_idle_init(struct cpuidle_driver *drv)
 {
-	spin_lock(&cpuidle_driver_lock);
-	__cpuidle_unregister_driver(drv, cpu);
-	spin_unlock(&cpuidle_driver_lock);
+	struct cpuidle_state *state = &drv->states[0];
+
+	snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
+	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
+	state->exit_latency = 0;
+	state->target_residency = 0;
+	state->power_usage = -1;
+	state->flags = CPUIDLE_FLAG_TIME_VALID;
+	state->enter = poll_idle;
+	state->disabled = false;
 }
+#else
+static void poll_idle_init(struct cpuidle_driver *drv) {}
+#endif /* !CONFIG_ARCH_HAS_CPU_RELAX */
 
 /**
- * cpuidle_register_driver - registers a driver
- * @drv: the driver
+ * __cpuidle_register_driver: register the driver
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * Do some sanity checks, initialize the driver, assign the driver to the
+ * global cpuidle driver variable(s) and set up the broadcast timer if the
+ * cpuidle driver has some states that shut down the local timer.
+ *
+ * Returns 0 on success, a negative error code otherwise:
+ *  * -EINVAL if the driver pointer is NULL or no idle states are available
+ *  * -ENODEV if the cpuidle framework is disabled
+ *  * -EBUSY if the driver is already assigned to the global variable(s)
  */
-int cpuidle_register_driver(struct cpuidle_driver *drv)
+static int __cpuidle_register_driver(struct cpuidle_driver *drv)
 {
 	int ret;
 
-	spin_lock(&cpuidle_driver_lock);
-	ret = __cpuidle_register_all_cpu_driver(drv);
-	spin_unlock(&cpuidle_driver_lock);
+	if (!drv || !drv->state_count)
+		return -EINVAL;
 
-	return ret;
-}
-EXPORT_SYMBOL_GPL(cpuidle_register_driver);
+	if (cpuidle_disabled())
+		return -ENODEV;
 
-/**
- * cpuidle_unregister_driver - unregisters a driver
- * @drv: the driver
- */
-void cpuidle_unregister_driver(struct cpuidle_driver *drv)
-{
-	spin_lock(&cpuidle_driver_lock);
-	__cpuidle_unregister_all_cpu_driver(drv);
-	spin_unlock(&cpuidle_driver_lock);
-}
-EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
+	__cpuidle_driver_init(drv);
 
-#else
+	ret = __cpuidle_set_driver(drv);
+	if (ret)
+		return ret;
 
-static struct cpuidle_driver *cpuidle_curr_driver;
+	if (drv->bctimer)
+		on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
+				 (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
 
-static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
-{
-	cpuidle_curr_driver = drv;
+	poll_idle_init(drv);
+
+	return 0;
 }
 
-static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+/**
+ * __cpuidle_unregister_driver - unregister the driver
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * Check if the driver is no longer in use, reset the global cpuidle driver
+ * variable(s) and disable the timer broadcast notification mechanism if it was
+ * in use.
+ *
+ */
+static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
 {
-	return cpuidle_curr_driver;
+	if (WARN_ON(drv->refcnt > 0))
+		return;
+
+	if (drv->bctimer) {
+		drv->bctimer = 0;
+		on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
+				 (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
+	}
+
+	__cpuidle_unset_driver(drv);
 }
 
 /**
  * cpuidle_register_driver - registers a driver
- * @drv: the driver
+ * @drv: a pointer to a valid struct cpuidle_driver
+ *
+ * Register the driver under a lock to prevent concurrent attempts to
+ * [un]register the driver from occuring at the same time.
+ *
+ * Returns 0 on success, a negative error code (returned by
+ * __cpuidle_register_driver()) otherwise.
  */
 int cpuidle_register_driver(struct cpuidle_driver *drv)
 {
-	int ret, cpu;
+	int ret;
 
-	cpu = get_cpu();
 	spin_lock(&cpuidle_driver_lock);
-	ret = __cpuidle_register_driver(drv, cpu);
+	ret = __cpuidle_register_driver(drv);
 	spin_unlock(&cpuidle_driver_lock);
-	put_cpu();
 
 	return ret;
 }
@@ -201,23 +305,24 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver);
 
 /**
  * cpuidle_unregister_driver - unregisters a driver
- * @drv: the driver
+ * @drv: a pointer to a valid struct cpuidle_driver
+ *
+ * Unregisters the cpuidle driver under a lock to prevent concurrent attempts
+ * to [un]register the driver from occuring at the same time.  @drv has to
+ * match the currently registered driver.
  */
 void cpuidle_unregister_driver(struct cpuidle_driver *drv)
 {
-	int cpu;
-
-	cpu = get_cpu();
 	spin_lock(&cpuidle_driver_lock);
-	__cpuidle_unregister_driver(drv, cpu);
+	__cpuidle_unregister_driver(drv);
 	spin_unlock(&cpuidle_driver_lock);
-	put_cpu();
 }
 EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
-#endif
 
 /**
- * cpuidle_get_driver - return the current driver
+ * cpuidle_get_driver - return the driver tied to the current CPU.
+ *
+ * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered.
  */
 struct cpuidle_driver *cpuidle_get_driver(void)
 {
@@ -233,7 +338,11 @@ struct cpuidle_driver *cpuidle_get_driver(void)
 EXPORT_SYMBOL_GPL(cpuidle_get_driver);
 
 /**
- * cpuidle_get_cpu_driver - return the driver tied with a cpu
+ * cpuidle_get_cpu_driver - return the driver registered for a CPU.
+ * @dev: a valid pointer to a struct cpuidle_device
+ *
+ * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered
+ * for the CPU associated with @dev.
  */
 struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
 {
@@ -244,6 +353,14 @@ struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
 }
 EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
 
+/**
+ * cpuidle_driver_ref - get a reference to the driver.
+ *
+ * Increment the reference counter of the cpuidle driver associated with
+ * the current CPU.
+ *
+ * Returns a pointer to the driver, or NULL if the current CPU has no driver.
+ */
 struct cpuidle_driver *cpuidle_driver_ref(void)
 {
 	struct cpuidle_driver *drv;
@@ -251,18 +368,26 @@ struct cpuidle_driver *cpuidle_driver_ref(void)
 	spin_lock(&cpuidle_driver_lock);
 
 	drv = cpuidle_get_driver();
-	drv->refcnt++;
+	if (drv)
+		drv->refcnt++;
 
 	spin_unlock(&cpuidle_driver_lock);
 	return drv;
 }
 
+/**
+ * cpuidle_driver_unref - puts down the refcount for the driver
+ *
+ * Decrement the reference counter of the cpuidle driver associated with
+ * the current CPU.
+ */
 void cpuidle_driver_unref(void)
 {
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv;
 
 	spin_lock(&cpuidle_driver_lock);
 
+	drv = cpuidle_get_driver();
 	if (drv && !WARN_ON(drv->refcnt <= 0))
 		drv->refcnt--;
 
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index ea2f8e7aa24..ca89412f512 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -96,46 +96,3 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
 
 	return ret;
 }
-
-/**
- * cpuidle_replace_governor - find a replacement governor
- * @exclude_rating: the rating that will be skipped while looking for
- * new governor.
- */
-static struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating)
-{
-	struct cpuidle_governor *gov;
-	struct cpuidle_governor *ret_gov = NULL;
-	unsigned int max_rating = 0;
-
-	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
-		if (gov->rating == exclude_rating)
-			continue;
-		if (gov->rating > max_rating) {
-			max_rating = gov->rating;
-			ret_gov = gov;
-		}
-	}
-
-	return ret_gov;
-}
-
-/**
- * cpuidle_unregister_governor - unregisters a governor
- * @gov: the governor
- */
-void cpuidle_unregister_governor(struct cpuidle_governor *gov)
-{
-	if (!gov)
-		return;
-
-	mutex_lock(&cpuidle_lock);
-	if (gov == cpuidle_curr_governor) {
-		struct cpuidle_governor *new_gov;
-		new_gov = cpuidle_replace_governor(gov->rating);
-		cpuidle_switch_governor(new_gov);
-	}
-	list_del(&gov->governor_list);
-	mutex_unlock(&cpuidle_lock);
-}
-
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 9b784051ec1..9f08e8cce1a 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -192,14 +192,4 @@ static int __init init_ladder(void)
 	return cpuidle_register_governor(&ladder_governor);
 }
 
-/**
- * exit_ladder - exits the governor
- */
-static void __exit exit_ladder(void)
-{
-	cpuidle_unregister_governor(&ladder_governor);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_ladder);
-module_exit(exit_ladder);
+postcore_initcall(init_ladder);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index fe343a06b7d..c4f80c15a48 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -21,6 +21,15 @@
 #include <linux/math64.h>
 #include <linux/module.h>
 
+/*
+ * Please note when changing the tuning values:
+ * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
+ * a scaling operation multiplication may overflow on 32 bit platforms.
+ * In that case, #define RESOLUTION as ULL to get 64 bit result:
+ * #define RESOLUTION 1024ULL
+ *
+ * The default values do not overflow.
+ */
 #define BUCKETS 12
 #define INTERVALS 8
 #define RESOLUTION 1024
@@ -28,13 +37,6 @@
 #define MAX_INTERESTING 50000
 #define STDDEV_THRESH 400
 
-/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
-#define MAX_DEVIATION 60
-
-static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
-static DEFINE_PER_CPU(int, hrtimer_status);
-/* menu hrtimer mode */
-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
 
 /*
  * Concepts and ideas behind the menu governor
@@ -116,23 +118,15 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
  *
  */
 
-/*
- * The C-state residency is so long that is is worthwhile to exit
- * from the shallow C-state and re-enter into a deeper C-state.
- */
-static unsigned int perfect_cstate_ms __read_mostly = 30;
-module_param(perfect_cstate_ms, uint, 0000);
-
 struct menu_device {
 	int		last_state_idx;
 	int             needs_update;
 
-	unsigned int	expected_us;
-	u64		predicted_us;
-	unsigned int	exit_us;
+	unsigned int	next_timer_us;
+	unsigned int	predicted_us;
 	unsigned int	bucket;
-	u64		correction_factor[BUCKETS];
-	u32		intervals[INTERVALS];
+	unsigned int	correction_factor[BUCKETS];
+	unsigned int	intervals[INTERVALS];
 	int		interval_ptr;
 };
 
@@ -205,59 +199,28 @@ static u64 div_round64(u64 dividend, u32 divisor)
 	return div_u64(dividend + (divisor / 2), divisor);
 }
 
-/* Cancel the hrtimer if it is not triggered yet */
-void menu_hrtimer_cancel(void)
-{
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
-
-	/* The timer is still not time out*/
-	if (per_cpu(hrtimer_status, cpu)) {
-		hrtimer_cancel(hrtmr);
-		per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-	}
-}
-EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
-
-/* Call back for hrtimer is triggered */
-static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
-{
-	int cpu = smp_processor_id();
-	struct menu_device *data = &per_cpu(menu_devices, cpu);
-
-	/* In general case, the expected residency is much larger than
-	 *  deepest C-state target residency, but prediction logic still
-	 *  predicts a small predicted residency, so the prediction
-	 *  history is totally broken if the timer is triggered.
-	 *  So reset the correction factor.
-	 */
-	if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL)
-		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
-
-	per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-
-	return HRTIMER_NORESTART;
-}
-
 /*
  * Try detecting repeating patterns by keeping track of the last 8
  * intervals, and checking if the standard deviation of that set
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static u32 get_typical_interval(struct menu_device *data)
+static void get_typical_interval(struct menu_device *data)
 {
-	int i = 0, divisor = 0;
-	uint64_t max = 0, avg = 0, stddev = 0;
-	int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */
-	unsigned int ret = 0;
+	int i, divisor;
+	unsigned int max, thresh;
+	uint64_t avg, stddev;
+
+	thresh = UINT_MAX; /* Discard outliers above this value */
 
 again:
 
-	/* first calculate average and standard deviation of the past */
-	max = avg = divisor = stddev = 0;
+	/* First calculate the average of past intervals */
+	max = 0;
+	avg = 0;
+	divisor = 0;
 	for (i = 0; i < INTERVALS; i++) {
-		int64_t value = data->intervals[i];
+		unsigned int value = data->intervals[i];
 		if (value <= thresh) {
 			avg += value;
 			divisor++;
@@ -267,15 +230,38 @@ again:
 	}
 	do_div(avg, divisor);
 
+	/* Then try to determine standard deviation */
+	stddev = 0;
 	for (i = 0; i < INTERVALS; i++) {
-		int64_t value = data->intervals[i];
+		unsigned int value = data->intervals[i];
 		if (value <= thresh) {
 			int64_t diff = value - avg;
 			stddev += diff * diff;
 		}
 	}
 	do_div(stddev, divisor);
-	stddev = int_sqrt(stddev);
+	/*
+	 * The typical interval is obtained when standard deviation is small
+	 * or standard deviation is small compared to the average interval.
+	 *
+	 * int_sqrt() formal parameter type is unsigned long. When the
+	 * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor)
+	 * the resulting squared standard deviation exceeds the input domain
+	 * of int_sqrt on platforms where unsigned long is 32 bits in size.
+	 * In such case reject the candidate average.
+	 *
+	 * Use this result only if there is no timer to wake us up sooner.
+	 */
+	if (likely(stddev <= ULONG_MAX)) {
+		stddev = int_sqrt(stddev);
+		if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
+							|| stddev <= 20) {
+			if (data->next_timer_us > avg)
+				data->predicted_us = avg;
+			return;
+		}
+	}
+
 	/*
 	 * If we have outliers to the upside in our distribution, discard
 	 * those by setting the threshold to exclude these outliers, then
@@ -284,23 +270,12 @@ again:
 	 *
 	 * This can deal with workloads that have long pauses interspersed
 	 * with sporadic activity with a bunch of short pauses.
-	 *
-	 * The typical interval is obtained when standard deviation is small
-	 * or standard deviation is small compared to the average interval.
 	 */
-	if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
-							|| stddev <= 20) {
-		data->predicted_us = avg;
-		ret = 1;
-		return ret;
-
-	} else if ((divisor * 4) > INTERVALS * 3) {
-		/* Exclude the max interval */
-		thresh = max - 1;
-		goto again;
-	}
+	if ((divisor * 4) <= INTERVALS * 3)
+		return;
 
-	return ret;
+	thresh = max - 1;
+	goto again;
 }
 
 /**
@@ -313,19 +288,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
-	int multiplier;
+	unsigned int interactivity_req;
 	struct timespec t;
-	int repeat = 0, low_predicted = 0;
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
 		data->needs_update = 0;
 	}
 
-	data->last_state_idx = 0;
-	data->exit_us = 0;
+	data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
 
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0))
@@ -333,32 +304,37 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 	/* determine the expected residency time, round up */
 	t = ktime_to_timespec(tick_nohz_get_sleep_length());
-	data->expected_us =
+	data->next_timer_us =
 		t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC;
 
 
-	data->bucket = which_bucket(data->expected_us);
-
-	multiplier = performance_multiplier();
+	data->bucket = which_bucket(data->next_timer_us);
 
 	/*
-	 * if the correction factor is 0 (eg first time init or cpu hotplug
-	 * etc), we actually want to start out with a unity factor.
+	 * Force the result of multiplication to be 64 bits even if both
+	 * operands are 32 bits.
+	 * Make sure to round up for half microseconds.
 	 */
-	if (data->correction_factor[data->bucket] == 0)
-		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
-
-	/* Make sure to round up for half microseconds */
-	data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
+	data->predicted_us = div_round64((uint64_t)data->next_timer_us *
+					 data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
-	repeat = get_typical_interval(data);
+	get_typical_interval(data);
+
+	/*
+	 * Performance multiplier defines a minimum predicted idle
+	 * duration / latency ratio. Adjust the latency limit if
+	 * necessary.
+	 */
+	interactivity_req = data->predicted_us / performance_multiplier();
+	if (latency_req > interactivity_req)
+		latency_req = interactivity_req;
 
 	/*
 	 * We want to default to C1 (hlt), not to busy polling
 	 * unless the timer is happening really really soon.
 	 */
-	if (data->expected_us > 5 &&
+	if (data->next_timer_us > 5 &&
 	    !drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
 		dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
 		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
@@ -373,55 +349,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 		if (s->disabled || su->disable)
 			continue;
-		if (s->target_residency > data->predicted_us) {
-			low_predicted = 1;
+		if (s->target_residency > data->predicted_us)
 			continue;
-		}
 		if (s->exit_latency > latency_req)
 			continue;
-		if (s->exit_latency * multiplier > data->predicted_us)
-			continue;
 
 		data->last_state_idx = i;
-		data->exit_us = s->exit_latency;
-	}
-
-	/* not deepest C-state chosen for low predicted residency */
-	if (low_predicted) {
-		unsigned int timer_us = 0;
-		unsigned int perfect_us = 0;
-
-		/*
-		 * Set a timer to detect whether this sleep is much
-		 * longer than repeat mode predicted.  If the timer
-		 * triggers, the code will evaluate whether to put
-		 * the CPU into a deeper C-state.
-		 * The timer is cancelled on CPU wakeup.
-		 */
-		timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
-
-		perfect_us = perfect_cstate_ms * 1000;
-
-		if (repeat && (4 * timer_us < data->expected_us)) {
-			RCU_NONIDLE(hrtimer_start(hrtmr,
-				ns_to_ktime(1000 * timer_us),
-				HRTIMER_MODE_REL_PINNED));
-			/* In repeat case, menu hrtimer is started */
-			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
-		} else if (perfect_us < data->expected_us) {
-			/*
-			 * The next timer is long. This could be because
-			 * we did not make a useful prediction.
-			 * In that case, it makes sense to re-enter
-			 * into a deeper C-state after some time.
-			 */
-			RCU_NONIDLE(hrtimer_start(hrtmr,
-				ns_to_ktime(1000 * timer_us),
-				HRTIMER_MODE_REL_PINNED));
-			/* In general case, menu hrtimer is started */
-			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL;
-		}
-
 	}
 
 	return data->last_state_idx;
@@ -452,37 +385,47 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int last_idx = data->last_state_idx;
-	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
 	struct cpuidle_state *target = &drv->states[last_idx];
 	unsigned int measured_us;
-	u64 new_factor;
+	unsigned int new_factor;
 
 	/*
-	 * Ugh, this idle state doesn't support residency measurements, so we
-	 * are basically lost in the dark.  As a compromise, assume we slept
-	 * for the whole expected time.
+	 * Try to figure out how much time passed between entry to low
+	 * power state and occurrence of the wakeup event.
+	 *
+	 * If the entered idle state didn't support residency measurements,
+	 * we are basically lost in the dark how much time passed.
+	 * As a compromise, assume we slept for the whole expected time.
+	 *
+	 * Any measured amount of time will include the exit latency.
+	 * Since we are interested in when the wakeup begun, not when it
+	 * was completed, we must substract the exit latency. However, if
+	 * the measured amount of time is less than the exit latency,
+	 * assume the state was never reached and the exit latency is 0.
 	 */
-	if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
-		last_idle_us = data->expected_us;
-
-
-	measured_us = last_idle_us;
+	if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) {
+		/* Use timer value as is */
+		measured_us = data->next_timer_us;
 
-	/*
-	 * We correct for the exit latency; we are assuming here that the
-	 * exit latency happens after the event that we're interested in.
-	 */
-	if (measured_us > data->exit_us)
-		measured_us -= data->exit_us;
+	} else {
+		/* Use measured value */
+		measured_us = cpuidle_get_last_residency(dev);
 
+		/* Deduct exit latency */
+		if (measured_us > target->exit_latency)
+			measured_us -= target->exit_latency;
 
-	/* update our correction ratio */
+		/* Make sure our coefficients do not exceed unity */
+		if (measured_us > data->next_timer_us)
+			measured_us = data->next_timer_us;
+	}
 
-	new_factor = data->correction_factor[data->bucket]
-			* (DECAY - 1) / DECAY;
+	/* Update our correction ratio */
+	new_factor = data->correction_factor[data->bucket];
+	new_factor -= new_factor / DECAY;
 
-	if (data->expected_us > 0 && measured_us < MAX_INTERESTING)
-		new_factor += RESOLUTION * measured_us / data->expected_us;
+	if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
+		new_factor += RESOLUTION * measured_us / data->next_timer_us;
 	else
 		/*
 		 * we were idle so long that we count it as a perfect
@@ -492,15 +435,17 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 	/*
 	 * We don't want 0 as factor; we always want at least
-	 * a tiny bit of estimated time.
+	 * a tiny bit of estimated time. Fortunately, due to rounding,
+	 * new_factor will stay nonzero regardless of measured_us values
+	 * and the compiler can eliminate this test as long as DECAY > 1.
 	 */
-	if (new_factor == 0)
+	if (DECAY == 1 && unlikely(new_factor == 0))
 		new_factor = 1;
 
 	data->correction_factor[data->bucket] = new_factor;
 
 	/* update the repeating-pattern data */
-	data->intervals[data->interval_ptr++] = last_idle_us;
+	data->intervals[data->interval_ptr++] = measured_us;
 	if (data->interval_ptr >= INTERVALS)
 		data->interval_ptr = 0;
 }
@@ -514,12 +459,17 @@ static int menu_enable_device(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev)
 {
 	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
-	struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
-	hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	t->function = menu_hrtimer_notify;
+	int i;
 
 	memset(data, 0, sizeof(struct menu_device));
 
+	/*
+	 * if the correction factor is 0 (eg first time init or cpu hotplug
+	 * etc), we actually want to start out with a unity factor.
+	 */
+	for(i = 0; i < BUCKETS; i++)
+		data->correction_factor[i] = RESOLUTION * DECAY;
+
 	return 0;
 }
 
@@ -540,14 +490,4 @@ static int __init init_menu(void)
 	return cpuidle_register_governor(&menu_governor);
 }
 
-/**
- * exit_menu - exits the governor
- */
-static void __exit exit_menu(void)
-{
-	cpuidle_unregister_governor(&menu_governor);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_menu);
-module_exit(exit_menu);
+postcore_initcall(init_menu);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 428754af623..efe2f175168 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -11,8 +11,10 @@
 #include <linux/sysfs.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/completion.h>
 #include <linux/capability.h>
 #include <linux/device.h>
+#include <linux/kobject.h>
 
 #include "cpuidle.h"
 
@@ -33,7 +35,8 @@ static ssize_t show_available_governors(struct device *dev,
 
 	mutex_lock(&cpuidle_lock);
 	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
-		if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
+		if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) -
+				    CPUIDLE_NAME_LEN - 2))
 			goto out;
 		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
 	}
@@ -49,11 +52,12 @@ static ssize_t show_current_driver(struct device *dev,
 				   char *buf)
 {
 	ssize_t ret;
-	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
+	struct cpuidle_driver *drv;
 
 	spin_lock(&cpuidle_driver_lock);
-	if (cpuidle_driver)
-		ret = sprintf(buf, "%s\n", cpuidle_driver->name);
+	drv = cpuidle_get_driver();
+	if (drv)
+		ret = sprintf(buf, "%s\n", drv->name);
 	else
 		ret = sprintf(buf, "none\n");
 	spin_unlock(&cpuidle_driver_lock);
@@ -166,13 +170,28 @@ struct cpuidle_attr {
 #define define_one_rw(_name, show, store) \
 	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
 
-#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
 #define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
-static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
+
+struct cpuidle_device_kobj {
+	struct cpuidle_device *dev;
+	struct completion kobj_unregister;
+	struct kobject kobj;
+};
+
+static inline struct cpuidle_device *to_cpuidle_device(struct kobject *kobj)
+{
+	struct cpuidle_device_kobj *kdev =
+		container_of(kobj, struct cpuidle_device_kobj, kobj);
+
+	return kdev->dev;
+}
+
+static ssize_t cpuidle_show(struct kobject *kobj, struct attribute *attr,
+			    char *buf)
 {
 	int ret = -EIO;
-	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
-	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+	struct cpuidle_device *dev = to_cpuidle_device(kobj);
+	struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
 
 	if (cattr->show) {
 		mutex_lock(&cpuidle_lock);
@@ -182,12 +201,12 @@ static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char
 	return ret;
 }
 
-static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
-		     const char * buf, size_t count)
+static ssize_t cpuidle_store(struct kobject *kobj, struct attribute *attr,
+			     const char *buf, size_t count)
 {
 	int ret = -EIO;
-	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
-	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+	struct cpuidle_device *dev = to_cpuidle_device(kobj);
+	struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
 
 	if (cattr->store) {
 		mutex_lock(&cpuidle_lock);
@@ -204,9 +223,10 @@ static const struct sysfs_ops cpuidle_sysfs_ops = {
 
 static void cpuidle_sysfs_release(struct kobject *kobj)
 {
-	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_device_kobj *kdev =
+		container_of(kobj, struct cpuidle_device_kobj, kobj);
 
-	complete(&dev->kobj_unregister);
+	complete(&kdev->kobj_unregister);
 }
 
 static struct kobj_type ktype_cpuidle = {
@@ -237,8 +257,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
 
 #define define_store_state_ull_function(_name) \
 static ssize_t store_state_##_name(struct cpuidle_state *state, \
-		struct cpuidle_state_usage *state_usage, \
-		const char *buf, size_t size) \
+				   struct cpuidle_state_usage *state_usage, \
+				   const char *buf, size_t size)	\
 { \
 	unsigned long long value; \
 	int err; \
@@ -256,14 +276,16 @@ static ssize_t store_state_##_name(struct cpuidle_state *state, \
 
 #define define_show_state_ull_function(_name) \
 static ssize_t show_state_##_name(struct cpuidle_state *state, \
-			struct cpuidle_state_usage *state_usage, char *buf) \
+				  struct cpuidle_state_usage *state_usage, \
+				  char *buf)				\
 { \
 	return sprintf(buf, "%llu\n", state_usage->_name);\
 }
 
 #define define_show_state_str_function(_name) \
 static ssize_t show_state_##_name(struct cpuidle_state *state, \
-			struct cpuidle_state_usage *state_usage, char *buf) \
+				  struct cpuidle_state_usage *state_usage, \
+				  char *buf)				\
 { \
 	if (state->_name[0] == '\0')\
 		return sprintf(buf, "<null>\n");\
@@ -271,6 +293,7 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
 }
 
 define_show_state_function(exit_latency)
+define_show_state_function(target_residency)
 define_show_state_function(power_usage)
 define_show_state_ull_function(usage)
 define_show_state_ull_function(time)
@@ -282,6 +305,7 @@ define_store_state_ull_function(disable)
 define_one_state_ro(name, show_state_name);
 define_one_state_ro(desc, show_state_desc);
 define_one_state_ro(latency, show_state_exit_latency);
+define_one_state_ro(residency, show_state_target_residency);
 define_one_state_ro(power, show_state_power_usage);
 define_one_state_ro(usage, show_state_usage);
 define_one_state_ro(time, show_state_time);
@@ -291,6 +315,7 @@ static struct attribute *cpuidle_state_default_attrs[] = {
 	&attr_name.attr,
 	&attr_desc.attr,
 	&attr_latency.attr,
+	&attr_residency.attr,
 	&attr_power.attr,
 	&attr_usage.attr,
 	&attr_time.attr,
@@ -309,8 +334,9 @@ struct cpuidle_state_kobj {
 #define kobj_to_state(k) (kobj_to_state_obj(k)->state)
 #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
 #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
-static ssize_t cpuidle_state_show(struct kobject * kobj,
-	struct attribute * attr ,char * buf)
+
+static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
+				  char * buf)
 {
 	int ret = -EIO;
 	struct cpuidle_state *state = kobj_to_state(kobj);
@@ -323,8 +349,8 @@ static ssize_t cpuidle_state_show(struct kobject * kobj,
 	return ret;
 }
 
-static ssize_t cpuidle_state_store(struct kobject *kobj,
-	struct attribute *attr, const char *buf, size_t size)
+static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr,
+				   const char *buf, size_t size)
 {
 	int ret = -EIO;
 	struct cpuidle_state *state = kobj_to_state(kobj);
@@ -371,6 +397,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 {
 	int i, ret = -ENOMEM;
 	struct cpuidle_state_kobj *kobj;
+	struct cpuidle_device_kobj *kdev = device->kobj_dev;
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
 
 	/* state statistics */
@@ -383,7 +410,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 		init_completion(&kobj->kobj_unregister);
 
 		ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
-					   &device->kobj, "state%d", i);
+					   &kdev->kobj, "state%d", i);
 		if (ret) {
 			kfree(kobj);
 			goto error_state;
@@ -449,8 +476,8 @@ static void cpuidle_driver_sysfs_release(struct kobject *kobj)
 	complete(&driver_kobj->kobj_unregister);
 }
 
-static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr,
-				   char * buf)
+static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute *attr,
+				   char *buf)
 {
 	int ret = -EIO;
 	struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
@@ -500,6 +527,7 @@ static struct kobj_type ktype_driver_cpuidle = {
 static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
 {
 	struct cpuidle_driver_kobj *kdrv;
+	struct cpuidle_device_kobj *kdev = dev->kobj_dev;
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int ret;
 
@@ -511,7 +539,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
 	init_completion(&kdrv->kobj_unregister);
 
 	ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
-				   &dev->kobj, "driver");
+				   &kdev->kobj, "driver");
 	if (ret) {
 		kfree(kdrv);
 		return ret;
@@ -580,16 +608,28 @@ void cpuidle_remove_device_sysfs(struct cpuidle_device *device)
  */
 int cpuidle_add_sysfs(struct cpuidle_device *dev)
 {
+	struct cpuidle_device_kobj *kdev;
 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
 	int error;
 
-	init_completion(&dev->kobj_unregister);
+	kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
+	if (!kdev)
+		return -ENOMEM;
+	kdev->dev = dev;
+	dev->kobj_dev = kdev;
 
-	error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
-				     "cpuidle");
-	if (!error)
-		kobject_uevent(&dev->kobj, KOBJ_ADD);
-	return error;
+	init_completion(&kdev->kobj_unregister);
+
+	error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
+				   "cpuidle");
+	if (error) {
+		kfree(kdev);
+		return error;
+	}
+
+	kobject_uevent(&kdev->kobj, KOBJ_ADD);
+
+	return 0;
 }
 
 /**
@@ -598,6 +638,9 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
  */
 void cpuidle_remove_sysfs(struct cpuidle_device *dev)
 {
-	kobject_put(&dev->kobj);
-	wait_for_completion(&dev->kobj_unregister);
+	struct cpuidle_device_kobj *kdev = dev->kobj_dev;
+
+	kobject_put(&kdev->kobj);
+	wait_for_completion(&kdev->kobj_unregister);
+	kfree(kdev);
 }