diff options
Diffstat (limited to 'drivers/cpuidle')
24 files changed, 2306 insertions, 623 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index c4cc27e5c8a..1b96fb91d32 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -1,7 +1,10 @@ +menu "CPU Idle" config CPU_IDLE bool "CPU idle PM support" default y if ACPI || PPC_PSERIES + select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE) + select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) help CPU idle is a generic framework for supporting software-controlled idle processor power management. It includes modular cross-platform @@ -9,9 +12,10 @@ config CPU_IDLE If you're using an ACPI-enabled platform, you should say Y here. +if CPU_IDLE + config CPU_IDLE_MULTIPLE_DRIVERS bool "Support multiple cpuidle drivers" - depends on CPU_IDLE default n help Allows the cpuidle framework to use different drivers for each CPU. @@ -19,24 +23,30 @@ config CPU_IDLE_MULTIPLE_DRIVERS states. If unsure say N. config CPU_IDLE_GOV_LADDER - bool - depends on CPU_IDLE + bool "Ladder governor (for periodic timer tick)" default y config CPU_IDLE_GOV_MENU - bool - depends on CPU_IDLE && NO_HZ + bool "Menu governor (for tickless system)" default y -config ARCH_NEEDS_CPU_IDLE_COUPLED - def_bool n +menu "ARM CPU Idle Drivers" +depends on ARM +source "drivers/cpuidle/Kconfig.arm" +endmenu -if CPU_IDLE +menu "MIPS CPU Idle Drivers" +depends on MIPS +source "drivers/cpuidle/Kconfig.mips" +endmenu -config CPU_IDLE_CALXEDA - bool "CPU Idle Driver for Calxeda processors" - depends on ARCH_HIGHBANK - help - Select this to enable cpuidle on Calxeda processors. +menu "POWERPC CPU Idle Drivers" +depends on PPC +source "drivers/cpuidle/Kconfig.powerpc" +endmenu endif + +config ARCH_NEEDS_CPU_IDLE_COUPLED + def_bool n +endmenu diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm new file mode 100644 index 00000000000..b6d69e899f5 --- /dev/null +++ b/drivers/cpuidle/Kconfig.arm @@ -0,0 +1,63 @@ +# +# ARM CPU Idle drivers +# +config ARM_ARMADA_370_XP_CPUIDLE + bool "CPU Idle Driver for Armada 370/XP family processors" + depends on ARCH_MVEBU + help + Select this to enable cpuidle on Armada 370/XP processors. + +config ARM_BIG_LITTLE_CPUIDLE + bool "Support for ARM big.LITTLE processors" + depends on ARCH_VEXPRESS_TC2_PM + select ARM_CPU_SUSPEND + select CPU_IDLE_MULTIPLE_DRIVERS + help + Select this option to enable CPU idle driver for big.LITTLE based + ARM systems. Driver manages CPUs coordination through MCPM and + define different C-states for little and big cores through the + multiple CPU idle drivers infrastructure. + +config ARM_CLPS711X_CPUIDLE + bool "CPU Idle Driver for CLPS711X processors" + depends on ARCH_CLPS711X || COMPILE_TEST + help + Select this to enable cpuidle on Cirrus Logic CLPS711X SOCs. + +config ARM_HIGHBANK_CPUIDLE + bool "CPU Idle Driver for Calxeda processors" + depends on ARM_PSCI + select ARM_CPU_SUSPEND + help + Select this to enable cpuidle on Calxeda processors. + +config ARM_KIRKWOOD_CPUIDLE + bool "CPU Idle Driver for Marvell Kirkwood SoCs" + depends on ARCH_KIRKWOOD || MACH_KIRKWOOD + help + This adds the CPU Idle driver for Marvell Kirkwood SoCs. + +config ARM_ZYNQ_CPUIDLE + bool "CPU Idle Driver for Xilinx Zynq processors" + depends on ARCH_ZYNQ + help + Select this to enable cpuidle on Xilinx Zynq processors. + +config ARM_U8500_CPUIDLE + bool "Cpu Idle Driver for the ST-E u8500 processors" + depends on ARCH_U8500 + help + Select this to enable cpuidle for ST-E u8500 processors + +config ARM_AT91_CPUIDLE + bool "Cpu Idle Driver for the AT91 processors" + default y + depends on ARCH_AT91 + help + Select this to enable cpuidle for AT91 processors + +config ARM_EXYNOS_CPUIDLE + bool "Cpu Idle Driver for the Exynos processors" + depends on ARCH_EXYNOS + help + Select this to enable cpuidle for Exynos processors diff --git a/drivers/cpuidle/Kconfig.mips b/drivers/cpuidle/Kconfig.mips new file mode 100644 index 00000000000..0e70ee28a5c --- /dev/null +++ b/drivers/cpuidle/Kconfig.mips @@ -0,0 +1,17 @@ +# +# MIPS CPU Idle Drivers +# +config MIPS_CPS_CPUIDLE + bool "CPU Idle driver for MIPS CPS platforms" + depends on CPU_IDLE + depends on SYS_SUPPORTS_MIPS_CPS + select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select MIPS_CPS_PM + default y + help + Select this option to enable processor idle state management + through cpuidle for systems built around the MIPS Coherent + Processing System (CPS) architecture. In order to make use of + the deepest idle states you will need to ensure that you are + also using the CONFIG_MIPS_CPS SMP implementation. diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc new file mode 100644 index 00000000000..66c3a09574e --- /dev/null +++ b/drivers/cpuidle/Kconfig.powerpc @@ -0,0 +1,20 @@ +# +# POWERPC CPU Idle Drivers +# +config PSERIES_CPUIDLE + bool "Cpuidle driver for pSeries platforms" + depends on CPU_IDLE + depends on PPC_PSERIES + default y + help + Select this option to enable processor idle state management + through cpuidle subsystem. + +config POWERNV_CPUIDLE + bool "Cpuidle driver for powernv platforms" + depends on CPU_IDLE + depends on PPC_POWERNV + default y + help + Select this option to enable processor idle state management + through cpuidle subsystem. diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 0d8bd55e776..d8bb1ff7256 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -5,5 +5,23 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o -obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o -obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o +################################################################################## +# ARM SoC drivers +obj-$(CONFIG_ARM_ARMADA_370_XP_CPUIDLE) += cpuidle-armada-370-xp.o +obj-$(CONFIG_ARM_BIG_LITTLE_CPUIDLE) += cpuidle-big_little.o +obj-$(CONFIG_ARM_CLPS711X_CPUIDLE) += cpuidle-clps711x.o +obj-$(CONFIG_ARM_HIGHBANK_CPUIDLE) += cpuidle-calxeda.o +obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE) += cpuidle-kirkwood.o +obj-$(CONFIG_ARM_ZYNQ_CPUIDLE) += cpuidle-zynq.o +obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o +obj-$(CONFIG_ARM_AT91_CPUIDLE) += cpuidle-at91.o +obj-$(CONFIG_ARM_EXYNOS_CPUIDLE) += cpuidle-exynos.o + +############################################################################### +# MIPS drivers +obj-$(CONFIG_MIPS_CPS_CPUIDLE) += cpuidle-cps.o + +############################################################################### +# POWERPC drivers +obj-$(CONFIG_PSERIES_CPUIDLE) += cpuidle-pseries.o +obj-$(CONFIG_POWERNV_CPUIDLE) += cpuidle-powernv.o diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 2a297f86dba..73fe2f8d7f9 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -106,6 +106,7 @@ struct cpuidle_coupled { cpumask_t coupled_cpus; int requested_state[NR_CPUS]; atomic_t ready_waiting_counts; + atomic_t abort_barrier; int online_count; int refcnt; int prevent; @@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock); static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); /* - * The cpuidle_coupled_poked_mask mask is used to avoid calling + * The cpuidle_coupled_poke_pending mask is used to avoid calling * __smp_call_function_single with the per cpu call_single_data struct already * in use. This prevents a deadlock where two cpus are waiting for each others * call_single_data struct to be available */ -static cpumask_t cpuidle_coupled_poked_mask; +static cpumask_t cpuidle_coupled_poke_pending; + +/* + * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked + * once to minimize entering the ready loop with a poke pending, which would + * require aborting and retrying. + */ +static cpumask_t cpuidle_coupled_poked; /** * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus @@ -139,7 +147,7 @@ static cpumask_t cpuidle_coupled_poked_mask; * has returned from this function, the barrier is immediately available for * reuse. * - * The atomic variable a must be initialized to 0 before any cpu calls + * The atomic variable must be initialized to 0 before any cpu calls * this function, will be reset to 0 before any cpu returns from this function. * * Must only be called from within a coupled idle state handler @@ -151,7 +159,7 @@ void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a) { int n = dev->coupled->online_count; - smp_mb__before_atomic_inc(); + smp_mb__before_atomic(); atomic_inc(a); while (atomic_read(a) < n) @@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev, return state; } -static void cpuidle_coupled_poked(void *info) +static void cpuidle_coupled_handle_poke(void *info) { int cpu = (unsigned long)info; - cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask); + cpumask_set_cpu(cpu, &cpuidle_coupled_poked); + cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending); } /** @@ -313,8 +322,8 @@ static void cpuidle_coupled_poke(int cpu) { struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); - if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask)) - __smp_call_function_single(cpu, csd, 0); + if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) + smp_call_function_single_async(cpu, csd); } /** @@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu, * @coupled: the struct coupled that contains the current cpu * @next_state: the index in drv->states of the requested state for this cpu * - * Updates the requested idle state for the specified cpuidle device, - * poking all coupled cpus out of idle if necessary to let them see the new - * state. + * Updates the requested idle state for the specified cpuidle device. + * Returns the number of waiting cpus. */ -static void cpuidle_coupled_set_waiting(int cpu, +static int cpuidle_coupled_set_waiting(int cpu, struct cpuidle_coupled *coupled, int next_state) { - int w; - coupled->requested_state[cpu] = next_state; /* - * If this is the last cpu to enter the waiting state, poke - * all the other cpus out of their waiting state so they can - * enter a deeper state. This can race with one of the cpus - * exiting the waiting state due to an interrupt and - * decrementing waiting_count, see comment below. - * * The atomic_inc_return provides a write barrier to order the write * to requested_state with the later write that increments ready_count. */ - w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; - if (w == coupled->online_count) - cpuidle_coupled_poke_others(cpu, coupled); + return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; } /** @@ -410,19 +408,33 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) * been processed and the poke bit has been cleared. * * Other interrupts may also be processed while interrupts are enabled, so - * need_resched() must be tested after turning interrupts off again to make sure + * need_resched() must be tested after this function returns to make sure * the interrupt didn't schedule work that should take the cpu out of idle. * - * Returns 0 if need_resched was false, -EINTR if need_resched was true. + * Returns 0 if no poke was pending, 1 if a poke was cleared. */ static int cpuidle_coupled_clear_pokes(int cpu) { + if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) + return 0; + local_irq_enable(); - while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask)) + while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) cpu_relax(); local_irq_disable(); - return need_resched() ? -EINTR : 0; + return 1; +} + +static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled) +{ + cpumask_t cpus; + int ret; + + cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus); + ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus); + + return ret; } /** @@ -449,31 +461,56 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, { int entered_state = -1; struct cpuidle_coupled *coupled = dev->coupled; + int w; if (!coupled) return -EINVAL; while (coupled->prevent) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { local_irq_enable(); return entered_state; } entered_state = cpuidle_enter_state(dev, drv, dev->safe_state_index); + local_irq_disable(); } /* Read barrier ensures online_count is read after prevent is cleared */ smp_rmb(); - cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); +reset: + cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked); + + w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); + /* + * If this is the last cpu to enter the waiting state, poke + * all the other cpus out of their waiting state so they can + * enter a deeper state. This can race with one of the cpus + * exiting the waiting state due to an interrupt and + * decrementing waiting_count, see comment below. + */ + if (w == coupled->online_count) { + cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked); + cpuidle_coupled_poke_others(dev->cpu, coupled); + } retry: /* * Wait for all coupled cpus to be idle, using the deepest state - * allowed for a single cpu. + * allowed for a single cpu. If this was not the poking cpu, wait + * for at least one poke before leaving to avoid a race where + * two cpus could arrive at the waiting loop at the same time, + * but the first of the two to arrive could skip the loop without + * processing the pokes from the last to arrive. */ - while (!cpuidle_coupled_cpus_waiting(coupled)) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + while (!cpuidle_coupled_cpus_waiting(coupled) || + !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) { + if (cpuidle_coupled_clear_pokes(dev->cpu)) + continue; + + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } @@ -485,14 +522,22 @@ retry: entered_state = cpuidle_enter_state(dev, drv, dev->safe_state_index); + local_irq_disable(); } - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } /* + * Make sure final poke status for this cpu is visible before setting + * cpu as ready. + */ + smp_wmb(); + + /* * All coupled cpus are probably idle. There is a small chance that * one of the other cpus just became active. Increment the ready count, * and spin until all coupled cpus have incremented the counter. Once a @@ -511,6 +556,28 @@ retry: cpu_relax(); } + /* + * Make sure read of all cpus ready is done before reading pending pokes + */ + smp_rmb(); + + /* + * There is a small chance that a cpu left and reentered idle after this + * cpu saw that all cpus were waiting. The cpu that reentered idle will + * have sent this cpu a poke, which will still be pending after the + * ready loop. The pending interrupt may be lost by the interrupt + * controller when entering the deep idle state. It's not possible to + * clear a pending interrupt without turning interrupts on and handling + * it, and it's too late to turn on interrupts here, so reset the + * coupled idle state of all cpus and retry. + */ + if (cpuidle_coupled_any_pokes_pending(coupled)) { + cpuidle_coupled_set_done(dev->cpu, coupled); + /* Wait for all cpus to see the pending pokes */ + cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier); + goto reset; + } + /* all cpus have acked the coupled state */ next_state = cpuidle_coupled_get_state(dev, coupled); @@ -596,7 +663,7 @@ have_coupled: coupled->refcnt++; csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); - csd->func = cpuidle_coupled_poked; + csd->func = cpuidle_coupled_handle_poke; csd->info = (void *)(unsigned long)dev->cpu; return 0; diff --git a/drivers/cpuidle/cpuidle-armada-370-xp.c b/drivers/cpuidle/cpuidle-armada-370-xp.c new file mode 100644 index 00000000000..a5fba0287bf --- /dev/null +++ b/drivers/cpuidle/cpuidle-armada-370-xp.c @@ -0,0 +1,93 @@ +/* + * Marvell Armada 370 and Armada XP SoC cpuidle driver + * + * Copyright (C) 2014 Marvell + * + * Nadav Haklai <nadavh@marvell.com> + * Gregory CLEMENT <gregory.clement@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * Maintainer: Gregory CLEMENT <gregory.clement@free-electrons.com> + */ + +#include <linux/cpu_pm.h> +#include <linux/cpuidle.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/suspend.h> +#include <linux/platform_device.h> +#include <asm/cpuidle.h> + +#define ARMADA_370_XP_MAX_STATES 3 +#define ARMADA_370_XP_FLAG_DEEP_IDLE 0x10000 + +static int (*armada_370_xp_cpu_suspend)(int); + +static int armada_370_xp_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + int ret; + bool deepidle = false; + cpu_pm_enter(); + + if (drv->states[index].flags & ARMADA_370_XP_FLAG_DEEP_IDLE) + deepidle = true; + + ret = armada_370_xp_cpu_suspend(deepidle); + if (ret) + return ret; + + cpu_pm_exit(); + + return index; +} + +static struct cpuidle_driver armada_370_xp_idle_driver = { + .name = "armada_370_xp_idle", + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { + .enter = armada_370_xp_enter_idle, + .exit_latency = 10, + .power_usage = 50, + .target_residency = 100, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "Idle", + .desc = "CPU power down", + }, + .states[2] = { + .enter = armada_370_xp_enter_idle, + .exit_latency = 100, + .power_usage = 5, + .target_residency = 1000, + .flags = CPUIDLE_FLAG_TIME_VALID | + ARMADA_370_XP_FLAG_DEEP_IDLE, + .name = "Deep idle", + .desc = "CPU and L2 Fabric power down", + }, + .state_count = ARMADA_370_XP_MAX_STATES, +}; + +static int armada_370_xp_cpuidle_probe(struct platform_device *pdev) +{ + + armada_370_xp_cpu_suspend = (void *)(pdev->dev.platform_data); + return cpuidle_register(&armada_370_xp_idle_driver, NULL); +} + +static struct platform_driver armada_370_xp_cpuidle_plat_driver = { + .driver = { + .name = "cpuidle-armada-370-xp", + .owner = THIS_MODULE, + }, + .probe = armada_370_xp_cpuidle_probe, +}; + +module_platform_driver(armada_370_xp_cpuidle_plat_driver); + +MODULE_AUTHOR("Gregory CLEMENT <gregory.clement@free-electrons.com>"); +MODULE_DESCRIPTION("Armada 370/XP cpu idle driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/cpuidle-at91.c b/drivers/cpuidle/cpuidle-at91.c new file mode 100644 index 00000000000..a0774370c6b --- /dev/null +++ b/drivers/cpuidle/cpuidle-at91.c @@ -0,0 +1,69 @@ +/* + * based on arch/arm/mach-kirkwood/cpuidle.c + * + * CPU idle support for AT91 SoC + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * The cpu idle uses wait-for-interrupt and RAM self refresh in order + * to implement two idle states - + * #1 wait-for-interrupt + * #2 wait-for-interrupt and RAM self refresh + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/cpuidle.h> +#include <linux/io.h> +#include <linux/export.h> +#include <asm/proc-fns.h> +#include <asm/cpuidle.h> + +#define AT91_MAX_STATES 2 + +static void (*at91_standby)(void); + +/* Actual code that puts the SoC in different idle states */ +static int at91_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + at91_standby(); + return index; +} + +static struct cpuidle_driver at91_idle_driver = { + .name = "at91_idle", + .owner = THIS_MODULE, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { + .enter = at91_enter_idle, + .exit_latency = 10, + .target_residency = 10000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "RAM_SR", + .desc = "WFI and DDR Self Refresh", + }, + .state_count = AT91_MAX_STATES, +}; + +/* Initialize CPU idle by registering the idle states */ +static int at91_cpuidle_probe(struct platform_device *dev) +{ + at91_standby = (void *)(dev->dev.platform_data); + + return cpuidle_register(&at91_idle_driver, NULL); +} + +static struct platform_driver at91_cpuidle_driver = { + .driver = { + .name = "cpuidle-at91", + .owner = THIS_MODULE, + }, + .probe = at91_cpuidle_probe, +}; + +module_platform_driver(at91_cpuidle_driver); diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c new file mode 100644 index 00000000000..b45fc624904 --- /dev/null +++ b/drivers/cpuidle/cpuidle-big_little.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2013 ARM/Linaro + * + * Authors: Daniel Lezcano <daniel.lezcano@linaro.org> + * Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * Nicolas Pitre <nicolas.pitre@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Maintainer: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * Maintainer: Daniel Lezcano <daniel.lezcano@linaro.org> + */ +#include <linux/cpuidle.h> +#include <linux/cpu_pm.h> +#include <linux/slab.h> +#include <linux/of.h> + +#include <asm/cpu.h> +#include <asm/cputype.h> +#include <asm/cpuidle.h> +#include <asm/mcpm.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> + +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx); + +/* + * NB: Owing to current menu governor behaviour big and LITTLE + * index 1 states have to define exit_latency and target_residency for + * cluster state since, when all CPUs in a cluster hit it, the cluster + * can be shutdown. This means that when a single CPU enters this state + * the exit_latency and target_residency values are somewhat overkill. + * There is no notion of cluster states in the menu governor, so CPUs + * have to define CPU states where possibly the cluster will be shutdown + * depending on the state of other CPUs. idle states entry and exit happen + * at random times; however the cluster state provides target_residency + * values as if all CPUs in a cluster enter the state at once; this is + * somewhat optimistic and behaviour should be fixed either in the governor + * or in the MCPM back-ends. + * To make this driver 100% generic the number of states and the exit_latency + * target_residency values must be obtained from device tree bindings. + * + * exit_latency: refers to the TC2 vexpress test chip and depends on the + * current cluster operating point. It is the time it takes to get the CPU + * up and running when the CPU is powered up on cluster wake-up from shutdown. + * Current values for big and LITTLE clusters are provided for clusters + * running at default operating points. + * + * target_residency: it is the minimum amount of time the cluster has + * to be down to break even in terms of power consumption. cluster + * shutdown has inherent dynamic power costs (L2 writebacks to DRAM + * being the main factor) that depend on the current operating points. + * The current values for both clusters are provided for a CPU whose half + * of L2 lines are dirty and require cleaning to DRAM, and takes into + * account leakage static power values related to the vexpress TC2 testchip. + */ +static struct cpuidle_driver bl_idle_little_driver = { + .name = "little_idle", + .owner = THIS_MODULE, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { + .enter = bl_enter_powerdown, + .exit_latency = 700, + .target_residency = 2500, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "C1", + .desc = "ARM little-cluster power down", + }, + .state_count = 2, +}; + +static struct cpuidle_driver bl_idle_big_driver = { + .name = "big_idle", + .owner = THIS_MODULE, + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[1] = { + .enter = bl_enter_powerdown, + .exit_latency = 500, + .target_residency = 2000, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "C1", + .desc = "ARM big-cluster power down", + }, + .state_count = 2, +}; + +/* + * notrace prevents trace shims from getting inserted where they + * should not. Global jumps and ldrex/strex must not be inserted + * in power down sequences where caches and MMU may be turned off. + */ +static int notrace bl_powerdown_finisher(unsigned long arg) +{ + /* MCPM works with HW CPU identifiers */ + unsigned int mpidr = read_cpuid_mpidr(); + unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + + mcpm_set_entry_vector(cpu, cluster, cpu_resume); + + /* + * Residency value passed to mcpm_cpu_suspend back-end + * has to be given clear semantics. Set to 0 as a + * temporary value. + */ + mcpm_cpu_suspend(0); + + /* return value != 0 means failure */ + return 1; +} + +/** + * bl_enter_powerdown - Programs CPU to enter the specified state + * @dev: cpuidle device + * @drv: The target state to be programmed + * @idx: state index + * + * Called from the CPUidle framework to program the device to the + * specified target state selected by the governor. + */ +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + cpu_pm_enter(); + + cpu_suspend(0, bl_powerdown_finisher); + + /* signals the MCPM core that CPU is out of low power state */ + mcpm_cpu_powered_up(); + + cpu_pm_exit(); + + return idx; +} + +static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int cpu_id) +{ + struct cpuinfo_arm *cpu_info; + struct cpumask *cpumask; + unsigned long cpuid; + int cpu; + + cpumask = kzalloc(cpumask_size(), GFP_KERNEL); + if (!cpumask) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + cpu_info = &per_cpu(cpu_data, cpu); + cpuid = is_smp() ? cpu_info->cpuid : read_cpuid_id(); + + /* read cpu id part number */ + if ((cpuid & 0xFFF0) == cpu_id) + cpumask_set_cpu(cpu, cpumask); + } + + drv->cpumask = cpumask; + + return 0; +} + +static int __init bl_idle_init(void) +{ + int ret; + + /* + * Initialize the driver just for a compliant set of machines + */ + if (!of_machine_is_compatible("arm,vexpress,v2p-ca15_a7")) + return -ENODEV; + /* + * For now the differentiation between little and big cores + * is based on the part number. A7 cores are considered little + * cores, A15 are considered big cores. This distinction may + * evolve in the future with a more generic matching approach. + */ + ret = bl_idle_driver_init(&bl_idle_little_driver, + ARM_CPU_PART_CORTEX_A7); + if (ret) + return ret; + + ret = bl_idle_driver_init(&bl_idle_big_driver, ARM_CPU_PART_CORTEX_A15); + if (ret) + goto out_uninit_little; + + ret = cpuidle_register(&bl_idle_little_driver, NULL); + if (ret) + goto out_uninit_big; + + ret = cpuidle_register(&bl_idle_big_driver, NULL); + if (ret) + goto out_unregister_little; + + return 0; + +out_unregister_little: + cpuidle_unregister(&bl_idle_little_driver); +out_uninit_big: + kfree(bl_idle_big_driver.cpumask); +out_uninit_little: + kfree(bl_idle_little_driver.cpumask); + + return ret; +} +device_initcall(bl_idle_init); diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index 223379169cb..6e51114057d 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -21,67 +21,30 @@ */ #include <linux/cpuidle.h> +#include <linux/cpu_pm.h> #include <linux/init.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/time.h> -#include <linux/delay.h> -#include <linux/suspend.h> +#include <linux/mm.h> +#include <linux/platform_device.h> #include <asm/cpuidle.h> -#include <asm/proc-fns.h> -#include <asm/smp_scu.h> #include <asm/suspend.h> -#include <asm/cacheflush.h> -#include <asm/cp15.h> - -extern void highbank_set_cpu_jump(int cpu, void *jump_addr); -extern void *scu_base_addr; - -static inline unsigned int get_auxcr(void) -{ - unsigned int val; - asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val) : : "cc"); - return val; -} - -static inline void set_auxcr(unsigned int val) -{ - asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR" - : : "r" (val) : "cc"); - isb(); -} - -static noinline void calxeda_idle_restore(void) -{ - set_cr(get_cr() | CR_C); - set_auxcr(get_auxcr() | 0x40); - scu_power_mode(scu_base_addr, SCU_PM_NORMAL); -} +#include <asm/psci.h> static int calxeda_idle_finish(unsigned long val) { - /* Already flushed cache, but do it again as the outer cache functions - * dirty the cache with spinlocks */ - flush_cache_all(); - - set_auxcr(get_auxcr() & ~0x40); - set_cr(get_cr() & ~CR_C); - - scu_power_mode(scu_base_addr, SCU_PM_DORMANT); - - cpu_do_idle(); - - /* Restore things if we didn't enter power-gating */ - calxeda_idle_restore(); - return 1; + const struct psci_power_state ps = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + return psci_ops.cpu_suspend(ps, __pa(cpu_resume)); } static int calxeda_pwrdown_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - highbank_set_cpu_jump(smp_processor_id(), cpu_resume); + cpu_pm_enter(); cpu_suspend(0, calxeda_idle_finish); + cpu_pm_exit(); + return index; } @@ -102,11 +65,17 @@ static struct cpuidle_driver calxeda_idle_driver = { .state_count = 2, }; -static int __init calxeda_cpuidle_init(void) +static int calxeda_cpuidle_probe(struct platform_device *pdev) { - if (!of_machine_is_compatible("calxeda,highbank")) - return -ENODEV; - return cpuidle_register(&calxeda_idle_driver, NULL); } -module_init(calxeda_cpuidle_init); + +static struct platform_driver calxeda_cpuidle_plat_driver = { + .driver = { + .name = "cpuidle-calxeda", + .owner = THIS_MODULE, + }, + .probe = calxeda_cpuidle_probe, +}; + +module_platform_driver(calxeda_cpuidle_plat_driver); diff --git a/drivers/cpuidle/cpuidle-clps711x.c b/drivers/cpuidle/cpuidle-clps711x.c new file mode 100644 index 00000000000..5243811daa6 --- /dev/null +++ b/drivers/cpuidle/cpuidle-clps711x.c @@ -0,0 +1,64 @@ +/* + * CLPS711X CPU idle driver + * + * Copyright (C) 2014 Alexander Shiyan <shc_work@mail.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/cpuidle.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#define CLPS711X_CPUIDLE_NAME "clps711x-cpuidle" + +static void __iomem *clps711x_halt; + +static int clps711x_cpuidle_halt(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + writel(0xaa, clps711x_halt); + + return index; +} + +static struct cpuidle_driver clps711x_idle_driver = { + .name = CLPS711X_CPUIDLE_NAME, + .owner = THIS_MODULE, + .states[0] = { + .name = "HALT", + .desc = "CLPS711X HALT", + .enter = clps711x_cpuidle_halt, + .exit_latency = 1, + }, + .state_count = 1, +}; + +static int __init clps711x_cpuidle_probe(struct platform_device *pdev) +{ + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + clps711x_halt = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(clps711x_halt)) + return PTR_ERR(clps711x_halt); + + return cpuidle_register(&clps711x_idle_driver, NULL); +} + +static struct platform_driver clps711x_cpuidle_driver = { + .driver = { + .name = CLPS711X_CPUIDLE_NAME, + .owner = THIS_MODULE, + }, +}; +module_platform_driver_probe(clps711x_cpuidle_driver, clps711x_cpuidle_probe); + +MODULE_AUTHOR("Alexander Shiyan <shc_work@mail.ru>"); +MODULE_DESCRIPTION("CLPS711X CPU idle driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c new file mode 100644 index 00000000000..fc7b62720de --- /dev/null +++ b/drivers/cpuidle/cpuidle-cps.c @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2014 Imagination Technologies + * Author: Paul Burton <paul.burton@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/cpu_pm.h> +#include <linux/cpuidle.h> +#include <linux/init.h> + +#include <asm/idle.h> +#include <asm/pm-cps.h> + +/* Enumeration of the various idle states this driver may enter */ +enum cps_idle_state { + STATE_WAIT = 0, /* MIPS wait instruction, coherent */ + STATE_NC_WAIT, /* MIPS wait instruction, non-coherent */ + STATE_CLOCK_GATED, /* Core clock gated */ + STATE_POWER_GATED, /* Core power gated */ + STATE_COUNT +}; + +static int cps_nc_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + enum cps_pm_state pm_state; + int err; + + /* + * At least one core must remain powered up & clocked in order for the + * system to have any hope of functioning. + * + * TODO: don't treat core 0 specially, just prevent the final core + * TODO: remap interrupt affinity temporarily + */ + if (!cpu_data[dev->cpu].core && (index > STATE_NC_WAIT)) + index = STATE_NC_WAIT; + + /* Select the appropriate cps_pm_state */ + switch (index) { + case STATE_NC_WAIT: + pm_state = CPS_PM_NC_WAIT; + break; + case STATE_CLOCK_GATED: + pm_state = CPS_PM_CLOCK_GATED; + break; + case STATE_POWER_GATED: + pm_state = CPS_PM_POWER_GATED; + break; + default: + BUG(); + return -EINVAL; + } + + /* Notify listeners the CPU is about to power down */ + if ((pm_state == CPS_PM_POWER_GATED) && cpu_pm_enter()) + return -EINTR; + + /* Enter that state */ + err = cps_pm_enter_state(pm_state); + + /* Notify listeners the CPU is back up */ + if (pm_state == CPS_PM_POWER_GATED) + cpu_pm_exit(); + + return err ?: index; +} + +static struct cpuidle_driver cps_driver = { + .name = "cpc_cpuidle", + .owner = THIS_MODULE, + .states = { + [STATE_WAIT] = MIPS_CPUIDLE_WAIT_STATE, + [STATE_NC_WAIT] = { + .enter = cps_nc_enter, + .exit_latency = 200, + .target_residency = 450, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "nc-wait", + .desc = "non-coherent MIPS wait", + }, + [STATE_CLOCK_GATED] = { + .enter = cps_nc_enter, + .exit_latency = 300, + .target_residency = 700, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "clock-gated", + .desc = "core clock gated", + }, + [STATE_POWER_GATED] = { + .enter = cps_nc_enter, + .exit_latency = 600, + .target_residency = 1000, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "power-gated", + .desc = "core power gated", + }, + }, + .state_count = STATE_COUNT, + .safe_state_index = 0, +}; + +static void __init cps_cpuidle_unregister(void) +{ + int cpu; + struct cpuidle_device *device; + + for_each_possible_cpu(cpu) { + device = &per_cpu(cpuidle_dev, cpu); + cpuidle_unregister_device(device); + } + + cpuidle_unregister_driver(&cps_driver); +} + +static int __init cps_cpuidle_init(void) +{ + int err, cpu, core, i; + struct cpuidle_device *device; + + /* Detect supported states */ + if (!cps_pm_support_state(CPS_PM_POWER_GATED)) + cps_driver.state_count = STATE_CLOCK_GATED + 1; + if (!cps_pm_support_state(CPS_PM_CLOCK_GATED)) + cps_driver.state_count = STATE_NC_WAIT + 1; + if (!cps_pm_support_state(CPS_PM_NC_WAIT)) + cps_driver.state_count = STATE_WAIT + 1; + + /* Inform the user if some states are unavailable */ + if (cps_driver.state_count < STATE_COUNT) { + pr_info("cpuidle-cps: limited to "); + switch (cps_driver.state_count - 1) { + case STATE_WAIT: + pr_cont("coherent wait\n"); + break; + case STATE_NC_WAIT: + pr_cont("non-coherent wait\n"); + break; + case STATE_CLOCK_GATED: + pr_cont("clock gating\n"); + break; + } + } + + /* + * Set the coupled flag on the appropriate states if this system + * requires it. + */ + if (coupled_coherence) + for (i = STATE_NC_WAIT; i < cps_driver.state_count; i++) + cps_driver.states[i].flags |= CPUIDLE_FLAG_COUPLED; + + err = cpuidle_register_driver(&cps_driver); + if (err) { + pr_err("Failed to register CPS cpuidle driver\n"); + return err; + } + + for_each_possible_cpu(cpu) { + core = cpu_data[cpu].core; + device = &per_cpu(cpuidle_dev, cpu); + device->cpu = cpu; +#ifdef CONFIG_MIPS_MT + cpumask_copy(&device->coupled_cpus, &cpu_sibling_map[cpu]); +#endif + + err = cpuidle_register_device(device); + if (err) { + pr_err("Failed to register CPU%d cpuidle device\n", + cpu); + goto err_out; + } + } + + return 0; +err_out: + cps_cpuidle_unregister(); + return err; +} +device_initcall(cps_cpuidle_init); diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c new file mode 100644 index 00000000000..7c015126382 --- /dev/null +++ b/drivers/cpuidle/cpuidle-exynos.c @@ -0,0 +1,99 @@ +/* linux/arch/arm/mach-exynos/cpuidle.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#include <linux/cpuidle.h> +#include <linux/cpu_pm.h> +#include <linux/export.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include <asm/proc-fns.h> +#include <asm/suspend.h> +#include <asm/cpuidle.h> + +static void (*exynos_enter_aftr)(void); + +static int idle_finisher(unsigned long flags) +{ + exynos_enter_aftr(); + cpu_do_idle(); + + return 1; +} + +static int exynos_enter_core0_aftr(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + cpu_pm_enter(); + cpu_suspend(0, idle_finisher); + cpu_pm_exit(); + + return index; +} + +static int exynos_enter_lowpower(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + int new_index = index; + + /* AFTR can only be entered when cores other than CPU0 are offline */ + if (num_online_cpus() > 1 || dev->cpu != 0) + new_index = drv->safe_state_index; + + if (new_index == 0) + return arm_cpuidle_simple_enter(dev, drv, new_index); + else + return exynos_enter_core0_aftr(dev, drv, new_index); +} + +static struct cpuidle_driver exynos_idle_driver = { + .name = "exynos_idle", + .owner = THIS_MODULE, + .states = { + [0] = ARM_CPUIDLE_WFI_STATE, + [1] = { + .enter = exynos_enter_lowpower, + .exit_latency = 300, + .target_residency = 100000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "C1", + .desc = "ARM power down", + }, + }, + .state_count = 2, + .safe_state_index = 0, +}; + +static int exynos_cpuidle_probe(struct platform_device *pdev) +{ + int ret; + + exynos_enter_aftr = (void *)(pdev->dev.platform_data); + + ret = cpuidle_register(&exynos_idle_driver, NULL); + if (ret) { + dev_err(&pdev->dev, "failed to register cpuidle driver\n"); + return ret; + } + + return 0; +} + +static struct platform_driver exynos_cpuidle_driver = { + .probe = exynos_cpuidle_probe, + .driver = { + .name = "exynos_cpuidle", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(exynos_cpuidle_driver); diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c index 521b0a7fdd8..41ba843251b 100644 --- a/drivers/cpuidle/cpuidle-kirkwood.c +++ b/drivers/cpuidle/cpuidle-kirkwood.c @@ -60,9 +60,6 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev) struct resource *res; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) - return -EINVAL; - ddr_operation_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ddr_operation_base)) return PTR_ERR(ddr_operation_base); @@ -70,7 +67,7 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev) return cpuidle_register(&kirkwood_idle_driver, NULL); } -int kirkwood_cpuidle_remove(struct platform_device *pdev) +static int kirkwood_cpuidle_remove(struct platform_device *pdev) { cpuidle_unregister(&kirkwood_idle_driver); return 0; diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c new file mode 100644 index 00000000000..74f5788d50b --- /dev/null +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -0,0 +1,254 @@ +/* + * cpuidle-powernv - idle state cpuidle driver. + * Adapted from drivers/cpuidle/cpuidle-pseries + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> +#include <linux/cpuidle.h> +#include <linux/cpu.h> +#include <linux/notifier.h> +#include <linux/clockchips.h> +#include <linux/of.h> + +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/runlatch.h> + +/* Flags and constants used in PowerNV platform */ + +#define MAX_POWERNV_IDLE_STATES 8 +#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */ +#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */ + +struct cpuidle_driver powernv_idle_driver = { + .name = "powernv_idle", + .owner = THIS_MODULE, +}; + +static int max_idle_state; +static struct cpuidle_state *cpuidle_state_table; + +static int snooze_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); + + ppc64_runlatch_off(); + while (!need_resched()) { + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + ppc64_runlatch_on(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + return index; +} + +static int nap_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + ppc64_runlatch_off(); + power7_idle(); + ppc64_runlatch_on(); + return index; +} + +static int fastsleep_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long old_lpcr = mfspr(SPRN_LPCR); + unsigned long new_lpcr; + + if (unlikely(system_state < SYSTEM_RUNNING)) + return index; + + new_lpcr = old_lpcr; + /* Do not exit powersave upon decrementer as we've setup the timer + * offload. + */ + new_lpcr &= ~LPCR_PECE1; + + mtspr(SPRN_LPCR, new_lpcr); + power7_sleep(); + + mtspr(SPRN_LPCR, old_lpcr); + + return index; +} + +/* + * States for dedicated partition case. + */ +static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = { + { /* Snooze */ + .name = "snooze", + .desc = "snooze", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 0, + .target_residency = 0, + .enter = &snooze_loop }, +}; + +static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + struct cpuidle_device *dev = + per_cpu(cpuidle_devices, hotcpu); + + if (dev && cpuidle_get_driver()) { + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cpuidle_pause_and_lock(); + cpuidle_enable_device(dev); + cpuidle_resume_and_unlock(); + break; + + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cpuidle_pause_and_lock(); + cpuidle_disable_device(dev); + cpuidle_resume_and_unlock(); + break; + + default: + return NOTIFY_DONE; + } + } + return NOTIFY_OK; +} + +static struct notifier_block setup_hotplug_notifier = { + .notifier_call = powernv_cpuidle_add_cpu_notifier, +}; + +/* + * powernv_cpuidle_driver_init() + */ +static int powernv_cpuidle_driver_init(void) +{ + int idle_state; + struct cpuidle_driver *drv = &powernv_idle_driver; + + drv->state_count = 0; + + for (idle_state = 0; idle_state < max_idle_state; ++idle_state) { + /* Is the state not enabled? */ + if (cpuidle_state_table[idle_state].enter == NULL) + continue; + + drv->states[drv->state_count] = /* structure copy */ + cpuidle_state_table[idle_state]; + + drv->state_count += 1; + } + + return 0; +} + +static int powernv_add_idle_states(void) +{ + struct device_node *power_mgt; + struct property *prop; + int nr_idle_states = 1; /* Snooze */ + int dt_idle_states; + u32 *flags; + int i; + + /* Currently we have snooze statically defined */ + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + return nr_idle_states; + } + + prop = of_find_property(power_mgt, "ibm,cpu-idle-state-flags", NULL); + if (!prop) { + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); + return nr_idle_states; + } + + dt_idle_states = prop->length / sizeof(u32); + flags = (u32 *) prop->value; + + for (i = 0; i < dt_idle_states; i++) { + + if (flags[i] & IDLE_USE_INST_NAP) { + /* Add NAP state */ + strcpy(powernv_states[nr_idle_states].name, "Nap"); + strcpy(powernv_states[nr_idle_states].desc, "Nap"); + powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID; + powernv_states[nr_idle_states].exit_latency = 10; + powernv_states[nr_idle_states].target_residency = 100; + powernv_states[nr_idle_states].enter = &nap_loop; + nr_idle_states++; + } + + if (flags[i] & IDLE_USE_INST_SLEEP) { + /* Add FASTSLEEP state */ + strcpy(powernv_states[nr_idle_states].name, "FastSleep"); + strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); + powernv_states[nr_idle_states].flags = + CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP; + powernv_states[nr_idle_states].exit_latency = 300; + powernv_states[nr_idle_states].target_residency = 1000000; + powernv_states[nr_idle_states].enter = &fastsleep_loop; + nr_idle_states++; + } + } + + return nr_idle_states; +} + +/* + * powernv_idle_probe() + * Choose state table for shared versus dedicated partition + */ +static int powernv_idle_probe(void) +{ + if (cpuidle_disable != IDLE_NO_OVERRIDE) + return -ENODEV; + + if (firmware_has_feature(FW_FEATURE_OPALv3)) { + cpuidle_state_table = powernv_states; + /* Device tree can indicate more idle states */ + max_idle_state = powernv_add_idle_states(); + } else + return -ENODEV; + + return 0; +} + +static int __init powernv_processor_idle_init(void) +{ + int retval; + + retval = powernv_idle_probe(); + if (retval) + return retval; + + powernv_cpuidle_driver_init(); + retval = cpuidle_register(&powernv_idle_driver, NULL); + if (retval) { + printk(KERN_DEBUG "Registration of powernv driver failed.\n"); + return retval; + } + + register_cpu_notifier(&setup_hotplug_notifier); + printk(KERN_DEBUG "powernv_idle_driver registered\n"); + return 0; +} + +device_initcall(powernv_processor_idle_init); diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c new file mode 100644 index 00000000000..6f7b0195688 --- /dev/null +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -0,0 +1,273 @@ +/* + * cpuidle-pseries - idle state cpuidle driver. + * Adapted from drivers/idle/intel_idle.c and + * drivers/acpi/processor_idle.c + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> +#include <linux/cpuidle.h> +#include <linux/cpu.h> +#include <linux/notifier.h> + +#include <asm/paca.h> +#include <asm/reg.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/runlatch.h> +#include <asm/plpar_wrappers.h> + +struct cpuidle_driver pseries_idle_driver = { + .name = "pseries_idle", + .owner = THIS_MODULE, +}; + +static int max_idle_state; +static struct cpuidle_state *cpuidle_state_table; + +static inline void idle_loop_prolog(unsigned long *in_purr) +{ + ppc64_runlatch_off(); + *in_purr = mfspr(SPRN_PURR); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; +} + +static inline void idle_loop_epilog(unsigned long in_purr) +{ + u64 wait_cycles; + + wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); + wait_cycles += mfspr(SPRN_PURR) - in_purr; + get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); + get_lppaca()->idle = 0; + + if (irqs_disabled()) + local_irq_enable(); + ppc64_runlatch_on(); +} + +static int snooze_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + + idle_loop_prolog(&in_purr); + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + + idle_loop_epilog(in_purr); + + return index; +} + +static void check_and_cede_processor(void) +{ + /* + * Ensure our interrupt state is properly tracked, + * also checks if no interrupt has occurred while we + * were soft-disabled + */ + if (prep_irq_for_idle()) { + cede_processor(); +#ifdef CONFIG_TRACE_IRQFLAGS + /* Ensure that H_CEDE returns with IRQs on */ + if (WARN_ON(!(mfmsr() & MSR_EE))) + __hard_irq_enable(); +#endif + } +} + +static int dedicated_cede_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + + idle_loop_prolog(&in_purr); + get_lppaca()->donate_dedicated_cpu = 1; + + HMT_medium(); + check_and_cede_processor(); + + get_lppaca()->donate_dedicated_cpu = 0; + + idle_loop_epilog(in_purr); + + return index; +} + +static int shared_cede_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + + idle_loop_prolog(&in_purr); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + check_and_cede_processor(); + + idle_loop_epilog(in_purr); + + return index; +} + +/* + * States for dedicated partition case. + */ +static struct cpuidle_state dedicated_states[] = { + { /* Snooze */ + .name = "snooze", + .desc = "snooze", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 0, + .target_residency = 0, + .enter = &snooze_loop }, + { /* CEDE */ + .name = "CEDE", + .desc = "CEDE", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 100, + .enter = &dedicated_cede_loop }, +}; + +/* + * States for shared partition case. + */ +static struct cpuidle_state shared_states[] = { + { /* Shared Cede */ + .name = "Shared Cede", + .desc = "Shared Cede", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 0, + .target_residency = 0, + .enter = &shared_cede_loop }, +}; + +static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + struct cpuidle_device *dev = + per_cpu(cpuidle_devices, hotcpu); + + if (dev && cpuidle_get_driver()) { + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cpuidle_pause_and_lock(); + cpuidle_enable_device(dev); + cpuidle_resume_and_unlock(); + break; + + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cpuidle_pause_and_lock(); + cpuidle_disable_device(dev); + cpuidle_resume_and_unlock(); + break; + + default: + return NOTIFY_DONE; + } + } + return NOTIFY_OK; +} + +static struct notifier_block setup_hotplug_notifier = { + .notifier_call = pseries_cpuidle_add_cpu_notifier, +}; + +/* + * pseries_cpuidle_driver_init() + */ +static int pseries_cpuidle_driver_init(void) +{ + int idle_state; + struct cpuidle_driver *drv = &pseries_idle_driver; + + drv->state_count = 0; + + for (idle_state = 0; idle_state < max_idle_state; ++idle_state) { + /* Is the state not enabled? */ + if (cpuidle_state_table[idle_state].enter == NULL) + continue; + + drv->states[drv->state_count] = /* structure copy */ + cpuidle_state_table[idle_state]; + + drv->state_count += 1; + } + + return 0; +} + +/* + * pseries_idle_probe() + * Choose state table for shared versus dedicated partition + */ +static int pseries_idle_probe(void) +{ + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + return -ENODEV; + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (lppaca_shared_proc(get_lppaca())) { + cpuidle_state_table = shared_states; + max_idle_state = ARRAY_SIZE(shared_states); + } else { + cpuidle_state_table = dedicated_states; + max_idle_state = ARRAY_SIZE(dedicated_states); + } + } else + return -ENODEV; + + return 0; +} + +static int __init pseries_processor_idle_init(void) +{ + int retval; + + retval = pseries_idle_probe(); + if (retval) + return retval; + + pseries_cpuidle_driver_init(); + retval = cpuidle_register(&pseries_idle_driver, NULL); + if (retval) { + printk(KERN_DEBUG "Registration of pseries driver failed.\n"); + return retval; + } + + register_cpu_notifier(&setup_hotplug_notifier); + printk(KERN_DEBUG "pseries_idle_driver registered\n"); + return 0; +} + +device_initcall(pseries_processor_idle_init); diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c new file mode 100644 index 00000000000..5e35804b1a9 --- /dev/null +++ b/drivers/cpuidle/cpuidle-ux500.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM) + * + * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com> + * and Jonas Aaberg <jonas.aberg@stericsson.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/cpuidle.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/smp.h> +#include <linux/mfd/dbx500-prcmu.h> +#include <linux/platform_data/arm-ux500-pm.h> +#include <linux/platform_device.h> + +#include <asm/cpuidle.h> +#include <asm/proc-fns.h> + +static atomic_t master = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(master_lock); + +static inline int ux500_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + int this_cpu = smp_processor_id(); + bool recouple = false; + + if (atomic_inc_return(&master) == num_online_cpus()) { + + /* With this lock, we prevent the other cpu to exit and enter + * this function again and become the master */ + if (!spin_trylock(&master_lock)) + goto wfi; + + /* decouple the gic from the A9 cores */ + if (prcmu_gic_decouple()) { + spin_unlock(&master_lock); + goto out; + } + + /* If an error occur, we will have to recouple the gic + * manually */ + recouple = true; + + /* At this state, as the gic is decoupled, if the other + * cpu is in WFI, we have the guarantee it won't be wake + * up, so we can safely go to retention */ + if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1)) + goto out; + + /* The prcmu will be in charge of watching the interrupts + * and wake up the cpus */ + if (prcmu_copy_gic_settings()) + goto out; + + /* Check in the meantime an interrupt did + * not occur on the gic ... */ + if (prcmu_gic_pending_irq()) + goto out; + + /* ... and the prcmu */ + if (prcmu_pending_irq()) + goto out; + + /* Go to the retention state, the prcmu will wait for the + * cpu to go WFI and this is what happens after exiting this + * 'master' critical section */ + if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true)) + goto out; + + /* When we switch to retention, the prcmu is in charge + * of recoupling the gic automatically */ + recouple = false; + + spin_unlock(&master_lock); + } +wfi: + cpu_do_idle(); +out: + atomic_dec(&master); + + if (recouple) { + prcmu_gic_recouple(); + spin_unlock(&master_lock); + } + + return index; +} + +static struct cpuidle_driver ux500_idle_driver = { + .name = "ux500_idle", + .owner = THIS_MODULE, + .states = { + ARM_CPUIDLE_WFI_STATE, + { + .enter = ux500_enter_idle, + .exit_latency = 70, + .target_residency = 260, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "ApIdle", + .desc = "ARM Retention", + }, + }, + .safe_state_index = 0, + .state_count = 2, +}; + +static int dbx500_cpuidle_probe(struct platform_device *pdev) +{ + /* Configure wake up reasons */ + prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) | + PRCMU_WAKEUP(ABB)); + + return cpuidle_register(&ux500_idle_driver, NULL); +} + +static struct platform_driver dbx500_cpuidle_plat_driver = { + .driver = { + .name = "cpuidle-dbx500", + .owner = THIS_MODULE, + }, + .probe = dbx500_cpuidle_probe, +}; + +module_platform_driver(dbx500_cpuidle_plat_driver); diff --git a/drivers/cpuidle/cpuidle-zynq.c b/drivers/cpuidle/cpuidle-zynq.c new file mode 100644 index 00000000000..aded7592802 --- /dev/null +++ b/drivers/cpuidle/cpuidle-zynq.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2012-2013 Xilinx + * + * CPU idle support for Xilinx Zynq + * + * based on arch/arm/mach-at91/cpuidle.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + * The cpu idle uses wait-for-interrupt and RAM self refresh in order + * to implement two idle states - + * #1 wait-for-interrupt + * #2 wait-for-interrupt and RAM self refresh + * + * Maintainer: Michal Simek <michal.simek@xilinx.com> + */ + +#include <linux/init.h> +#include <linux/cpu_pm.h> +#include <linux/cpuidle.h> +#include <linux/platform_device.h> +#include <asm/proc-fns.h> +#include <asm/cpuidle.h> + +#define ZYNQ_MAX_STATES 2 + +/* Actual code that puts the SoC in different idle states */ +static int zynq_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + /* Devices must be stopped here */ + cpu_pm_enter(); + + /* Add code for DDR self refresh start */ + cpu_do_idle(); + + /* Add code for DDR self refresh stop */ + cpu_pm_exit(); + + return index; +} + +static struct cpuidle_driver zynq_idle_driver = { + .name = "zynq_idle", + .owner = THIS_MODULE, + .states = { + ARM_CPUIDLE_WFI_STATE, + { + .enter = zynq_enter_idle, + .exit_latency = 10, + .target_residency = 10000, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "RAM_SR", + .desc = "WFI and RAM Self Refresh", + }, + }, + .safe_state_index = 0, + .state_count = ZYNQ_MAX_STATES, +}; + +/* Initialize CPU idle by registering the idle states */ +static int zynq_cpuidle_probe(struct platform_device *pdev) +{ + pr_info("Xilinx Zynq CpuIdle Driver started\n"); + + return cpuidle_register(&zynq_idle_driver, NULL); +} + +static struct platform_driver zynq_cpuidle_driver = { + .driver = { + .name = "cpuidle-zynq", + .owner = THIS_MODULE, + }, + .probe = zynq_cpuidle_probe, +}; + +module_platform_driver(zynq_cpuidle_driver); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index c3a93fece81..cb7019977c5 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -32,6 +32,7 @@ LIST_HEAD(cpuidle_detected_devices); static int enabled_devices; static int off __read_mostly; static int initialized __read_mostly; +static bool use_deepest_state __read_mostly; int cpuidle_disabled(void) { @@ -42,8 +43,6 @@ void disable_cpuidle(void) off = 1; } -static int __cpuidle_register_device(struct cpuidle_device *dev); - /** * cpuidle_play_dead - cpu off-lining * @@ -67,6 +66,45 @@ int cpuidle_play_dead(void) } /** + * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode. + * @enable: Whether enable or disable the feature. + * + * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and + * always use the state with the greatest exit latency (out of the states that + * are not disabled). + * + * This function can only be called after cpuidle_pause() to avoid races. + */ +void cpuidle_use_deepest_state(bool enable) +{ + use_deepest_state = enable; +} + +/** + * cpuidle_find_deepest_state - Find the state of the greatest exit latency. + * @drv: cpuidle driver for a given CPU. + * @dev: cpuidle device for a given CPU. + */ +static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + unsigned int latency_req = 0; + int i, ret = CPUIDLE_DRIVER_STATE_START - 1; + + for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { + struct cpuidle_state *s = &drv->states[i]; + struct cpuidle_state_usage *su = &dev->states_usage[i]; + + if (s->disabled || su->disable || s->exit_latency <= latency_req) + continue; + + latency_req = s->exit_latency; + ret = i; + } + return ret; +} + +/** * cpuidle_enter_state - enter the state and update stats * @dev: cpuidle device for this cpu * @drv: cpuidle driver for this cpu @@ -87,7 +125,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, time_end = ktime_get(); - local_irq_enable(); + if (!cpuidle_state_is_coupled(dev, drv, entered_state)) + local_irq_enable(); diff = ktime_to_us(ktime_sub(time_end, time_start)); if (diff > INT_MAX) @@ -110,63 +149,57 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, } /** - * cpuidle_idle_call - the main idle loop + * cpuidle_select - ask the cpuidle framework to choose an idle state + * + * @drv: the cpuidle driver + * @dev: the cpuidle device * - * NOTE: no locks or semaphores should be used here - * return non-zero on failure + * Returns the index of the idle state. */ -int cpuidle_idle_call(void) +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv; - int next_state, entered_state; - - if (off) - return -ENODEV; - - if (!initialized) + if (off || !initialized) return -ENODEV; - /* check if the device is ready */ - if (!dev || !dev->enabled) + if (!drv || !dev || !dev->enabled) return -EBUSY; - drv = cpuidle_get_cpu_driver(dev); - - /* ask the governor for the next state */ - next_state = cpuidle_curr_governor->select(drv, dev); - if (need_resched()) { - dev->last_residency = 0; - /* give the governor an opportunity to reflect on the outcome */ - if (cpuidle_curr_governor->reflect) - cpuidle_curr_governor->reflect(dev, next_state); - local_irq_enable(); - return 0; - } - - trace_cpu_idle_rcuidle(next_state, dev->cpu); - - if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, - &dev->cpu); - - if (cpuidle_state_is_coupled(dev, drv, next_state)) - entered_state = cpuidle_enter_state_coupled(dev, drv, - next_state); - else - entered_state = cpuidle_enter_state(dev, drv, next_state); + if (unlikely(use_deepest_state)) + return cpuidle_find_deepest_state(drv, dev); - if (drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, - &dev->cpu); - - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + return cpuidle_curr_governor->select(drv, dev); +} - /* give the governor an opportunity to reflect on the outcome */ - if (cpuidle_curr_governor->reflect) - cpuidle_curr_governor->reflect(dev, entered_state); +/** + * cpuidle_enter - enter into the specified idle state + * + * @drv: the cpuidle driver tied with the cpu + * @dev: the cpuidle device + * @index: the index in the idle state table + * + * Returns the index in the idle state, < 0 in case of error. + * The error code depends on the backend driver + */ +int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, + int index) +{ + if (cpuidle_state_is_coupled(dev, drv, index)) + return cpuidle_enter_state_coupled(dev, drv, index); + return cpuidle_enter_state(dev, drv, index); +} - return 0; +/** + * cpuidle_reflect - tell the underlying governor what was the state + * we were in + * + * @dev : the cpuidle device + * @index: the index in the idle state table + * + */ +void cpuidle_reflect(struct cpuidle_device *dev, int index) +{ + if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state)) + cpuidle_curr_governor->reflect(dev, index); } /** @@ -230,45 +263,6 @@ void cpuidle_resume(void) mutex_unlock(&cpuidle_lock); } -#ifdef CONFIG_ARCH_HAS_CPU_RELAX -static int poll_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) -{ - ktime_t t1, t2; - s64 diff; - - t1 = ktime_get(); - local_irq_enable(); - while (!need_resched()) - cpu_relax(); - - t2 = ktime_get(); - diff = ktime_to_us(ktime_sub(t2, t1)); - if (diff > INT_MAX) - diff = INT_MAX; - - dev->last_residency = (int) diff; - - return index; -} - -static void poll_idle_init(struct cpuidle_driver *drv) -{ - struct cpuidle_state *state = &drv->states[0]; - - snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); - snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); - state->exit_latency = 0; - state->target_residency = 0; - state->power_usage = -1; - state->flags = 0; - state->enter = poll_idle; - state->disabled = false; -} -#else -static void poll_idle_init(struct cpuidle_driver *drv) {} -#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ - /** * cpuidle_enable_device - enables idle PM for a CPU * @dev: the CPU @@ -278,7 +272,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} */ int cpuidle_enable_device(struct cpuidle_device *dev) { - int ret, i; + int ret; struct cpuidle_driver *drv; if (!dev) @@ -292,17 +286,12 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (!drv || !cpuidle_curr_governor) return -EIO; + if (!dev->registered) + return -EINVAL; + if (!dev->state_count) dev->state_count = drv->state_count; - if (dev->registered == 0) { - ret = __cpuidle_register_device(dev); - if (ret) - return ret; - } - - poll_idle_init(drv); - ret = cpuidle_add_device_sysfs(dev); if (ret) return ret; @@ -311,12 +300,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev) (ret = cpuidle_curr_governor->enable(drv, dev))) goto fail_sysfs; - for (i = 0; i < dev->state_count; i++) { - dev->states_usage[i].usage = 0; - dev->states_usage[i].time = 0; - } - dev->last_residency = 0; - smp_wmb(); dev->enabled = 1; @@ -360,6 +343,21 @@ void cpuidle_disable_device(struct cpuidle_device *dev) EXPORT_SYMBOL_GPL(cpuidle_disable_device); +static void __cpuidle_unregister_device(struct cpuidle_device *dev) +{ + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + + list_del(&dev->device_list); + per_cpu(cpuidle_devices, dev->cpu) = NULL; + module_put(drv->owner); +} + +static void __cpuidle_device_init(struct cpuidle_device *dev) +{ + memset(dev->states_usage, 0, sizeof(dev->states_usage)); + dev->last_residency = 0; +} + /** * __cpuidle_register_device - internal register function called before register * and enable routines @@ -377,23 +375,13 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); - ret = cpuidle_add_sysfs(dev); - if (ret) - goto err_sysfs; ret = cpuidle_coupled_register_device(dev); if (ret) - goto err_coupled; - - dev->registered = 1; - return 0; + __cpuidle_unregister_device(dev); + else + dev->registered = 1; -err_coupled: - cpuidle_remove_sysfs(dev); -err_sysfs: - list_del(&dev->device_list); - per_cpu(cpuidle_devices, dev->cpu) = NULL; - module_put(drv->owner); return ret; } @@ -403,25 +391,42 @@ err_sysfs: */ int cpuidle_register_device(struct cpuidle_device *dev) { - int ret; + int ret = -EBUSY; if (!dev) return -EINVAL; mutex_lock(&cpuidle_lock); - if ((ret = __cpuidle_register_device(dev))) { - mutex_unlock(&cpuidle_lock); - return ret; - } + if (dev->registered) + goto out_unlock; + + __cpuidle_device_init(dev); + + ret = __cpuidle_register_device(dev); + if (ret) + goto out_unlock; + + ret = cpuidle_add_sysfs(dev); + if (ret) + goto out_unregister; + + ret = cpuidle_enable_device(dev); + if (ret) + goto out_sysfs; - cpuidle_enable_device(dev); cpuidle_install_idle_handler(); +out_unlock: mutex_unlock(&cpuidle_lock); - return 0; + return ret; +out_sysfs: + cpuidle_remove_sysfs(dev); +out_unregister: + __cpuidle_unregister_device(dev); + goto out_unlock; } EXPORT_SYMBOL_GPL(cpuidle_register_device); @@ -432,9 +437,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - - if (dev->registered == 0) + if (!dev || dev->registered == 0) return; cpuidle_pause_and_lock(); @@ -442,14 +445,12 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_disable_device(dev); cpuidle_remove_sysfs(dev); - list_del(&dev->device_list); - per_cpu(cpuidle_devices, dev->cpu) = NULL; + + __cpuidle_unregister_device(dev); cpuidle_coupled_unregister_device(dev); cpuidle_resume_and_unlock(); - - module_put(drv->owner); } EXPORT_SYMBOL_GPL(cpuidle_unregister_device); @@ -466,7 +467,7 @@ void cpuidle_unregister(struct cpuidle_driver *drv) int cpu; struct cpuidle_device *device; - for_each_possible_cpu(cpu) { + for_each_cpu(cpu, drv->cpumask) { device = &per_cpu(cpuidle_dev, cpu); cpuidle_unregister_device(device); } @@ -498,13 +499,13 @@ int cpuidle_register(struct cpuidle_driver *drv, return ret; } - for_each_possible_cpu(cpu) { + for_each_cpu(cpu, drv->cpumask) { device = &per_cpu(cpuidle_dev, cpu); device->cpu = cpu; #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED /* - * On multiplatform for ARM, the coupled idle states could + * On multiplatform for ARM, the coupled idle states could be * enabled in the kernel even if the cpuidle driver does not * use it. Note, coupled_cpus is a struct copy. */ diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 8dfaaae9444..9634f20e392 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -10,6 +10,7 @@ #include <linux/mutex.h> #include <linux/module.h> +#include <linux/sched.h> #include <linux/cpuidle.h> #include <linux/cpumask.h> #include <linux/clockchips.h> @@ -18,182 +19,285 @@ DEFINE_SPINLOCK(cpuidle_driver_lock); -static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu); -static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu); +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS -static void cpuidle_setup_broadcast_timer(void *arg) +static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers); + +/** + * __cpuidle_get_cpu_driver - return the cpuidle driver tied to a CPU. + * @cpu: the CPU handled by the driver + * + * Returns a pointer to struct cpuidle_driver or NULL if no driver has been + * registered for @cpu. + */ +static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) { - int cpu = smp_processor_id(); - clockevents_notify((long)(arg), &cpu); + return per_cpu(cpuidle_drivers, cpu); } -static void __cpuidle_driver_init(struct cpuidle_driver *drv, int cpu) +/** + * __cpuidle_unset_driver - unset per CPU driver variables. + * @drv: a valid pointer to a struct cpuidle_driver + * + * For each CPU in the driver's CPU mask, unset the registered driver per CPU + * variable. If @drv is different from the registered driver, the corresponding + * variable is not cleared. + */ +static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv) { - int i; + int cpu; - drv->refcnt = 0; + for_each_cpu(cpu, drv->cpumask) { - for (i = drv->state_count - 1; i >= 0 ; i--) { - - if (!(drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP)) + if (drv != __cpuidle_get_cpu_driver(cpu)) continue; - drv->bctimer = 1; - on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer, - (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1); - break; + per_cpu(cpuidle_drivers, cpu) = NULL; } } -static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu) +/** + * __cpuidle_set_driver - set per CPU driver variables for the given driver. + * @drv: a valid pointer to a struct cpuidle_driver + * + * For each CPU in the driver's cpumask, unset the registered driver per CPU + * to @drv. + * + * Returns 0 on success, -EBUSY if the CPUs have driver(s) already. + */ +static inline int __cpuidle_set_driver(struct cpuidle_driver *drv) { - if (!drv || !drv->state_count) - return -EINVAL; - - if (cpuidle_disabled()) - return -ENODEV; + int cpu; - if (__cpuidle_get_cpu_driver(cpu)) - return -EBUSY; + for_each_cpu(cpu, drv->cpumask) { - __cpuidle_driver_init(drv, cpu); + if (__cpuidle_get_cpu_driver(cpu)) { + __cpuidle_unset_driver(drv); + return -EBUSY; + } - __cpuidle_set_cpu_driver(drv, cpu); + per_cpu(cpuidle_drivers, cpu) = drv; + } return 0; } -static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu) -{ - if (drv != __cpuidle_get_cpu_driver(cpu)) - return; +#else - if (!WARN_ON(drv->refcnt > 0)) - __cpuidle_set_cpu_driver(NULL, cpu); +static struct cpuidle_driver *cpuidle_curr_driver; - if (drv->bctimer) { - drv->bctimer = 0; - on_each_cpu_mask(get_cpu_mask(cpu), cpuidle_setup_broadcast_timer, - (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1); - } +/** + * __cpuidle_get_cpu_driver - return the global cpuidle driver pointer. + * @cpu: ignored without the multiple driver support + * + * Return a pointer to a struct cpuidle_driver object or NULL if no driver was + * previously registered. + */ +static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) +{ + return cpuidle_curr_driver; } -#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS +/** + * __cpuidle_set_driver - assign the global cpuidle driver variable. + * @drv: pointer to a struct cpuidle_driver object + * + * Returns 0 on success, -EBUSY if the driver is already registered. + */ +static inline int __cpuidle_set_driver(struct cpuidle_driver *drv) +{ + if (cpuidle_curr_driver) + return -EBUSY; -static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers); + cpuidle_curr_driver = drv; -static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) -{ - per_cpu(cpuidle_drivers, cpu) = drv; + return 0; } -static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) +/** + * __cpuidle_unset_driver - unset the global cpuidle driver variable. + * @drv: a pointer to a struct cpuidle_driver + * + * Reset the global cpuidle variable to NULL. If @drv does not match the + * registered driver, do nothing. + */ +static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv) { - return per_cpu(cpuidle_drivers, cpu); + if (drv == cpuidle_curr_driver) + cpuidle_curr_driver = NULL; } -static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv) +#endif + +/** + * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer + * @arg: a void pointer used to match the SMP cross call API + * + * @arg is used as a value of type 'long' with one of the two values: + * - CLOCK_EVT_NOTIFY_BROADCAST_ON + * - CLOCK_EVT_NOTIFY_BROADCAST_OFF + * + * Set the broadcast timer notification for the current CPU. This function + * is executed per CPU by an SMP cross call. It not supposed to be called + * directly. + */ +static void cpuidle_setup_broadcast_timer(void *arg) { - int cpu; - for_each_present_cpu(cpu) - __cpuidle_unregister_driver(drv, cpu); + int cpu = smp_processor_id(); + clockevents_notify((long)(arg), &cpu); } -static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv) +/** + * __cpuidle_driver_init - initialize the driver's internal data + * @drv: a valid pointer to a struct cpuidle_driver + */ +static void __cpuidle_driver_init(struct cpuidle_driver *drv) { - int ret = 0; - int i, cpu; + int i; - for_each_present_cpu(cpu) { - ret = __cpuidle_register_driver(drv, cpu); - if (ret) - break; - } + drv->refcnt = 0; - if (ret) - for_each_present_cpu(i) { - if (i == cpu) - break; - __cpuidle_unregister_driver(drv, i); + /* + * Use all possible CPUs as the default, because if the kernel boots + * with some CPUs offline and then we online one of them, the CPU + * notifier has to know which driver to assign. + */ + if (!drv->cpumask) + drv->cpumask = (struct cpumask *)cpu_possible_mask; + + /* + * Look for the timer stop flag in the different states, so that we know + * if the broadcast timer has to be set up. The loop is in the reverse + * order, because usually one of the deeper states have this flag set. + */ + for (i = drv->state_count - 1; i >= 0 ; i--) { + if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) { + drv->bctimer = 1; + break; } - - - return ret; + } } -int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu) +#ifdef CONFIG_ARCH_HAS_CPU_RELAX +static int poll_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { - int ret; + ktime_t t1, t2; + s64 diff; + + t1 = ktime_get(); + local_irq_enable(); + if (!current_set_polling_and_test()) { + while (!need_resched()) + cpu_relax(); + } + current_clr_polling(); - spin_lock(&cpuidle_driver_lock); - ret = __cpuidle_register_driver(drv, cpu); - spin_unlock(&cpuidle_driver_lock); + t2 = ktime_get(); + diff = ktime_to_us(ktime_sub(t2, t1)); + if (diff > INT_MAX) + diff = INT_MAX; - return ret; + dev->last_residency = (int) diff; + + return index; } -void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu) +static void poll_idle_init(struct cpuidle_driver *drv) { - spin_lock(&cpuidle_driver_lock); - __cpuidle_unregister_driver(drv, cpu); - spin_unlock(&cpuidle_driver_lock); + struct cpuidle_state *state = &drv->states[0]; + + snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); + snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); + state->exit_latency = 0; + state->target_residency = 0; + state->power_usage = -1; + state->flags = CPUIDLE_FLAG_TIME_VALID; + state->enter = poll_idle; + state->disabled = false; } +#else +static void poll_idle_init(struct cpuidle_driver *drv) {} +#endif /* !CONFIG_ARCH_HAS_CPU_RELAX */ /** - * cpuidle_register_driver - registers a driver - * @drv: the driver + * __cpuidle_register_driver: register the driver + * @drv: a valid pointer to a struct cpuidle_driver + * + * Do some sanity checks, initialize the driver, assign the driver to the + * global cpuidle driver variable(s) and set up the broadcast timer if the + * cpuidle driver has some states that shut down the local timer. + * + * Returns 0 on success, a negative error code otherwise: + * * -EINVAL if the driver pointer is NULL or no idle states are available + * * -ENODEV if the cpuidle framework is disabled + * * -EBUSY if the driver is already assigned to the global variable(s) */ -int cpuidle_register_driver(struct cpuidle_driver *drv) +static int __cpuidle_register_driver(struct cpuidle_driver *drv) { int ret; - spin_lock(&cpuidle_driver_lock); - ret = __cpuidle_register_all_cpu_driver(drv); - spin_unlock(&cpuidle_driver_lock); + if (!drv || !drv->state_count) + return -EINVAL; - return ret; -} -EXPORT_SYMBOL_GPL(cpuidle_register_driver); + if (cpuidle_disabled()) + return -ENODEV; -/** - * cpuidle_unregister_driver - unregisters a driver - * @drv: the driver - */ -void cpuidle_unregister_driver(struct cpuidle_driver *drv) -{ - spin_lock(&cpuidle_driver_lock); - __cpuidle_unregister_all_cpu_driver(drv); - spin_unlock(&cpuidle_driver_lock); -} -EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); + __cpuidle_driver_init(drv); -#else + ret = __cpuidle_set_driver(drv); + if (ret) + return ret; -static struct cpuidle_driver *cpuidle_curr_driver; + if (drv->bctimer) + on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer, + (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1); -static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) -{ - cpuidle_curr_driver = drv; + poll_idle_init(drv); + + return 0; } -static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) +/** + * __cpuidle_unregister_driver - unregister the driver + * @drv: a valid pointer to a struct cpuidle_driver + * + * Check if the driver is no longer in use, reset the global cpuidle driver + * variable(s) and disable the timer broadcast notification mechanism if it was + * in use. + * + */ +static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) { - return cpuidle_curr_driver; + if (WARN_ON(drv->refcnt > 0)) + return; + + if (drv->bctimer) { + drv->bctimer = 0; + on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer, + (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1); + } + + __cpuidle_unset_driver(drv); } /** * cpuidle_register_driver - registers a driver - * @drv: the driver + * @drv: a pointer to a valid struct cpuidle_driver + * + * Register the driver under a lock to prevent concurrent attempts to + * [un]register the driver from occuring at the same time. + * + * Returns 0 on success, a negative error code (returned by + * __cpuidle_register_driver()) otherwise. */ int cpuidle_register_driver(struct cpuidle_driver *drv) { - int ret, cpu; + int ret; - cpu = get_cpu(); spin_lock(&cpuidle_driver_lock); - ret = __cpuidle_register_driver(drv, cpu); + ret = __cpuidle_register_driver(drv); spin_unlock(&cpuidle_driver_lock); - put_cpu(); return ret; } @@ -201,23 +305,24 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver); /** * cpuidle_unregister_driver - unregisters a driver - * @drv: the driver + * @drv: a pointer to a valid struct cpuidle_driver + * + * Unregisters the cpuidle driver under a lock to prevent concurrent attempts + * to [un]register the driver from occuring at the same time. @drv has to + * match the currently registered driver. */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { - int cpu; - - cpu = get_cpu(); spin_lock(&cpuidle_driver_lock); - __cpuidle_unregister_driver(drv, cpu); + __cpuidle_unregister_driver(drv); spin_unlock(&cpuidle_driver_lock); - put_cpu(); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); -#endif /** - * cpuidle_get_driver - return the current driver + * cpuidle_get_driver - return the driver tied to the current CPU. + * + * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered. */ struct cpuidle_driver *cpuidle_get_driver(void) { @@ -233,7 +338,11 @@ struct cpuidle_driver *cpuidle_get_driver(void) EXPORT_SYMBOL_GPL(cpuidle_get_driver); /** - * cpuidle_get_cpu_driver - return the driver tied with a cpu + * cpuidle_get_cpu_driver - return the driver registered for a CPU. + * @dev: a valid pointer to a struct cpuidle_device + * + * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered + * for the CPU associated with @dev. */ struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev) { @@ -244,6 +353,14 @@ struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev) } EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver); +/** + * cpuidle_driver_ref - get a reference to the driver. + * + * Increment the reference counter of the cpuidle driver associated with + * the current CPU. + * + * Returns a pointer to the driver, or NULL if the current CPU has no driver. + */ struct cpuidle_driver *cpuidle_driver_ref(void) { struct cpuidle_driver *drv; @@ -251,18 +368,26 @@ struct cpuidle_driver *cpuidle_driver_ref(void) spin_lock(&cpuidle_driver_lock); drv = cpuidle_get_driver(); - drv->refcnt++; + if (drv) + drv->refcnt++; spin_unlock(&cpuidle_driver_lock); return drv; } +/** + * cpuidle_driver_unref - puts down the refcount for the driver + * + * Decrement the reference counter of the cpuidle driver associated with + * the current CPU. + */ void cpuidle_driver_unref(void) { - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv; spin_lock(&cpuidle_driver_lock); + drv = cpuidle_get_driver(); if (drv && !WARN_ON(drv->refcnt <= 0)) drv->refcnt--; diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index ea2f8e7aa24..ca89412f512 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -96,46 +96,3 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) return ret; } - -/** - * cpuidle_replace_governor - find a replacement governor - * @exclude_rating: the rating that will be skipped while looking for - * new governor. - */ -static struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating) -{ - struct cpuidle_governor *gov; - struct cpuidle_governor *ret_gov = NULL; - unsigned int max_rating = 0; - - list_for_each_entry(gov, &cpuidle_governors, governor_list) { - if (gov->rating == exclude_rating) - continue; - if (gov->rating > max_rating) { - max_rating = gov->rating; - ret_gov = gov; - } - } - - return ret_gov; -} - -/** - * cpuidle_unregister_governor - unregisters a governor - * @gov: the governor - */ -void cpuidle_unregister_governor(struct cpuidle_governor *gov) -{ - if (!gov) - return; - - mutex_lock(&cpuidle_lock); - if (gov == cpuidle_curr_governor) { - struct cpuidle_governor *new_gov; - new_gov = cpuidle_replace_governor(gov->rating); - cpuidle_switch_governor(new_gov); - } - list_del(&gov->governor_list); - mutex_unlock(&cpuidle_lock); -} - diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 9b784051ec1..9f08e8cce1a 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -192,14 +192,4 @@ static int __init init_ladder(void) return cpuidle_register_governor(&ladder_governor); } -/** - * exit_ladder - exits the governor - */ -static void __exit exit_ladder(void) -{ - cpuidle_unregister_governor(&ladder_governor); -} - -MODULE_LICENSE("GPL"); -module_init(init_ladder); -module_exit(exit_ladder); +postcore_initcall(init_ladder); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index fe343a06b7d..c4f80c15a48 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -21,6 +21,15 @@ #include <linux/math64.h> #include <linux/module.h> +/* + * Please note when changing the tuning values: + * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of + * a scaling operation multiplication may overflow on 32 bit platforms. + * In that case, #define RESOLUTION as ULL to get 64 bit result: + * #define RESOLUTION 1024ULL + * + * The default values do not overflow. + */ #define BUCKETS 12 #define INTERVALS 8 #define RESOLUTION 1024 @@ -28,13 +37,6 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 -/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ -#define MAX_DEVIATION 60 - -static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); -static DEFINE_PER_CPU(int, hrtimer_status); -/* menu hrtimer mode */ -enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; /* * Concepts and ideas behind the menu governor @@ -116,23 +118,15 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; * */ -/* - * The C-state residency is so long that is is worthwhile to exit - * from the shallow C-state and re-enter into a deeper C-state. - */ -static unsigned int perfect_cstate_ms __read_mostly = 30; -module_param(perfect_cstate_ms, uint, 0000); - struct menu_device { int last_state_idx; int needs_update; - unsigned int expected_us; - u64 predicted_us; - unsigned int exit_us; + unsigned int next_timer_us; + unsigned int predicted_us; unsigned int bucket; - u64 correction_factor[BUCKETS]; - u32 intervals[INTERVALS]; + unsigned int correction_factor[BUCKETS]; + unsigned int intervals[INTERVALS]; int interval_ptr; }; @@ -205,59 +199,28 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } -/* Cancel the hrtimer if it is not triggered yet */ -void menu_hrtimer_cancel(void) -{ - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); - - /* The timer is still not time out*/ - if (per_cpu(hrtimer_status, cpu)) { - hrtimer_cancel(hrtmr); - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - } -} -EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); - -/* Call back for hrtimer is triggered */ -static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) -{ - int cpu = smp_processor_id(); - struct menu_device *data = &per_cpu(menu_devices, cpu); - - /* In general case, the expected residency is much larger than - * deepest C-state target residency, but prediction logic still - * predicts a small predicted residency, so the prediction - * history is totally broken if the timer is triggered. - * So reset the correction factor. - */ - if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL) - data->correction_factor[data->bucket] = RESOLUTION * DECAY; - - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - - return HRTIMER_NORESTART; -} - /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static u32 get_typical_interval(struct menu_device *data) +static void get_typical_interval(struct menu_device *data) { - int i = 0, divisor = 0; - uint64_t max = 0, avg = 0, stddev = 0; - int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ - unsigned int ret = 0; + int i, divisor; + unsigned int max, thresh; + uint64_t avg, stddev; + + thresh = UINT_MAX; /* Discard outliers above this value */ again: - /* first calculate average and standard deviation of the past */ - max = avg = divisor = stddev = 0; + /* First calculate the average of past intervals */ + max = 0; + avg = 0; + divisor = 0; for (i = 0; i < INTERVALS; i++) { - int64_t value = data->intervals[i]; + unsigned int value = data->intervals[i]; if (value <= thresh) { avg += value; divisor++; @@ -267,15 +230,38 @@ again: } do_div(avg, divisor); + /* Then try to determine standard deviation */ + stddev = 0; for (i = 0; i < INTERVALS; i++) { - int64_t value = data->intervals[i]; + unsigned int value = data->intervals[i]; if (value <= thresh) { int64_t diff = value - avg; stddev += diff * diff; } } do_div(stddev, divisor); - stddev = int_sqrt(stddev); + /* + * The typical interval is obtained when standard deviation is small + * or standard deviation is small compared to the average interval. + * + * int_sqrt() formal parameter type is unsigned long. When the + * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor) + * the resulting squared standard deviation exceeds the input domain + * of int_sqrt on platforms where unsigned long is 32 bits in size. + * In such case reject the candidate average. + * + * Use this result only if there is no timer to wake us up sooner. + */ + if (likely(stddev <= ULONG_MAX)) { + stddev = int_sqrt(stddev); + if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) + || stddev <= 20) { + if (data->next_timer_us > avg) + data->predicted_us = avg; + return; + } + } + /* * If we have outliers to the upside in our distribution, discard * those by setting the threshold to exclude these outliers, then @@ -284,23 +270,12 @@ again: * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. - * - * The typical interval is obtained when standard deviation is small - * or standard deviation is small compared to the average interval. */ - if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) - || stddev <= 20) { - data->predicted_us = avg; - ret = 1; - return ret; - - } else if ((divisor * 4) > INTERVALS * 3) { - /* Exclude the max interval */ - thresh = max - 1; - goto again; - } + if ((divisor * 4) <= INTERVALS * 3) + return; - return ret; + thresh = max - 1; + goto again; } /** @@ -313,19 +288,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) struct menu_device *data = &__get_cpu_var(menu_devices); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; - int multiplier; + unsigned int interactivity_req; struct timespec t; - int repeat = 0, low_predicted = 0; - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); data->needs_update = 0; } - data->last_state_idx = 0; - data->exit_us = 0; + data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1; /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) @@ -333,32 +304,37 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* determine the expected residency time, round up */ t = ktime_to_timespec(tick_nohz_get_sleep_length()); - data->expected_us = + data->next_timer_us = t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC; - data->bucket = which_bucket(data->expected_us); - - multiplier = performance_multiplier(); + data->bucket = which_bucket(data->next_timer_us); /* - * if the correction factor is 0 (eg first time init or cpu hotplug - * etc), we actually want to start out with a unity factor. + * Force the result of multiplication to be 64 bits even if both + * operands are 32 bits. + * Make sure to round up for half microseconds. */ - if (data->correction_factor[data->bucket] == 0) - data->correction_factor[data->bucket] = RESOLUTION * DECAY; - - /* Make sure to round up for half microseconds */ - data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], + data->predicted_us = div_round64((uint64_t)data->next_timer_us * + data->correction_factor[data->bucket], RESOLUTION * DECAY); - repeat = get_typical_interval(data); + get_typical_interval(data); + + /* + * Performance multiplier defines a minimum predicted idle + * duration / latency ratio. Adjust the latency limit if + * necessary. + */ + interactivity_req = data->predicted_us / performance_multiplier(); + if (latency_req > interactivity_req) + latency_req = interactivity_req; /* * We want to default to C1 (hlt), not to busy polling * unless the timer is happening really really soon. */ - if (data->expected_us > 5 && + if (data->next_timer_us > 5 && !drv->states[CPUIDLE_DRIVER_STATE_START].disabled && dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0) data->last_state_idx = CPUIDLE_DRIVER_STATE_START; @@ -373,55 +349,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) { - low_predicted = 1; + if (s->target_residency > data->predicted_us) continue; - } if (s->exit_latency > latency_req) continue; - if (s->exit_latency * multiplier > data->predicted_us) - continue; data->last_state_idx = i; - data->exit_us = s->exit_latency; - } - - /* not deepest C-state chosen for low predicted residency */ - if (low_predicted) { - unsigned int timer_us = 0; - unsigned int perfect_us = 0; - - /* - * Set a timer to detect whether this sleep is much - * longer than repeat mode predicted. If the timer - * triggers, the code will evaluate whether to put - * the CPU into a deeper C-state. - * The timer is cancelled on CPU wakeup. - */ - timer_us = 2 * (data->predicted_us + MAX_DEVIATION); - - perfect_us = perfect_cstate_ms * 1000; - - if (repeat && (4 * timer_us < data->expected_us)) { - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In repeat case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; - } else if (perfect_us < data->expected_us) { - /* - * The next timer is long. This could be because - * we did not make a useful prediction. - * In that case, it makes sense to re-enter - * into a deeper C-state after some time. - */ - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In general case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL; - } - } return data->last_state_idx; @@ -452,37 +385,47 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &__get_cpu_var(menu_devices); int last_idx = data->last_state_idx; - unsigned int last_idle_us = cpuidle_get_last_residency(dev); struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; - u64 new_factor; + unsigned int new_factor; /* - * Ugh, this idle state doesn't support residency measurements, so we - * are basically lost in the dark. As a compromise, assume we slept - * for the whole expected time. + * Try to figure out how much time passed between entry to low + * power state and occurrence of the wakeup event. + * + * If the entered idle state didn't support residency measurements, + * we are basically lost in the dark how much time passed. + * As a compromise, assume we slept for the whole expected time. + * + * Any measured amount of time will include the exit latency. + * Since we are interested in when the wakeup begun, not when it + * was completed, we must substract the exit latency. However, if + * the measured amount of time is less than the exit latency, + * assume the state was never reached and the exit latency is 0. */ - if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) - last_idle_us = data->expected_us; - - - measured_us = last_idle_us; + if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) { + /* Use timer value as is */ + measured_us = data->next_timer_us; - /* - * We correct for the exit latency; we are assuming here that the - * exit latency happens after the event that we're interested in. - */ - if (measured_us > data->exit_us) - measured_us -= data->exit_us; + } else { + /* Use measured value */ + measured_us = cpuidle_get_last_residency(dev); + /* Deduct exit latency */ + if (measured_us > target->exit_latency) + measured_us -= target->exit_latency; - /* update our correction ratio */ + /* Make sure our coefficients do not exceed unity */ + if (measured_us > data->next_timer_us) + measured_us = data->next_timer_us; + } - new_factor = data->correction_factor[data->bucket] - * (DECAY - 1) / DECAY; + /* Update our correction ratio */ + new_factor = data->correction_factor[data->bucket]; + new_factor -= new_factor / DECAY; - if (data->expected_us > 0 && measured_us < MAX_INTERESTING) - new_factor += RESOLUTION * measured_us / data->expected_us; + if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING) + new_factor += RESOLUTION * measured_us / data->next_timer_us; else /* * we were idle so long that we count it as a perfect @@ -492,15 +435,17 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* * We don't want 0 as factor; we always want at least - * a tiny bit of estimated time. + * a tiny bit of estimated time. Fortunately, due to rounding, + * new_factor will stay nonzero regardless of measured_us values + * and the compiler can eliminate this test as long as DECAY > 1. */ - if (new_factor == 0) + if (DECAY == 1 && unlikely(new_factor == 0)) new_factor = 1; data->correction_factor[data->bucket] = new_factor; /* update the repeating-pattern data */ - data->intervals[data->interval_ptr++] = last_idle_us; + data->intervals[data->interval_ptr++] = measured_us; if (data->interval_ptr >= INTERVALS) data->interval_ptr = 0; } @@ -514,12 +459,17 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); - struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); - hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - t->function = menu_hrtimer_notify; + int i; memset(data, 0, sizeof(struct menu_device)); + /* + * if the correction factor is 0 (eg first time init or cpu hotplug + * etc), we actually want to start out with a unity factor. + */ + for(i = 0; i < BUCKETS; i++) + data->correction_factor[i] = RESOLUTION * DECAY; + return 0; } @@ -540,14 +490,4 @@ static int __init init_menu(void) return cpuidle_register_governor(&menu_governor); } -/** - * exit_menu - exits the governor - */ -static void __exit exit_menu(void) -{ - cpuidle_unregister_governor(&menu_governor); -} - -MODULE_LICENSE("GPL"); -module_init(init_menu); -module_exit(exit_menu); +postcore_initcall(init_menu); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 428754af623..efe2f175168 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -11,8 +11,10 @@ #include <linux/sysfs.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/completion.h> #include <linux/capability.h> #include <linux/device.h> +#include <linux/kobject.h> #include "cpuidle.h" @@ -33,7 +35,8 @@ static ssize_t show_available_governors(struct device *dev, mutex_lock(&cpuidle_lock); list_for_each_entry(tmp, &cpuidle_governors, governor_list) { - if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) + if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - + CPUIDLE_NAME_LEN - 2)) goto out; i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); } @@ -49,11 +52,12 @@ static ssize_t show_current_driver(struct device *dev, char *buf) { ssize_t ret; - struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + struct cpuidle_driver *drv; spin_lock(&cpuidle_driver_lock); - if (cpuidle_driver) - ret = sprintf(buf, "%s\n", cpuidle_driver->name); + drv = cpuidle_get_driver(); + if (drv) + ret = sprintf(buf, "%s\n", drv->name); else ret = sprintf(buf, "none\n"); spin_unlock(&cpuidle_driver_lock); @@ -166,13 +170,28 @@ struct cpuidle_attr { #define define_one_rw(_name, show, store) \ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store) -#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj) #define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr) -static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf) + +struct cpuidle_device_kobj { + struct cpuidle_device *dev; + struct completion kobj_unregister; + struct kobject kobj; +}; + +static inline struct cpuidle_device *to_cpuidle_device(struct kobject *kobj) +{ + struct cpuidle_device_kobj *kdev = + container_of(kobj, struct cpuidle_device_kobj, kobj); + + return kdev->dev; +} + +static ssize_t cpuidle_show(struct kobject *kobj, struct attribute *attr, + char *buf) { int ret = -EIO; - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); - struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + struct cpuidle_device *dev = to_cpuidle_device(kobj); + struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr); if (cattr->show) { mutex_lock(&cpuidle_lock); @@ -182,12 +201,12 @@ static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char return ret; } -static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) +static ssize_t cpuidle_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) { int ret = -EIO; - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); - struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + struct cpuidle_device *dev = to_cpuidle_device(kobj); + struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr); if (cattr->store) { mutex_lock(&cpuidle_lock); @@ -204,9 +223,10 @@ static const struct sysfs_ops cpuidle_sysfs_ops = { static void cpuidle_sysfs_release(struct kobject *kobj) { - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + struct cpuidle_device_kobj *kdev = + container_of(kobj, struct cpuidle_device_kobj, kobj); - complete(&dev->kobj_unregister); + complete(&kdev->kobj_unregister); } static struct kobj_type ktype_cpuidle = { @@ -237,8 +257,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ #define define_store_state_ull_function(_name) \ static ssize_t store_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, \ - const char *buf, size_t size) \ + struct cpuidle_state_usage *state_usage, \ + const char *buf, size_t size) \ { \ unsigned long long value; \ int err; \ @@ -256,14 +276,16 @@ static ssize_t store_state_##_name(struct cpuidle_state *state, \ #define define_show_state_ull_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, char *buf) \ + struct cpuidle_state_usage *state_usage, \ + char *buf) \ { \ return sprintf(buf, "%llu\n", state_usage->_name);\ } #define define_show_state_str_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, char *buf) \ + struct cpuidle_state_usage *state_usage, \ + char *buf) \ { \ if (state->_name[0] == '\0')\ return sprintf(buf, "<null>\n");\ @@ -271,6 +293,7 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ } define_show_state_function(exit_latency) +define_show_state_function(target_residency) define_show_state_function(power_usage) define_show_state_ull_function(usage) define_show_state_ull_function(time) @@ -282,6 +305,7 @@ define_store_state_ull_function(disable) define_one_state_ro(name, show_state_name); define_one_state_ro(desc, show_state_desc); define_one_state_ro(latency, show_state_exit_latency); +define_one_state_ro(residency, show_state_target_residency); define_one_state_ro(power, show_state_power_usage); define_one_state_ro(usage, show_state_usage); define_one_state_ro(time, show_state_time); @@ -291,6 +315,7 @@ static struct attribute *cpuidle_state_default_attrs[] = { &attr_name.attr, &attr_desc.attr, &attr_latency.attr, + &attr_residency.attr, &attr_power.attr, &attr_usage.attr, &attr_time.attr, @@ -309,8 +334,9 @@ struct cpuidle_state_kobj { #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) -static ssize_t cpuidle_state_show(struct kobject * kobj, - struct attribute * attr ,char * buf) + +static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr, + char * buf) { int ret = -EIO; struct cpuidle_state *state = kobj_to_state(kobj); @@ -323,8 +349,8 @@ static ssize_t cpuidle_state_show(struct kobject * kobj, return ret; } -static ssize_t cpuidle_state_store(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t size) +static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) { int ret = -EIO; struct cpuidle_state *state = kobj_to_state(kobj); @@ -371,6 +397,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) { int i, ret = -ENOMEM; struct cpuidle_state_kobj *kobj; + struct cpuidle_device_kobj *kdev = device->kobj_dev; struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ @@ -383,7 +410,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) init_completion(&kobj->kobj_unregister); ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, - &device->kobj, "state%d", i); + &kdev->kobj, "state%d", i); if (ret) { kfree(kobj); goto error_state; @@ -449,8 +476,8 @@ static void cpuidle_driver_sysfs_release(struct kobject *kobj) complete(&driver_kobj->kobj_unregister); } -static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr, - char * buf) +static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute *attr, + char *buf) { int ret = -EIO; struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); @@ -500,6 +527,7 @@ static struct kobj_type ktype_driver_cpuidle = { static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) { struct cpuidle_driver_kobj *kdrv; + struct cpuidle_device_kobj *kdev = dev->kobj_dev; struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int ret; @@ -511,7 +539,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) init_completion(&kdrv->kobj_unregister); ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle, - &dev->kobj, "driver"); + &kdev->kobj, "driver"); if (ret) { kfree(kdrv); return ret; @@ -580,16 +608,28 @@ void cpuidle_remove_device_sysfs(struct cpuidle_device *device) */ int cpuidle_add_sysfs(struct cpuidle_device *dev) { + struct cpuidle_device_kobj *kdev; struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; - init_completion(&dev->kobj_unregister); + kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); + if (!kdev) + return -ENOMEM; + kdev->dev = dev; + dev->kobj_dev = kdev; - error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj, - "cpuidle"); - if (!error) - kobject_uevent(&dev->kobj, KOBJ_ADD); - return error; + init_completion(&kdev->kobj_unregister); + + error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj, + "cpuidle"); + if (error) { + kfree(kdev); + return error; + } + + kobject_uevent(&kdev->kobj, KOBJ_ADD); + + return 0; } /** @@ -598,6 +638,9 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) */ void cpuidle_remove_sysfs(struct cpuidle_device *dev) { - kobject_put(&dev->kobj); - wait_for_completion(&dev->kobj_unregister); + struct cpuidle_device_kobj *kdev = dev->kobj_dev; + + kobject_put(&kdev->kobj); + wait_for_completion(&kdev->kobj_unregister); + kfree(kdev); } |
