diff options
54 files changed, 1124 insertions, 656 deletions
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 36579544780..c77170c04fd 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -66,10 +66,22 @@ config ARCH_VEXPRESS_DCSCB This is needed to provide CPU and cluster power management on RTSM implementing big.LITTLE. +config ARCH_VEXPRESS_SPC + bool "Versatile Express Serial Power Controller (SPC)" + select ARCH_HAS_CPUFREQ + select ARCH_HAS_OPP + select PM_OPP + help + The TC2 (A15x2 A7x3) versatile express core tile integrates a logic + block called Serial Power Controller (SPC) that provides the interface + between the dual cluster test-chip and the M3 microcontroller that + carries out power management. + config ARCH_VEXPRESS_TC2_PM bool "Versatile Express TC2 power management" depends on MCPM select ARM_CCI + select ARCH_VEXPRESS_SPC help Support for CPU and cluster power management on Versatile Express with a TC2 (A15x2 A7x3) big.LITTLE core tile. diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 505e64ab3ea..0997e0b7494 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -8,7 +8,8 @@ obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o CFLAGS_dcscb.o += -march=armv7-a -obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM) += tc2_pm.o spc.o +obj-$(CONFIG_ARCH_VEXPRESS_SPC) += spc.o +obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM) += tc2_pm.o CFLAGS_tc2_pm.o += -march=armv7-a obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index eefb029197c..033d34dcbd3 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -17,14 +17,31 @@ * GNU General Public License for more details. */ +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/cpu.h> +#include <linux/delay.h> #include <linux/err.h> +#include <linux/interrupt.h> #include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/pm_opp.h> #include <linux/slab.h> +#include <linux/semaphore.h> #include <asm/cacheflush.h> #define SPCLOG "vexpress-spc: " +#define PERF_LVL_A15 0x00 +#define PERF_REQ_A15 0x04 +#define PERF_LVL_A7 0x08 +#define PERF_REQ_A7 0x0c +#define COMMS 0x10 +#define COMMS_REQ 0x14 +#define PWC_STATUS 0x18 +#define PWC_FLAG 0x1c + /* SPC wake-up IRQs status and mask */ #define WAKE_INT_MASK 0x24 #define WAKE_INT_RAW 0x28 @@ -36,12 +53,45 @@ #define A15_BX_ADDR0 0x68 #define A7_BX_ADDR0 0x78 +/* SPC system config interface registers */ +#define SYSCFG_WDATA 0x70 +#define SYSCFG_RDATA 0x74 + +/* A15/A7 OPP virtual register base */ +#define A15_PERFVAL_BASE 0xC10 +#define A7_PERFVAL_BASE 0xC30 + +/* Config interface control bits */ +#define SYSCFG_START (1 << 31) +#define SYSCFG_SCC (6 << 20) +#define SYSCFG_STAT (14 << 20) + /* wake-up interrupt masks */ #define GBL_WAKEUP_INT_MSK (0x3 << 10) /* TC2 static dual-cluster configuration */ #define MAX_CLUSTERS 2 +/* + * Even though the SPC takes max 3-5 ms to complete any OPP/COMMS + * operation, the operation could start just before jiffie is about + * to be incremented. So setting timeout value of 20ms = 2jiffies@100Hz + */ +#define TIMEOUT_US 20000 + +#define MAX_OPPS 8 +#define CA15_DVFS 0 +#define CA7_DVFS 1 +#define SPC_SYS_CFG 2 +#define STAT_COMPLETE(type) ((1 << 0) << (type << 2)) +#define STAT_ERR(type) ((1 << 1) << (type << 2)) +#define RESPONSE_MASK(type) (STAT_COMPLETE(type) | STAT_ERR(type)) + +struct ve_spc_opp { + unsigned long freq; + unsigned long u_volt; +}; + struct ve_spc_drvdata { void __iomem *baseaddr; /* @@ -49,6 +99,12 @@ struct ve_spc_drvdata { * It corresponds to A15 processors MPIDR[15:8] bitfield */ u32 a15_clusid; + uint32_t cur_rsp_mask; + uint32_t cur_rsp_stat; + struct semaphore sem; + struct completion done; + struct ve_spc_opp *opps[MAX_CLUSTERS]; + int num_opps[MAX_CLUSTERS]; }; static struct ve_spc_drvdata *info; @@ -157,8 +213,197 @@ void ve_spc_powerdown(u32 cluster, bool enable) writel_relaxed(enable, info->baseaddr + pwdrn_reg); } -int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid) +static int ve_spc_get_performance(int cluster, u32 *freq) +{ + struct ve_spc_opp *opps = info->opps[cluster]; + u32 perf_cfg_reg = 0; + u32 perf; + + perf_cfg_reg = cluster_is_a15(cluster) ? PERF_LVL_A15 : PERF_LVL_A7; + + perf = readl_relaxed(info->baseaddr + perf_cfg_reg); + if (perf >= info->num_opps[cluster]) + return -EINVAL; + + opps += perf; + *freq = opps->freq; + + return 0; +} + +/* find closest match to given frequency in OPP table */ +static int ve_spc_round_performance(int cluster, u32 freq) +{ + int idx, max_opp = info->num_opps[cluster]; + struct ve_spc_opp *opps = info->opps[cluster]; + u32 fmin = 0, fmax = ~0, ftmp; + + freq /= 1000; /* OPP entries in kHz */ + for (idx = 0; idx < max_opp; idx++, opps++) { + ftmp = opps->freq; + if (ftmp >= freq) { + if (ftmp <= fmax) + fmax = ftmp; + } else { + if (ftmp >= fmin) + fmin = ftmp; + } + } + if (fmax != ~0) + return fmax * 1000; + else + return fmin * 1000; +} + +static int ve_spc_find_performance_index(int cluster, u32 freq) +{ + int idx, max_opp = info->num_opps[cluster]; + struct ve_spc_opp *opps = info->opps[cluster]; + + for (idx = 0; idx < max_opp; idx++, opps++) + if (opps->freq == freq) + break; + return (idx == max_opp) ? -EINVAL : idx; +} + +static int ve_spc_waitforcompletion(int req_type) +{ + int ret = wait_for_completion_interruptible_timeout( + &info->done, usecs_to_jiffies(TIMEOUT_US)); + if (ret == 0) + ret = -ETIMEDOUT; + else if (ret > 0) + ret = info->cur_rsp_stat & STAT_COMPLETE(req_type) ? 0 : -EIO; + return ret; +} + +static int ve_spc_set_performance(int cluster, u32 freq) +{ + u32 perf_cfg_reg, perf_stat_reg; + int ret, perf, req_type; + + if (cluster_is_a15(cluster)) { + req_type = CA15_DVFS; + perf_cfg_reg = PERF_LVL_A15; + perf_stat_reg = PERF_REQ_A15; + } else { + req_type = CA7_DVFS; + perf_cfg_reg = PERF_LVL_A7; + perf_stat_reg = PERF_REQ_A7; + } + + perf = ve_spc_find_performance_index(cluster, freq); + + if (perf < 0) + return perf; + + if (down_timeout(&info->sem, usecs_to_jiffies(TIMEOUT_US))) + return -ETIME; + + init_completion(&info->done); + info->cur_rsp_mask = RESPONSE_MASK(req_type); + + writel(perf, info->baseaddr + perf_cfg_reg); + ret = ve_spc_waitforcompletion(req_type); + + info->cur_rsp_mask = 0; + up(&info->sem); + + return ret; +} + +static int ve_spc_read_sys_cfg(int func, int offset, uint32_t *data) +{ + int ret; + + if (down_timeout(&info->sem, usecs_to_jiffies(TIMEOUT_US))) + return -ETIME; + + init_completion(&info->done); + info->cur_rsp_mask = RESPONSE_MASK(SPC_SYS_CFG); + + /* Set the control value */ + writel(SYSCFG_START | func | offset >> 2, info->baseaddr + COMMS); + ret = ve_spc_waitforcompletion(SPC_SYS_CFG); + + if (ret == 0) + *data = readl(info->baseaddr + SYSCFG_RDATA); + + info->cur_rsp_mask = 0; + up(&info->sem); + + return ret; +} + +static irqreturn_t ve_spc_irq_handler(int irq, void *data) +{ + struct ve_spc_drvdata *drv_data = data; + uint32_t status = readl_relaxed(drv_data->baseaddr + PWC_STATUS); + + if (info->cur_rsp_mask & status) { + info->cur_rsp_stat = status; + complete(&drv_data->done); + } + + return IRQ_HANDLED; +} + +/* + * +--------------------------+ + * | 31 20 | 19 0 | + * +--------------------------+ + * | u_volt | freq(kHz) | + * +--------------------------+ + */ +#define MULT_FACTOR 20 +#define VOLT_SHIFT 20 +#define FREQ_MASK (0xFFFFF) +static int ve_spc_populate_opps(uint32_t cluster) { + uint32_t data = 0, off, ret, idx; + struct ve_spc_opp *opps; + + opps = kzalloc(sizeof(*opps) * MAX_OPPS, GFP_KERNEL); + if (!opps) + return -ENOMEM; + + info->opps[cluster] = opps; + + off = cluster_is_a15(cluster) ? A15_PERFVAL_BASE : A7_PERFVAL_BASE; + for (idx = 0; idx < MAX_OPPS; idx++, off += 4, opps++) { + ret = ve_spc_read_sys_cfg(SYSCFG_SCC, off, &data); + if (!ret) { + opps->freq = (data & FREQ_MASK) * MULT_FACTOR; + opps->u_volt = data >> VOLT_SHIFT; + } else { + break; + } + } + info->num_opps[cluster] = idx; + + return ret; +} + +static int ve_init_opp_table(struct device *cpu_dev) +{ + int cluster = topology_physical_package_id(cpu_dev->id); + int idx, ret = 0, max_opp = info->num_opps[cluster]; + struct ve_spc_opp *opps = info->opps[cluster]; + + for (idx = 0; idx < max_opp; idx++, opps++) { + ret = dev_pm_opp_add(cpu_dev, opps->freq * 1000, opps->u_volt); + if (ret) { + dev_warn(cpu_dev, "failed to add opp %lu %lu\n", + opps->freq, opps->u_volt); + return ret; + } + } + return ret; +} + +int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid, int irq) +{ + int ret; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { pr_err(SPCLOG "unable to allocate mem\n"); @@ -168,6 +413,25 @@ int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid) info->baseaddr = baseaddr; info->a15_clusid = a15_clusid; + if (irq <= 0) { + pr_err(SPCLOG "Invalid IRQ %d\n", irq); + kfree(info); + return -EINVAL; + } + + init_completion(&info->done); + + readl_relaxed(info->baseaddr + PWC_STATUS); + + ret = request_irq(irq, ve_spc_irq_handler, IRQF_TRIGGER_HIGH + | IRQF_ONESHOT, "vexpress-spc", info); + if (ret) { + pr_err(SPCLOG "IRQ %d request failed\n", irq); + kfree(info); + return -ENODEV; + } + + sema_init(&info->sem, 1); /* * Multi-cluster systems may need this data when non-coherent, during * cluster power-up/power-down. Make sure driver info reaches main @@ -178,3 +442,103 @@ int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid) return 0; } + +struct clk_spc { + struct clk_hw hw; + int cluster; +}; + +#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw) +static unsigned long spc_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + u32 freq; + + if (ve_spc_get_performance(spc->cluster, &freq)) + return -EIO; + + return freq * 1000; +} + +static long spc_round_rate(struct clk_hw *hw, unsigned long drate, + unsigned long *parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + + return ve_spc_round_performance(spc->cluster, drate); +} + +static int spc_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + + return ve_spc_set_performance(spc->cluster, rate / 1000); +} + +static struct clk_ops clk_spc_ops = { + .recalc_rate = spc_recalc_rate, + .round_rate = spc_round_rate, + .set_rate = spc_set_rate, +}; + +static struct clk *ve_spc_clk_register(struct device *cpu_dev) +{ + struct clk_init_data init; + struct clk_spc *spc; + + spc = kzalloc(sizeof(*spc), GFP_KERNEL); + if (!spc) { + pr_err("could not allocate spc clk\n"); + return ERR_PTR(-ENOMEM); + } + + spc->hw.init = &init; + spc->cluster = topology_physical_package_id(cpu_dev->id); + + init.name = dev_name(cpu_dev); + init.ops = &clk_spc_ops; + init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.num_parents = 0; + + return devm_clk_register(cpu_dev, &spc->hw); +} + +static int __init ve_spc_clk_init(void) +{ + int cpu; + struct clk *clk; + + if (!info) + return 0; /* Continue only if SPC is initialised */ + + if (ve_spc_populate_opps(0) || ve_spc_populate_opps(1)) { + pr_err("failed to build OPP table\n"); + return -ENODEV; + } + + for_each_possible_cpu(cpu) { + struct device *cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + pr_warn("failed to get cpu%d device\n", cpu); + continue; + } + clk = ve_spc_clk_register(cpu_dev); + if (IS_ERR(clk)) { + pr_warn("failed to register cpu%d clock\n", cpu); + continue; + } + if (clk_register_clkdev(clk, NULL, dev_name(cpu_dev))) { + pr_warn("failed to register cpu%d clock lookup\n", cpu); + continue; + } + + if (ve_init_opp_table(cpu_dev)) + pr_warn("failed to initialise cpu%d opp table\n", cpu); + } + + platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); + return 0; +} +module_init(ve_spc_clk_init); diff --git a/arch/arm/mach-vexpress/spc.h b/arch/arm/mach-vexpress/spc.h index 5f7e4a446a1..dbd44c3720f 100644 --- a/arch/arm/mach-vexpress/spc.h +++ b/arch/arm/mach-vexpress/spc.h @@ -15,7 +15,7 @@ #ifndef __SPC_H_ #define __SPC_H_ -int __init ve_spc_init(void __iomem *base, u32 a15_clusid); +int __init ve_spc_init(void __iomem *base, u32 a15_clusid, int irq); void ve_spc_global_wakeup_irq(bool set); void ve_spc_cpu_wakeup_irq(u32 cluster, u32 cpu, bool set); void ve_spc_set_resume_addr(u32 cluster, u32 cpu, u32 addr); diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index e6eb4819291..d38130aba46 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -16,6 +16,7 @@ #include <linux/io.h> #include <linux/kernel.h> #include <linux/of_address.h> +#include <linux/of_irq.h> #include <linux/spinlock.h> #include <linux/errno.h> #include <linux/irqchip/arm-gic.h> @@ -311,7 +312,7 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level) static int __init tc2_pm_init(void) { - int ret; + int ret, irq; void __iomem *scc; u32 a15_cluster_id, a7_cluster_id, sys_info; struct device_node *np; @@ -336,13 +337,15 @@ static int __init tc2_pm_init(void) tc2_nr_cpus[a15_cluster_id] = (sys_info >> 16) & 0xf; tc2_nr_cpus[a7_cluster_id] = (sys_info >> 20) & 0xf; + irq = irq_of_parse_and_map(np, 0); + /* * A subset of the SCC registers is also used to communicate * with the SPC (power controller). We need to be able to * drive it very early in the boot process to power up * processors, so we initialize the SPC driver here. */ - ret = ve_spc_init(scc + SPC_BASE, a15_cluster_id); + ret = ve_spc_init(scc + SPC_BASE, a15_cluster_id, irq); if (ret) return ret; diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 701ec95ce95..ce52ed94924 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -224,3 +224,11 @@ config ARM_TEGRA_CPUFREQ default y help This adds the CPUFreq driver support for TEGRA SOCs. + +config ARM_VEXPRESS_SPC_CPUFREQ + tristate "Versatile Express SPC based CPUfreq driver" + select ARM_BIG_LITTLE_CPUFREQ + depends on ARCH_VEXPRESS_SPC + help + This add the CPUfreq driver support for Versatile Express + big.LITTLE platforms using SPC for power management. diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index b7948bbbbf1..74945652dd7 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -74,6 +74,7 @@ obj-$(CONFIG_ARM_SA1100_CPUFREQ) += sa1100-cpufreq.o obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_TEGRA_CPUFREQ) += tegra-cpufreq.o +obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o ################################################################################## # PowerPC platform drivers diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index e4bc19552d2..caf41ebea18 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -428,14 +428,10 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, { struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); struct acpi_processor_performance *perf; - struct cpufreq_freqs freqs; struct drv_cmd cmd; unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; - pr_debug("acpi_cpufreq_target %d (%d)\n", - data->freq_table[index].frequency, policy->cpu); - if (unlikely(data == NULL || data->acpi_data == NULL || data->freq_table == NULL)) { return -ENODEV; @@ -483,23 +479,17 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, else cmd.mask = cpumask_of(policy->cpu); - freqs.old = perf->states[perf->state].core_frequency * 1000; - freqs.new = data->freq_table[index].frequency; - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - drv_write(&cmd); if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, freqs.new, data)) { + if (!check_freqs(cmd.mask, data->freq_table[index].frequency, + data)) { pr_debug("acpi_cpufreq_target failed (%d)\n", policy->cpu); result = -EAGAIN; - freqs.new = freqs.old; } } - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - if (!result) perf->state = next_perf_state; diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 163e3378fe1..5519933813e 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -24,91 +24,319 @@ #include <linux/cpufreq.h> #include <linux/cpumask.h> #include <linux/export.h> +#include <linux/mutex.h> #include <linux/of_platform.h> #include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/topology.h> #include <linux/types.h> +#include <asm/bL_switcher.h> #include "arm_big_little.h" /* Currently we support only two clusters */ +#define A15_CLUSTER 0 +#define A7_CLUSTER 1 #define MAX_CLUSTERS 2 +#ifdef CONFIG_BL_SWITCHER +static bool bL_switching_enabled; +#define is_bL_switching_enabled() bL_switching_enabled +#define set_switching_enabled(x) (bL_switching_enabled = (x)) +#else +#define is_bL_switching_enabled() false +#define set_switching_enabled(x) do { } while (0) +#endif + +#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) +#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq) + static struct cpufreq_arm_bL_ops *arm_bL_ops; static struct clk *clk[MAX_CLUSTERS]; -static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS]; -static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)}; +static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1]; +static atomic_t cluster_usage[MAX_CLUSTERS + 1]; + +static unsigned int clk_big_min; /* (Big) clock frequencies */ +static unsigned int clk_little_max; /* Maximum clock frequency (Little) */ + +static DEFINE_PER_CPU(unsigned int, physical_cluster); +static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq); + +static struct mutex cluster_lock[MAX_CLUSTERS]; + +static inline int raw_cpu_to_cluster(int cpu) +{ + return topology_physical_package_id(cpu); +} + +static inline int cpu_to_cluster(int cpu) +{ + return is_bL_switching_enabled() ? + MAX_CLUSTERS : raw_cpu_to_cluster(cpu); +} + +static unsigned int find_cluster_maxfreq(int cluster) +{ + int j; + u32 max_freq = 0, cpu_freq; + + for_each_online_cpu(j) { + cpu_freq = per_cpu(cpu_last_req_freq, j); + + if ((cluster == per_cpu(physical_cluster, j)) && + (max_freq < cpu_freq)) + max_freq = cpu_freq; + } + + pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster, + max_freq); + + return max_freq; +} + +static unsigned int clk_ge |