diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-26 11:04:08 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-26 11:04:08 -0700 |
commit | ada19a31a90b4f46c040c25ef4ef8ffc203c7fc6 (patch) | |
tree | 7d523d2d90dbaa973c5843d6219ec149b5949243 /drivers/cpufreq | |
parent | 8d80ce80e1d58ba9cd3e3972b112cccd6b4008f4 (diff) | |
parent | 36e8abf3edcd2d207193ec5741d1a2a645d470a5 (diff) |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq: (35 commits)
[CPUFREQ] Prevent p4-clockmod from auto-binding to the ondemand governor.
[CPUFREQ] Make cpufreq-nforce2 less obnoxious
[CPUFREQ] p4-clockmod reports wrong frequency.
[CPUFREQ] powernow-k8: Use a common exit path.
[CPUFREQ] Change link order of x86 cpufreq modules
[CPUFREQ] conservative: remove 10x from def_sampling_rate
[CPUFREQ] conservative: fixup governor to function more like ondemand logic
[CPUFREQ] conservative: fix dbs_cpufreq_notifier so freq is not locked
[CPUFREQ] conservative: amend author's email address
[CPUFREQ] Use swap() in longhaul.c
[CPUFREQ] checkpatch cleanups for acpi-cpufreq
[CPUFREQ] powernow-k8: Only print error message once, not per core.
[CPUFREQ] ondemand/conservative: sanitize sampling_rate restrictions
[CPUFREQ] ondemand/conservative: deprecate sampling_rate{min,max}
[CPUFREQ] powernow-k8: Always compile powernow-k8 driver with ACPI support
[CPUFREQ] Introduce /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_transition_latency
[CPUFREQ] checkpatch cleanups for powernow-k8
[CPUFREQ] checkpatch cleanups for ondemand governor.
[CPUFREQ] checkpatch cleanups for powernow-k7
[CPUFREQ] checkpatch cleanups for speedstep related drivers.
...
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 55 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 404 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 74 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_stats.c | 74 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_userspace.c | 27 | ||||
-rw-r--r-- | drivers/cpufreq/freq_table.c | 18 |
6 files changed, 379 insertions, 273 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d6daf3c507d..d270e8eb3e6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -104,7 +104,8 @@ EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write); /* internal prototypes */ -static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); +static int __cpufreq_governor(struct cpufreq_policy *policy, + unsigned int event); static unsigned int __cpufreq_get(unsigned int cpu); static void handle_update(struct work_struct *work); @@ -128,7 +129,7 @@ static int __init init_cpufreq_transition_notifier_list(void) pure_initcall(init_cpufreq_transition_notifier_list); static LIST_HEAD(cpufreq_governor_list); -static DEFINE_MUTEX (cpufreq_governor_mutex); +static DEFINE_MUTEX(cpufreq_governor_mutex); struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { @@ -371,7 +372,7 @@ static struct cpufreq_governor *__find_governor(const char *str_governor) struct cpufreq_governor *t; list_for_each_entry(t, &cpufreq_governor_list, governor_list) - if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN)) + if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN)) return t; return NULL; @@ -429,15 +430,11 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, mutex_unlock(&cpufreq_governor_mutex); } - out: +out: return err; } -/* drivers/base/cpu.c */ -extern struct sysdev_class cpu_sysdev_class; - - /** * cpufreq_per_cpu_attr_read() / show_##file_name() - * print out cpufreq information @@ -450,11 +447,12 @@ extern struct sysdev_class cpu_sysdev_class; static ssize_t show_##file_name \ (struct cpufreq_policy *policy, char *buf) \ { \ - return sprintf (buf, "%u\n", policy->object); \ + return sprintf(buf, "%u\n", policy->object); \ } show_one(cpuinfo_min_freq, cpuinfo.min_freq); show_one(cpuinfo_max_freq, cpuinfo.max_freq); +show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); show_one(scaling_cur_freq, cur); @@ -476,7 +474,7 @@ static ssize_t store_##file_name \ if (ret) \ return -EINVAL; \ \ - ret = sscanf (buf, "%u", &new_policy.object); \ + ret = sscanf(buf, "%u", &new_policy.object); \ if (ret != 1) \ return -EINVAL; \ \ @@ -486,8 +484,8 @@ static ssize_t store_##file_name \ return ret ? ret : count; \ } -store_one(scaling_min_freq,min); -store_one(scaling_max_freq,max); +store_one(scaling_min_freq, min); +store_one(scaling_max_freq, max); /** * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware @@ -507,12 +505,13 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, */ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) { - if(policy->policy == CPUFREQ_POLICY_POWERSAVE) + if (policy->policy == CPUFREQ_POLICY_POWERSAVE) return sprintf(buf, "powersave\n"); else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) return sprintf(buf, "performance\n"); else if (policy->governor) - return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name); + return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", + policy->governor->name); return -EINVAL; } @@ -531,7 +530,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, if (ret) return ret; - ret = sscanf (buf, "%15s", str_governor); + ret = sscanf(buf, "%15s", str_governor); if (ret != 1) return -EINVAL; @@ -575,7 +574,8 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, } list_for_each_entry(t, &cpufreq_governor_list, governor_list) { - if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) + if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) + - (CPUFREQ_NAME_LEN + 2))) goto out; i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name); } @@ -594,7 +594,7 @@ static ssize_t show_cpus(const struct cpumask *mask, char *buf) i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); if (i >= (PAGE_SIZE - 5)) - break; + break; } i += sprintf(&buf[i], "\n"); return i; @@ -660,6 +660,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_ro0400(cpuinfo_cur_freq); define_one_ro(cpuinfo_min_freq); define_one_ro(cpuinfo_max_freq); +define_one_ro(cpuinfo_transition_latency); define_one_ro(scaling_available_governors); define_one_ro(scaling_driver); define_one_ro(scaling_cur_freq); @@ -673,6 +674,7 @@ define_one_rw(scaling_setspeed); static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, &cpuinfo_max_freq.attr, + &cpuinfo_transition_latency.attr, &scaling_min_freq.attr, &scaling_max_freq.attr, &affected_cpus.attr, @@ -684,10 +686,10 @@ static struct attribute *default_attrs[] = { NULL }; -#define to_policy(k) container_of(k,struct cpufreq_policy,kobj) -#define to_attr(a) container_of(a,struct freq_attr,attr) +#define to_policy(k) container_of(k, struct cpufreq_policy, kobj) +#define to_attr(a) container_of(a, struct freq_attr, attr) -static ssize_t show(struct kobject *kobj, struct attribute *attr ,char *buf) +static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); @@ -853,10 +855,10 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) if (cpu == j) continue; - /* check for existing affected CPUs. They may not be aware - * of it due to CPU Hotplug. + /* Check for existing affected CPUs. + * They may not be aware of it due to CPU Hotplug. */ - managed_policy = cpufreq_cpu_get(j); // FIXME: Where is this released? What about error paths? + managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */ if (unlikely(managed_policy)) { /* Set proper policy_cpu */ @@ -1127,8 +1129,8 @@ static void handle_update(struct work_struct *work) * @old_freq: CPU frequency the kernel thinks the CPU runs at * @new_freq: CPU frequency the CPU actually runs at * - * We adjust to current frequency first, and need to clean up later. So either call - * to cpufreq_update_policy() or schedule handle_update()). + * We adjust to current frequency first, and need to clean up later. + * So either call to cpufreq_update_policy() or schedule handle_update()). */ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigned int new_freq) @@ -1610,7 +1612,8 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); /** * cpufreq_get_policy - get the current cpufreq_policy - * @policy: struct cpufreq_policy into which the current cpufreq_policy is written + * @policy: struct cpufreq_policy into which the current cpufreq_policy + * is written * * Reads the current cpufreq policy. */ diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 0320962c4ec..2ecd95e4ab1 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -4,7 +4,7 @@ * Copyright (C) 2001 Russell King * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. * Jun Nakajima <jun.nakajima@intel.com> - * (C) 2004 Alexander Clouter <alex-kernel@digriz.org.uk> + * (C) 2009 Alexander Clouter <alex@digriz.org.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,22 +13,17 @@ #include <linux/kernel.h> #include <linux/module.h> -#include <linux/smp.h> #include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/ctype.h> #include <linux/cpufreq.h> -#include <linux/sysctl.h> -#include <linux/types.h> -#include <linux/fs.h> -#include <linux/sysfs.h> #include <linux/cpu.h> -#include <linux/kmod.h> -#include <linux/workqueue.h> #include <linux/jiffies.h> #include <linux/kernel_stat.h> -#include <linux/percpu.h> #include <linux/mutex.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/ktime.h> +#include <linux/sched.h> + /* * dbs is used in this file as a shortform for demandbased switching * It helps to keep variable names smaller, simpler @@ -43,19 +38,31 @@ * latency of the processor. The governor will work on any processor with * transition latency <= 10mS, using appropriate sampling * rate. - * For CPUs with transition latency > 10mS (mostly drivers - * with CPUFREQ_ETERNAL), this governor will not work. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. * All times here are in uS. */ static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) /* for correct statistics, we need at least 10 ticks between each measure */ -#define MIN_STAT_SAMPLING_RATE \ +#define MIN_STAT_SAMPLING_RATE \ (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) #define MIN_SAMPLING_RATE \ (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) +/* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon + * Define the minimal settable sampling rate to the greater of: + * - "HW transition latency" * 100 (same as default sampling / 10) + * - MIN_STAT_SAMPLING_RATE + * To avoid that userspace shoots itself. +*/ +static unsigned int minimum_sampling_rate(void) +{ + return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE); +} + +/* This will also vanish soon with removing sampling_rate_max */ #define MAX_SAMPLING_RATE (500 * def_sampling_rate) -#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) +#define LATENCY_MULTIPLIER (1000) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) @@ -63,12 +70,15 @@ static unsigned int def_sampling_rate; static void do_dbs_timer(struct work_struct *work); struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; struct cpufreq_policy *cur_policy; - unsigned int prev_cpu_idle_up; - unsigned int prev_cpu_idle_down; - unsigned int enable; + struct delayed_work work; unsigned int down_skip; unsigned int requested_freq; + int cpu; + unsigned int enable:1; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); @@ -82,19 +92,18 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock * is recursive for the same process. -Venki */ -static DEFINE_MUTEX (dbs_mutex); -static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer); +static DEFINE_MUTEX(dbs_mutex); -struct dbs_tuners { +static struct workqueue_struct *kconservative_wq; + +static struct dbs_tuners { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; unsigned int down_threshold; unsigned int ignore_nice; unsigned int freq_step; -}; - -static struct dbs_tuners dbs_tuners_ins = { +} dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, @@ -102,18 +111,37 @@ static struct dbs_tuners dbs_tuners_ins = { .freq_step = 5, }; -static inline unsigned int get_cpu_idle_time(unsigned int cpu) +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) { - unsigned int add_nice = 0, ret; + cputime64_t idle_time; + cputime64_t cur_wall_time; + cputime64_t busy_time; - if (dbs_tuners_ins.ignore_nice) - add_nice = kstat_cpu(cpu).cpustat.nice; + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, + kstat_cpu(cpu).cpustat.system); + + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); + + idle_time = cputime64_sub(cur_wall_time, busy_time); + if (wall) + *wall = cur_wall_time; + + return idle_time; +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); - ret = kstat_cpu(cpu).cpustat.idle + - kstat_cpu(cpu).cpustat.iowait + - add_nice; + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); - return ret; + return idle_time; } /* keep track of frequency transitions */ @@ -125,10 +153,21 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, freq->cpu); + struct cpufreq_policy *policy; + if (!this_dbs_info->enable) return 0; - this_dbs_info->requested_freq = freq->new; + policy = this_dbs_info->cur_policy; + + /* + * we only care if our internally tracked freq moves outside + * the 'valid' ranges of freqency available to us otherwise + * we do not change it + */ + if (this_dbs_info->requested_freq > policy->max + || this_dbs_info->requested_freq < policy->min) + this_dbs_info->requested_freq = freq->new; return 0; } @@ -140,16 +179,31 @@ static struct notifier_block dbs_cpufreq_notifier_block = { /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { - return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); + static int print_once; + + if (!print_once) { + printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", + current->comm); + print_once = 1; + } + return sprintf(buf, "%u\n", MAX_SAMPLING_RATE); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) { - return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); + static int print_once; + + if (!print_once) { + printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", current->comm); + print_once = 1; + } + return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); } -#define define_one_ro(_name) \ -static struct freq_attr _name = \ +#define define_one_ro(_name) \ +static struct freq_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) define_one_ro(sampling_rate_max); @@ -174,7 +228,8 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, { unsigned int input; int ret; - ret = sscanf (buf, "%u", &input); + ret = sscanf(buf, "%u", &input); + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; @@ -190,15 +245,13 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, { unsigned int input; int ret; - ret = sscanf (buf, "%u", &input); + ret = sscanf(buf, "%u", &input); - mutex_lock(&dbs_mutex); - if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { - mutex_unlock(&dbs_mutex); + if (ret != 1) return -EINVAL; - } - dbs_tuners_ins.sampling_rate = input; + mutex_lock(&dbs_mutex); + dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); mutex_unlock(&dbs_mutex); return count; @@ -209,10 +262,11 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, { unsigned int input; int ret; - ret = sscanf (buf, "%u", &input); + ret = sscanf(buf, "%u", &input); mutex_lock(&dbs_mutex); - if (ret != 1 || input > 100 || input <= dbs_tuners_ins.down_threshold) { + if (ret != 1 || input > 100 || + input <= dbs_tuners_ins.down_threshold) { mutex_unlock(&dbs_mutex); return -EINVAL; } @@ -228,10 +282,12 @@ static ssize_t store_down_threshold(struct cpufreq_policy *unused, { unsigned int input; int ret; - ret = sscanf (buf, "%u", &input); + ret = sscanf(buf, "%u", &input); mutex_lock(&dbs_mutex); - if (ret != 1 || input > 100 || input >= dbs_tuners_ins.up_threshold) { + /* cannot be lower than 11 otherwise freq will not fall */ + if (ret != 1 || input < 11 || input > 100 || + input >= dbs_tuners_ins.up_threshold) { mutex_unlock(&dbs_mutex); return -EINVAL; } @@ -264,12 +320,14 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, } dbs_tuners_ins.ignore_nice = input; - /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ + /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { - struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(cpu_dbs_info, j); - j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); - j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; } mutex_unlock(&dbs_mutex); @@ -281,7 +339,6 @@ static ssize_t store_freq_step(struct cpufreq_policy *policy, { unsigned int input; int ret; - ret = sscanf(buf, "%u", &input); if (ret != 1) @@ -310,7 +367,7 @@ define_one_rw(down_threshold); define_one_rw(ignore_nice_load); define_one_rw(freq_step); -static struct attribute * dbs_attributes[] = { +static struct attribute *dbs_attributes[] = { &sampling_rate_max.attr, &sampling_rate_min.attr, &sampling_rate.attr, @@ -329,55 +386,78 @@ static struct attribute_group dbs_attr_group = { /************************** sysfs end ************************/ -static void dbs_check_cpu(int cpu) +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { - unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; - unsigned int tmp_idle_ticks, total_idle_ticks; + unsigned int load = 0; unsigned int freq_target; - unsigned int freq_down_sampling_rate; - struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); - struct cpufreq_policy *policy; - if (!this_dbs_info->enable) - return; + struct cpufreq_policy *policy; + unsigned int j; policy = this_dbs_info->cur_policy; /* - * The default safe range is 20% to 80% - * Every sampling_rate, we check - * - If current idle time is less than 20%, then we try to - * increase frequency - * Every sampling_rate*sampling_down_factor, we check - * - If current idle time is more than 80%, then we try to - * decrease frequency + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate*sampling_down_factor, we check, if current + * idle time is more than 80%, then we try to decrease frequency * * Any frequency increase takes it to the maximum frequency. * Frequency reduction happens at minimum steps of - * 5% (default) of max_frequency + * 5% (default) of maximum frequency */ - /* Check for frequency increase */ - idle_ticks = UINT_MAX; + /* Get Absolute Load */ + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time; + unsigned int idle_time, wall_time; - /* Check for frequency increase */ - total_idle_ticks = get_cpu_idle_time(cpu); - tmp_idle_ticks = total_idle_ticks - - this_dbs_info->prev_cpu_idle_up; - this_dbs_info->prev_cpu_idle_up = total_idle_ticks; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, + j_dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); - if (tmp_idle_ticks < idle_ticks) - idle_ticks = tmp_idle_ticks; + j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + } - /* Scale idle ticks by 100 and compare with up and down ticks */ - idle_ticks *= 100; - up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * - usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + /* + * break out if we 'cannot' reduce the speed as the user might + * want freq_step to be zero + */ + if (dbs_tuners_ins.freq_step == 0) + return; - if (idle_ticks < up_idle_ticks) { + /* Check for frequency increase */ + if (load > dbs_tuners_ins.up_threshold) { this_dbs_info->down_skip = 0; - this_dbs_info->prev_cpu_idle_down = - this_dbs_info->prev_cpu_idle_up; /* if we are already at full speed then break out early */ if (this_dbs_info->requested_freq == policy->max) @@ -398,49 +478,24 @@ static void dbs_check_cpu(int cpu) return; } - /* Check for frequency decrease */ - this_dbs_info->down_skip++; - if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor) - return; - - /* Check for frequency decrease */ - total_idle_ticks = this_dbs_info->prev_cpu_idle_up; - tmp_idle_ticks = total_idle_ticks - - this_dbs_info->prev_cpu_idle_down; - this_dbs_info->prev_cpu_idle_down = total_idle_ticks; - - if (tmp_idle_ticks < idle_ticks) - idle_ticks = tmp_idle_ticks; - - /* Scale idle ticks by 100 and compare with up and down ticks */ - idle_ticks *= 100; - this_dbs_info->down_skip = 0; - - freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * - dbs_tuners_ins.sampling_down_factor; - down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * - usecs_to_jiffies(freq_down_sampling_rate); - - if (idle_ticks > down_idle_ticks) { - /* - * if we are already at the lowest speed then break out early - * or if we 'cannot' reduce the speed as the user might want - * freq_target to be zero - */ - if (this_dbs_info->requested_freq == policy->min - || dbs_tuners_ins.freq_step == 0) - return; - + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (load < (dbs_tuners_ins.down_threshold - 10)) { freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; - /* max freq cannot be less than 100. But who knows.... */ - if (unlikely(freq_target == 0)) - freq_target = 5; - this_dbs_info->requested_freq -= freq_target; if (this_dbs_info->requested_freq < policy->min) this_dbs_info->requested_freq = policy->min; + /* + * if we cannot reduce the frequency anymore, break out early + */ + if (policy->cur == policy->min) + return; + __cpufreq_driver_target(policy, this_dbs_info->requested_freq, CPUFREQ_RELATION_H); return; @@ -449,27 +504,45 @@ static void dbs_check_cpu(int cpu) static void do_dbs_timer(struct work_struct *work) { - int i; - mutex_lock(&dbs_mutex); - for_each_online_cpu(i) - dbs_check_cpu(i); - schedule_delayed_work(&dbs_work, - usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); - mutex_unlock(&dbs_mutex); + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + delay -= jiffies % delay; + + if (lock_policy_rwsem_write(cpu) < 0) + return; + + if (!dbs_info->enable) { + unlock_policy_rwsem_write(cpu); + return; + } + + dbs_check_cpu(dbs_info); + + queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay); + unlock_policy_rwsem_write(cpu); } -static inline void dbs_timer_init(void) +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) { - init_timer_deferrable(&dbs_work.timer); - schedule_delayed_work(&dbs_work, - usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); - return; + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + delay -= jiffies % delay; + + dbs_info->enable = 1; + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, + delay); } -static inline void dbs_timer_exit(void) +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { - cancel_delayed_work(&dbs_work); - return; + dbs_info->enable = 0; + cancel_delayed_work(&dbs_info->work); } static int cpufreq_governor_dbs(struct cpufreq_policy *policy, @@ -503,11 +576,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; - j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); - j_dbs_info->prev_cpu_idle_down - = j_dbs_info->prev_cpu_idle_up; + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kstat_cpu(j).cpustat.nice; + } } - this_dbs_info->enable = 1; this_dbs_info->down_skip = 0; this_dbs_info->requested_freq = policy->cur; @@ -523,38 +598,36 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (latency == 0) latency = 1; - def_sampling_rate = 10 * latency * - DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; - - if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) - def_sampling_rate = MIN_STAT_SAMPLING_RATE; + def_sampling_rate = + max(latency * LATENCY_MULTIPLIER, + MIN_STAT_SAMPLING_RATE); dbs_tuners_ins.sampling_rate = def_sampling_rate; - dbs_timer_init(); cpufreq_register_notifier( &dbs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } + dbs_timer_init(this_dbs_info); mutex_unlock(&dbs_mutex); + break; case CPUFREQ_GOV_STOP: mutex_lock(&dbs_mutex); - this_dbs_info->enable = 0; + dbs_timer_exit(this_dbs_info); sysfs_remove_group(&policy->kobj, &dbs_attr_group); dbs_enable--; + /* * Stop the timerschedule work, when this governor * is used for first time */ - if (dbs_enable == 0) { - dbs_timer_exit(); + if (dbs_enable == 0) cpufreq_unregister_notifier( &dbs_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - } mutex_unlock(&dbs_mutex); @@ -571,6 +644,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); mutex_unlock(&dbs_mutex); + break; } return 0; @@ -588,23 +662,33 @@ struct cpufreq_governor cpufreq_gov_conservative = { static int __init cpufreq_gov_dbs_init(void) { - return cpufreq_register_governor(&cpufreq_gov_conservative); + int err; + + kconservative_wq = create_workqueue("kconservative"); + if (!kconservative_wq) { + printk(KERN_ERR "Creation of kconservative failed\n"); + return -EFAULT; + } + + err = cpufreq_register_governor(&cpufreq_gov_conservative); + if (err) + destroy_workqueue(kconservative_wq); + + return err; } static void __exit cpufreq_gov_dbs_exit(void) { - /* Make sure that the scheduled work is indeed not running */ - flush_scheduled_work(); - cpufreq_unregister_governor(&cpufreq_gov_conservative); + destroy_workqueue(kconservative_wq); } -MODULE_AUTHOR ("Alexander Clouter <alex-kernel@digriz.org.uk>"); -MODULE_DESCRIPTION ("'cpufreq_conservative' - A dynamic cpufreq governor for " +MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); +MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " "Low Latency Frequency Transition capable processors " "optimised for use in a battery environment"); -MODULE_LICENSE ("GPL"); +MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE fs_initcall(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 6f45b1658a6..338f428a15b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -21,6 +21,7 @@ #include <linux/hrtimer.h> #include <linux/tick.h> #include <linux/ktime.h> +#include <linux/sched.h> /* * dbs is used in this file as a shortform for demandbased switching @@ -51,8 +52,20 @@ static unsigned int def_sampling_rate; (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) #define MIN_SAMPLING_RATE \ (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) +/* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon + * Define the minimal settable sampling rate to the greater of: + * - "HW transition latency" * 100 (same as default sampling / 10) + * - MIN_STAT_SAMPLING_RATE + * To avoid that userspace shoots itself. +*/ +static unsigned int minimum_sampling_rate(void) +{ + return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE); +} + +/* This will also vanish soon with removing sampling_rate_max */ #define MAX_SAMPLING_RATE (500 * def_sampling_rate) -#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) +#define LATENCY_MULTIPLIER (1000) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) static void do_dbs_timer(struct work_struct *work); @@ -65,14 +78,14 @@ struct cpu_dbs_info_s { cputime64_t prev_cpu_wall; cputime64_t prev_cpu_nice; struct cpufreq_policy *cur_policy; - struct delayed_work work; + struct delayed_work work; struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; unsigned int freq_lo_jiffies; unsigned int freq_hi_jiffies; int cpu; unsigned int enable:1, - sample_type:1; + sample_type:1; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); @@ -203,12 +216,28 @@ static void ondemand_powersave_bias_init(void) /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { - return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); + static int print_once; + + if (!print_once) { + printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", + current->comm); + print_once = 1; + } + return sprintf(buf, "%u\n", MAX_SAMPLING_RATE); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) { - return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); + static int print_once; + + if (!print_once) { + printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_min " + "sysfs file is deprecated - used by: %s\n", + current->comm); + print_once = 1; + } + return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); } #define define_one_ro(_name) \ @@ -238,13 +267,11 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, ret = sscanf(buf, "%u", &input); mutex_lock(&dbs_mutex); - if (ret != 1 || input > MAX_SAMPLING_RATE - || input < MIN_SAMPLING_RATE) { + if (ret != 1) { mutex_unlock(&dbs_mutex); return -EINVAL; } - - dbs_tuners_ins.sampling_rate = input; + dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); mutex_unlock(&dbs_mutex); return count; @@ -279,14 +306,14 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, unsigned int j; ret = sscanf(buf, "%u", &input); - if ( ret != 1 ) + if (ret != 1) return -EINVAL; - if ( input > 1 ) + if (input > 1) input = 1; mutex_lock(&dbs_mutex); - if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ mutex_unlock(&dbs_mutex); return count; } @@ -337,7 +364,7 @@ define_one_rw(up_threshold); define_one_rw(ignore_nice_load); define_one_rw(powersave_bias); -static struct attribute * dbs_attributes[] = { +static struct attr |