aboutsummaryrefslogtreecommitdiff
path: root/drivers/cpufreq/cpufreq_governor.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpufreq/cpufreq_governor.c')
-rw-r--r--drivers/cpufreq/cpufreq_governor.c115
1 files changed, 89 insertions, 26 deletions
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 7b839a8db2a..1b44496b2d2 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -16,15 +16,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <asm/cputime.h>
-#include <linux/cpufreq.h>
-#include <linux/cpumask.h>
#include <linux/export.h>
#include <linux/kernel_stat.h>
-#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/workqueue.h>
#include "cpufreq_governor.h"
@@ -42,18 +36,33 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
struct cpufreq_policy *policy;
+ unsigned int sampling_rate;
unsigned int max_load = 0;
unsigned int ignore_nice;
unsigned int j;
- if (dbs_data->cdata->governor == GOV_ONDEMAND)
- ignore_nice = od_tuners->ignore_nice;
- else
- ignore_nice = cs_tuners->ignore_nice;
+ if (dbs_data->cdata->governor == GOV_ONDEMAND) {
+ struct od_cpu_dbs_info_s *od_dbs_info =
+ dbs_data->cdata->get_cpu_dbs_info_s(cpu);
+
+ /*
+ * Sometimes, the ondemand governor uses an additional
+ * multiplier to give long delays. So apply this multiplier to
+ * the 'sampling_rate', so as to keep the wake-up-from-idle
+ * detection logic a bit conservative.
+ */
+ sampling_rate = od_tuners->sampling_rate;
+ sampling_rate *= od_dbs_info->rate_mult;
+
+ ignore_nice = od_tuners->ignore_nice_load;
+ } else {
+ sampling_rate = cs_tuners->sampling_rate;
+ ignore_nice = cs_tuners->ignore_nice_load;
+ }
policy = cdbs->cur_policy;
- /* Get Absolute Load (in terms of freq for ondemand gov) */
+ /* Get Absolute Load */
for_each_cpu(j, policy->cpus) {
struct cpu_dbs_common_info *j_cdbs;
u64 cur_wall_time, cur_idle_time;
@@ -102,14 +111,45 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
if (unlikely(!wall_time || wall_time < idle_time))
continue;
- load = 100 * (wall_time - idle_time) / wall_time;
-
- if (dbs_data->cdata->governor == GOV_ONDEMAND) {
- int freq_avg = __cpufreq_driver_getavg(policy, j);
- if (freq_avg <= 0)
- freq_avg = policy->cur;
+ /*
+ * If the CPU had gone completely idle, and a task just woke up
+ * on this CPU now, it would be unfair to calculate 'load' the
+ * usual way for this elapsed time-window, because it will show
+ * near-zero load, irrespective of how CPU intensive that task
+ * actually is. This is undesirable for latency-sensitive bursty
+ * workloads.
+ *
+ * To avoid this, we reuse the 'load' from the previous
+ * time-window and give this task a chance to start with a
+ * reasonably high CPU frequency. (However, we shouldn't over-do
+ * this copy, lest we get stuck at a high load (high frequency)
+ * for too long, even when the current system load has actually
+ * dropped down. So we perform the copy only once, upon the
+ * first wake-up from idle.)
+ *
+ * Detecting this situation is easy: the governor's deferrable
+ * timer would not have fired during CPU-idle periods. Hence
+ * an unusually large 'wall_time' (as compared to the sampling
+ * rate) indicates this scenario.
+ *
+ * prev_load can be zero in two cases and we must recalculate it
+ * for both cases:
+ * - during long idle intervals
+ * - explicitly set to zero
+ */
+ if (unlikely(wall_time > (2 * sampling_rate) &&
+ j_cdbs->prev_load)) {
+ load = j_cdbs->prev_load;
- load *= freq_avg;
+ /*
+ * Perform a destructive copy, to ensure that we copy
+ * the previous load only once, upon the first wake-up
+ * from idle.
+ */
+ j_cdbs->prev_load = 0;
+ } else {
+ load = 100 * (wall_time - idle_time) / wall_time;
+ j_cdbs->prev_load = load;
}
if (load > max_load)
@@ -133,12 +173,26 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
{
int i;
+ mutex_lock(&cpufreq_governor_lock);
+ if (!policy->governor_enabled)
+ goto out_unlock;
+
if (!all_cpus) {
- __gov_queue_work(smp_processor_id(), dbs_data, delay);
+ /*
+ * Use raw_smp_processor_id() to avoid preemptible warnings.
+ * We know that this is only called with all_cpus == false from
+ * works that have been queued with *_work_on() functions and
+ * those works are canceled during CPU_DOWN_PREPARE so they
+ * can't possibly run on any other CPU.
+ */
+ __gov_queue_work(raw_smp_processor_id(), dbs_data, delay);
} else {
for_each_cpu(i, policy->cpus)
__gov_queue_work(i, dbs_data, delay);
}
+
+out_unlock:
+ mutex_unlock(&cpufreq_governor_lock);
}
EXPORT_SYMBOL_GPL(gov_queue_work);
@@ -244,7 +298,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
policy->governor_data = dbs_data;
- /* policy latency is in nS. Convert it to uS first */
+ /* policy latency is in ns. Convert it to us first */
latency = policy->cpuinfo.transition_latency / 1000;
if (latency == 0)
latency = 1;
@@ -298,12 +352,12 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
cs_tuners = dbs_data->tuners;
cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
sampling_rate = cs_tuners->sampling_rate;
- ignore_nice = cs_tuners->ignore_nice;
+ ignore_nice = cs_tuners->ignore_nice_load;
} else {
od_tuners = dbs_data->tuners;
od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
sampling_rate = od_tuners->sampling_rate;
- ignore_nice = od_tuners->ignore_nice;
+ ignore_nice = od_tuners->ignore_nice_load;
od_ops = dbs_data->cdata->gov_ops;
io_busy = od_tuners->io_is_busy;
}
@@ -318,11 +372,18 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
for_each_cpu(j, policy->cpus) {
struct cpu_dbs_common_info *j_cdbs =
dbs_data->cdata->get_cpu_cdbs(j);
+ unsigned int prev_load;
j_cdbs->cpu = j;
j_cdbs->cur_policy = policy;
j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
&j_cdbs->prev_cpu_wall, io_busy);
+
+ prev_load = (unsigned int)
+ (j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle);
+ j_cdbs->prev_load = 100 * prev_load /
+ (unsigned int) j_cdbs->prev_cpu_wall;
+
if (ignore_nice)
j_cdbs->prev_cpu_nice =
kcpustat_cpu(j).cpustat[CPUTIME_NICE];
@@ -332,10 +393,6 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
dbs_data->cdata->gov_dbs_timer);
}
- /*
- * conservative does not implement micro like ondemand
- * governor, thus we are bound to jiffes/HZ
- */
if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
cs_dbs_info->down_skip = 0;
cs_dbs_info->enable = 1;
@@ -370,6 +427,11 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
break;
case CPUFREQ_GOV_LIMITS:
+ mutex_lock(&dbs_data->mutex);
+ if (!cpu_cdbs->cur_policy) {
+ mutex_unlock(&dbs_data->mutex);
+ break;
+ }
mutex_lock(&cpu_cdbs->timer_mutex);
if (policy->max < cpu_cdbs->cur_policy->cur)
__cpufreq_driver_target(cpu_cdbs->cur_policy,
@@ -379,6 +441,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
policy->min, CPUFREQ_RELATION_L);
dbs_check_cpu(dbs_data, cpu);
mutex_unlock(&cpu_cdbs->timer_mutex);
+ mutex_unlock(&dbs_data->mutex);
break;
}
return 0;