aboutsummaryrefslogtreecommitdiff
path: root/drivers/cpuidle/governors/menu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle/governors/menu.c')
-rw-r--r--drivers/cpuidle/governors/menu.c271
1 files changed, 171 insertions, 100 deletions
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f508690eb95..c4f80c15a48 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -12,14 +12,24 @@
#include <linux/kernel.h>
#include <linux/cpuidle.h>
-#include <linux/pm_qos_params.h>
+#include <linux/pm_qos.h>
#include <linux/time.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/sched.h>
#include <linux/math64.h>
+#include <linux/module.h>
+/*
+ * Please note when changing the tuning values:
+ * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
+ * a scaling operation multiplication may overflow on 32 bit platforms.
+ * In that case, #define RESOLUTION as ULL to get 64 bit result:
+ * #define RESOLUTION 1024ULL
+ *
+ * The default values do not overflow.
+ */
#define BUCKETS 12
#define INTERVALS 8
#define RESOLUTION 1024
@@ -112,12 +122,11 @@ struct menu_device {
int last_state_idx;
int needs_update;
- unsigned int expected_us;
- u64 predicted_us;
- unsigned int exit_us;
+ unsigned int next_timer_us;
+ unsigned int predicted_us;
unsigned int bucket;
- u64 correction_factor[BUCKETS];
- u32 intervals[INTERVALS];
+ unsigned int correction_factor[BUCKETS];
+ unsigned int intervals[INTERVALS];
int interval_ptr;
};
@@ -182,7 +191,7 @@ static inline int performance_multiplier(void)
static DEFINE_PER_CPU(struct menu_device, menu_devices);
-static void menu_update(struct cpuidle_device *dev);
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
/* This implements DIV_ROUND_CLOSEST but avoids 64 bit division */
static u64 div_round64(u64 dividend, u32 divisor)
@@ -196,110 +205,156 @@ static u64 div_round64(u64 dividend, u32 divisor)
* of points is below a threshold. If it is... then use the
* average of these 8 points as the estimated value.
*/
-static void detect_repeating_patterns(struct menu_device *data)
+static void get_typical_interval(struct menu_device *data)
{
- int i;
- uint64_t avg = 0;
- uint64_t stddev = 0; /* contains the square of the std deviation */
-
- /* first calculate average and standard deviation of the past */
- for (i = 0; i < INTERVALS; i++)
- avg += data->intervals[i];
- avg = avg / INTERVALS;
-
- /* if the avg is beyond the known next tick, it's worthless */
- if (avg > data->expected_us)
- return;
-
- for (i = 0; i < INTERVALS; i++)
- stddev += (data->intervals[i] - avg) *
- (data->intervals[i] - avg);
-
- stddev = stddev / INTERVALS;
+ int i, divisor;
+ unsigned int max, thresh;
+ uint64_t avg, stddev;
+
+ thresh = UINT_MAX; /* Discard outliers above this value */
+
+again:
+
+ /* First calculate the average of past intervals */
+ max = 0;
+ avg = 0;
+ divisor = 0;
+ for (i = 0; i < INTERVALS; i++) {
+ unsigned int value = data->intervals[i];
+ if (value <= thresh) {
+ avg += value;
+ divisor++;
+ if (value > max)
+ max = value;
+ }
+ }
+ do_div(avg, divisor);
+
+ /* Then try to determine standard deviation */
+ stddev = 0;
+ for (i = 0; i < INTERVALS; i++) {
+ unsigned int value = data->intervals[i];
+ if (value <= thresh) {
+ int64_t diff = value - avg;
+ stddev += diff * diff;
+ }
+ }
+ do_div(stddev, divisor);
+ /*
+ * The typical interval is obtained when standard deviation is small
+ * or standard deviation is small compared to the average interval.
+ *
+ * int_sqrt() formal parameter type is unsigned long. When the
+ * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor)
+ * the resulting squared standard deviation exceeds the input domain
+ * of int_sqrt on platforms where unsigned long is 32 bits in size.
+ * In such case reject the candidate average.
+ *
+ * Use this result only if there is no timer to wake us up sooner.
+ */
+ if (likely(stddev <= ULONG_MAX)) {
+ stddev = int_sqrt(stddev);
+ if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
+ || stddev <= 20) {
+ if (data->next_timer_us > avg)
+ data->predicted_us = avg;
+ return;
+ }
+ }
/*
- * now.. if stddev is small.. then assume we have a
- * repeating pattern and predict we keep doing this.
+ * If we have outliers to the upside in our distribution, discard
+ * those by setting the threshold to exclude these outliers, then
+ * calculate the average and standard deviation again. Once we get
+ * down to the bottom 3/4 of our samples, stop excluding samples.
+ *
+ * This can deal with workloads that have long pauses interspersed
+ * with sporadic activity with a bunch of short pauses.
*/
+ if ((divisor * 4) <= INTERVALS * 3)
+ return;
- if (avg && stddev < STDDEV_THRESH)
- data->predicted_us = avg;
+ thresh = max - 1;
+ goto again;
}
/**
* menu_select - selects the next idle state to enter
+ * @drv: cpuidle driver containing state data
* @dev: the CPU
*/
-static int menu_select(struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
- unsigned int power_usage = -1;
int i;
- int multiplier;
+ unsigned int interactivity_req;
+ struct timespec t;
if (data->needs_update) {
- menu_update(dev);
+ menu_update(drv, dev);
data->needs_update = 0;
}
- data->last_state_idx = 0;
- data->exit_us = 0;
+ data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0))
return 0;
/* determine the expected residency time, round up */
- data->expected_us =
- DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000);
-
+ t = ktime_to_timespec(tick_nohz_get_sleep_length());
+ data->next_timer_us =
+ t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC;
- data->bucket = which_bucket(data->expected_us);
- multiplier = performance_multiplier();
+ data->bucket = which_bucket(data->next_timer_us);
/*
- * if the correction factor is 0 (eg first time init or cpu hotplug
- * etc), we actually want to start out with a unity factor.
+ * Force the result of multiplication to be 64 bits even if both
+ * operands are 32 bits.
+ * Make sure to round up for half microseconds.
*/
- if (data->correction_factor[data->bucket] == 0)
- data->correction_factor[data->bucket] = RESOLUTION * DECAY;
-
- /* Make sure to round up for half microseconds */
- data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
+ data->predicted_us = div_round64((uint64_t)data->next_timer_us *
+ data->correction_factor[data->bucket],
RESOLUTION * DECAY);
- detect_repeating_patterns(data);
+ get_typical_interval(data);
+
+ /*
+ * Performance multiplier defines a minimum predicted idle
+ * duration / latency ratio. Adjust the latency limit if
+ * necessary.
+ */
+ interactivity_req = data->predicted_us / performance_multiplier();
+ if (latency_req > interactivity_req)
+ latency_req = interactivity_req;
/*
* We want to default to C1 (hlt), not to busy polling
* unless the timer is happening really really soon.
*/
- if (data->expected_us > 5)
+ if (data->next_timer_us > 5 &&
+ !drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
+ dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
/*
* Find the idle state with the lowest power while satisfying
* our constraints.
*/
- for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) {
- struct cpuidle_state *s = &dev->states[i];
+ for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+ struct cpuidle_state *s = &drv->states[i];
+ struct cpuidle_state_usage *su = &dev->states_usage[i];
- if (s->flags & CPUIDLE_FLAG_IGNORE)
+ if (s->disabled || su->disable)
continue;
if (s->target_residency > data->predicted_us)
continue;
if (s->exit_latency > latency_req)
continue;
- if (s->exit_latency * multiplier > data->predicted_us)
- continue;
- if (s->power_usage < power_usage) {
- power_usage = s->power_usage;
- data->last_state_idx = i;
- data->exit_us = s->exit_latency;
- }
+ data->last_state_idx = i;
}
return data->last_state_idx;
@@ -308,55 +363,69 @@ static int menu_select(struct cpuidle_device *dev)
/**
* menu_reflect - records that data structures need update
* @dev: the CPU
+ * @index: the index of actual entered state
*
* NOTE: it's important to be fast here because this operation will add to
* the overall exit latency.
*/
-static void menu_reflect(struct cpuidle_device *dev)
+static void menu_reflect(struct cpuidle_device *dev, int index)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
- data->needs_update = 1;
+ data->last_state_idx = index;
+ if (index >= 0)
+ data->needs_update = 1;
}
/**
* menu_update - attempts to guess what happened after entry
+ * @drv: cpuidle driver containing state data
* @dev: the CPU
*/
-static void menu_update(struct cpuidle_device *dev)
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct menu_device *data = &__get_cpu_var(menu_devices);
int last_idx = data->last_state_idx;
- unsigned int last_idle_us = cpuidle_get_last_residency(dev);
- struct cpuidle_state *target = &dev->states[last_idx];
+ struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us;
- u64 new_factor;
+ unsigned int new_factor;
/*
- * Ugh, this idle state doesn't support residency measurements, so we
- * are basically lost in the dark. As a compromise, assume we slept
- * for the whole expected time.
+ * Try to figure out how much time passed between entry to low
+ * power state and occurrence of the wakeup event.
+ *
+ * If the entered idle state didn't support residency measurements,
+ * we are basically lost in the dark how much time passed.
+ * As a compromise, assume we slept for the whole expected time.
+ *
+ * Any measured amount of time will include the exit latency.
+ * Since we are interested in when the wakeup begun, not when it
+ * was completed, we must substract the exit latency. However, if
+ * the measured amount of time is less than the exit latency,
+ * assume the state was never reached and the exit latency is 0.
*/
- if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
- last_idle_us = data->expected_us;
+ if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) {
+ /* Use timer value as is */
+ measured_us = data->next_timer_us;
+ } else {
+ /* Use measured value */
+ measured_us = cpuidle_get_last_residency(dev);
- measured_us = last_idle_us;
+ /* Deduct exit latency */
+ if (measured_us > target->exit_latency)
+ measured_us -= target->exit_latency;
- /*
- * We correct for the exit latency; we are assuming here that the
- * exit latency happens after the event that we're interested in.
- */
- if (measured_us > data->exit_us)
- measured_us -= data->exit_us;
-
-
- /* update our correction ratio */
+ /* Make sure our coefficients do not exceed unity */
+ if (measured_us > data->next_timer_us)
+ measured_us = data->next_timer_us;
+ }
- new_factor = data->correction_factor[data->bucket]
- * (DECAY - 1) / DECAY;
+ /* Update our correction ratio */
+ new_factor = data->correction_factor[data->bucket];
+ new_factor -= new_factor / DECAY;
- if (data->expected_us > 0 && measured_us < MAX_INTERESTING)
- new_factor += RESOLUTION * measured_us / data->expected_us;
+ if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
+ new_factor += RESOLUTION * measured_us / data->next_timer_us;
else
/*
* we were idle so long that we count it as a perfect
@@ -366,29 +435,41 @@ static void menu_update(struct cpuidle_device *dev)
/*
* We don't want 0 as factor; we always want at least
- * a tiny bit of estimated time.
+ * a tiny bit of estimated time. Fortunately, due to rounding,
+ * new_factor will stay nonzero regardless of measured_us values
+ * and the compiler can eliminate this test as long as DECAY > 1.
*/
- if (new_factor == 0)
+ if (DECAY == 1 && unlikely(new_factor == 0))
new_factor = 1;
data->correction_factor[data->bucket] = new_factor;
/* update the repeating-pattern data */
- data->intervals[data->interval_ptr++] = last_idle_us;
+ data->intervals[data->interval_ptr++] = measured_us;
if (data->interval_ptr >= INTERVALS)
data->interval_ptr = 0;
}
/**
* menu_enable_device - scans a CPU's states and does setup
+ * @drv: cpuidle driver
* @dev: the CPU
*/
-static int menu_enable_device(struct cpuidle_device *dev)
+static int menu_enable_device(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
{
struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
+ int i;
memset(data, 0, sizeof(struct menu_device));
+ /*
+ * if the correction factor is 0 (eg first time init or cpu hotplug
+ * etc), we actually want to start out with a unity factor.
+ */
+ for(i = 0; i < BUCKETS; i++)
+ data->correction_factor[i] = RESOLUTION * DECAY;
+
return 0;
}
@@ -409,14 +490,4 @@ static int __init init_menu(void)
return cpuidle_register_governor(&menu_governor);
}
-/**
- * exit_menu - exits the governor
- */
-static void __exit exit_menu(void)
-{
- cpuidle_unregister_governor(&menu_governor);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_menu);
-module_exit(exit_menu);
+postcore_initcall(init_menu);