From 1abc4b20b85b42e8573957e54b193385cf48b0d6 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:25 -0700 Subject: cpufreq / intel_pstate: remove idle time and duration from sample and calculations Idle time is taken into account in the APERF/MPERF ratio calculation there is no reason for the driver to track it seperately. This reduces the work in the driver and makes the code more readable. Removal of the tracking of sample duration removes the possibility of the divide by zero exception when the duration is sub 1us References: https://bugzilla.kernel.org/show_bug.cgi?id=56691 Reported-by: Mike Lothian Cc: 3.9+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 45 ++++++++---------------------------------- 1 file changed, 8 insertions(+), 37 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cc3a8e6c92b..c6e10d02b79 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -48,12 +48,7 @@ static inline int32_t div_fp(int32_t x, int32_t y) } struct sample { - ktime_t start_time; - ktime_t end_time; int core_pct_busy; - int pstate_pct_busy; - u64 duration_us; - u64 idletime_us; u64 aperf; u64 mperf; int freq; @@ -91,8 +86,6 @@ struct cpudata { int min_pstate_count; int idle_mode; - ktime_t prev_sample; - u64 prev_idle_time_us; u64 prev_aperf; u64 prev_mperf; int sample_ptr; @@ -450,48 +443,26 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, struct sample *sample) { u64 core_pct; - sample->pstate_pct_busy = 100 - div64_u64( - sample->idletime_us * 100, - sample->duration_us); core_pct = div64_u64(sample->aperf * 100, sample->mperf); sample->freq = cpu->pstate.max_pstate * core_pct * 1000; - sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct), - 100); + sample->core_pct_busy = core_pct; } static inline void intel_pstate_sample(struct cpudata *cpu) { - ktime_t now; - u64 idle_time_us; u64 aperf, mperf; - now = ktime_get(); - idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL); - rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - /* for the first sample, don't actually record a sample, just - * set the baseline */ - if (cpu->prev_idle_time_us > 0) { - cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; - cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample; - cpu->samples[cpu->sample_ptr].end_time = now; - cpu->samples[cpu->sample_ptr].duration_us = - ktime_us_delta(now, cpu->prev_sample); - cpu->samples[cpu->sample_ptr].idletime_us = - idle_time_us - cpu->prev_idle_time_us; - - cpu->samples[cpu->sample_ptr].aperf = aperf; - cpu->samples[cpu->sample_ptr].mperf = mperf; - cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; - cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; - - intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); - } + cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; + cpu->samples[cpu->sample_ptr].aperf = aperf; + cpu->samples[cpu->sample_ptr].mperf = mperf; + cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; + cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; + + intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); - cpu->prev_sample = now; - cpu->prev_idle_time_us = idle_time_us; cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; } -- cgit v1.2.3-18-g5258 From d8f469e9cff3bc4a6317d923e9506be046aa7bdc Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:26 -0700 Subject: cpufreq / intel_pstate: use lowest requested max performance There are two ways that the maximum p-state can be clamped, via a policy change and via the sysfs file. The acpi-thermal driver adjusts the p-state policy in response to thermal events. These changes override the users settings at the moment. Use the lowest of the two requested values this ensures that we will not exceed the requested pstate from either mechanism. Reported-by: Srinivas Pandruvada Cc: 3.9+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c6e10d02b79..4a437ffc518 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -117,6 +117,8 @@ struct perf_limits { int min_perf_pct; int32_t max_perf; int32_t min_perf; + int max_policy_pct; + int max_sysfs_pct; }; static struct perf_limits limits = { @@ -125,6 +127,8 @@ static struct perf_limits limits = { .max_perf = int_tofp(1), .min_perf_pct = 0, .min_perf = 0, + .max_policy_pct = 100, + .max_sysfs_pct = 100, }; static inline void pid_reset(struct _pid *pid, int setpoint, int busy, @@ -295,7 +299,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; - limits.max_perf_pct = clamp_t(int, input, 0 , 100); + limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); + limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); return count; } @@ -646,8 +651,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); - limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq; - limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100); + limits.max_policy_pct = policy->max * 100 / policy->cpuinfo.max_freq; + limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); + limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); return 0; -- cgit v1.2.3-18-g5258 From ca182aee389f8026401510f4c63841cb02c820e8 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:27 -0700 Subject: cpufreq / intel_pstate: fix ffmpeg regression The ffmpeg benchmark in the phoronix test suite has threads on multiple cores that rely on the progress on of threads on other cores and ping pong back and forth fast enough to make the core appear less busy than it "should" be. If the core has been at minimum p-state for a while bump the pstate up to kick the core to see if it is in this ping pong state. If the core is truly idle the p-state will be reduced at the next sample time. If the core makes more progress it will send more work to the thread bringing both threads out of the ping pong scenario and the p-state will be selected normally. This fixes a performance regression of approximately 30% Cc: 3.9+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4a437ffc518..a7f1946b345 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -551,22 +551,16 @@ static void intel_pstate_timer_func(unsigned long __data) struct cpudata *cpu = (struct cpudata *) __data; intel_pstate_sample(cpu); + intel_pstate_adjust_busy_pstate(cpu); - if (!cpu->idle_mode) - intel_pstate_adjust_busy_pstate(cpu); - else - intel_pstate_adjust_idle_pstate(cpu); - -#if defined(XPERF_FIX) if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) { cpu->min_pstate_count++; if (!(cpu->min_pstate_count % 5)) { intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); - intel_pstate_idle_mode(cpu); } } else cpu->min_pstate_count = 0; -#endif + intel_pstate_set_sample_time(cpu); } -- cgit v1.2.3-18-g5258 From a73108d578559c83e35fa386a4058142a019b8d4 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:28 -0700 Subject: cpufreq / intel_pstate: Remove idle mode PID Remove dead code from the driver. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 49 ------------------------------------------ 1 file changed, 49 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a7f1946b345..b93e3851b5d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -81,10 +81,8 @@ struct cpudata { struct pstate_adjust_policy *pstate_policy; struct pstate_data pstate; struct _pid pid; - struct _pid idle_pid; int min_pstate_count; - int idle_mode; u64 prev_aperf; u64 prev_mperf; @@ -199,19 +197,6 @@ static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 0); } -static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu) -{ - pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct); - pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct); - pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct); - - pid_reset(&cpu->idle_pid, - 75, - 50, - cpu->pstate_policy->deadband, - 0); -} - static inline void intel_pstate_reset_all_pid(void) { unsigned int cpu; @@ -481,16 +466,6 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) mod_timer_pinned(&cpu->timer, jiffies + delay); } -static inline void intel_pstate_idle_mode(struct cpudata *cpu) -{ - cpu->idle_mode = 1; -} - -static inline void intel_pstate_normal_mode(struct cpudata *cpu) -{ - cpu->idle_mode = 0; -} - static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t busy_scaled; @@ -523,29 +498,6 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) intel_pstate_pstate_decrease(cpu, steps); } -static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu) -{ - int busy_scaled; - struct _pid *pid; - int ctl = 0; - int steps; - - pid = &cpu->idle_pid; - - busy_scaled = intel_pstate_get_scaled_busy(cpu); - - ctl = pid_calc(pid, 100 - busy_scaled); - - steps = abs(ctl); - if (ctl < 0) - intel_pstate_pstate_decrease(cpu, steps); - else - intel_pstate_pstate_increase(cpu, steps); - - if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) - intel_pstate_normal_mode(cpu); -} - static void intel_pstate_timer_func(unsigned long __data) { struct cpudata *cpu = (struct cpudata *) __data; @@ -601,7 +553,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) (unsigned long)cpu; cpu->timer.expires = jiffies + HZ/100; intel_pstate_busy_pid_reset(cpu); - intel_pstate_idle_pid_reset(cpu); intel_pstate_sample(cpu); intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); -- cgit v1.2.3-18-g5258 From 35363e943f2b0aa503e1dd55f894f736563e85a3 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 7 May 2013 08:20:30 -0700 Subject: cpufreq / intel_pstate: remove #ifdef MODULE compile fence The driver can no longer be built as a module remove the compile fence around cpufreq tracing call. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b93e3851b5d..0cc7d60525a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -391,9 +391,8 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) if (pstate == cpu->pstate.current_pstate) return; -#ifndef MODULE trace_cpu_frequency(pstate * 100000, cpu->cpu); -#endif + cpu->pstate.current_pstate = pstate; wrmsrl(MSR_IA32_PERF_CTL, pstate << 8); -- cgit v1.2.3-18-g5258 From b57ffac5e57bff33dde3cff35dff5c41876a6d12 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 13 May 2013 08:03:43 +0000 Subject: cpufreq / intel_pstate: use vzalloc() instead of vmalloc()/memset(0) Use vzalloc() instead of vmalloc() and memset(0). Signed-off-by: Wei Yongjun Acked-by: Viresh Kumar Acked-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/cpufreq/intel_pstate.c') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0cc7d60525a..9c36ace92a3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -709,10 +709,9 @@ static int __init intel_pstate_init(void) pr_info("Intel P-state driver initializing.\n"); - all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus()); + all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) return -ENOMEM; - memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus()); rc = cpufreq_register_driver(&intel_pstate_driver); if (rc) -- cgit v1.2.3-18-g5258