@@ -919,6 +919,12 @@ static inline int32_t get_avg_frequency(struct cpudata *cpu)
cpu->pstate.scaling, cpu->sample.mperf);
}
+static inline int32_t get_avg_pstate(struct cpudata *cpu)
+{
+ return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf,
+ cpu->sample.mperf);
+}
+
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
@@ -951,7 +957,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
cpu->sample.busy_scaled = cpu_load;
- return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
+ return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
}
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
The result returned by pid_calc() is subtracted from current_pstate (which is the pstate requested during the last period) in order to obtain the target pstate for the current iteration. However, current_pstate may not reflect the real current P-state of the CPU. In particular, that P-state may be higher because of the frequency sharing per module. The theory is: - The load is the percentage of time spent in C0 and is related to the average frequency during the same period (We'll not have the same load at 1GHz or at 2GHz for the same task running). - The current frequency can be completely different than the average frequency (because of frequency sharing or throttling). => The frequency shift computed by the pid_calc is based on the load, so it must be applied to the frequency with which the load was measured. Using the average pstate instead of current pstate solve some migration issues (e.g when a task migrates from one core to another in the same package/module and all of the cores in there except for that particular one are basically idle). Performance and power comparison with this patch on Android: IPLoad+Avg-Pstate vs IP Load: Benchmark ?Perf ?Power FishTank 10.45% 3.1% SmartBench-Gaming -0.1% -10.4% SmartBench-Productivity -0.8% -10.4% CandyCrush n/a -17.4% AngryBirds n/a -5.9% videoPlayback n/a -13.9% audioPlayback n/a -4.9% IcyRocks-20-50 0.0% -38.4% iozone RR -0.16% -1.3% iozone RW 0.74% -1.3% Comparison with the perf algorithm: (this patch in cpu_load vs Core algorithm) Benchmark ?Perf ?Power SmartBench-Gaming -0.58% -22.8% SmartBench-Productivity 0.82% CandyCrush n/a -20.8% AngryBirds n/a -37.0% videoPlayback n/a -53.4% audioPlayback n/a -2.1% iozone RR -0.55% -13.29% iozone RW 2.22% => No regression > 1% observed and a huge power improvement! Signed-off-by: Philippe Longepe <philippe.longepe@linux.intel.com> --- drivers/cpufreq/intel_pstate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)