@@ -317,6 +317,14 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
return cpufreq_table_validate_and_show(policy, powernv_freqs);
}
+static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
+{
+ struct powernv_smp_call_data freq_data;
+
+ freq_data.pstate_id = powernv_pstate_info.min;
+ smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
+}
+
static struct cpufreq_driver powernv_cpufreq_driver = {
.name = "powernv-cpufreq",
.flags = CPUFREQ_CONST_LOOPS,
@@ -324,6 +332,7 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
.verify = cpufreq_generic_frequency_table_verify,
.target_index = powernv_cpufreq_target_index,
.get = powernv_cpufreq_get,
+ .stop_cpu = powernv_cpufreq_stop_cpu,
.attr = powernv_cpu_freq_attr,
};
Its possible today that the pstate of a core is held at a high even after the entire core is hotplugged out if a load had just run on the hotplugged cpu. This is fair, since it is assumed that the pstate does not matter to a cpu in a deep idle state, which is the expected state of a hotplugged core on powerpc. However on powerpc, the pstate at a socket level is held at the maximum of the pstates of each core. Even if the pstates of the active cores on that socket is low, the socket pstate is held high due to the pstate of the hotplugged core in the above mentioned scenario. This can cost significant amount of power loss for no good. Besides, since it is a non active core, nothing can be done from the kernel's end to set the frequency of the core right. Hence make use of the stop_cpu callback to explicitly set the pstate of the core to a minimum when the last cpu of the core gets hotplugged out. Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> --- drivers/cpufreq/powernv-cpufreq.c | 9 +++++++++ 1 file changed, 9 insertions(+) -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html