diff mbox

cpuidle/powernv: Read target_residency value of idle states from DT if available

Message ID 54BE3B79.9070004@linux.vnet.ibm.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

preeti Jan. 20, 2015, 11:26 a.m. UTC
On 01/20/2015 11:15 AM, Michael Ellerman wrote:
> On Mon, 2015-19-01 at 11:32:51 UTC, Preeti U Murthy wrote:
>> The device tree now exposes the residency values for different idle states. Read
>> these values instead of calculating residency from the latency values. The values
>> exposed in the DT are validated for optimal power efficiency. However to maintain
>> compatibility with the older firmware code which does not expose residency
>> values, use default values as a fallback mechanism. While at it, clump
>> the common parts of device tree parsing into one chunk.
>>
>> Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
>> ---
>>
>>  drivers/cpuidle/cpuidle-powernv.c |   39 ++++++++++++++++++++++++-------------
>>  1 file changed, 25 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
>> index 2726663..9e732e1 100644
>> --- a/drivers/cpuidle/cpuidle-powernv.c
>> +++ b/drivers/cpuidle/cpuidle-powernv.c
>> @@ -162,7 +162,8 @@ static int powernv_add_idle_states(void)
>>  	int dt_idle_states;
>>  	const __be32 *idle_state_flags;
>>  	const __be32 *idle_state_latency;
>> -	u32 len_flags, flags, latency_ns;
>> +	const __be32 *idle_state_residency;
>> +	u32 len_flags, flags, latency_ns, residency_ns;
>>  	int i;
>>  
>>  	/* Currently we have snooze statically defined */
>> @@ -186,14 +187,21 @@ static int powernv_add_idle_states(void)
>>  		return nr_idle_states;
>>  	}
>>  
>> +	idle_state_residency = of_get_property(power_mgt,
>> +			"ibm,cpu-idle-state-residency-ns", NULL);
>> +	if (!idle_state_residency) {
>> +		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-residency-ns\n");
>> +		pr_warn("Falling back to default values\n");
>> +	}
> 
> This would be better done with something like:
> 
> 	rc = of_read_property_u32(power_mgt, "ibm,cpu-idle-state-residency-ns", &residency_ns);
> 	if (!rc) {
> 		pr_info("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns\n");
> 		residency_ns = 300000;
> 	}

This looks like a better API, but the default residency values are different
for each idle state. So perhaps a patch like the below ?

--------------------------Start Patch------------------------------------------

From: Preeti U Murthy <preeti@linux.vnet.ibm.com>


---
 drivers/cpuidle/cpuidle-powernv.c |   54 +++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 21 deletions(-)


Regards
Preeti U Murthy

> 
> cheers
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Michael Ellerman Jan. 23, 2015, 4:59 a.m. UTC | #1
On Tue, 2015-20-01 at 11:26:49 UTC, Preeti U Murthy wrote:
> @@ -177,34 +178,39 @@ static int powernv_add_idle_states(void)
>  		return nr_idle_states;
>  	}
>  
> -	idle_state_latency = of_get_property(power_mgt,
> -			"ibm,cpu-idle-state-latencies-ns", NULL);
> -	if (!idle_state_latency) {
> +	dt_idle_states = len_flags / sizeof(u32);
> +
> +	latency_ns = kzalloc(sizeof(*latency_ns) * dt_idle_states, GFP_KERNEL);
> +	rc = of_property_read_u32(power_mgt,
> +			"ibm,cpu-idle-state-latencies-ns", latency_ns);

That's only reading the first value.

If you want to read the full property you need the _array version.

> +	if (rc) {
>  		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-latencies-ns\n");

You missed my hint that "DT-PowerMgmt" is a weird and ugly prefix. Can you use
"cpuidle-powernv:" instead?

>  
> -	dt_idle_states = len_flags / sizeof(u32);
> +	residency_ns = kzalloc(sizeof(*residency_ns) * dt_idle_states, GFP_KERNEL);
> +	rc = of_property_read_u32(power_mgt,
> +			"ibm,cpu-idle-state-residency-ns", residency_ns);
> +	if (rc) {
> +		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-residency-ns\n");
> +		pr_warn("Falling back to default values\n");

I don't think this is worth a warning seeing as we know there are firmwares out
there which do not have the property.

>  	for (i = 0; i < dt_idle_states; i++) {
>  
>  		flags = be32_to_cpu(idle_state_flags[i]);
> -
> -		/* Cpuidle accepts exit_latency in us and we estimate
> -		 * target residency to be 10x exit_latency
> +		/*
> +		 * Cpuidle accepts exit_latency and target_residency in us.
> +		 * Use default target_residency values if f/w does not expose it.
>  		 */
> -		latency_ns = be32_to_cpu(idle_state_latency[i]);
>  		if (flags & OPAL_PM_NAP_ENABLED) {
>  			/* Add NAP state */
>  			strcpy(powernv_states[nr_idle_states].name, "Nap");
>  			strcpy(powernv_states[nr_idle_states].desc, "Nap");
>  			powernv_states[nr_idle_states].flags = 0;
> -			powernv_states[nr_idle_states].exit_latency =
> -					((unsigned int)latency_ns) / 1000;
> -			powernv_states[nr_idle_states].target_residency =
> -					((unsigned int)latency_ns / 100);
> +			powernv_states[nr_idle_states].target_residency = 100;
>  			powernv_states[nr_idle_states].enter = &nap_loop;
> -			nr_idle_states++;

That looks wrong? Or do you mean to do that?

cheers
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
preeti Jan. 27, 2015, 4:02 a.m. UTC | #2
On 01/23/2015 10:29 AM, Michael Ellerman wrote:
> On Tue, 2015-20-01 at 11:26:49 UTC, Preeti U Murthy wrote:
>> @@ -177,34 +178,39 @@ static int powernv_add_idle_states(void)
>>  		return nr_idle_states;
>>  	}
>>  
>> -	idle_state_latency = of_get_property(power_mgt,
>> -			"ibm,cpu-idle-state-latencies-ns", NULL);
>> -	if (!idle_state_latency) {
>> +	dt_idle_states = len_flags / sizeof(u32);
>> +
>> +	latency_ns = kzalloc(sizeof(*latency_ns) * dt_idle_states, GFP_KERNEL);
>> +	rc = of_property_read_u32(power_mgt,
>> +			"ibm,cpu-idle-state-latencies-ns", latency_ns);
> 
> That's only reading the first value.
> 
> If you want to read the full property you need the _array version.

Right, thanks for pointing this out.
> 
>> +	if (rc) {
>>  		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-latencies-ns\n");
> 
> You missed my hint that "DT-PowerMgmt" is a weird and ugly prefix. Can you use
> "cpuidle-powernv:" instead?

Yes will change this.
> 
>>  
>> -	dt_idle_states = len_flags / sizeof(u32);
>> +	residency_ns = kzalloc(sizeof(*residency_ns) * dt_idle_states, GFP_KERNEL);
>> +	rc = of_property_read_u32(power_mgt,
>> +			"ibm,cpu-idle-state-residency-ns", residency_ns);
>> +	if (rc) {
>> +		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-residency-ns\n");
>> +		pr_warn("Falling back to default values\n");
> 
> I don't think this is worth a warning seeing as we know there are firmwares out
> there which do not have the property.
> 
>>  	for (i = 0; i < dt_idle_states; i++) {
>>  
>>  		flags = be32_to_cpu(idle_state_flags[i]);
>> -
>> -		/* Cpuidle accepts exit_latency in us and we estimate
>> -		 * target residency to be 10x exit_latency
>> +		/*
>> +		 * Cpuidle accepts exit_latency and target_residency in us.
>> +		 * Use default target_residency values if f/w does not expose it.
>>  		 */
>> -		latency_ns = be32_to_cpu(idle_state_latency[i]);
>>  		if (flags & OPAL_PM_NAP_ENABLED) {
>>  			/* Add NAP state */
>>  			strcpy(powernv_states[nr_idle_states].name, "Nap");
>>  			strcpy(powernv_states[nr_idle_states].desc, "Nap");
>>  			powernv_states[nr_idle_states].flags = 0;
>> -			powernv_states[nr_idle_states].exit_latency =
>> -					((unsigned int)latency_ns) / 1000;
>> -			powernv_states[nr_idle_states].target_residency =
>> -					((unsigned int)latency_ns / 100);
>> +			powernv_states[nr_idle_states].target_residency = 100;
>>  			powernv_states[nr_idle_states].enter = &nap_loop;
>> -			nr_idle_states++;
> 
> That looks wrong? Or do you mean to do that?

Are you pointing at removing the lines that populate the exit_latency ?
I read the latency value from the DT outside of these conditions because
it is guaranteed to be present at this point. We return back from this
function early on if we do not find it in the DT.

If you are pointing at the lines that remove increment of
nr_idle_states, it has to be done so towards the end of the iteration
because we are yet to populate the exit_latency and target_residency
outside of these conditions using this index.

Regards
Preeti U Murthy
> 
> cheers
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Ellerman Jan. 27, 2015, 6:28 a.m. UTC | #3
On Tue, 2015-01-27 at 09:32 +0530, Preeti U Murthy wrote:
> On 01/23/2015 10:29 AM, Michael Ellerman wrote:
> > On Tue, 2015-20-01 at 11:26:49 UTC, Preeti U Murthy wrote:
> >> @@ -177,34 +178,39 @@ static int powernv_add_idle_states(void)
> > 
> >>  	for (i = 0; i < dt_idle_states; i++) {
> >>  
> >>  		flags = be32_to_cpu(idle_state_flags[i]);
> >> -
> >> -		/* Cpuidle accepts exit_latency in us and we estimate
> >> -		 * target residency to be 10x exit_latency
> >> +		/*
> >> +		 * Cpuidle accepts exit_latency and target_residency in us.
> >> +		 * Use default target_residency values if f/w does not expose it.
> >>  		 */
> >> -		latency_ns = be32_to_cpu(idle_state_latency[i]);
> >>  		if (flags & OPAL_PM_NAP_ENABLED) {
> >>  			/* Add NAP state */
> >>  			strcpy(powernv_states[nr_idle_states].name, "Nap");
> >>  			strcpy(powernv_states[nr_idle_states].desc, "Nap");
> >>  			powernv_states[nr_idle_states].flags = 0;
> >> -			powernv_states[nr_idle_states].exit_latency =
> >> -					((unsigned int)latency_ns) / 1000;
> >> -			powernv_states[nr_idle_states].target_residency =
> >> -					((unsigned int)latency_ns / 100);
> >> +			powernv_states[nr_idle_states].target_residency = 100;
> >>  			powernv_states[nr_idle_states].enter = &nap_loop;
> >> -			nr_idle_states++;
> > 
> > That looks wrong? Or do you mean to do that?
> 
> If you are pointing at the lines that remove increment of
> nr_idle_states, it has to be done so towards the end of the iteration
> because we are yet to populate the exit_latency and target_residency
> outside of these conditions using this index.

Oh OK I think I get it, it's not clear at all.

The resulting code is:

	for (i = 0; i < dt_idle_states; i++) {

		flags = be32_to_cpu(idle_state_flags[i]);
		/*
		 * Cpuidle accepts exit_latency and target_residency in us.
		 * Use default target_residency values if f/w does not expose it.
		 */
		if (flags & OPAL_PM_NAP_ENABLED) {
			/* Add NAP state */
			strcpy(powernv_states[nr_idle_states].name, "Nap");
			strcpy(powernv_states[nr_idle_states].desc, "Nap");
			powernv_states[nr_idle_states].flags = 0;
			powernv_states[nr_idle_states].target_residency = 100;
			powernv_states[nr_idle_states].enter = &nap_loop;
		}

		if (flags & OPAL_PM_SLEEP_ENABLED ||
			flags & OPAL_PM_SLEEP_ENABLED_ER1) {
			/* Add FASTSLEEP state */
			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
			powernv_states[nr_idle_states].target_residency = 300000;
			powernv_states[nr_idle_states].enter = &fastsleep_loop;
		}

		powernv_states[nr_idle_states].exit_latency =
				((unsigned int)latency_ns[i]) / 1000;

		if (!rc) {
			powernv_states[nr_idle_states].target_residency =
				((unsigned int)residency_ns[i]) / 1000;
		}

		nr_idle_states++;
	}


Which looks like you could potentially overwrite the first set of values, "Nap"
with "FastSleep". But I think what you're going to tell me is that flags can
never satisfy both conditions, so we will only ever execute either the "Nap"
case OR the "FastSleep".

If so please change the code to make that obvious, ie. by using else if.

cheers


--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index aedec09..2031560 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -13,6 +13,7 @@ 
 #include <linux/notifier.h>
 #include <linux/clockchips.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
@@ -159,9 +160,9 @@  static int powernv_add_idle_states(void)
 	int nr_idle_states = 1; /* Snooze */
 	int dt_idle_states;
 	const __be32 *idle_state_flags;
-	const __be32 *idle_state_latency;
-	u32 len_flags, flags, latency_ns;
-	int i;
+	u32 len_flags, flags;
+	u32 *latency_ns, *residency_ns;
+	int i, rc;
 
 	/* Currently we have snooze statically defined */
 
@@ -177,34 +178,39 @@  static int powernv_add_idle_states(void)
 		return nr_idle_states;
 	}
 
-	idle_state_latency = of_get_property(power_mgt,
-			"ibm,cpu-idle-state-latencies-ns", NULL);
-	if (!idle_state_latency) {
+	dt_idle_states = len_flags / sizeof(u32);
+
+	latency_ns = kzalloc(sizeof(*latency_ns) * dt_idle_states, GFP_KERNEL);
+	rc = of_property_read_u32(power_mgt,
+			"ibm,cpu-idle-state-latencies-ns", latency_ns);
+	if (rc) {
 		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-latencies-ns\n");
 		return nr_idle_states;
 	}
 
-	dt_idle_states = len_flags / sizeof(u32);
+	residency_ns = kzalloc(sizeof(*residency_ns) * dt_idle_states, GFP_KERNEL);
+	rc = of_property_read_u32(power_mgt,
+			"ibm,cpu-idle-state-residency-ns", residency_ns);
+	if (rc) {
+		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-residency-ns\n");
+		pr_warn("Falling back to default values\n");
+	}
+
 
 	for (i = 0; i < dt_idle_states; i++) {
 
 		flags = be32_to_cpu(idle_state_flags[i]);
-
-		/* Cpuidle accepts exit_latency in us and we estimate
-		 * target residency to be 10x exit_latency
+		/*
+		 * Cpuidle accepts exit_latency and target_residency in us.
+		 * Use default target_residency values if f/w does not expose it.
 		 */
-		latency_ns = be32_to_cpu(idle_state_latency[i]);
 		if (flags & OPAL_PM_NAP_ENABLED) {
 			/* Add NAP state */
 			strcpy(powernv_states[nr_idle_states].name, "Nap");
 			strcpy(powernv_states[nr_idle_states].desc, "Nap");
 			powernv_states[nr_idle_states].flags = 0;
-			powernv_states[nr_idle_states].exit_latency =
-					((unsigned int)latency_ns) / 1000;
-			powernv_states[nr_idle_states].target_residency =
-					((unsigned int)latency_ns / 100);
+			powernv_states[nr_idle_states].target_residency = 100;
 			powernv_states[nr_idle_states].enter = &nap_loop;
-			nr_idle_states++;
 		}
 
 		if (flags & OPAL_PM_SLEEP_ENABLED ||
@@ -213,13 +219,19 @@  static int powernv_add_idle_states(void)
 			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
 			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
-			powernv_states[nr_idle_states].exit_latency =
-					((unsigned int)latency_ns) / 1000;
-			powernv_states[nr_idle_states].target_residency =
-					((unsigned int)latency_ns / 100);
+			powernv_states[nr_idle_states].target_residency = 300000;
 			powernv_states[nr_idle_states].enter = &fastsleep_loop;
-			nr_idle_states++;
 		}
+
+		powernv_states[nr_idle_states].exit_latency =
+				((unsigned int)latency_ns[i]) / 1000;
+
+		if (!rc) {
+			powernv_states[nr_idle_states].target_residency =
+				((unsigned int)residency_ns[i]) / 1000;
+		}
+
+		nr_idle_states++;
 	}
 
 	return nr_idle_states;