diff mbox

[RFC,v2,1/8] sched/cpufreq_schedutil: make use of DEADLINE utilization signal

Message ID 20171204102325.5110-2-juri.lelli@redhat.com (mailing list archive)
State RFC, archived
Headers show

Commit Message

Juri Lelli Dec. 4, 2017, 10:23 a.m. UTC
From: Juri Lelli <juri.lelli@arm.com>

SCHED_DEADLINE tracks active utilization signal with a per dl_rq
variable named running_bw.

Make use of that to drive cpu frequency selection: add up FAIR and
DEADLINE contribution to get the required CPU capacity to handle both
requirements (while RT still selects max frequency).

Co-authored-by: Claudio Scordino <claudio@evidence.eu.com>
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/sched/cpufreq.h    |  2 --
 kernel/sched/cpufreq_schedutil.c | 25 +++++++++++++++----------
 2 files changed, 15 insertions(+), 12 deletions(-)

Comments

Patrick Bellasi Dec. 5, 2017, 3:09 p.m. UTC | #1
Hi Juri,

On 04-Dec 11:23, Juri Lelli wrote:
> From: Juri Lelli <juri.lelli@arm.com>
> 
> SCHED_DEADLINE tracks active utilization signal with a per dl_rq
> variable named running_bw.
> 
> Make use of that to drive cpu frequency selection: add up FAIR and
> DEADLINE contribution to get the required CPU capacity to handle both
> requirements (while RT still selects max frequency).
> 
> Co-authored-by: Claudio Scordino <claudio@evidence.eu.com>
> Signed-off-by: Juri Lelli <juri.lelli@arm.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Viresh Kumar <viresh.kumar@linaro.org>
> Cc: Luca Abeni <luca.abeni@santannapisa.it>
> Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  include/linux/sched/cpufreq.h    |  2 --
>  kernel/sched/cpufreq_schedutil.c | 25 +++++++++++++++----------
>  2 files changed, 15 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
> index d1ad3d825561..0b55834efd46 100644
> --- a/include/linux/sched/cpufreq.h
> +++ b/include/linux/sched/cpufreq.h
> @@ -12,8 +12,6 @@
>  #define SCHED_CPUFREQ_DL	(1U << 1)
>  #define SCHED_CPUFREQ_IOWAIT	(1U << 2)
>  
> -#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
> -
>  #ifdef CONFIG_CPU_FREQ
>  struct update_util_data {
>         void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 2f52ec0f1539..de1ad1fffbdc 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -179,12 +179,17 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
>  static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
>  {
>  	struct rq *rq = cpu_rq(cpu);
> -	unsigned long cfs_max;
> +	unsigned long dl_util = (rq->dl.running_bw * SCHED_CAPACITY_SCALE)
> +				>> BW_SHIFT;

What about using a pair of getter methods (e.g. cpu_util_{cfs,dl}) to
be defined in kernel/sched/sched.h?

This would help to hide class-specific signals mangling from cpufreq.
And here we can have something "more abstract" like:

       unsigned long util_cfs = cpu_util_cfs(rq);
       unsigned long util_dl = cpu_util_dl(rq);

>  
> -	cfs_max = arch_scale_cpu_capacity(NULL, cpu);
> +	*max = arch_scale_cpu_capacity(NULL, cpu);
>  
> -	*util = min(rq->cfs.avg.util_avg, cfs_max);
> -	*max = cfs_max;
> +	/*
> +	 * Ideally we would like to set util_dl as min/guaranteed freq and
> +	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
> +	 * ready for such an interface. So, we only do the latter for now.
> +	 */

Maybe I don't completely get the above comment, but to me it is not
really required.

When you say that "util_dl" should be set to a min/guaranteed freq
are you not actually talking about a DL implementation detail?

From the cpufreq standpoint instead, we should always set a capacity
which can accommodate util_dl + util_cfs.

We don't care about the meaning of util_dl and we should always assume
(by default) that the signal is properly updated by the scheduling
class... which unfortunately does not always happen for CFS.


> +	*util = min(rq->cfs.avg.util_avg + dl_util, *max);

With the above proposal, here also we will have:

	*util = min(util_cfs + util_dl, *max);

>  }
>  
>  static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
> @@ -272,7 +277,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
>  
>  	busy = sugov_cpu_is_busy(sg_cpu);
>  
> -	if (flags & SCHED_CPUFREQ_RT_DL) {
> +	if (flags & SCHED_CPUFREQ_RT) {
>  		next_f = policy->cpuinfo.max_freq;
>  	} else {
>  		sugov_get_util(&util, &max, sg_cpu->cpu);
> @@ -317,7 +322,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
>  			j_sg_cpu->iowait_boost_pending = false;
>  			continue;
>  		}
> -		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
> +		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
>  			return policy->cpuinfo.max_freq;
>  
>  		j_util = j_sg_cpu->util;
> @@ -353,7 +358,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
>  	sg_cpu->last_update = time;
>  
>  	if (sugov_should_update_freq(sg_policy, time)) {
> -		if (flags & SCHED_CPUFREQ_RT_DL)
> +		if (flags & SCHED_CPUFREQ_RT)
>  			next_f = sg_policy->policy->cpuinfo.max_freq;
>  		else
>  			next_f = sugov_next_freq_shared(sg_cpu, time);
> @@ -383,9 +388,9 @@ static void sugov_irq_work(struct irq_work *irq_work)
>  	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
>  
>  	/*
> -	 * For RT and deadline tasks, the schedutil governor shoots the
> -	 * frequency to maximum. Special care must be taken to ensure that this
> -	 * kthread doesn't result in the same behavior.
> +	 * For RT tasks, the schedutil governor shoots the frequency to maximum.
> +	 * Special care must be taken to ensure that this kthread doesn't result
> +	 * in the same behavior.
>  	 *
>  	 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
>  	 * updated only at the end of the sugov_work() function and before that
> -- 
> 2.14.3
>
Juri Lelli Dec. 5, 2017, 3:24 p.m. UTC | #2
Hi,

On 05/12/17 15:09, Patrick Bellasi wrote:
> Hi Juri,
> 

[...]

> >  static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
> >  {
> >  	struct rq *rq = cpu_rq(cpu);
> > -	unsigned long cfs_max;
> > +	unsigned long dl_util = (rq->dl.running_bw * SCHED_CAPACITY_SCALE)
> > +				>> BW_SHIFT;
> 
> What about using a pair of getter methods (e.g. cpu_util_{cfs,dl}) to
> be defined in kernel/sched/sched.h?
> 
> This would help to hide class-specific signals mangling from cpufreq.
> And here we can have something "more abstract" like:
> 
>        unsigned long util_cfs = cpu_util_cfs(rq);
>        unsigned long util_dl = cpu_util_dl(rq);

LGTM. I'll cook something for next spin.

> 
> >  
> > -	cfs_max = arch_scale_cpu_capacity(NULL, cpu);
> > +	*max = arch_scale_cpu_capacity(NULL, cpu);
> >  
> > -	*util = min(rq->cfs.avg.util_avg, cfs_max);
> > -	*max = cfs_max;
> > +	/*
> > +	 * Ideally we would like to set util_dl as min/guaranteed freq and
> > +	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
> > +	 * ready for such an interface. So, we only do the latter for now.
> > +	 */
> 
> Maybe I don't completely get the above comment, but to me it is not
> really required.
> 
> When you say that "util_dl" should be set to a min/guaranteed freq
> are you not actually talking about a DL implementation detail?
> 
> From the cpufreq standpoint instead, we should always set a capacity
> which can accommodate util_dl + util_cfs.

It's more for platforms which supports such combination of values for
frequency requests (CPPC like, AFAIU). The idea being that util_dl is
what the system has to always guarantee, but it could go up to the sum
if feasible.

> 
> We don't care about the meaning of util_dl and we should always assume
> (by default) that the signal is properly updated by the scheduling
> class... which unfortunately does not always happen for CFS.
> 
> 
> > +	*util = min(rq->cfs.avg.util_avg + dl_util, *max);
> 
> With the above proposal, here also we will have:
> 
> 	*util = min(util_cfs + util_dl, *max);

Looks cleaner.

Thanks,

- Juri
Patrick Bellasi Dec. 5, 2017, 4:34 p.m. UTC | #3
On 05-Dec 16:24, Juri Lelli wrote:
> On 05/12/17 15:09, Patrick Bellasi wrote:
> > > +	/*
> > > +	 * Ideally we would like to set util_dl as min/guaranteed freq and
> > > +	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
> > > +	 * ready for such an interface. So, we only do the latter for now.
> > > +	 */
> > 
> > Maybe I don't completely get the above comment, but to me it is not
> > really required.
> > 
> > When you say that "util_dl" should be set to a min/guaranteed freq
> > are you not actually talking about a DL implementation detail?
> > 
> > From the cpufreq standpoint instead, we should always set a capacity
> > which can accommodate util_dl + util_cfs.
> 
> It's more for platforms which supports such combination of values for
> frequency requests (CPPC like, AFAIU). The idea being that util_dl is
> what the system has to always guarantee, but it could go up to the sum
> if feasible.

I see, you mean for systems where you can specify both values at the
same time, i.e.
- please give me util_dl...
- ... but if you have more beer, I would like util_dl + util_cfs

However, I'm not an expert, on those systems can we really set a
minimum guaranteed performance level?

I was more of the idea that the "minimum guaranteed" is something we
can only read from "firmware", while we can only ask for something
which is never "guaranteed".

> > We don't care about the meaning of util_dl and we should always assume
> > (by default) that the signal is properly updated by the scheduling
> > class... which unfortunately does not always happen for CFS.
> >
Juri Lelli Dec. 5, 2017, 4:40 p.m. UTC | #4
On 05/12/17 16:34, Patrick Bellasi wrote:
> On 05-Dec 16:24, Juri Lelli wrote:
> > On 05/12/17 15:09, Patrick Bellasi wrote:
> > > > +	/*
> > > > +	 * Ideally we would like to set util_dl as min/guaranteed freq and
> > > > +	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
> > > > +	 * ready for such an interface. So, we only do the latter for now.
> > > > +	 */
> > > 
> > > Maybe I don't completely get the above comment, but to me it is not
> > > really required.
> > > 
> > > When you say that "util_dl" should be set to a min/guaranteed freq
> > > are you not actually talking about a DL implementation detail?
> > > 
> > > From the cpufreq standpoint instead, we should always set a capacity
> > > which can accommodate util_dl + util_cfs.
> > 
> > It's more for platforms which supports such combination of values for
> > frequency requests (CPPC like, AFAIU). The idea being that util_dl is
> > what the system has to always guarantee, but it could go up to the sum
> > if feasible.
> 
> I see, you mean for systems where you can specify both values at the
> same time, i.e.
> - please give me util_dl...
> - ... but if you have more beer, I would like util_dl + util_cfs
> 
> However, I'm not an expert, on those systems can we really set a
> minimum guaranteed performance level?

This is my current understanding. Never played with such platforms
myself. :)

Best,

- Juri
Peter Zijlstra Dec. 20, 2017, 12:51 p.m. UTC | #5
On Tue, Dec 05, 2017 at 04:34:10PM +0000, Patrick Bellasi wrote:
> On 05-Dec 16:24, Juri Lelli wrote:
> However, I'm not an expert, on those systems can we really set a
> minimum guaranteed performance level?

If you look at the Intel SDM, Volume 3, 14.4 Hardware-Controlled
Performance States (HWP), which is the Intel implementation of ACPI
CPPC.

You'll see that IA32_HWP_CAPABILITIES has a Guaranteed_Performance field
and describes that upon changes to this frequency we will receive
notifications (Interrupts).

If you then look at IA32_HWP_REQUEST, you'll see a Minimum_Performance
field, which we can raise up-to the guaranteed level, and would/should
contain the DEADLINE stuff.

HWP_REQUEST also includes a Desired_Performance field, which is where we
want to be for DL+CFS.

Trouble is that cpufreq doesn't yet support the various CPPC fields. So
we have this comment here at the input side stating what we'd want to do
once cpufreq itself grows the interface bits.
diff mbox

Patch

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index d1ad3d825561..0b55834efd46 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -12,8 +12,6 @@ 
 #define SCHED_CPUFREQ_DL	(1U << 1)
 #define SCHED_CPUFREQ_IOWAIT	(1U << 2)
 
-#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
-
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
        void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2f52ec0f1539..de1ad1fffbdc 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -179,12 +179,17 @@  static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	unsigned long cfs_max;
+	unsigned long dl_util = (rq->dl.running_bw * SCHED_CAPACITY_SCALE)
+				>> BW_SHIFT;
 
-	cfs_max = arch_scale_cpu_capacity(NULL, cpu);
+	*max = arch_scale_cpu_capacity(NULL, cpu);
 
-	*util = min(rq->cfs.avg.util_avg, cfs_max);
-	*max = cfs_max;
+	/*
+	 * Ideally we would like to set util_dl as min/guaranteed freq and
+	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
+	 * ready for such an interface. So, we only do the latter for now.
+	 */
+	*util = min(rq->cfs.avg.util_avg + dl_util, *max);
 }
 
 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
@@ -272,7 +277,7 @@  static void sugov_update_single(struct update_util_data *hook, u64 time,
 
 	busy = sugov_cpu_is_busy(sg_cpu);
 
-	if (flags & SCHED_CPUFREQ_RT_DL) {
+	if (flags & SCHED_CPUFREQ_RT) {
 		next_f = policy->cpuinfo.max_freq;
 	} else {
 		sugov_get_util(&util, &max, sg_cpu->cpu);
@@ -317,7 +322,7 @@  static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 			j_sg_cpu->iowait_boost_pending = false;
 			continue;
 		}
-		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
+		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
 			return policy->cpuinfo.max_freq;
 
 		j_util = j_sg_cpu->util;
@@ -353,7 +358,7 @@  static void sugov_update_shared(struct update_util_data *hook, u64 time,
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		if (flags & SCHED_CPUFREQ_RT_DL)
+		if (flags & SCHED_CPUFREQ_RT)
 			next_f = sg_policy->policy->cpuinfo.max_freq;
 		else
 			next_f = sugov_next_freq_shared(sg_cpu, time);
@@ -383,9 +388,9 @@  static void sugov_irq_work(struct irq_work *irq_work)
 	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
 
 	/*
-	 * For RT and deadline tasks, the schedutil governor shoots the
-	 * frequency to maximum. Special care must be taken to ensure that this
-	 * kthread doesn't result in the same behavior.
+	 * For RT tasks, the schedutil governor shoots the frequency to maximum.
+	 * Special care must be taken to ensure that this kthread doesn't result
+	 * in the same behavior.
 	 *
 	 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
 	 * updated only at the end of the sugov_work() function and before that