diff mbox series

[04/39] sched: Add sched_class->reweight_task()

Message ID 20240501151312.635565-5-tj@kernel.org (mailing list archive)
State Not Applicable
Headers show
Series [01/39] cgroup: Implement cgroup_show_cftypes() | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch, async

Commit Message

Tejun Heo May 1, 2024, 3:09 p.m. UTC
Currently, during a task weight change, sched core directly calls
reweight_task() defined in fair.c if @p is on CFS. Let's make it a proper
sched_class operation instead. CFS's reweight_task() is renamed to
reweight_task_fair() and now called through sched_class.

While it turns a direct call into an indirect one, set_load_weight() isn't
called from a hot path and this change shouldn't cause any noticeable
difference. This will be used to implement reweight_task for a new BPF
extensible sched_class so that it can keep its cached task weight
up-to-date.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 kernel/sched/core.c  | 4 ++--
 kernel/sched/fair.c  | 3 ++-
 kernel/sched/sched.h | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

Comments

Peter Zijlstra June 24, 2024, 10:23 a.m. UTC | #1
On Wed, May 01, 2024 at 05:09:39AM -1000, Tejun Heo wrote:
> Currently, during a task weight change, sched core directly calls
> reweight_task() defined in fair.c if @p is on CFS. Let's make it a proper
> sched_class operation instead. CFS's reweight_task() is renamed to
> reweight_task_fair() and now called through sched_class.
> 
> While it turns a direct call into an indirect one, set_load_weight() isn't
> called from a hot path and this change shouldn't cause any noticeable
> difference. This will be used to implement reweight_task for a new BPF
> extensible sched_class so that it can keep its cached task weight
> up-to-date.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Reviewed-by: David Vernet <dvernet@meta.com>
> Acked-by: Josh Don <joshdon@google.com>
> Acked-by: Hao Luo <haoluo@google.com>
> Acked-by: Barret Rhoden <brho@google.com>
> ---
>  kernel/sched/core.c  | 4 ++--
>  kernel/sched/fair.c  | 3 ++-
>  kernel/sched/sched.h | 4 ++--
>  3 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index b12b1b7405fd..4b9cb2228b04 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1342,8 +1342,8 @@ static void set_load_weight(struct task_struct *p, bool update_load)
>  	 * SCHED_OTHER tasks have to update their load when changing their
>  	 * weight
>  	 */
> -	if (update_load && p->sched_class == &fair_sched_class) {
> -		reweight_task(p, prio);
> +	if (update_load && p->sched_class->reweight_task) {
> +		p->sched_class->reweight_task(task_rq(p), p, prio);
>  	} else {
>  		load->weight = scale_load(sched_prio_to_weight[prio]);
>  		load->inv_weight = sched_prio_to_wmult[prio];

This reminds me, I think we have a bug here...

  https://lkml.kernel.org/r/20240422094157.GA34453@noisy.programming.kicks-ass.net

I *think* we want something like the below, hmm?


diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0935f9d4bb7b..32a40d85c0b1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1328,15 +1328,15 @@ int tg_nop(struct task_group *tg, void *data)
 void set_load_weight(struct task_struct *p, bool update_load)
 {
 	int prio = p->static_prio - MAX_RT_PRIO;
-	struct load_weight *load = &p->se.load;
+	unsigned long weight;
+	u32 inv_weight;
 
-	/*
-	 * SCHED_IDLE tasks get minimal weight:
-	 */
 	if (task_has_idle_policy(p)) {
-		load->weight = scale_load(WEIGHT_IDLEPRIO);
-		load->inv_weight = WMULT_IDLEPRIO;
-		return;
+		weight = scale_load(WEIGHT_IDLEPRIO);
+		inv_weight = WMULT_IDLEPRIO;
+	} else {
+		weight = scale_load(sched_prio_to_weight[prio]);
+		inv_weight = sched_prio_to_wmult[prio];
 	}
 
 	/*
@@ -1344,10 +1344,11 @@ void set_load_weight(struct task_struct *p, bool update_load)
 	 * weight
 	 */
 	if (update_load && p->sched_class == &fair_sched_class) {
-		reweight_task(p, prio);
+		reweight_task(p, weight, inv_weight);
 	} else {
-		load->weight = scale_load(sched_prio_to_weight[prio]);
-		load->inv_weight = sched_prio_to_wmult[prio];
+		struct load_weight *lw = &p->se.load;
+		lw->weight = weight;
+		lw->inv_weight = inv_weight;
 	}
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 41b58387023d..07398042e342 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3835,7 +3835,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 	}
 }
 
-void reweight_task(struct task_struct *p, int prio)
+void reweight_task(struct task_struct *p, unsigned long weight, u32 inv_weight)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 62fd8bc6fd08..c1d07957e38a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2509,7 +2509,7 @@ extern void init_sched_dl_class(void);
 extern void init_sched_rt_class(void);
 extern void init_sched_fair_class(void);
 
-extern void reweight_task(struct task_struct *p, int prio);
+extern void reweight_task(struct task_struct *p, unsigned long weight, u32 inv_weight);
 
 extern void resched_curr(struct rq *rq);
 extern void resched_cpu(int cpu);
Peter Zijlstra June 24, 2024, 10:31 a.m. UTC | #2
On Mon, Jun 24, 2024 at 12:23:31PM +0200, Peter Zijlstra wrote:
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 41b58387023d..07398042e342 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3835,7 +3835,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
>  	}
>  }
>  
> -void reweight_task(struct task_struct *p, int prio)
> +void reweight_task(struct task_struct *p, unsigned long weight, u32 inv_weight)
>  {
>  	struct sched_entity *se = &p->se;
>  	struct cfs_rq *cfs_rq = cfs_rq_of(se);

Lost something in transition...

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 41b58387023d..cd9b89e9e944 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3835,15 +3835,14 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 	}
 }
 
-void reweight_task(struct task_struct *p, int prio)
+void reweight_task(struct task_struct *p, unsigned long weight, u32 inv_weight)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	struct load_weight *load = &se->load;
-	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
 
 	reweight_entity(cfs_rq, se, weight);
-	load->inv_weight = sched_prio_to_wmult[prio];
+	load->inv_weight = inv_weight;
 }
 
 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
Tejun Heo June 24, 2024, 11:59 p.m. UTC | #3
Hello, Peter.

On Mon, Jun 24, 2024 at 12:23:31PM +0200, Peter Zijlstra wrote:
> This reminds me, I think we have a bug here...
> 
>   https://lkml.kernel.org/r/20240422094157.GA34453@noisy.programming.kicks-ass.net
> 
> I *think* we want something like the below, hmm?
> 
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 0935f9d4bb7b..32a40d85c0b1 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1328,15 +1328,15 @@ int tg_nop(struct task_group *tg, void *data)
>  void set_load_weight(struct task_struct *p, bool update_load)
>  {
>  	int prio = p->static_prio - MAX_RT_PRIO;
> -	struct load_weight *load = &p->se.load;
> +	unsigned long weight;
> +	u32 inv_weight;
>  
> -	/*
> -	 * SCHED_IDLE tasks get minimal weight:
> -	 */
>  	if (task_has_idle_policy(p)) {
> -		load->weight = scale_load(WEIGHT_IDLEPRIO);
> -		load->inv_weight = WMULT_IDLEPRIO;
> -		return;
> +		weight = scale_load(WEIGHT_IDLEPRIO);
> +		inv_weight = WMULT_IDLEPRIO;
> +	} else {
> +		weight = scale_load(sched_prio_to_weight[prio]);
> +		inv_weight = sched_prio_to_wmult[prio];

Hmmm... sorry but I'm a bit confused again. Isn't the code doing the same
thing before and after?

Before, if @p is SCHED_IDLE, @p->se.load is set to idle values and the
function returns. If @update_load && fair, calls reweight_task(). Otherwise,
update @p->se.load is updated according to @prio. After the patch, @weight
and @inv_weight calcuations are moved to set_load_weight() but it ends up
with the same result for all three cases.

Were you trying to say that if the idle policy were to implement
->reweight_task(), it wouldn't be called?

Thanks.
Peter Zijlstra June 25, 2024, 7:29 a.m. UTC | #4
On Mon, Jun 24, 2024 at 01:59:02PM -1000, Tejun Heo wrote:

> Were you trying to say that if the idle policy were to implement
> ->reweight_task(), it wouldn't be called?

This. IDLE really is FAIR but with a really small weight.
Tejun Heo June 25, 2024, 11:57 p.m. UTC | #5
On Tue, Jun 25, 2024 at 09:29:54AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 24, 2024 at 01:59:02PM -1000, Tejun Heo wrote:
> 
> > Were you trying to say that if the idle policy were to implement
> > ->reweight_task(), it wouldn't be called?
> 
> This. IDLE really is FAIR but with a really small weight.

Oh, right, I got confused (again) by the idle class and SCHED_IDLE. Will
fix.

Thanks.
diff mbox series

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b12b1b7405fd..4b9cb2228b04 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1342,8 +1342,8 @@  static void set_load_weight(struct task_struct *p, bool update_load)
 	 * SCHED_OTHER tasks have to update their load when changing their
 	 * weight
 	 */
-	if (update_load && p->sched_class == &fair_sched_class) {
-		reweight_task(p, prio);
+	if (update_load && p->sched_class->reweight_task) {
+		p->sched_class->reweight_task(task_rq(p), p, prio);
 	} else {
 		load->weight = scale_load(sched_prio_to_weight[prio]);
 		load->inv_weight = sched_prio_to_wmult[prio];
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 03be0d1330a6..5d7cffee1a4e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3835,7 +3835,7 @@  static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 	}
 }
 
-void reweight_task(struct task_struct *p, int prio)
+static void reweight_task_fair(struct rq *rq, struct task_struct *p, int prio)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
@@ -13135,6 +13135,7 @@  DEFINE_SCHED_CLASS(fair) = {
 	.task_tick		= task_tick_fair,
 	.task_fork		= task_fork_fair,
 
+	.reweight_task		= reweight_task_fair,
 	.prio_changed		= prio_changed_fair,
 	.switched_from		= switched_from_fair,
 	.switched_to		= switched_to_fair,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d2242679239e..8e23f19e8096 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2303,6 +2303,8 @@  struct sched_class {
 	 */
 	void (*switched_from)(struct rq *this_rq, struct task_struct *task);
 	void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
+	void (*reweight_task)(struct rq *this_rq, struct task_struct *task,
+			      int newprio);
 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
 			      int oldprio);
 
@@ -2460,8 +2462,6 @@  extern void init_sched_dl_class(void);
 extern void init_sched_rt_class(void);
 extern void init_sched_fair_class(void);
 
-extern void reweight_task(struct task_struct *p, int prio);
-
 extern void resched_curr(struct rq *rq);
 extern void resched_cpu(int cpu);