diff mbox series

[v9,09/15] sched: Introduce sched_energy_present static key

Message ID 20181119141857.8625-10-quentin.perret@arm.com (mailing list archive)
State Superseded, archived
Headers show
Series Energy Aware Scheduling | expand

Commit Message

Quentin Perret Nov. 19, 2018, 2:18 p.m. UTC
In order to make sure Energy Aware Scheduling (EAS) will not impact
systems where no Energy Model is available, introduce a static key
guarding the access to EAS code. Since EAS is enabled on a
per-root-domain basis, the static key is enabled when at least one root
domain meets all conditions for EAS.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/sched.h    |  4 ++++
 kernel/sched/topology.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

Comments

Peter Zijlstra Nov. 21, 2018, 1:08 p.m. UTC | #1
On Mon, Nov 19, 2018 at 02:18:51PM +0000, Quentin Perret wrote:

> +static void sched_energy_start(int ndoms_new, cpumask_var_t doms_new[])
> +{
> +	/*
> +	 * The conditions for EAS to start are checked during the creation of
> +	 * root domains. If one of them meets all conditions, it will have a
> +	 * non-null list of performance domains.
> +	 */
> +	while (ndoms_new) {
> +		if (cpu_rq(cpumask_first(doms_new[ndoms_new - 1]))->rd->pd)
> +			goto enable;
> +		ndoms_new--;
> +	}

That seems quite ugly; can't you simply return a bool from
build_perf_domains() ?

Something like the below, but less fugly ;-)

--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -299,7 +299,7 @@ static void destroy_perf_domain_rcu(stru
 #define EM_MAX_COMPLEXITY 2048
 
 extern struct cpufreq_governor schedutil_gov;
-static void build_perf_domains(const struct cpumask *cpu_map)
+static bool build_perf_domains(const struct cpumask *cpu_map)
 {
 	int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
 	struct perf_domain *pd = NULL, *tmp;
@@ -365,7 +365,7 @@ static void build_perf_domains(const str
 	if (tmp)
 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
 
-	return;
+	return !!pd;
 
 free:
 	free_pd(pd);
@@ -373,6 +373,8 @@ static void build_perf_domains(const str
 	rcu_assign_pointer(rd->pd, NULL);
 	if (tmp)
 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
+
+	return false;
 }
 #else
 static void free_pd(struct perf_domain *pd) { }
@@ -2173,6 +2175,9 @@ void partition_sched_domains(int ndoms_n
 	}
 
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+	{
+	bool has_eas = false;
+
 	/* Build perf. domains: */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < n && !sched_energy_update; j++) {
@@ -2181,10 +2186,13 @@ void partition_sched_domains(int ndoms_n
 				goto match3;
 		}
 		/* No match - add perf. domains for a new rd */
-		build_perf_domains(doms_new[i]);
+		has_eas |= build_perf_domains(doms_new[i]);
 match3:
 		;
 	}
+
+	sched_energy_set(has_eas);
+	}
 #endif
 
 	/* Remember the new sched domains: */
Quentin Perret Nov. 21, 2018, 3:14 p.m. UTC | #2
Hi Peter,

On Wednesday 21 Nov 2018 at 14:08:22 (+0100), Peter Zijlstra wrote:
> On Mon, Nov 19, 2018 at 02:18:51PM +0000, Quentin Perret wrote:
> 
> > +static void sched_energy_start(int ndoms_new, cpumask_var_t doms_new[])
> > +{
> > +	/*
> > +	 * The conditions for EAS to start are checked during the creation of
> > +	 * root domains. If one of them meets all conditions, it will have a
> > +	 * non-null list of performance domains.
> > +	 */
> > +	while (ndoms_new) {
> > +		if (cpu_rq(cpumask_first(doms_new[ndoms_new - 1]))->rd->pd)
> > +			goto enable;
> > +		ndoms_new--;
> > +	}
> 
> That seems quite ugly; can't you simply return a bool from
> build_perf_domains() ?
> 
> Something like the below, but less fugly ;-)
> 
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -299,7 +299,7 @@ static void destroy_perf_domain_rcu(stru
>  #define EM_MAX_COMPLEXITY 2048
>  
>  extern struct cpufreq_governor schedutil_gov;
> -static void build_perf_domains(const struct cpumask *cpu_map)
> +static bool build_perf_domains(const struct cpumask *cpu_map)
>  {
>  	int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
>  	struct perf_domain *pd = NULL, *tmp;
> @@ -365,7 +365,7 @@ static void build_perf_domains(const str
>  	if (tmp)
>  		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
>  
> -	return;
> +	return !!pd;
>  
>  free:
>  	free_pd(pd);
> @@ -373,6 +373,8 @@ static void build_perf_domains(const str
>  	rcu_assign_pointer(rd->pd, NULL);
>  	if (tmp)
>  		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
> +
> +	return false;
>  }
>  #else
>  static void free_pd(struct perf_domain *pd) { }
> @@ -2173,6 +2175,9 @@ void partition_sched_domains(int ndoms_n
>  	}
>  
>  #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> +	{
> +	bool has_eas = false;

That one wants to be declared '__maybe_unused' at the top of
partition_sched_domains() perhaps ? Just to avoid the { } section.
Hopefully the compiler will be smart enough to not use stack space for
it if not needed.

> +
>  	/* Build perf. domains: */
>  	for (i = 0; i < ndoms_new; i++) {
>  		for (j = 0; j < n && !sched_energy_update; j++) {
> @@ -2181,10 +2186,13 @@ void partition_sched_domains(int ndoms_n
>  				goto match3;

And we want to do 'has_eas = true' just before 'goto match3' here.
Otherwise we'll end up disabling EAS if we hit 'goto match3' for all
root domains.

>  		}
>  		/* No match - add perf. domains for a new rd */
> -		build_perf_domains(doms_new[i]);
> +		has_eas |= build_perf_domains(doms_new[i]);
>  match3:
>  		;
>  	}
> +
> +	sched_energy_set(has_eas);
> +	}
>  #endif
>  
>  	/* Remember the new sched domains: */

Other than that I guess that should work OK :-)

Thanks,
Quentin
Quentin Perret Nov. 22, 2018, 9:17 a.m. UTC | #3
On Wednesday 21 Nov 2018 at 15:14:45 (+0000), Quentin Perret wrote:
> Hi Peter,
> 
> On Wednesday 21 Nov 2018 at 14:08:22 (+0100), Peter Zijlstra wrote:
> > On Mon, Nov 19, 2018 at 02:18:51PM +0000, Quentin Perret wrote:
> > 
> > > +static void sched_energy_start(int ndoms_new, cpumask_var_t doms_new[])
> > > +{
> > > +	/*
> > > +	 * The conditions for EAS to start are checked during the creation of
> > > +	 * root domains. If one of them meets all conditions, it will have a
> > > +	 * non-null list of performance domains.
> > > +	 */
> > > +	while (ndoms_new) {
> > > +		if (cpu_rq(cpumask_first(doms_new[ndoms_new - 1]))->rd->pd)
> > > +			goto enable;
> > > +		ndoms_new--;
> > > +	}
> > 
> > That seems quite ugly; can't you simply return a bool from
> > build_perf_domains() ?
> > 
> > Something like the below, but less fugly ;-)
> > 
> > --- a/kernel/sched/topology.c
> > +++ b/kernel/sched/topology.c
> > @@ -299,7 +299,7 @@ static void destroy_perf_domain_rcu(stru
> >  #define EM_MAX_COMPLEXITY 2048
> >  
> >  extern struct cpufreq_governor schedutil_gov;
> > -static void build_perf_domains(const struct cpumask *cpu_map)
> > +static bool build_perf_domains(const struct cpumask *cpu_map)
> >  {
> >  	int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
> >  	struct perf_domain *pd = NULL, *tmp;
> > @@ -365,7 +365,7 @@ static void build_perf_domains(const str
> >  	if (tmp)
> >  		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
> >  
> > -	return;
> > +	return !!pd;
> >  
> >  free:
> >  	free_pd(pd);
> > @@ -373,6 +373,8 @@ static void build_perf_domains(const str
> >  	rcu_assign_pointer(rd->pd, NULL);
> >  	if (tmp)
> >  		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
> > +
> > +	return false;
> >  }
> >  #else
> >  static void free_pd(struct perf_domain *pd) { }
> > @@ -2173,6 +2175,9 @@ void partition_sched_domains(int ndoms_n
> >  	}
> >  
> >  #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> > +	{
> > +	bool has_eas = false;
> 
> That one wants to be declared '__maybe_unused' at the top of
> partition_sched_domains() perhaps ? Just to avoid the { } section.
> Hopefully the compiler will be smart enough to not use stack space for
> it if not needed.
> 
> > +
> >  	/* Build perf. domains: */
> >  	for (i = 0; i < ndoms_new; i++) {
> >  		for (j = 0; j < n && !sched_energy_update; j++) {
> > @@ -2181,10 +2186,13 @@ void partition_sched_domains(int ndoms_n
> >  				goto match3;
> 
> And we want to do 'has_eas = true' just before 'goto match3' here.
> Otherwise we'll end up disabling EAS if we hit 'goto match3' for all
> root domains.
> 
> >  		}
> >  		/* No match - add perf. domains for a new rd */
> > -		build_perf_domains(doms_new[i]);
> > +		has_eas |= build_perf_domains(doms_new[i]);
> >  match3:
> >  		;
> >  	}
> > +
> > +	sched_energy_set(has_eas);
> > +	}
> >  #endif
> >  
> >  	/* Remember the new sched domains: */
> 
> Other than that I guess that should work OK :-)

So I actually just came across your patch that does
static_branch_{inc,dec} on sched_smt_present. Maybe I could do something
similar here ?

How about something like the below (totally untested):

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 8f9dfea60344..10ac32a0dc2e 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -298,6 +298,7 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
 
        pd = container_of(rp, struct perf_domain, rcu);
        free_pd(pd);
+       static_branch_dec_cpuslocked(&sched_energy_present);
 }
 
 static void sched_energy_start(int ndoms_new, cpumask_var_t doms_new[])
@@ -421,6 +422,8 @@ static void build_perf_domains(const struct cpumask *cpu_map)
        /* Attach the new list of performance domains to the root domain. */
        tmp = rd->pd;
        rcu_assign_pointer(rd->pd, pd);
+       static_branch_inc_cpuslocked(&sched_energy_present);
+
        if (tmp)
                call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
 
@@ -2246,7 +2249,6 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 match3:
                ;
        }
-       sched_energy_start(ndoms_new, doms_new);
 #endif
 
        /* Remember the new sched domains: */
Quentin Perret Nov. 22, 2018, 9:32 a.m. UTC | #4
On Thursday 22 Nov 2018 at 09:17:16 (+0000), Quentin Perret wrote:
> On Wednesday 21 Nov 2018 at 15:14:45 (+0000), Quentin Perret wrote:
> > Hi Peter,
> > 
> > On Wednesday 21 Nov 2018 at 14:08:22 (+0100), Peter Zijlstra wrote:
> > > On Mon, Nov 19, 2018 at 02:18:51PM +0000, Quentin Perret wrote:
> > > 
> > > > +static void sched_energy_start(int ndoms_new, cpumask_var_t doms_new[])
> > > > +{
> > > > +	/*
> > > > +	 * The conditions for EAS to start are checked during the creation of
> > > > +	 * root domains. If one of them meets all conditions, it will have a
> > > > +	 * non-null list of performance domains.
> > > > +	 */
> > > > +	while (ndoms_new) {
> > > > +		if (cpu_rq(cpumask_first(doms_new[ndoms_new - 1]))->rd->pd)
> > > > +			goto enable;
> > > > +		ndoms_new--;
> > > > +	}
> > > 
> > > That seems quite ugly; can't you simply return a bool from
> > > build_perf_domains() ?
> > > 
> > > Something like the below, but less fugly ;-)
> > > 
> > > --- a/kernel/sched/topology.c
> > > +++ b/kernel/sched/topology.c
> > > @@ -299,7 +299,7 @@ static void destroy_perf_domain_rcu(stru
> > >  #define EM_MAX_COMPLEXITY 2048
> > >  
> > >  extern struct cpufreq_governor schedutil_gov;
> > > -static void build_perf_domains(const struct cpumask *cpu_map)
> > > +static bool build_perf_domains(const struct cpumask *cpu_map)
> > >  {
> > >  	int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
> > >  	struct perf_domain *pd = NULL, *tmp;
> > > @@ -365,7 +365,7 @@ static void build_perf_domains(const str
> > >  	if (tmp)
> > >  		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
> > >  
> > > -	return;
> > > +	return !!pd;
> > >  
> > >  free:
> > >  	free_pd(pd);
> > > @@ -373,6 +373,8 @@ static void build_perf_domains(const str
> > >  	rcu_assign_pointer(rd->pd, NULL);
> > >  	if (tmp)
> > >  		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
> > > +
> > > +	return false;
> > >  }
> > >  #else
> > >  static void free_pd(struct perf_domain *pd) { }
> > > @@ -2173,6 +2175,9 @@ void partition_sched_domains(int ndoms_n
> > >  	}
> > >  
> > >  #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> > > +	{
> > > +	bool has_eas = false;
> > 
> > That one wants to be declared '__maybe_unused' at the top of
> > partition_sched_domains() perhaps ? Just to avoid the { } section.
> > Hopefully the compiler will be smart enough to not use stack space for
> > it if not needed.
> > 
> > > +
> > >  	/* Build perf. domains: */
> > >  	for (i = 0; i < ndoms_new; i++) {
> > >  		for (j = 0; j < n && !sched_energy_update; j++) {
> > > @@ -2181,10 +2186,13 @@ void partition_sched_domains(int ndoms_n
> > >  				goto match3;
> > 
> > And we want to do 'has_eas = true' just before 'goto match3' here.
> > Otherwise we'll end up disabling EAS if we hit 'goto match3' for all
> > root domains.
> > 
> > >  		}
> > >  		/* No match - add perf. domains for a new rd */
> > > -		build_perf_domains(doms_new[i]);
> > > +		has_eas |= build_perf_domains(doms_new[i]);
> > >  match3:
> > >  		;
> > >  	}
> > > +
> > > +	sched_energy_set(has_eas);
> > > +	}
> > >  #endif
> > >  
> > >  	/* Remember the new sched domains: */
> > 
> > Other than that I guess that should work OK :-)
> 
> So I actually just came across your patch that does
> static_branch_{inc,dec} on sched_smt_present. Maybe I could do something
> similar here ?
> 
> How about something like the below (totally untested):
> 
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 8f9dfea60344..10ac32a0dc2e 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -298,6 +298,7 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
>  
>         pd = container_of(rp, struct perf_domain, rcu);
>         free_pd(pd);
> +       static_branch_dec_cpuslocked(&sched_energy_present);
>  }
>  
>  static void sched_energy_start(int ndoms_new, cpumask_var_t doms_new[])
> @@ -421,6 +422,8 @@ static void build_perf_domains(const struct cpumask *cpu_map)
>         /* Attach the new list of performance domains to the root domain. */
>         tmp = rd->pd;
>         rcu_assign_pointer(rd->pd, pd);
> +       static_branch_inc_cpuslocked(&sched_energy_present);
> +
>         if (tmp)
>                 call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
>  
> @@ -2246,7 +2249,6 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
>  match3:
>                 ;
>         }
> -       sched_energy_start(ndoms_new, doms_new);
>  #endif
>  
>         /* Remember the new sched domains: */
> 

Hmm, I went too fast, that's totally broken. But there's still something
we can do with static_branch_{inc,dec} I think. I'll come back later
with a better solution.

Sorry for the noise :/
Peter Zijlstra Nov. 22, 2018, 10:25 a.m. UTC | #5
On Thu, Nov 22, 2018 at 09:32:39AM +0000, Quentin Perret wrote:
> Hmm, I went too fast, that's totally broken. But there's still something
> we can do with static_branch_{inc,dec} I think. I'll come back later
> with a better solution.

Right; if you count the rd's that have pd set, it should work-ish. Yes,
much cleaner if you can get it to work.
Quentin Perret Nov. 22, 2018, 3:25 p.m. UTC | #6
On Thursday 22 Nov 2018 at 11:25:45 (+0100), Peter Zijlstra wrote:
> On Thu, Nov 22, 2018 at 09:32:39AM +0000, Quentin Perret wrote:
> > Hmm, I went too fast, that's totally broken. But there's still something
> > we can do with static_branch_{inc,dec} I think. I'll come back later
> > with a better solution.
> 
> Right; if you count the rd's that have pd set, it should work-ish. Yes,
> much cleaner if you can get it to work.

So, I came up with the following code which seems to work OK. It's not
as I clean as I'd like, though. The fact that free_pd() can be called in
softirq context is annoying to manipulate the static key ...

An alternative to this work item workaround is to do static_branch_dec()
from build_perf_domains() and next to the three call sites of
free_rootdomain() in order to avoid the call_rcu() context. Not very
pretty either.

Or we can just stick with your original suggestion to carry a boolean
around.

Any preference ?

Thanks,
Quentin

--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -232,7 +232,7 @@ int sched_energy_aware_handler(struct ctl_table *table, int write,
 }
 #endif

-static void free_pd(struct perf_domain *pd)
+static void _free_pd(struct perf_domain *pd)
 {
        struct perf_domain *tmp;

@@ -243,6 +243,24 @@ static void free_pd(struct perf_domain *pd)
        }
 }

+static void dec_sched_energy_workfn(struct work_struct *work)
+{
+       static_branch_dec(&sched_energy_present);
+}
+static DECLARE_WORK(dec_sched_energy_work, dec_sched_energy_workfn);
+
+static void free_pd(struct perf_domain *pd)
+{
+       if (pd) {
+               /*
+                * XXX: The static key can't be modified from SOFTIRQ context,
+                * so do it using a work item.
+                */
+               schedule_work(&dec_sched_energy_work);
+               _free_pd(pd);
+       }
+}
+
 static struct perf_domain *find_pd(struct perf_domain *pd, int cpu)
 {
        while (pd) {
@@ -421,13 +410,14 @@ static void build_perf_domains(const struct cpumask *cpu_map)
        /* Attach the new list of performance domains to the root domain. */
        tmp = rd->pd;
        rcu_assign_pointer(rd->pd, pd);
+       static_branch_inc_cpuslocked(&sched_energy_present);
        if (tmp)
                call_rcu(&tmp->rcu, destroy_perf_domain_rcu);

        return;

 free:
-       free_pd(pd);
+       _free_pd(pd);
        tmp = rd->pd;
        rcu_assign_pointer(rd->pd, NULL);
        if (tmp)
@@ -2246,7 +2236,6 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 match3:
                ;
        }
-       sched_energy_start(ndoms_new, doms_new);
 #endif

        /* Remember the new sched domains: */
Rafael J. Wysocki Nov. 22, 2018, 3:51 p.m. UTC | #7
On Thu, Nov 22, 2018 at 4:25 PM Quentin Perret <quentin.perret@arm.com> wrote:
>
> On Thursday 22 Nov 2018 at 11:25:45 (+0100), Peter Zijlstra wrote:
> > On Thu, Nov 22, 2018 at 09:32:39AM +0000, Quentin Perret wrote:
> > > Hmm, I went too fast, that's totally broken. But there's still something
> > > we can do with static_branch_{inc,dec} I think. I'll come back later
> > > with a better solution.
> >
> > Right; if you count the rd's that have pd set, it should work-ish. Yes,
> > much cleaner if you can get it to work.
>
> So, I came up with the following code which seems to work OK. It's not
> as I clean as I'd like, though. The fact that free_pd() can be called in
> softirq context is annoying to manipulate the static key ...
>
> An alternative to this work item workaround is to do static_branch_dec()
> from build_perf_domains() and next to the three call sites of
> free_rootdomain() in order to avoid the call_rcu() context. Not very
> pretty either.
>
> Or we can just stick with your original suggestion to carry a boolean
> around.

What's problematic with carrying a boolean around?
Quentin Perret Nov. 22, 2018, 3:55 p.m. UTC | #8
On Thursday 22 Nov 2018 at 16:51:41 (+0100), Rafael J. Wysocki wrote:
> On Thu, Nov 22, 2018 at 4:25 PM Quentin Perret <quentin.perret@arm.com> wrote:
> >
> > On Thursday 22 Nov 2018 at 11:25:45 (+0100), Peter Zijlstra wrote:
> > > On Thu, Nov 22, 2018 at 09:32:39AM +0000, Quentin Perret wrote:
> > > > Hmm, I went too fast, that's totally broken. But there's still something
> > > > we can do with static_branch_{inc,dec} I think. I'll come back later
> > > > with a better solution.
> > >
> > > Right; if you count the rd's that have pd set, it should work-ish. Yes,
> > > much cleaner if you can get it to work.
> >
> > So, I came up with the following code which seems to work OK. It's not
> > as I clean as I'd like, though. The fact that free_pd() can be called in
> > softirq context is annoying to manipulate the static key ...
> >
> > An alternative to this work item workaround is to do static_branch_dec()
> > from build_perf_domains() and next to the three call sites of
> > free_rootdomain() in order to avoid the call_rcu() context. Not very
> > pretty either.
> >
> > Or we can just stick with your original suggestion to carry a boolean
> > around.
> 
> What's problematic with carrying a boolean around?

Nothing, I was just trying to see if I could find a more elegant way to
increment/decrement the static key as we create/destroy the perf
domains.

I would not describe the work item thing I came up with as particularly
elegant, though :-)

So, all in all, the boolean is fine by me if we all agree it is nicer.

Thanks,
Quentin
diff mbox series

Patch

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0278b251c379..5b636a8aa900 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2298,3 +2298,7 @@  unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
 #else
 #define perf_domain_span(pd) NULL
 #endif
+
+#ifdef CONFIG_SMP
+extern struct static_key_false sched_energy_present;
+#endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 70492d4bfed6..1457ef80ee94 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -201,6 +201,7 @@  sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
+DEFINE_STATIC_KEY_FALSE(sched_energy_present);
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
 DEFINE_MUTEX(sched_energy_mutex);
 bool sched_energy_update;
@@ -273,6 +274,35 @@  static void destroy_perf_domain_rcu(struct rcu_head *rp)
 	free_pd(pd);
 }
 
+static void sched_energy_start(int ndoms_new, cpumask_var_t doms_new[])
+{
+	/*
+	 * The conditions for EAS to start are checked during the creation of
+	 * root domains. If one of them meets all conditions, it will have a
+	 * non-null list of performance domains.
+	 */
+	while (ndoms_new) {
+		if (cpu_rq(cpumask_first(doms_new[ndoms_new - 1]))->rd->pd)
+			goto enable;
+		ndoms_new--;
+	}
+
+	if (static_branch_unlikely(&sched_energy_present)) {
+		if (sched_debug())
+			pr_info("%s: stopping EAS\n", __func__);
+		static_branch_disable_cpuslocked(&sched_energy_present);
+	}
+
+	return;
+
+enable:
+	if (!static_branch_unlikely(&sched_energy_present)) {
+		if (sched_debug())
+			pr_info("%s: starting EAS\n", __func__);
+		static_branch_enable_cpuslocked(&sched_energy_present);
+	}
+}
+
 /*
  * EAS can be used on a root domain if it meets all the following conditions:
  *    1. an Energy Model (EM) is available;
@@ -2187,6 +2217,7 @@  void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 match3:
 		;
 	}
+	sched_energy_start(ndoms_new, doms_new);
 #endif
 
 	/* Remember the new sched domains: */