Message ID | 1436293469-25707-2-git-send-email-morten.rasmussen@arm.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Hi Morten, On Tue, Jul 07, 2015 at 07:23:44PM +0100, Morten Rasmussen wrote: > From: Morten Rasmussen <Morten.Rasmussen@arm.com> > > Implements arch-specific function to provide the scheduler with a > frequency scaling correction factor for more accurate load-tracking. > The factor is: > > current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu) > > This implementation only provides frequency invariance. No cpu > invariance yet. > > Cc: Russell King <linux@arm.linux.org.uk> > > Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> > > --- > arch/arm/include/asm/topology.h | 7 +++++ > arch/arm/kernel/smp.c | 57 +++++++++++++++++++++++++++++++++++++++-- > arch/arm/kernel/topology.c | 17 ++++++++++++ > 3 files changed, 79 insertions(+), 2 deletions(-) > > diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h > index 370f7a7..c31096f 100644 > --- a/arch/arm/include/asm/topology.h > +++ b/arch/arm/include/asm/topology.h > @@ -24,6 +24,13 @@ void init_cpu_topology(void); > void store_cpu_topology(unsigned int cpuid); > const struct cpumask *cpu_coregroup_mask(int cpu); > > +#define arch_scale_freq_capacity arm_arch_scale_freq_capacity > +struct sched_domain; > +extern > +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu); > + > +DECLARE_PER_CPU(atomic_long_t, cpu_freq_capacity); > + > #else > > static inline void init_cpu_topology(void) { } > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > index cca5b87..a32539c 100644 > --- a/arch/arm/kernel/smp.c > +++ b/arch/arm/kernel/smp.c > @@ -677,12 +677,34 @@ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); > static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); > static unsigned long global_l_p_j_ref; > static unsigned long global_l_p_j_ref_freq; > +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); > +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); > + > +/* > + * Scheduler load-tracking scale-invariance > + * > + * Provides the scheduler with a scale-invariance correction factor that > + * compensates for frequency scaling through arch_scale_freq_capacity() > + * (implemented in topology.c). > + */ > +static inline > +void scale_freq_capacity(int cpu, unsigned long curr, unsigned long max) > +{ > + unsigned long capacity; > + > + if (!max) > + return; > + > + capacity = (curr << SCHED_CAPACITY_SHIFT) / max; > + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); > +} > > static int cpufreq_callback(struct notifier_block *nb, > unsigned long val, void *data) > { > struct cpufreq_freqs *freq = data; > int cpu = freq->cpu; > + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); > > if (freq->flags & CPUFREQ_CONST_LOOPS) > return NOTIFY_OK; > @@ -707,6 +729,10 @@ static int cpufreq_callback(struct notifier_block *nb, > per_cpu(l_p_j_ref_freq, cpu), > freq->new); > } > + > + if (val == CPUFREQ_PRECHANGE) > + scale_freq_capacity(cpu, freq->new, max); > + > return NOTIFY_OK; > } > > @@ -714,11 +740,38 @@ static struct notifier_block cpufreq_notifier = { > .notifier_call = cpufreq_callback, > }; > > +static int cpufreq_policy_callback(struct notifier_block *nb, > + unsigned long val, void *data) > +{ > + struct cpufreq_policy *policy = data; > + int i; > + > + if (val != CPUFREQ_NOTIFY) > + return NOTIFY_OK; > + > + for_each_cpu(i, policy->cpus) { > + scale_freq_capacity(i, policy->cur, policy->max); > + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); > + } > + > + return NOTIFY_OK; > +} > + > +static struct notifier_block cpufreq_policy_notifier = { > + .notifier_call = cpufreq_policy_callback, > +}; > + > static int __init register_cpufreq_notifier(void) > { > - return cpufreq_register_notifier(&cpufreq_notifier, > + int ret; > + > + ret = cpufreq_register_notifier(&cpufreq_notifier, > CPUFREQ_TRANSITION_NOTIFIER); > + if (ret) > + return ret; > + > + return cpufreq_register_notifier(&cpufreq_policy_notifier, > + CPUFREQ_POLICY_NOTIFIER); > } > core_initcall(register_cpufreq_notifier); For "cpu_freq_capacity" structure, could move it into driver/cpufreq so that it can be shared by all architectures? Otherwise, every architecture's smp.c need register notifier for themselves. Thanks, Leo Yan > - > #endif > diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c > index 08b7847..9c09e6e 100644 > --- a/arch/arm/kernel/topology.c > +++ b/arch/arm/kernel/topology.c > @@ -169,6 +169,23 @@ static void update_cpu_capacity(unsigned int cpu) > cpu, arch_scale_cpu_capacity(NULL, cpu)); > } > > +/* > + * Scheduler load-tracking scale-invariance > + * > + * Provides the scheduler with a scale-invariance correction factor that > + * compensates for frequency scaling (arch_scale_freq_capacity()). The scaling > + * factor is updated in smp.c > + */ > +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu) > +{ > + unsigned long curr = atomic_long_read(&per_cpu(cpu_freq_capacity, cpu)); > + > + if (!curr) > + return SCHED_CAPACITY_SCALE; > + > + return curr; > +} > + > #else > static inline void parse_dt_topology(void) {} > static inline void update_cpu_capacity(unsigned int cpuid) {} -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 21, 2015 at 11:41:45PM +0800, Leo Yan wrote: > Hi Morten, > > On Tue, Jul 07, 2015 at 07:23:44PM +0100, Morten Rasmussen wrote: > > From: Morten Rasmussen <Morten.Rasmussen@arm.com> > > > > Implements arch-specific function to provide the scheduler with a > > frequency scaling correction factor for more accurate load-tracking. > > The factor is: > > > > current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu) > > > > This implementation only provides frequency invariance. No cpu > > invariance yet. > > > > Cc: Russell King <linux@arm.linux.org.uk> > > > > Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> > > > > --- > > arch/arm/include/asm/topology.h | 7 +++++ > > arch/arm/kernel/smp.c | 57 +++++++++++++++++++++++++++++++++++++++-- > > arch/arm/kernel/topology.c | 17 ++++++++++++ > > 3 files changed, 79 insertions(+), 2 deletions(-) > > > > diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h > > index 370f7a7..c31096f 100644 > > --- a/arch/arm/include/asm/topology.h > > +++ b/arch/arm/include/asm/topology.h > > @@ -24,6 +24,13 @@ void init_cpu_topology(void); > > void store_cpu_topology(unsigned int cpuid); > > const struct cpumask *cpu_coregroup_mask(int cpu); > > > > +#define arch_scale_freq_capacity arm_arch_scale_freq_capacity > > +struct sched_domain; > > +extern > > +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu); > > + > > +DECLARE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > + > > #else > > > > static inline void init_cpu_topology(void) { } > > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > > index cca5b87..a32539c 100644 > > --- a/arch/arm/kernel/smp.c > > +++ b/arch/arm/kernel/smp.c > > @@ -677,12 +677,34 @@ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); > > static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); > > static unsigned long global_l_p_j_ref; > > static unsigned long global_l_p_j_ref_freq; > > +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); > > +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > + > > +/* > > + * Scheduler load-tracking scale-invariance > > + * > > + * Provides the scheduler with a scale-invariance correction factor that > > + * compensates for frequency scaling through arch_scale_freq_capacity() > > + * (implemented in topology.c). > > + */ > > +static inline > > +void scale_freq_capacity(int cpu, unsigned long curr, unsigned long max) > > +{ > > + unsigned long capacity; > > + > > + if (!max) > > + return; > > + > > + capacity = (curr << SCHED_CAPACITY_SHIFT) / max; > > + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); > > +} > > > > static int cpufreq_callback(struct notifier_block *nb, > > unsigned long val, void *data) > > { > > struct cpufreq_freqs *freq = data; > > int cpu = freq->cpu; > > + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); > > > > if (freq->flags & CPUFREQ_CONST_LOOPS) > > return NOTIFY_OK; > > @@ -707,6 +729,10 @@ static int cpufreq_callback(struct notifier_block *nb, > > per_cpu(l_p_j_ref_freq, cpu), > > freq->new); > > } > > + > > + if (val == CPUFREQ_PRECHANGE) > > + scale_freq_capacity(cpu, freq->new, max); > > + > > return NOTIFY_OK; > > } > > > > @@ -714,11 +740,38 @@ static struct notifier_block cpufreq_notifier = { > > .notifier_call = cpufreq_callback, > > }; > > > > +static int cpufreq_policy_callback(struct notifier_block *nb, > > + unsigned long val, void *data) > > +{ > > + struct cpufreq_policy *policy = data; > > + int i; > > + > > + if (val != CPUFREQ_NOTIFY) > > + return NOTIFY_OK; > > + > > + for_each_cpu(i, policy->cpus) { > > + scale_freq_capacity(i, policy->cur, policy->max); > > + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); > > + } > > + > > + return NOTIFY_OK; > > +} > > + > > +static struct notifier_block cpufreq_policy_notifier = { > > + .notifier_call = cpufreq_policy_callback, > > +}; > > + > > static int __init register_cpufreq_notifier(void) > > { > > - return cpufreq_register_notifier(&cpufreq_notifier, > > + int ret; > > + > > + ret = cpufreq_register_notifier(&cpufreq_notifier, > > CPUFREQ_TRANSITION_NOTIFIER); > > + if (ret) > > + return ret; > > + > > + return cpufreq_register_notifier(&cpufreq_policy_notifier, > > + CPUFREQ_POLICY_NOTIFIER); > > } > > core_initcall(register_cpufreq_notifier); > > For "cpu_freq_capacity" structure, could move it into driver/cpufreq > so that it can be shared by all architectures? Otherwise, every > architecture's smp.c need register notifier for themselves. We could, but I put it in arch/arm/* as not all architectures might want this notifier. The frequency scaling factor could be provided based on architecture specific performance counters instead. AFAIK, the Intel p-state driver does not even fire the notifiers so the notifier solution would be redundant code for those platforms. That said, the above solution is not handling changes to policy->max very well. Basically, we don't inform the scheduler if it has changed which means that the OPP represented by "100%" might change. We need cpufreq to keep track of the true max frequency when policy->max is changed to work out the correct scaling factor instead of having it relative to policy->max. Morten -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jul 22, 2015 at 02:31:04PM +0100, Morten Rasmussen wrote: > On Tue, Jul 21, 2015 at 11:41:45PM +0800, Leo Yan wrote: > > Hi Morten, > > > > On Tue, Jul 07, 2015 at 07:23:44PM +0100, Morten Rasmussen wrote: > > > From: Morten Rasmussen <Morten.Rasmussen@arm.com> > > > > > > Implements arch-specific function to provide the scheduler with a > > > frequency scaling correction factor for more accurate load-tracking. > > > The factor is: > > > > > > current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu) > > > > > > This implementation only provides frequency invariance. No cpu > > > invariance yet. > > > > > > Cc: Russell King <linux@arm.linux.org.uk> > > > > > > Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> > > > > > > --- > > > arch/arm/include/asm/topology.h | 7 +++++ > > > arch/arm/kernel/smp.c | 57 +++++++++++++++++++++++++++++++++++++++-- > > > arch/arm/kernel/topology.c | 17 ++++++++++++ > > > 3 files changed, 79 insertions(+), 2 deletions(-) > > > > > > diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h > > > index 370f7a7..c31096f 100644 > > > --- a/arch/arm/include/asm/topology.h > > > +++ b/arch/arm/include/asm/topology.h > > > @@ -24,6 +24,13 @@ void init_cpu_topology(void); > > > void store_cpu_topology(unsigned int cpuid); > > > const struct cpumask *cpu_coregroup_mask(int cpu); > > > > > > +#define arch_scale_freq_capacity arm_arch_scale_freq_capacity > > > +struct sched_domain; > > > +extern > > > +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu); > > > + > > > +DECLARE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > > + > > > #else > > > > > > static inline void init_cpu_topology(void) { } > > > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > > > index cca5b87..a32539c 100644 > > > --- a/arch/arm/kernel/smp.c > > > +++ b/arch/arm/kernel/smp.c > > > @@ -677,12 +677,34 @@ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); > > > static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); > > > static unsigned long global_l_p_j_ref; > > > static unsigned long global_l_p_j_ref_freq; > > > +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); > > > +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > > + > > > +/* > > > + * Scheduler load-tracking scale-invariance > > > + * > > > + * Provides the scheduler with a scale-invariance correction factor that > > > + * compensates for frequency scaling through arch_scale_freq_capacity() > > > + * (implemented in topology.c). > > > + */ > > > +static inline > > > +void scale_freq_capacity(int cpu, unsigned long curr, unsigned long max) > > > +{ > > > + unsigned long capacity; > > > + > > > + if (!max) > > > + return; > > > + > > > + capacity = (curr << SCHED_CAPACITY_SHIFT) / max; > > > + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); > > > +} > > > > > > static int cpufreq_callback(struct notifier_block *nb, > > > unsigned long val, void *data) > > > { > > > struct cpufreq_freqs *freq = data; > > > int cpu = freq->cpu; > > > + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); > > > > > > if (freq->flags & CPUFREQ_CONST_LOOPS) > > > return NOTIFY_OK; > > > @@ -707,6 +729,10 @@ static int cpufreq_callback(struct notifier_block *nb, > > > per_cpu(l_p_j_ref_freq, cpu), > > > freq->new); > > > } > > > + > > > + if (val == CPUFREQ_PRECHANGE) > > > + scale_freq_capacity(cpu, freq->new, max); > > > + > > > return NOTIFY_OK; > > > } > > > > > > @@ -714,11 +740,38 @@ static struct notifier_block cpufreq_notifier = { > > > .notifier_call = cpufreq_callback, > > > }; > > > > > > +static int cpufreq_policy_callback(struct notifier_block *nb, > > > + unsigned long val, void *data) > > > +{ > > > + struct cpufreq_policy *policy = data; > > > + int i; > > > + > > > + if (val != CPUFREQ_NOTIFY) > > > + return NOTIFY_OK; > > > + > > > + for_each_cpu(i, policy->cpus) { > > > + scale_freq_capacity(i, policy->cur, policy->max); > > > + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); > > > + } > > > + > > > + return NOTIFY_OK; > > > +} > > > + > > > +static struct notifier_block cpufreq_policy_notifier = { > > > + .notifier_call = cpufreq_policy_callback, > > > +}; > > > + > > > static int __init register_cpufreq_notifier(void) > > > { > > > - return cpufreq_register_notifier(&cpufreq_notifier, > > > + int ret; > > > + > > > + ret = cpufreq_register_notifier(&cpufreq_notifier, > > > CPUFREQ_TRANSITION_NOTIFIER); > > > + if (ret) > > > + return ret; > > > + > > > + return cpufreq_register_notifier(&cpufreq_policy_notifier, > > > + CPUFREQ_POLICY_NOTIFIER); > > > } > > > core_initcall(register_cpufreq_notifier); > > > > For "cpu_freq_capacity" structure, could move it into driver/cpufreq > > so that it can be shared by all architectures? Otherwise, every > > architecture's smp.c need register notifier for themselves. > > We could, but I put it in arch/arm/* as not all architectures might want > this notifier. The frequency scaling factor could be provided based on > architecture specific performance counters instead. AFAIK, the Intel > p-state driver does not even fire the notifiers so the notifier > solution would be redundant code for those platforms. When i tried to enable EAS on Hikey, i found it's absent related code for arm64; actually this code section can also be reused by arm64, so just brought up this question. Just now roughly went through the driver "drivers/cpufreq/intel_pstate.c"; that's true it has different implementation comparing to usual ARM SoCs. So i'd like to ask this question with another way: should cpufreq framework provides helper functions for getting related cpu frequency scaling info? If the architecture has specific performance counters then it can ignore these helper functions. > That said, the above solution is not handling changes to policy->max > very well. Basically, we don't inform the scheduler if it has changed > which means that the OPP represented by "100%" might change. We need > cpufreq to keep track of the true max frequency when policy->max is > changed to work out the correct scaling factor instead of having it > relative to policy->max. i'm not sure understand correctly here. For example, when thermal framework limits the cpu frequency, it will update the value for policy->max, so scheduler will get the correct scaling factor, right? So i don't know what's the issue at here. Further more, i noticed in the later patches for arch_scale_cpu_capacity(); the cpu capacity is calculated by the property passed by DT, so it's a static value. In some cases, system may constraint the maximum frequency for CPUs, so in this case, will scheduler get misknowledge from arch_scale_cpu_capacity after system has imposed constraint for maximum frequency? Sorry if these questions have been discussed before :) Thanks, Leo Yan -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jul 22, 2015 at 10:59:04PM +0800, Leo Yan wrote: > On Wed, Jul 22, 2015 at 02:31:04PM +0100, Morten Rasmussen wrote: > > On Tue, Jul 21, 2015 at 11:41:45PM +0800, Leo Yan wrote: > > > Hi Morten, > > > > > > On Tue, Jul 07, 2015 at 07:23:44PM +0100, Morten Rasmussen wrote: > > > > From: Morten Rasmussen <Morten.Rasmussen@arm.com> > > > > > > > > Implements arch-specific function to provide the scheduler with a > > > > frequency scaling correction factor for more accurate load-tracking. > > > > The factor is: > > > > > > > > current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu) > > > > > > > > This implementation only provides frequency invariance. No cpu > > > > invariance yet. > > > > > > > > Cc: Russell King <linux@arm.linux.org.uk> > > > > > > > > Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> > > > > > > > > --- > > > > arch/arm/include/asm/topology.h | 7 +++++ > > > > arch/arm/kernel/smp.c | 57 +++++++++++++++++++++++++++++++++++++++-- > > > > arch/arm/kernel/topology.c | 17 ++++++++++++ > > > > 3 files changed, 79 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h > > > > index 370f7a7..c31096f 100644 > > > > --- a/arch/arm/include/asm/topology.h > > > > +++ b/arch/arm/include/asm/topology.h > > > > @@ -24,6 +24,13 @@ void init_cpu_topology(void); > > > > void store_cpu_topology(unsigned int cpuid); > > > > const struct cpumask *cpu_coregroup_mask(int cpu); > > > > > > > > +#define arch_scale_freq_capacity arm_arch_scale_freq_capacity > > > > +struct sched_domain; > > > > +extern > > > > +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu); > > > > + > > > > +DECLARE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > > > + > > > > #else > > > > > > > > static inline void init_cpu_topology(void) { } > > > > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > > > > index cca5b87..a32539c 100644 > > > > --- a/arch/arm/kernel/smp.c > > > > +++ b/arch/arm/kernel/smp.c > > > > @@ -677,12 +677,34 @@ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); > > > > static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); > > > > static unsigned long global_l_p_j_ref; > > > > static unsigned long global_l_p_j_ref_freq; > > > > +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); > > > > +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > > > + > > > > +/* > > > > + * Scheduler load-tracking scale-invariance > > > > + * > > > > + * Provides the scheduler with a scale-invariance correction factor that > > > > + * compensates for frequency scaling through arch_scale_freq_capacity() > > > > + * (implemented in topology.c). > > > > + */ > > > > +static inline > > > > +void scale_freq_capacity(int cpu, unsigned long curr, unsigned long max) > > > > +{ > > > > + unsigned long capacity; > > > > + > > > > + if (!max) > > > > + return; > > > > + > > > > + capacity = (curr << SCHED_CAPACITY_SHIFT) / max; > > > > + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); > > > > +} > > > > > > > > static int cpufreq_callback(struct notifier_block *nb, > > > > unsigned long val, void *data) > > > > { > > > > struct cpufreq_freqs *freq = data; > > > > int cpu = freq->cpu; > > > > + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); > > > > > > > > if (freq->flags & CPUFREQ_CONST_LOOPS) > > > > return NOTIFY_OK; > > > > @@ -707,6 +729,10 @@ static int cpufreq_callback(struct notifier_block *nb, > > > > per_cpu(l_p_j_ref_freq, cpu), > > > > freq->new); > > > > } > > > > + > > > > + if (val == CPUFREQ_PRECHANGE) > > > > + scale_freq_capacity(cpu, freq->new, max); > > > > + > > > > return NOTIFY_OK; > > > > } > > > > > > > > @@ -714,11 +740,38 @@ static struct notifier_block cpufreq_notifier = { > > > > .notifier_call = cpufreq_callback, > > > > }; > > > > > > > > +static int cpufreq_policy_callback(struct notifier_block *nb, > > > > + unsigned long val, void *data) > > > > +{ > > > > + struct cpufreq_policy *policy = data; > > > > + int i; > > > > + > > > > + if (val != CPUFREQ_NOTIFY) > > > > + return NOTIFY_OK; > > > > + > > > > + for_each_cpu(i, policy->cpus) { > > > > + scale_freq_capacity(i, policy->cur, policy->max); > > > > + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); > > > > + } > > > > + > > > > + return NOTIFY_OK; > > > > +} > > > > + > > > > +static struct notifier_block cpufreq_policy_notifier = { > > > > + .notifier_call = cpufreq_policy_callback, > > > > +}; > > > > + > > > > static int __init register_cpufreq_notifier(void) > > > > { > > > > - return cpufreq_register_notifier(&cpufreq_notifier, > > > > + int ret; > > > > + > > > > + ret = cpufreq_register_notifier(&cpufreq_notifier, > > > > CPUFREQ_TRANSITION_NOTIFIER); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + return cpufreq_register_notifier(&cpufreq_policy_notifier, > > > > + CPUFREQ_POLICY_NOTIFIER); > > > > } > > > > core_initcall(register_cpufreq_notifier); > > > > > > For "cpu_freq_capacity" structure, could move it into driver/cpufreq > > > so that it can be shared by all architectures? Otherwise, every > > > architecture's smp.c need register notifier for themselves. > > > > We could, but I put it in arch/arm/* as not all architectures might want > > this notifier. The frequency scaling factor could be provided based on > > architecture specific performance counters instead. AFAIK, the Intel > > p-state driver does not even fire the notifiers so the notifier > > solution would be redundant code for those platforms. > > When i tried to enable EAS on Hikey, i found it's absent related code > for arm64; actually this code section can also be reused by arm64, > so just brought up this question. Yes. We have patches for arm64 if you are interested. We are using them for the Juno platforms. > Just now roughly went through the driver > "drivers/cpufreq/intel_pstate.c"; that's true it has different > implementation comparing to usual ARM SoCs. So i'd like to ask this > question with another way: should cpufreq framework provides helper > functions for getting related cpu frequency scaling info? If the > architecture has specific performance counters then it can ignore > these helper functions. That is the idea with the notifiers. If the architecture code a specific architecture wants to be poked by cpufreq when the frequency is changed it should have a way to subscribe to those. Another way of implementing it is to let the architecture code call a helper function in cpufreq every time the scheduler calls into the architecture code to get the scaling factor (arch_scale_freq_capacity()). We actually did it that way a couple of versions back using weak functions. It wasn't as clean as using the notifiers, but if we make the necessary changes to cpufreq to let the architecture code call into cpufreq that could be even better. > > > That said, the above solution is not handling changes to policy->max > > very well. Basically, we don't inform the scheduler if it has changed > > which means that the OPP represented by "100%" might change. We need > > cpufreq to keep track of the true max frequency when policy->max is > > changed to work out the correct scaling factor instead of having it > > relative to policy->max. > > i'm not sure understand correctly here. For example, when thermal > framework limits the cpu frequency, it will update the value for > policy->max, so scheduler will get the correct scaling factor, right? > So i don't know what's the issue at here. > > Further more, i noticed in the later patches for > arch_scale_cpu_capacity(); the cpu capacity is calculated by the > property passed by DT, so it's a static value. In some cases, system > may constraint the maximum frequency for CPUs, so in this case, will > scheduler get misknowledge from arch_scale_cpu_capacity after system > has imposed constraint for maximum frequency? The issue is first of all to define what 100% means. Is it policy->cur/policy->max or policy->cur/uncapped_max? Where uncapped max is the max frequency supported by the hardware when not capped in any way by governors or thermal framework. If we choose the first definition then we have to recalculate the cpu capacity scaling factor (arch_scale_cpu_capacity()) too whenever policy->max changes such that capacity_orig is updated appropriately. The scale-invariance code in the scheduler assumes: arch_scale_cpu_capacity()*arch_scale_freq_capacity() = current capacity ...and that capacity_orig = arch_scale_cpu_capacity() is the max available capacity. If we cap the frequency to say, 50%, by setting policy->max then we have to reduce arch_scale_cpu_capacity() to 50% to still get the right current capacity using the expression above. Using the second definition arch_scale_cpu_capacity() can be a static value and arch_scale_freq_capacity() is always relative to uncapped_max. It seems simpler, but capacity_orig could then be an unavailable capacity and hence we would need to introduce a third capacity to track the current max capacity and use that for scheduling decisions. As you have already discovered the current code is a combination of both which is broken when policy->max is reduced. Thinking more about it, I would suggest to go with the first definition. The scheduler doesn't need to know about currently unavailable compute capacity it should balance based on the current situation, so it seems to make sense to let capacity_orig reflect the current max capacity. I would suggest that we fix arch_scale_cpu_capacity() to take policy->max changes into account. We need to know the uncapped max frequency somehow to do that. I haven't looked into if we can get that from cpufreq. Also, we need to make sure that no load-balance code assumes that cpus have a capacity of 1024. > Sorry if these questions have been discussed before :) No problem. I don't think we have discussed it to this detail before and it is very valid points. Thanks, Morten -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jul 23, 2015 at 12:06:26PM +0100, Morten Rasmussen wrote: > On Wed, Jul 22, 2015 at 10:59:04PM +0800, Leo Yan wrote: > > On Wed, Jul 22, 2015 at 02:31:04PM +0100, Morten Rasmussen wrote: > > > On Tue, Jul 21, 2015 at 11:41:45PM +0800, Leo Yan wrote: > > > > Hi Morten, > > > > > > > > On Tue, Jul 07, 2015 at 07:23:44PM +0100, Morten Rasmussen wrote: > > > > > From: Morten Rasmussen <Morten.Rasmussen@arm.com> > > > > > > > > > > Implements arch-specific function to provide the scheduler with a > > > > > frequency scaling correction factor for more accurate load-tracking. > > > > > The factor is: > > > > > > > > > > current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu) > > > > > > > > > > This implementation only provides frequency invariance. No cpu > > > > > invariance yet. > > > > > > > > > > Cc: Russell King <linux@arm.linux.org.uk> > > > > > > > > > > Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> > > > > > > > > > > --- > > > > > arch/arm/include/asm/topology.h | 7 +++++ > > > > > arch/arm/kernel/smp.c | 57 +++++++++++++++++++++++++++++++++++++++-- > > > > > arch/arm/kernel/topology.c | 17 ++++++++++++ > > > > > 3 files changed, 79 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h > > > > > index 370f7a7..c31096f 100644 > > > > > --- a/arch/arm/include/asm/topology.h > > > > > +++ b/arch/arm/include/asm/topology.h > > > > > @@ -24,6 +24,13 @@ void init_cpu_topology(void); > > > > > void store_cpu_topology(unsigned int cpuid); > > > > > const struct cpumask *cpu_coregroup_mask(int cpu); > > > > > > > > > > +#define arch_scale_freq_capacity arm_arch_scale_freq_capacity > > > > > +struct sched_domain; > > > > > +extern > > > > > +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu); > > > > > + > > > > > +DECLARE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > > > > + > > > > > #else > > > > > > > > > > static inline void init_cpu_topology(void) { } > > > > > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > > > > > index cca5b87..a32539c 100644 > > > > > --- a/arch/arm/kernel/smp.c > > > > > +++ b/arch/arm/kernel/smp.c > > > > > @@ -677,12 +677,34 @@ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); > > > > > static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); > > > > > static unsigned long global_l_p_j_ref; > > > > > static unsigned long global_l_p_j_ref_freq; > > > > > +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); > > > > > +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > > > > + > > > > > +/* > > > > > + * Scheduler load-tracking scale-invariance > > > > > + * > > > > > + * Provides the scheduler with a scale-invariance correction factor that > > > > > + * compensates for frequency scaling through arch_scale_freq_capacity() > > > > > + * (implemented in topology.c). > > > > > + */ > > > > > +static inline > > > > > +void scale_freq_capacity(int cpu, unsigned long curr, unsigned long max) > > > > > +{ > > > > > + unsigned long capacity; > > > > > + > > > > > + if (!max) > > > > > + return; > > > > > + > > > > > + capacity = (curr << SCHED_CAPACITY_SHIFT) / max; > > > > > + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); > > > > > +} > > > > > > > > > > static int cpufreq_callback(struct notifier_block *nb, > > > > > unsigned long val, void *data) > > > > > { > > > > > struct cpufreq_freqs *freq = data; > > > > > int cpu = freq->cpu; > > > > > + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); > > > > > > > > > > if (freq->flags & CPUFREQ_CONST_LOOPS) > > > > > return NOTIFY_OK; > > > > > @@ -707,6 +729,10 @@ static int cpufreq_callback(struct notifier_block *nb, > > > > > per_cpu(l_p_j_ref_freq, cpu), > > > > > freq->new); > > > > > } > > > > > + > > > > > + if (val == CPUFREQ_PRECHANGE) > > > > > + scale_freq_capacity(cpu, freq->new, max); > > > > > + > > > > > return NOTIFY_OK; > > > > > } > > > > > > > > > > @@ -714,11 +740,38 @@ static struct notifier_block cpufreq_notifier = { > > > > > .notifier_call = cpufreq_callback, > > > > > }; > > > > > > > > > > +static int cpufreq_policy_callback(struct notifier_block *nb, > > > > > + unsigned long val, void *data) > > > > > +{ > > > > > + struct cpufreq_policy *policy = data; > > > > > + int i; > > > > > + > > > > > + if (val != CPUFREQ_NOTIFY) > > > > > + return NOTIFY_OK; > > > > > + > > > > > + for_each_cpu(i, policy->cpus) { > > > > > + scale_freq_capacity(i, policy->cur, policy->max); > > > > > + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); > > > > > + } > > > > > + > > > > > + return NOTIFY_OK; > > > > > +} > > > > > + > > > > > +static struct notifier_block cpufreq_policy_notifier = { > > > > > + .notifier_call = cpufreq_policy_callback, > > > > > +}; > > > > > + > > > > > static int __init register_cpufreq_notifier(void) > > > > > { > > > > > - return cpufreq_register_notifier(&cpufreq_notifier, > > > > > + int ret; > > > > > + > > > > > + ret = cpufreq_register_notifier(&cpufreq_notifier, > > > > > CPUFREQ_TRANSITION_NOTIFIER); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + return cpufreq_register_notifier(&cpufreq_policy_notifier, > > > > > + CPUFREQ_POLICY_NOTIFIER); > > > > > } > > > > > core_initcall(register_cpufreq_notifier); > > > > > > > > For "cpu_freq_capacity" structure, could move it into driver/cpufreq > > > > so that it can be shared by all architectures? Otherwise, every > > > > architecture's smp.c need register notifier for themselves. > > > > > > We could, but I put it in arch/arm/* as not all architectures might want > > > this notifier. The frequency scaling factor could be provided based on > > > architecture specific performance counters instead. AFAIK, the Intel > > > p-state driver does not even fire the notifiers so the notifier > > > solution would be redundant code for those platforms. > > > > When i tried to enable EAS on Hikey, i found it's absent related code > > for arm64; actually this code section can also be reused by arm64, > > so just brought up this question. > > Yes. We have patches for arm64 if you are interested. We are using them > for the Juno platforms. If convenience, please share with me related patches, so i can directly apply them and do some profiling works. > > Just now roughly went through the driver > > "drivers/cpufreq/intel_pstate.c"; that's true it has different > > implementation comparing to usual ARM SoCs. So i'd like to ask this > > question with another way: should cpufreq framework provides helper > > functions for getting related cpu frequency scaling info? If the > > architecture has specific performance counters then it can ignore > > these helper functions. > > That is the idea with the notifiers. If the architecture code a specific > architecture wants to be poked by cpufreq when the frequency is changed > it should have a way to subscribe to those. Another way of implementing > it is to let the architecture code call a helper function in cpufreq > every time the scheduler calls into the architecture code to get the > scaling factor (arch_scale_freq_capacity()). We actually did it that way > a couple of versions back using weak functions. It wasn't as clean as > using the notifiers, but if we make the necessary changes to cpufreq to > let the architecture code call into cpufreq that could be even better. > > > > > > That said, the above solution is not handling changes to policy->max > > > very well. Basically, we don't inform the scheduler if it has changed > > > which means that the OPP represented by "100%" might change. We need > > > cpufreq to keep track of the true max frequency when policy->max is > > > changed to work out the correct scaling factor instead of having it > > > relative to policy->max. > > > > i'm not sure understand correctly here. For example, when thermal > > framework limits the cpu frequency, it will update the value for > > policy->max, so scheduler will get the correct scaling factor, right? > > So i don't know what's the issue at here. > > > > Further more, i noticed in the later patches for > > arch_scale_cpu_capacity(); the cpu capacity is calculated by the > > property passed by DT, so it's a static value. In some cases, system > > may constraint the maximum frequency for CPUs, so in this case, will > > scheduler get misknowledge from arch_scale_cpu_capacity after system > > has imposed constraint for maximum frequency? > > The issue is first of all to define what 100% means. Is it > policy->cur/policy->max or policy->cur/uncapped_max? Where uncapped max > is the max frequency supported by the hardware when not capped in any > way by governors or thermal framework. > > If we choose the first definition then we have to recalculate the cpu > capacity scaling factor (arch_scale_cpu_capacity()) too whenever > policy->max changes such that capacity_orig is updated appropriately. > > The scale-invariance code in the scheduler assumes: > > arch_scale_cpu_capacity()*arch_scale_freq_capacity() = current capacity This is an important concept, thanks for the explaining. > ...and that capacity_orig = arch_scale_cpu_capacity() is the max > available capacity. If we cap the frequency to say, 50%, by setting > policy->max then we have to reduce arch_scale_cpu_capacity() to 50% to > still get the right current capacity using the expression above. > > Using the second definition arch_scale_cpu_capacity() can be a static > value and arch_scale_freq_capacity() is always relative to uncapped_max. > It seems simpler, but capacity_orig could then be an unavailable > capacity and hence we would need to introduce a third capacity to track > the current max capacity and use that for scheduling decisions. > As you have already discovered the current code is a combination of both > which is broken when policy->max is reduced. > > Thinking more about it, I would suggest to go with the first definition. > The scheduler doesn't need to know about currently unavailable compute > capacity it should balance based on the current situation, so it seems > to make sense to let capacity_orig reflect the current max capacity. Agree. > I would suggest that we fix arch_scale_cpu_capacity() to take > policy->max changes into account. We need to know the uncapped max > frequency somehow to do that. I haven't looked into if we can get that > from cpufreq. Also, we need to make sure that no load-balance code > assumes that cpus have a capacity of 1024. Cpufreq framework provides API *cpufreq_quick_get_max()* and *cpufreq_quick_get()* for inquiry current frequency and max frequency, but i'm curious if these two functions can be directly called by scheduler, due they acquire and release locks internally. Thanks, Leo Yan -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jul 23, 2015 at 10:22:16PM +0800, Leo Yan wrote: > On Thu, Jul 23, 2015 at 12:06:26PM +0100, Morten Rasmussen wrote: > > Yes. We have patches for arm64 if you are interested. We are using them > > for the Juno platforms. > > If convenience, please share with me related patches, so i can > directly apply them and do some profiling works. Will do. > > > > Just now roughly went through the driver > > > "drivers/cpufreq/intel_pstate.c"; that's true it has different > > > implementation comparing to usual ARM SoCs. So i'd like to ask this > > > question with another way: should cpufreq framework provides helper > > > functions for getting related cpu frequency scaling info? If the > > > architecture has specific performance counters then it can ignore > > > these helper functions. > > > > That is the idea with the notifiers. If the architecture code a specific > > architecture wants to be poked by cpufreq when the frequency is changed > > it should have a way to subscribe to those. Another way of implementing > > it is to let the architecture code call a helper function in cpufreq > > every time the scheduler calls into the architecture code to get the > > scaling factor (arch_scale_freq_capacity()). We actually did it that way > > a couple of versions back using weak functions. It wasn't as clean as > > using the notifiers, but if we make the necessary changes to cpufreq to > > let the architecture code call into cpufreq that could be even better. > > > > > > > > > That said, the above solution is not handling changes to policy->max > > > > very well. Basically, we don't inform the scheduler if it has changed > > > > which means that the OPP represented by "100%" might change. We need > > > > cpufreq to keep track of the true max frequency when policy->max is > > > > changed to work out the correct scaling factor instead of having it > > > > relative to policy->max. > > > > > > i'm not sure understand correctly here. For example, when thermal > > > framework limits the cpu frequency, it will update the value for > > > policy->max, so scheduler will get the correct scaling factor, right? > > > So i don't know what's the issue at here. > > > > > > Further more, i noticed in the later patches for > > > arch_scale_cpu_capacity(); the cpu capacity is calculated by the > > > property passed by DT, so it's a static value. In some cases, system > > > may constraint the maximum frequency for CPUs, so in this case, will > > > scheduler get misknowledge from arch_scale_cpu_capacity after system > > > has imposed constraint for maximum frequency? > > > > The issue is first of all to define what 100% means. Is it > > policy->cur/policy->max or policy->cur/uncapped_max? Where uncapped max > > is the max frequency supported by the hardware when not capped in any > > way by governors or thermal framework. > > > > If we choose the first definition then we have to recalculate the cpu > > capacity scaling factor (arch_scale_cpu_capacity()) too whenever > > policy->max changes such that capacity_orig is updated appropriately. > > > > The scale-invariance code in the scheduler assumes: > > > > arch_scale_cpu_capacity()*arch_scale_freq_capacity() = current capacity > > This is an important concept, thanks for the explaining. No problem, thanks for reviewing the patches. > > ...and that capacity_orig = arch_scale_cpu_capacity() is the max > > available capacity. If we cap the frequency to say, 50%, by setting > > policy->max then we have to reduce arch_scale_cpu_capacity() to 50% to > > still get the right current capacity using the expression above. > > > > Using the second definition arch_scale_cpu_capacity() can be a static > > value and arch_scale_freq_capacity() is always relative to uncapped_max. > > It seems simpler, but capacity_orig could then be an unavailable > > capacity and hence we would need to introduce a third capacity to track > > the current max capacity and use that for scheduling decisions. > > As you have already discovered the current code is a combination of both > > which is broken when policy->max is reduced. > > > > Thinking more about it, I would suggest to go with the first definition. > > The scheduler doesn't need to know about currently unavailable compute > > capacity it should balance based on the current situation, so it seems > > to make sense to let capacity_orig reflect the current max capacity. > > Agree. > > > I would suggest that we fix arch_scale_cpu_capacity() to take > > policy->max changes into account. We need to know the uncapped max > > frequency somehow to do that. I haven't looked into if we can get that > > from cpufreq. Also, we need to make sure that no load-balance code > > assumes that cpus have a capacity of 1024. > > Cpufreq framework provides API *cpufreq_quick_get_max()* and > *cpufreq_quick_get()* for inquiry current frequency and max frequency, > but i'm curious if these two functions can be directly called by > scheduler, due they acquire and release locks internally. The arch_scale_{cpu,freq}_capacity() functions are called from contexts where blocking/sleeping is not allowed, so that rules out calling function that takes locks. We currently avoid that by using atomics. However, even if we had non-sleeping functions to call into cpufreq, we would still need some code in arch/* to make that call so it is only the variables storing the current frequencies that we can move into cpufreq. But it would naturally belong there, so I guess it is worth it. -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Morten, On 7 July 2015 at 20:23, Morten Rasmussen <morten.rasmussen@arm.com> wrote: > From: Morten Rasmussen <Morten.Rasmussen@arm.com> > [snip] > - > #endif > diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c > index 08b7847..9c09e6e 100644 > --- a/arch/arm/kernel/topology.c > +++ b/arch/arm/kernel/topology.c > @@ -169,6 +169,23 @@ static void update_cpu_capacity(unsigned int cpu) > cpu, arch_scale_cpu_capacity(NULL, cpu)); > } > > +/* > + * Scheduler load-tracking scale-invariance > + * > + * Provides the scheduler with a scale-invariance correction factor that > + * compensates for frequency scaling (arch_scale_freq_capacity()). The scaling > + * factor is updated in smp.c > + */ > +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu) > +{ > + unsigned long curr = atomic_long_read(&per_cpu(cpu_freq_capacity, cpu)); access to cpu_freq_capacity to should be put under #ifdef CONFIG_CPU_FREQ. Why haven't you moved arm_arch_scale_freq_capacity in smp.c as everything else for frequency in-variance is already in this file ? This should also enable you to remove DECLARE_PER_CPU(atomic_long_t, cpu_freq_capacity); from topology.h Vincent > + > + if (!curr) > + return SCHED_CAPACITY_SCALE; > + > + return curr; > +} > + > #else > static inline void parse_dt_topology(void) {} > static inline void update_cpu_capacity(unsigned int cpuid) {} > -- > 1.9.1 > -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 07, 2015 at 07:23:44PM +0100, Morten Rasmussen wrote: > +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); > +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); > + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); > + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); > + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); > + unsigned long curr = atomic_long_read(&per_cpu(cpu_freq_capacity, cpu)); The use of atomic_long_t here is entirely pointless. In fact (and someone needs to go fix this), its worse than WRITE_ONCE()/READ_ONCE(). -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 11, 2015 at 11:27:54AM +0200, Peter Zijlstra wrote: > On Tue, Jul 07, 2015 at 07:23:44PM +0100, Morten Rasmussen wrote: > > +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); > > +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); > > > + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); > > + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); > > + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); > > + unsigned long curr = atomic_long_read(&per_cpu(cpu_freq_capacity, cpu)); > > The use of atomic_long_t here is entirely pointless. Yes, I will get rid of those. -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 370f7a7..c31096f 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -24,6 +24,13 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +#define arch_scale_freq_capacity arm_arch_scale_freq_capacity +struct sched_domain; +extern +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu); + +DECLARE_PER_CPU(atomic_long_t, cpu_freq_capacity); + #else static inline void init_cpu_topology(void) { } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b87..a32539c 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -677,12 +677,34 @@ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); static unsigned long global_l_p_j_ref; static unsigned long global_l_p_j_ref_freq; +static DEFINE_PER_CPU(atomic_long_t, cpu_max_freq); +DEFINE_PER_CPU(atomic_long_t, cpu_freq_capacity); + +/* + * Scheduler load-tracking scale-invariance + * + * Provides the scheduler with a scale-invariance correction factor that + * compensates for frequency scaling through arch_scale_freq_capacity() + * (implemented in topology.c). + */ +static inline +void scale_freq_capacity(int cpu, unsigned long curr, unsigned long max) +{ + unsigned long capacity; + + if (!max) + return; + + capacity = (curr << SCHED_CAPACITY_SHIFT) / max; + atomic_long_set(&per_cpu(cpu_freq_capacity, cpu), capacity); +} static int cpufreq_callback(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; int cpu = freq->cpu; + unsigned long max = atomic_long_read(&per_cpu(cpu_max_freq, cpu)); if (freq->flags & CPUFREQ_CONST_LOOPS) return NOTIFY_OK; @@ -707,6 +729,10 @@ static int cpufreq_callback(struct notifier_block *nb, per_cpu(l_p_j_ref_freq, cpu), freq->new); } + + if (val == CPUFREQ_PRECHANGE) + scale_freq_capacity(cpu, freq->new, max); + return NOTIFY_OK; } @@ -714,11 +740,38 @@ static struct notifier_block cpufreq_notifier = { .notifier_call = cpufreq_callback, }; +static int cpufreq_policy_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = data; + int i; + + if (val != CPUFREQ_NOTIFY) + return NOTIFY_OK; + + for_each_cpu(i, policy->cpus) { + scale_freq_capacity(i, policy->cur, policy->max); + atomic_long_set(&per_cpu(cpu_max_freq, i), policy->max); + } + + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_policy_notifier = { + .notifier_call = cpufreq_policy_callback, +}; + static int __init register_cpufreq_notifier(void) { - return cpufreq_register_notifier(&cpufreq_notifier, + int ret; + + ret = cpufreq_register_notifier(&cpufreq_notifier, CPUFREQ_TRANSITION_NOTIFIER); + if (ret) + return ret; + + return cpufreq_register_notifier(&cpufreq_policy_notifier, + CPUFREQ_POLICY_NOTIFIER); } core_initcall(register_cpufreq_notifier); - #endif diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 08b7847..9c09e6e 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -169,6 +169,23 @@ static void update_cpu_capacity(unsigned int cpu) cpu, arch_scale_cpu_capacity(NULL, cpu)); } +/* + * Scheduler load-tracking scale-invariance + * + * Provides the scheduler with a scale-invariance correction factor that + * compensates for frequency scaling (arch_scale_freq_capacity()). The scaling + * factor is updated in smp.c + */ +unsigned long arm_arch_scale_freq_capacity(struct sched_domain *sd, int cpu) +{ + unsigned long curr = atomic_long_read(&per_cpu(cpu_freq_capacity, cpu)); + + if (!curr) + return SCHED_CAPACITY_SCALE; + + return curr; +} + #else static inline void parse_dt_topology(void) {} static inline void update_cpu_capacity(unsigned int cpuid) {}