Message ID | e2b56be446eeb67f1905d2ac6f8d82dd4389d0c5.1552640968.git.viresh.kumar@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [V2] cpufreq: Call transition notifier only once for each policy | expand |
On Fri, Mar 15, 2019 at 02:43:07PM +0530, Viresh Kumar wrote: > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index 3fae23834069..cff8779fc0d2 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > void *data) > { > struct cpufreq_freqs *freq = data; > - unsigned long *lpj; > - > - lpj = &boot_cpu_data.loops_per_jiffy; > -#ifdef CONFIG_SMP > - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > -#endif > + struct cpumask *cpus = freq->policy->cpus; > + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; > + unsigned long lpj; > + int cpu; > > if (!ref_freq) { > ref_freq = freq->old; > - loops_per_jiffy_ref = *lpj; > tsc_khz_ref = tsc_khz; > + > + if (boot_cpu) > + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; > + else > + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; > } > + > if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > - > + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); > + > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > mark_tsc_unstable("cpufreq changes"); > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > + if (boot_cpu) { > + boot_cpu_data.loops_per_jiffy = lpj; > + } else { > + for_each_cpu(cpu, cpus) > + cpu_data(cpu).loops_per_jiffy = lpj; > + } > + > + for_each_cpu(cpu, cpus) > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); This code doesn't make sense, the rdtsc() _must_ be called on the CPU in question. That's part of the whole problem here, TSC isn't sync'ed when it's subject to CPUFREQ. > } > > return 0;
On 15-03-19, 13:29, Peter Zijlstra wrote: > On Fri, Mar 15, 2019 at 02:43:07PM +0530, Viresh Kumar wrote: > > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > > index 3fae23834069..cff8779fc0d2 100644 > > --- a/arch/x86/kernel/tsc.c > > +++ b/arch/x86/kernel/tsc.c > > @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > void *data) > > { > > struct cpufreq_freqs *freq = data; > > - unsigned long *lpj; > > - > > - lpj = &boot_cpu_data.loops_per_jiffy; > > -#ifdef CONFIG_SMP > > - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > > -#endif > > + struct cpumask *cpus = freq->policy->cpus; > > + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; > > + unsigned long lpj; > > + int cpu; > > > > if (!ref_freq) { > > ref_freq = freq->old; > > - loops_per_jiffy_ref = *lpj; > > tsc_khz_ref = tsc_khz; > > + > > + if (boot_cpu) > > + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; > > + else > > + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; > > } > > + > > if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > > (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > > - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > - > > + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); > > + > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > mark_tsc_unstable("cpufreq changes"); > > > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > > + if (boot_cpu) { > > + boot_cpu_data.loops_per_jiffy = lpj; > > + } else { > > + for_each_cpu(cpu, cpus) > > + cpu_data(cpu).loops_per_jiffy = lpj; > > + } > > + > > + for_each_cpu(cpu, cpus) > > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); > > This code doesn't make sense, the rdtsc() _must_ be called on the CPU in > question. You mean rdtsc() must be locally on that CPU? The cpufreq core never guaranteed that and it was left for the notifier to do. This patch doesn't change the behavior at all, just that it moves the for-loop to the notifier instead of the cpufreq core. > That's part of the whole problem here, TSC isn't sync'ed when > it's subject to CPUFREQ. > > > } > > > > return 0;
On Fri, Mar 15, 2019 at 1:30 PM Peter Zijlstra <peterz@infradead.org> wrote: > > On Fri, Mar 15, 2019 at 02:43:07PM +0530, Viresh Kumar wrote: > > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > > index 3fae23834069..cff8779fc0d2 100644 > > --- a/arch/x86/kernel/tsc.c > > +++ b/arch/x86/kernel/tsc.c > > @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > void *data) > > { > > struct cpufreq_freqs *freq = data; > > - unsigned long *lpj; > > - > > - lpj = &boot_cpu_data.loops_per_jiffy; > > -#ifdef CONFIG_SMP > > - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > > -#endif > > + struct cpumask *cpus = freq->policy->cpus; > > + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; > > + unsigned long lpj; > > + int cpu; > > > > if (!ref_freq) { > > ref_freq = freq->old; > > - loops_per_jiffy_ref = *lpj; > > tsc_khz_ref = tsc_khz; > > + > > + if (boot_cpu) > > + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; > > + else > > + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; > > } > > + > > if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > > (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > > - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > - > > + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); > > + > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > mark_tsc_unstable("cpufreq changes"); > > > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > > + if (boot_cpu) { > > + boot_cpu_data.loops_per_jiffy = lpj; > > + } else { > > + for_each_cpu(cpu, cpus) > > + cpu_data(cpu).loops_per_jiffy = lpj; > > + } > > + > > + for_each_cpu(cpu, cpus) > > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); > > This code doesn't make sense, the rdtsc() _must_ be called on the CPU in > question. Well, strictly speaking the TSC value here comes from the CPU running the code. The original code has this problem too, though (as Viresh said), so the patch really doesn't make it worse in that respect. :-) I'm not going to defend the original code (I ldidn't invent it anyway), but it clearly assumes that different CPUs cannot run at different frequencies and that kind of explains what happens in it. > That's part of the whole problem here, TSC isn't sync'ed when > it's subject to CPUFREQ. So what would you recommend us to do here? Obviously, this won't run on any new hardware. Frankly, I'm not even sure what the most recent HW where this hack would make a difference is (the comment talking about Opterons suggests early 2000s), so this clearly falls into the "legacy" bucket to me. Does it make sense to try to preserve it, or can we simply make cpufreq init fail on the systems where the TSC rate depends on the frequency?
On Mon, Mar 18, 2019 at 08:05:14AM +0530, Viresh Kumar wrote: > On 15-03-19, 13:29, Peter Zijlstra wrote: > > On Fri, Mar 15, 2019 at 02:43:07PM +0530, Viresh Kumar wrote: > > > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > > > index 3fae23834069..cff8779fc0d2 100644 > > > --- a/arch/x86/kernel/tsc.c > > > +++ b/arch/x86/kernel/tsc.c > > > @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > > void *data) > > > { > > > struct cpufreq_freqs *freq = data; > > > - unsigned long *lpj; > > > - > > > - lpj = &boot_cpu_data.loops_per_jiffy; > > > -#ifdef CONFIG_SMP > > > - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > > > -#endif > > > + struct cpumask *cpus = freq->policy->cpus; > > > + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; > > > + unsigned long lpj; > > > + int cpu; > > > > > > if (!ref_freq) { > > > ref_freq = freq->old; > > > - loops_per_jiffy_ref = *lpj; > > > tsc_khz_ref = tsc_khz; > > > + > > > + if (boot_cpu) > > > + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; > > > + else > > > + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; > > > } > > > + > > > if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > > > (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > > > - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > > - > > > + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > > tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); > > > + > > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > mark_tsc_unstable("cpufreq changes"); > > > > > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > > > + if (boot_cpu) { > > > + boot_cpu_data.loops_per_jiffy = lpj; > > > + } else { > > > + for_each_cpu(cpu, cpus) > > > + cpu_data(cpu).loops_per_jiffy = lpj; > > > + } > > > + > > > + for_each_cpu(cpu, cpus) > > > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); > > > > This code doesn't make sense, the rdtsc() _must_ be called on the CPU in > > question. > > You mean rdtsc() must be locally on that CPU? The cpufreq core never guaranteed > that and it was left for the notifier to do. This patch doesn't change the > behavior at all, just that it moves the for-loop to the notifier instead of the > cpufreq core. Yuck.. Rafael; how does this work in practise? Earlier you said that on x86 the policies typically have a single cpu in them anyway. Is the freq change also notified from _that_ cpu? I don't think I have old enough hardware around anymore to test any of this. This was truly ancient p6 era stuff IIRC. Because in that case, I'm all for not doing the changes to this notifier Viresh is proposing but simply adding something like: WARN_ON_ONCE(cpumask_weight(cpuc) != 1); WARN_ON_ONCE(cpumask_first(cpuc) != smp_processor_id()); And leave it at that.
On Mon, Mar 18, 2019 at 11:45:00AM +0100, Rafael J. Wysocki wrote: > On Fri, Mar 15, 2019 at 1:30 PM Peter Zijlstra <peterz@infradead.org> wrote: > > On Fri, Mar 15, 2019 at 02:43:07PM +0530, Viresh Kumar wrote: > > > + for_each_cpu(cpu, cpus) > > > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); > > > > This code doesn't make sense, the rdtsc() _must_ be called on the CPU in > > question. > > Well, strictly speaking the TSC value here comes from the CPU running the code. > > The original code has this problem too, though (as Viresh said), so > the patch really doesn't make it worse in that respect. :-) > > I'm not going to defend the original code (I ldidn't invent it > anyway), but it clearly assumes that different CPUs cannot run at > different frequencies and that kind of explains what happens in it. The assumption was always that if CPUs ran at different frequencies, the notifier would run on the affected CPU. After all, only that CPU would know its frequency changed. > > That's part of the whole problem here, TSC isn't sync'ed when > > it's subject to CPUFREQ. > > So what would you recommend us to do here? > > Obviously, this won't run on any new hardware. Frankly, I'm not even > sure what the most recent HW where this hack would make a difference > is (the comment talking about Opterons suggests early 2000s), so this > clearly falls into the "legacy" bucket to me. > > Does it make sense to try to preserve it, or can we simply make > cpufreq init fail on the systems where the TSC rate depends on the > frequency? I'm all for deleting this and basically dropping support for anything that needs this, but I suspect some people digging their legacy systems (*cough* Pavel *cough*) might object to that. Heck, I'm even ok with just calling panic() when TSC goes wobbly :-) I'm fed up with all that broken crap. And yes, I know, that's systems sold today :-(
On Mon, Mar 18, 2019 at 11:54 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Mon, Mar 18, 2019 at 08:05:14AM +0530, Viresh Kumar wrote: > > On 15-03-19, 13:29, Peter Zijlstra wrote: > > > On Fri, Mar 15, 2019 at 02:43:07PM +0530, Viresh Kumar wrote: > > > > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > > > > index 3fae23834069..cff8779fc0d2 100644 > > > > --- a/arch/x86/kernel/tsc.c > > > > +++ b/arch/x86/kernel/tsc.c > > > > @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > > > void *data) > > > > { > > > > struct cpufreq_freqs *freq = data; > > > > - unsigned long *lpj; > > > > - > > > > - lpj = &boot_cpu_data.loops_per_jiffy; > > > > -#ifdef CONFIG_SMP > > > > - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > > > > -#endif > > > > + struct cpumask *cpus = freq->policy->cpus; > > > > + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; > > > > + unsigned long lpj; > > > > + int cpu; > > > > > > > > if (!ref_freq) { > > > > ref_freq = freq->old; > > > > - loops_per_jiffy_ref = *lpj; > > > > tsc_khz_ref = tsc_khz; > > > > + > > > > + if (boot_cpu) > > > > + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; > > > > + else > > > > + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; > > > > } > > > > + > > > > if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > > > > (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > > > > - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > > > - > > > > + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > > > tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); > > > > + > > > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > > mark_tsc_unstable("cpufreq changes"); > > > > > > > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > > > > + if (boot_cpu) { > > > > + boot_cpu_data.loops_per_jiffy = lpj; > > > > + } else { > > > > + for_each_cpu(cpu, cpus) > > > > + cpu_data(cpu).loops_per_jiffy = lpj; > > > > + } > > > > + > > > > + for_each_cpu(cpu, cpus) > > > > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); > > > > > > This code doesn't make sense, the rdtsc() _must_ be called on the CPU in > > > question. > > > > You mean rdtsc() must be locally on that CPU? The cpufreq core never guaranteed > > that and it was left for the notifier to do. This patch doesn't change the > > behavior at all, just that it moves the for-loop to the notifier instead of the > > cpufreq core. > > Yuck.. > > Rafael; how does this work in practise? Earlier you said that on x86 the > policies typically have a single cpu in them anyway. Yes. > Is the freq change also notified from _that_ cpu? May not be, depending on what CPU runs the work item/thread changing the freq. It generally is not guaranteed to always be the same as the target CPU. > I don't think I have old enough hardware around anymore to test any of > this. This was truly ancient p6 era stuff IIRC. > > Because in that case, I'm all for not doing the changes to this notifier > Viresh is proposing but simply adding something like: > > > WARN_ON_ONCE(cpumask_weight(cpuc) != 1); > WARN_ON_ONCE(cpumask_first(cpuc) != smp_processor_id()); > > And leave it at that. That may not work I'm afraid.
On Mon, Mar 18, 2019 at 12:09 PM Rafael J. Wysocki <rafael@kernel.org> wrote: > > On Mon, Mar 18, 2019 at 11:54 AM Peter Zijlstra <peterz@infradead.org> wrote: > > > > On Mon, Mar 18, 2019 at 08:05:14AM +0530, Viresh Kumar wrote: > > > On 15-03-19, 13:29, Peter Zijlstra wrote: > > > > On Fri, Mar 15, 2019 at 02:43:07PM +0530, Viresh Kumar wrote: > > > > > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > > > > > index 3fae23834069..cff8779fc0d2 100644 > > > > > --- a/arch/x86/kernel/tsc.c > > > > > +++ b/arch/x86/kernel/tsc.c > > > > > @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > > > > void *data) > > > > > { > > > > > struct cpufreq_freqs *freq = data; > > > > > - unsigned long *lpj; > > > > > - > > > > > - lpj = &boot_cpu_data.loops_per_jiffy; > > > > > -#ifdef CONFIG_SMP > > > > > - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > > > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > > > > > -#endif > > > > > + struct cpumask *cpus = freq->policy->cpus; > > > > > + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; > > > > > + unsigned long lpj; > > > > > + int cpu; > > > > > > > > > > if (!ref_freq) { > > > > > ref_freq = freq->old; > > > > > - loops_per_jiffy_ref = *lpj; > > > > > tsc_khz_ref = tsc_khz; > > > > > + > > > > > + if (boot_cpu) > > > > > + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; > > > > > + else > > > > > + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; > > > > > } > > > > > + > > > > > if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > > > > > (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > > > > > - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > > > > - > > > > > + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > > > > > tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); > > > > > + > > > > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > > > mark_tsc_unstable("cpufreq changes"); > > > > > > > > > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > > > > > + if (boot_cpu) { > > > > > + boot_cpu_data.loops_per_jiffy = lpj; > > > > > + } else { > > > > > + for_each_cpu(cpu, cpus) > > > > > + cpu_data(cpu).loops_per_jiffy = lpj; > > > > > + } > > > > > + > > > > > + for_each_cpu(cpu, cpus) > > > > > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); > > > > > > > > This code doesn't make sense, the rdtsc() _must_ be called on the CPU in > > > > question. > > > > > > You mean rdtsc() must be locally on that CPU? The cpufreq core never guaranteed > > > that and it was left for the notifier to do. This patch doesn't change the > > > behavior at all, just that it moves the for-loop to the notifier instead of the > > > cpufreq core. > > > > Yuck.. > > > > Rafael; how does this work in practise? Earlier you said that on x86 the > > policies typically have a single cpu in them anyway. > > Yes. > > > Is the freq change also notified from _that_ cpu? > > May not be, depending on what CPU runs the work item/thread changing > the freq. It generally is not guaranteed to always be the same as the > target CPU. Actually, scratch that. On x86, with one CPU per cpufreq policy, that will always be the target CPU. > > I don't think I have old enough hardware around anymore to test any of > > this. This was truly ancient p6 era stuff IIRC. > > > > Because in that case, I'm all for not doing the changes to this notifier > > Viresh is proposing but simply adding something like: > > > > > > WARN_ON_ONCE(cpumask_weight(cpuc) != 1); > > WARN_ON_ONCE(cpumask_first(cpuc) != smp_processor_id()); > > > > And leave it at that. > > That may not work I'm afraid. So something like that could work.
On Fri, Mar 15, 2019 at 10:13 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > Currently we call these notifiers once for each CPU of the policy->cpus > cpumask. It would be more optimal if the notifier can be called only > once and all the relevant information be provided to it. Out of the 24 > drivers that register for the transition notifiers today, only 5 of them > do per-cpu updates and the callback for the rest can be called only once > for the policy without any impact. > > This would also avoid multiple function calls to the notifier callbacks > and reduce multiple iterations of notifier core's code (which does > locking as well). > > This patch adds pointer to the cpufreq policy to the struct > cpufreq_freqs, so the notifier callback has all the information > available to it with a single call. The five drivers which perform > per-cpu updates are updated to use the cpufreq policy. The freqs->cpu > field is redundant now and is removed. > > Acked-by: David S. Miller <davem@davemloft.net> (sparc) > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > --- > V1->V2: > - Add cpufreq policy instead of cpus in struct cpufreq_freqs. > - Use policy->cpus instead of related_cpus everywhere in order not to > change the existing behavior. > - Merged all 7 patches into a single patch. > - Updated changlog and included Ack from David. > > arch/arm/kernel/smp.c | 24 +++++++++++++++--------- > arch/arm/kernel/smp_twd.c | 9 ++++++--- > arch/sparc/kernel/time_64.c | 28 ++++++++++++++++------------ > arch/x86/kernel/tsc.c | 32 +++++++++++++++++++++----------- > arch/x86/kvm/x86.c | 31 ++++++++++++++++++++----------- > drivers/cpufreq/cpufreq.c | 19 ++++++++++--------- > include/linux/cpufreq.h | 14 +++++++------- > 7 files changed, 95 insertions(+), 62 deletions(-) > > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > index 1d6f5ea522f4..6f6b981fecda 100644 > --- a/arch/arm/kernel/smp.c > +++ b/arch/arm/kernel/smp.c > @@ -758,15 +758,20 @@ static int cpufreq_callback(struct notifier_block *nb, > unsigned long val, void *data) > { > struct cpufreq_freqs *freq = data; > - int cpu = freq->cpu; > + struct cpumask *cpus = freq->policy->cpus; > + int cpu, first = cpumask_first(cpus); > + unsigned int lpj; > > if (freq->flags & CPUFREQ_CONST_LOOPS) > return NOTIFY_OK; > > - if (!per_cpu(l_p_j_ref, cpu)) { > - per_cpu(l_p_j_ref, cpu) = > - per_cpu(cpu_data, cpu).loops_per_jiffy; > - per_cpu(l_p_j_ref_freq, cpu) = freq->old; > + if (!per_cpu(l_p_j_ref, first)) { > + for_each_cpu(cpu, cpus) { > + per_cpu(l_p_j_ref, cpu) = > + per_cpu(cpu_data, cpu).loops_per_jiffy; > + per_cpu(l_p_j_ref_freq, cpu) = freq->old; > + } > + > if (!global_l_p_j_ref) { > global_l_p_j_ref = loops_per_jiffy; > global_l_p_j_ref_freq = freq->old; > @@ -778,10 +783,11 @@ static int cpufreq_callback(struct notifier_block *nb, > loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, > global_l_p_j_ref_freq, > freq->new); > - per_cpu(cpu_data, cpu).loops_per_jiffy = > - cpufreq_scale(per_cpu(l_p_j_ref, cpu), > - per_cpu(l_p_j_ref_freq, cpu), > - freq->new); > + > + lpj = cpufreq_scale(per_cpu(l_p_j_ref, first), > + per_cpu(l_p_j_ref_freq, first), freq->new); > + for_each_cpu(cpu, cpus) > + per_cpu(cpu_data, cpu).loops_per_jiffy = lpj; > } > return NOTIFY_OK; > } > diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c > index b30eafeef096..495cc7282096 100644 > --- a/arch/arm/kernel/smp_twd.c > +++ b/arch/arm/kernel/smp_twd.c > @@ -162,15 +162,18 @@ static int twd_cpufreq_transition(struct notifier_block *nb, > unsigned long state, void *data) > { > struct cpufreq_freqs *freqs = data; > + int cpu; > > /* > * The twd clock events must be reprogrammed to account for the new > * frequency. The timer is local to a cpu, so cross-call to the > * changing cpu. > */ > - if (state == CPUFREQ_POSTCHANGE) > - smp_call_function_single(freqs->cpu, twd_update_frequency, > - NULL, 1); > + if (state != CPUFREQ_POSTCHANGE) > + return NOTIFY_OK; > + > + for_each_cpu(cpu, freqs->policy->cpus) > + smp_call_function_single(cpu, twd_update_frequency, NULL, 1); > > return NOTIFY_OK; > } > diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c > index 3eb77943ce12..89fb05f90609 100644 > --- a/arch/sparc/kernel/time_64.c > +++ b/arch/sparc/kernel/time_64.c > @@ -653,19 +653,23 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val > void *data) > { > struct cpufreq_freqs *freq = data; > - unsigned int cpu = freq->cpu; > - struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); > + unsigned int cpu; > + struct freq_table *ft; > > - if (!ft->ref_freq) { > - ft->ref_freq = freq->old; > - ft->clock_tick_ref = cpu_data(cpu).clock_tick; > - } > - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > - cpu_data(cpu).clock_tick = > - cpufreq_scale(ft->clock_tick_ref, > - ft->ref_freq, > - freq->new); > + for_each_cpu(cpu, freq->policy->cpus) { > + ft = &per_cpu(sparc64_freq_table, cpu); > + > + if (!ft->ref_freq) { > + ft->ref_freq = freq->old; > + ft->clock_tick_ref = cpu_data(cpu).clock_tick; > + } > + > + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > + cpu_data(cpu).clock_tick = > + cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, > + freq->new); > + } > } > > return 0; > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index 3fae23834069..cff8779fc0d2 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > void *data) > { > struct cpufreq_freqs *freq = data; > - unsigned long *lpj; > - > - lpj = &boot_cpu_data.loops_per_jiffy; > -#ifdef CONFIG_SMP > - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > -#endif > + struct cpumask *cpus = freq->policy->cpus; > + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; > + unsigned long lpj; > + int cpu; > > if (!ref_freq) { > ref_freq = freq->old; > - loops_per_jiffy_ref = *lpj; > tsc_khz_ref = tsc_khz; > + > + if (boot_cpu) > + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; > + else > + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; > } > + > if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > - > + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); > tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); > + > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > mark_tsc_unstable("cpufreq changes"); > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > + if (boot_cpu) { > + boot_cpu_data.loops_per_jiffy = lpj; > + } else { > + for_each_cpu(cpu, cpus) > + cpu_data(cpu).loops_per_jiffy = lpj; > + } > + > + for_each_cpu(cpu, cpus) > + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); To summarize, I think that it would be sufficient to do this just for policy->cpu and, as Peter said, warn once if there are more CPUs in the policy or policy->cpu is not the CPU running this code. And mark the TSC as unstable in both of these cases.
On 18-03-19, 12:49, Rafael J. Wysocki wrote: > To summarize, I think that it would be sufficient to do this just for > policy->cpu and, as Peter said, warn once if there are more CPUs in > the policy or policy->cpu is not the CPU running this code. And mark > the TSC as unstable in both of these cases. How about this ? diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3fae23834069..4d3681cfb6e0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -958,10 +958,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; unsigned long *lpj; + if (WARN_ON_ONCE(cpumask_weight(freq->policy->related_cpus) != 1)) + mark_tsc_unstable("cpufreq policy has more than CPU"); + lpj = &boot_cpu_data.loops_per_jiffy; #ifdef CONFIG_SMP if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - lpj = &cpu_data(freq->cpu).loops_per_jiffy; + lpj = &cpu_data(freq->policy->cpu).loops_per_jiffy; #endif if (!ref_freq) { @@ -977,7 +980,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); } return 0;
On Tue, Mar 19, 2019 at 6:50 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > On 18-03-19, 12:49, Rafael J. Wysocki wrote: > > To summarize, I think that it would be sufficient to do this just for > > policy->cpu and, as Peter said, warn once if there are more CPUs in > > the policy or policy->cpu is not the CPU running this code. And mark > > the TSC as unstable in both of these cases. > > How about this ? We guarantee that this will always run on policy->cpu IIRC, so it LGTM overall, but the new message is missing "one". > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index 3fae23834069..4d3681cfb6e0 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -958,10 +958,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > struct cpufreq_freqs *freq = data; > unsigned long *lpj; > > + if (WARN_ON_ONCE(cpumask_weight(freq->policy->related_cpus) != 1)) > + mark_tsc_unstable("cpufreq policy has more than CPU"); Also I would check policy->cpus here. After all, we don't care about CPUs that are never online. And the message could be something like "cpufreq changes: related CPUs affected". > + > lpj = &boot_cpu_data.loops_per_jiffy; > #ifdef CONFIG_SMP > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > + lpj = &cpu_data(freq->policy->cpu).loops_per_jiffy; > #endif > > if (!ref_freq) { > @@ -977,7 +980,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > mark_tsc_unstable("cpufreq changes"); > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); > } > > return 0;
On 19-03-19, 10:41, Rafael J. Wysocki wrote: > On Tue, Mar 19, 2019 at 6:50 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > On 18-03-19, 12:49, Rafael J. Wysocki wrote: > > > To summarize, I think that it would be sufficient to do this just for > > > policy->cpu and, as Peter said, warn once if there are more CPUs in > > > the policy or policy->cpu is not the CPU running this code. And mark > > > the TSC as unstable in both of these cases. > > > > How about this ? > > We guarantee that this will always run on policy->cpu IIRC, so it LGTM Yeah, the governor guarantees that unless dvfs_possible_from_any_cpu is set for the policy. But there are few direct invocations to cpufreq_driver_target() and __cpufreq_driver_target() which don't take that into account. First one is done from cpufreq_online(), which can get called on any CPU I believe. Other one is from cpufreq_generic_suspend(). But I think none of them gets called for x86 and so below code should be safe. > overall, but the new message is missing "one". Talking about print message ? > > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > > index 3fae23834069..4d3681cfb6e0 100644 > > --- a/arch/x86/kernel/tsc.c > > +++ b/arch/x86/kernel/tsc.c > > @@ -958,10 +958,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > struct cpufreq_freqs *freq = data; > > unsigned long *lpj; > > > > + if (WARN_ON_ONCE(cpumask_weight(freq->policy->related_cpus) != 1)) > > + mark_tsc_unstable("cpufreq policy has more than CPU"); > > Also I would check policy->cpus here. After all, we don't care about > CPUs that are never online. Because the CPU isn't in the policy->cpus mask, we can't say it was *never* online. Just that it isn't online at that moment of time. I used related_cpus as the code here should be robust against any corner cases and shouldn't have different behavior based on value of policy->cpus. If the cpufreq driver is probed after all but one CPUs of a policy are offlined, then you won't see the warning if policy->cpus is used. But the warning will appear if any other CPU is onlined. For me that is wrong, we should have got the warning earlier as well as it was just wrong to not warn earlier. > And the message could be something like "cpufreq changes: related CPUs > affected". Sure. I also forgot to add a "return" statement here. We shouldn't continue in this case, right ? > > + > > lpj = &boot_cpu_data.loops_per_jiffy; > > #ifdef CONFIG_SMP > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > > + lpj = &cpu_data(freq->policy->cpu).loops_per_jiffy; > > #endif > > > > if (!ref_freq) { > > @@ -977,7 +980,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > mark_tsc_unstable("cpufreq changes"); > > > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > > + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); > > } > > > > return 0;
On Tue, Mar 19, 2019 at 11:49 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > On 19-03-19, 10:41, Rafael J. Wysocki wrote: > > On Tue, Mar 19, 2019 at 6:50 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > On 18-03-19, 12:49, Rafael J. Wysocki wrote: > > > > To summarize, I think that it would be sufficient to do this just for > > > > policy->cpu and, as Peter said, warn once if there are more CPUs in > > > > the policy or policy->cpu is not the CPU running this code. And mark > > > > the TSC as unstable in both of these cases. > > > > > > How about this ? > > > > We guarantee that this will always run on policy->cpu IIRC, so it LGTM > > Yeah, the governor guarantees that unless dvfs_possible_from_any_cpu is set for > the policy. But there are few direct invocations to cpufreq_driver_target() and > __cpufreq_driver_target() which don't take that into account. First one is done > from cpufreq_online(), which can get called on any CPU I believe. Other one is > from cpufreq_generic_suspend(). But I think none of them gets called for x86 and > so below code should be safe. I meant on x86, sorry. > > overall, but the new message is missing "one". > > Talking about print message ? Yes. > > > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > > > index 3fae23834069..4d3681cfb6e0 100644 > > > --- a/arch/x86/kernel/tsc.c > > > +++ b/arch/x86/kernel/tsc.c > > > @@ -958,10 +958,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > > struct cpufreq_freqs *freq = data; > > > unsigned long *lpj; > > > > > > + if (WARN_ON_ONCE(cpumask_weight(freq->policy->related_cpus) != 1)) > > > + mark_tsc_unstable("cpufreq policy has more than CPU"); > > > > Also I would check policy->cpus here. After all, we don't care about > > CPUs that are never online. > > Because the CPU isn't in the policy->cpus mask, we can't say it was *never* > online. Just that it isn't online at that moment of time. I used related_cpus as > the code here should be robust against any corner cases and shouldn't have > different behavior based on value of policy->cpus. > > If the cpufreq driver is probed after all but one CPUs of a policy are offlined, > then you won't see the warning if policy->cpus is used. But the warning will > appear if any other CPU is onlined. For me that is wrong, we should have got the > warning earlier as well as it was just wrong to not warn earlier. Fair enough. > > And the message could be something like "cpufreq changes: related CPUs > > affected". > > Sure. > > I also forgot to add a "return" statement here. We shouldn't continue in this > case, right ? It makes a little sense to continue then, so it's better to return immediately in that case IMO. > > > + > > > lpj = &boot_cpu_data.loops_per_jiffy; > > > #ifdef CONFIG_SMP > > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > - lpj = &cpu_data(freq->cpu).loops_per_jiffy; > > > + lpj = &cpu_data(freq->policy->cpu).loops_per_jiffy; > > > #endif > > > > > > if (!ref_freq) { > > > @@ -977,7 +980,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > > > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > > > mark_tsc_unstable("cpufreq changes"); > > > > > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > > > + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); > > > } > > > > > > return 0;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 1d6f5ea522f4..6f6b981fecda 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -758,15 +758,20 @@ static int cpufreq_callback(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - int cpu = freq->cpu; + struct cpumask *cpus = freq->policy->cpus; + int cpu, first = cpumask_first(cpus); + unsigned int lpj; if (freq->flags & CPUFREQ_CONST_LOOPS) return NOTIFY_OK; - if (!per_cpu(l_p_j_ref, cpu)) { - per_cpu(l_p_j_ref, cpu) = - per_cpu(cpu_data, cpu).loops_per_jiffy; - per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!per_cpu(l_p_j_ref, first)) { + for_each_cpu(cpu, cpus) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + } + if (!global_l_p_j_ref) { global_l_p_j_ref = loops_per_jiffy; global_l_p_j_ref_freq = freq->old; @@ -778,10 +783,11 @@ static int cpufreq_callback(struct notifier_block *nb, loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, global_l_p_j_ref_freq, freq->new); - per_cpu(cpu_data, cpu).loops_per_jiffy = - cpufreq_scale(per_cpu(l_p_j_ref, cpu), - per_cpu(l_p_j_ref_freq, cpu), - freq->new); + + lpj = cpufreq_scale(per_cpu(l_p_j_ref, first), + per_cpu(l_p_j_ref_freq, first), freq->new); + for_each_cpu(cpu, cpus) + per_cpu(cpu_data, cpu).loops_per_jiffy = lpj; } return NOTIFY_OK; } diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index b30eafeef096..495cc7282096 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -162,15 +162,18 @@ static int twd_cpufreq_transition(struct notifier_block *nb, unsigned long state, void *data) { struct cpufreq_freqs *freqs = data; + int cpu; /* * The twd clock events must be reprogrammed to account for the new * frequency. The timer is local to a cpu, so cross-call to the * changing cpu. */ - if (state == CPUFREQ_POSTCHANGE) - smp_call_function_single(freqs->cpu, twd_update_frequency, - NULL, 1); + if (state != CPUFREQ_POSTCHANGE) + return NOTIFY_OK; + + for_each_cpu(cpu, freqs->policy->cpus) + smp_call_function_single(cpu, twd_update_frequency, NULL, 1); return NOTIFY_OK; } diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3eb77943ce12..89fb05f90609 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -653,19 +653,23 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val void *data) { struct cpufreq_freqs *freq = data; - unsigned int cpu = freq->cpu; - struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + unsigned int cpu; + struct freq_table *ft; - if (!ft->ref_freq) { - ft->ref_freq = freq->old; - ft->clock_tick_ref = cpu_data(cpu).clock_tick; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { - cpu_data(cpu).clock_tick = - cpufreq_scale(ft->clock_tick_ref, - ft->ref_freq, - freq->new); + for_each_cpu(cpu, freq->policy->cpus) { + ft = &per_cpu(sparc64_freq_table, cpu); + + if (!ft->ref_freq) { + ft->ref_freq = freq->old; + ft->clock_tick_ref = cpu_data(cpu).clock_tick; + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + cpu_data(cpu).clock_tick = + cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, + freq->new); + } } return 0; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3fae23834069..cff8779fc0d2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -956,28 +956,38 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj; - - lpj = &boot_cpu_data.loops_per_jiffy; -#ifdef CONFIG_SMP - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#endif + struct cpumask *cpus = freq->policy->cpus; + bool boot_cpu = !IS_ENABLED(CONFIG_SMP) || freq->flags & CPUFREQ_CONST_LOOPS; + unsigned long lpj; + int cpu; if (!ref_freq) { ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; tsc_khz_ref = tsc_khz; + + if (boot_cpu) + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; + else + loops_per_jiffy_ref = cpu_data(cpumask_first(cpus)).loops_per_jiffy; } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); - + lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); + if (boot_cpu) { + boot_cpu_data.loops_per_jiffy = lpj; + } else { + for_each_cpu(cpu, cpus) + cpu_data(cpu).loops_per_jiffy = lpj; + } + + for_each_cpu(cpu, cpus) + set_cyc2ns_scale(tsc_khz, cpu, rdtsc()); } return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 941f932373d0..653c7da11647 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6648,10 +6648,8 @@ static void kvm_hyperv_tsc_notifier(void) } #endif -static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) +static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) { - struct cpufreq_freqs *freq = data; struct kvm *kvm; struct kvm_vcpu *vcpu; int i, send_ipi = 0; @@ -6695,17 +6693,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va * */ - if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) - return 0; - if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) - return 0; - - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu->cpu != freq->cpu) + if (vcpu->cpu != cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); if (vcpu->cpu != smp_processor_id()) @@ -6727,8 +6720,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va * guest context is entered kvmclock will be updated, * so the guest will not see stale values. */ - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); } +} + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + + for_each_cpu(cpu, freq->policy->cpus) + __kvmclock_cpufreq_notifier(freq, cpu); + return 0; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e10922709d13..fba38bf27d26 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -300,11 +300,14 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) { + int cpu; + BUG_ON(irqs_disabled()); if (cpufreq_disabled()) return; + freqs->policy = policy; freqs->flags = cpufreq_driver->flags; pr_debug("notification %u of frequency transition to %u kHz\n", state, freqs->new); @@ -324,10 +327,8 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, } } - for_each_cpu(freqs->cpu, policy->cpus) { - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_PRECHANGE, freqs); - } + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_PRECHANGE, freqs); adjust_jiffies(CPUFREQ_PRECHANGE, freqs); break; @@ -337,11 +338,11 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, pr_debug("FREQ: %u - CPUs: %*pbl\n", freqs->new, cpumask_pr_args(policy->cpus)); - for_each_cpu(freqs->cpu, policy->cpus) { - trace_cpu_frequency(freqs->new, freqs->cpu); - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_POSTCHANGE, freqs); - } + for_each_cpu(cpu, policy->cpus) + trace_cpu_frequency(freqs->new, cpu); + + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_POSTCHANGE, freqs); cpufreq_stats_record_transition(policy, freqs->new); policy->cur = freqs->new; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index b160e98076e3..e327523ddff2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -42,13 +42,6 @@ enum cpufreq_table_sorting { CPUFREQ_TABLE_SORTED_DESCENDING }; -struct cpufreq_freqs { - unsigned int cpu; /* cpu nr */ - unsigned int old; - unsigned int new; - u8 flags; /* flags of cpufreq_driver, see below. */ -}; - struct cpufreq_cpuinfo { unsigned int max_freq; unsigned int min_freq; @@ -156,6 +149,13 @@ struct cpufreq_policy { struct thermal_cooling_device *cdev; }; +struct cpufreq_freqs { + struct cpufreq_policy *policy; + unsigned int old; + unsigned int new; + u8 flags; /* flags of cpufreq_driver, see below. */ +}; + /* Only for ACPI */ #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */