Message ID | 20200612002205.174295-5-volodymyr_babchuk@epam.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Fair scheduling | expand |
On 12.06.20 02:22, Volodymyr Babchuk wrote: > As scheduler code now collects time spent in IRQ handlers and in > do_softirq(), we can present those values to userspace tools like > xentop, so system administrator can see how system behaves. > > We are updating counters only in sched_get_time_correction() function > to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it > is not enough to store time with nanosecond precision. So we need to > use 64 bit variables and protect them with spinlock. > > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> > --- > xen/common/sched/core.c | 17 +++++++++++++++++ > xen/common/sysctl.c | 1 + > xen/include/public/sysctl.h | 4 +++- > xen/include/xen/sched.h | 2 ++ > 4 files changed, 23 insertions(+), 1 deletion(-) > > diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c > index a7294ff5c3..ee6b1d9161 100644 > --- a/xen/common/sched/core.c > +++ b/xen/common/sched/core.c > @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; > > static bool scheduler_active; > > +static DEFINE_SPINLOCK(sched_stat_lock); > +s_time_t sched_stat_irq_time; > +s_time_t sched_stat_hyp_time; > + > static void sched_set_affinity( > struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u) > break; > } > > + spin_lock_irqsave(&sched_stat_lock, flags); > + sched_stat_irq_time += irq; > + sched_stat_hyp_time += hyp; > + spin_unlock_irqrestore(&sched_stat_lock, flags); Please don't use a lock. Just use add_sized() instead which will add atomically. > + > return irq + hyp; > } > > +void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time) > +{ > + unsigned long flags; > + > + spin_lock_irqsave(&sched_stat_lock, flags); > + *irq_time = sched_stat_irq_time; > + *hyp_time = sched_stat_hyp_time; > + spin_unlock_irqrestore(&sched_stat_lock, flags); read_atomic() will do the job without lock. > } > > /* > diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c > index 1c6a817476..00683bc93f 100644 > --- a/xen/common/sysctl.c > +++ b/xen/common/sysctl.c > @@ -270,6 +270,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) > pi->scrub_pages = 0; > pi->cpu_khz = cpu_khz; > pi->max_mfn = get_upper_mfn_bound(); > + sched_get_time_stats(&pi->irq_time, &pi->hyp_time); > arch_do_physinfo(pi); > if ( iommu_enabled ) > { > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h > index 3a08c512e8..f320144d40 100644 > --- a/xen/include/public/sysctl.h > +++ b/xen/include/public/sysctl.h > @@ -35,7 +35,7 @@ > #include "domctl.h" > #include "physdev.h" > > -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013 > +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014 > > /* > * Read console content from Xen buffer ring. > @@ -118,6 +118,8 @@ struct xen_sysctl_physinfo { > uint64_aligned_t scrub_pages; > uint64_aligned_t outstanding_pages; > uint64_aligned_t max_mfn; /* Largest possible MFN on this host */ > + uint64_aligned_t irq_time; > + uint64_aligned_t hyp_time; Would hypfs work, too? This would avoid the need for extending another hypercall. Juergen
On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote: > On 12.06.20 02:22, Volodymyr Babchuk wrote: > > As scheduler code now collects time spent in IRQ handlers and in > > do_softirq(), we can present those values to userspace tools like > > xentop, so system administrator can see how system behaves. > > > > We are updating counters only in sched_get_time_correction() function > > to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it > > is not enough to store time with nanosecond precision. So we need to > > use 64 bit variables and protect them with spinlock. > > > > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> > > --- > > xen/common/sched/core.c | 17 +++++++++++++++++ > > xen/common/sysctl.c | 1 + > > xen/include/public/sysctl.h | 4 +++- > > xen/include/xen/sched.h | 2 ++ > > 4 files changed, 23 insertions(+), 1 deletion(-) > > > > diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c > > index a7294ff5c3..ee6b1d9161 100644 > > --- a/xen/common/sched/core.c > > +++ b/xen/common/sched/core.c > > @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; > > > > static bool scheduler_active; > > > > +static DEFINE_SPINLOCK(sched_stat_lock); > > +s_time_t sched_stat_irq_time; > > +s_time_t sched_stat_hyp_time; > > + > > static void sched_set_affinity( > > struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); > > > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u) > > break; > > } > > > > + spin_lock_irqsave(&sched_stat_lock, flags); > > + sched_stat_irq_time += irq; > > + sched_stat_hyp_time += hyp; > > + spin_unlock_irqrestore(&sched_stat_lock, flags); > > Please don't use a lock. Just use add_sized() instead which will add > atomically. Looks like arm does not support 64 bit variables. Julien, I believe, this is armv7 limitation? Should armv8 work with 64- bit atomics? > > + > > return irq + hyp; > > } > > > > +void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time) > > +{ > > + unsigned long flags; > > + > > + spin_lock_irqsave(&sched_stat_lock, flags); > > + *irq_time = sched_stat_irq_time; > > + *hyp_time = sched_stat_hyp_time; > > + spin_unlock_irqrestore(&sched_stat_lock, flags); > > read_atomic() will do the job without lock. Yes, I really want to use atomics there. Just need to clarify 64 bit support on ARM. > > } > > > > /* > > diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c > > index 1c6a817476..00683bc93f 100644 > > --- a/xen/common/sysctl.c > > +++ b/xen/common/sysctl.c > > @@ -270,6 +270,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) > > pi->scrub_pages = 0; > > pi->cpu_khz = cpu_khz; > > pi->max_mfn = get_upper_mfn_bound(); > > + sched_get_time_stats(&pi->irq_time, &pi->hyp_time); > > arch_do_physinfo(pi); > > if ( iommu_enabled ) > > { > > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h > > index 3a08c512e8..f320144d40 100644 > > --- a/xen/include/public/sysctl.h > > +++ b/xen/include/public/sysctl.h > > @@ -35,7 +35,7 @@ > > #include "domctl.h" > > #include "physdev.h" > > > > -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013 > > +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014 > > > > /* > > * Read console content from Xen buffer ring. > > @@ -118,6 +118,8 @@ struct xen_sysctl_physinfo { > > uint64_aligned_t scrub_pages; > > uint64_aligned_t outstanding_pages; > > uint64_aligned_t max_mfn; /* Largest possible MFN on this host */ > > + uint64_aligned_t irq_time; > > + uint64_aligned_t hyp_time; > > Would hypfs work, too? This would avoid the need for extending another > hypercall. Good point. I'll take a look at this from toolstack side. I didn't see any hypfs calls in the xentop. But this is a good time to begin using it.
Hi Juergen, On 12/06/2020 05:57, Jürgen Groß wrote: > On 12.06.20 02:22, Volodymyr Babchuk wrote: >> As scheduler code now collects time spent in IRQ handlers and in >> do_softirq(), we can present those values to userspace tools like >> xentop, so system administrator can see how system behaves. >> >> We are updating counters only in sched_get_time_correction() function >> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it >> is not enough to store time with nanosecond precision. So we need to >> use 64 bit variables and protect them with spinlock. >> >> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> >> --- >> xen/common/sched/core.c | 17 +++++++++++++++++ >> xen/common/sysctl.c | 1 + >> xen/include/public/sysctl.h | 4 +++- >> xen/include/xen/sched.h | 2 ++ >> 4 files changed, 23 insertions(+), 1 deletion(-) >> >> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c >> index a7294ff5c3..ee6b1d9161 100644 >> --- a/xen/common/sched/core.c >> +++ b/xen/common/sched/core.c >> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; >> static bool scheduler_active; >> +static DEFINE_SPINLOCK(sched_stat_lock); >> +s_time_t sched_stat_irq_time; >> +s_time_t sched_stat_hyp_time; >> + >> static void sched_set_affinity( >> struct sched_unit *unit, const cpumask_t *hard, const cpumask_t >> *soft); >> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >> sched_unit *u) >> break; >> } >> + spin_lock_irqsave(&sched_stat_lock, flags); >> + sched_stat_irq_time += irq; >> + sched_stat_hyp_time += hyp; >> + spin_unlock_irqrestore(&sched_stat_lock, flags); > > Please don't use a lock. Just use add_sized() instead which will add > atomically. add_sized() is definitely not atomic. It will only prevent the compiler to read/write multiple time the variable. If we expect sched_get_time_correction to be called concurrently then we would need to introduce atomic64_t or a spin lock. Cheers,
On 12.06.20 14:29, Julien Grall wrote: > Hi Juergen, > > On 12/06/2020 05:57, Jürgen Groß wrote: >> On 12.06.20 02:22, Volodymyr Babchuk wrote: >>> As scheduler code now collects time spent in IRQ handlers and in >>> do_softirq(), we can present those values to userspace tools like >>> xentop, so system administrator can see how system behaves. >>> >>> We are updating counters only in sched_get_time_correction() function >>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it >>> is not enough to store time with nanosecond precision. So we need to >>> use 64 bit variables and protect them with spinlock. >>> >>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> >>> --- >>> xen/common/sched/core.c | 17 +++++++++++++++++ >>> xen/common/sysctl.c | 1 + >>> xen/include/public/sysctl.h | 4 +++- >>> xen/include/xen/sched.h | 2 ++ >>> 4 files changed, 23 insertions(+), 1 deletion(-) >>> >>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c >>> index a7294ff5c3..ee6b1d9161 100644 >>> --- a/xen/common/sched/core.c >>> +++ b/xen/common/sched/core.c >>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; >>> static bool scheduler_active; >>> +static DEFINE_SPINLOCK(sched_stat_lock); >>> +s_time_t sched_stat_irq_time; >>> +s_time_t sched_stat_hyp_time; >>> + >>> static void sched_set_affinity( >>> struct sched_unit *unit, const cpumask_t *hard, const cpumask_t >>> *soft); >>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >>> sched_unit *u) >>> break; >>> } >>> + spin_lock_irqsave(&sched_stat_lock, flags); >>> + sched_stat_irq_time += irq; >>> + sched_stat_hyp_time += hyp; >>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >> >> Please don't use a lock. Just use add_sized() instead which will add >> atomically. > > add_sized() is definitely not atomic. It will only prevent the compiler > to read/write multiple time the variable. Oh, my bad, I let myself fool by it being defined in atomic.h. > > If we expect sched_get_time_correction to be called concurrently then we > would need to introduce atomic64_t or a spin lock. Or we could use percpu variables and add the cpu values up when fetching the values. Juergen
Hi Volodymyr, On 12/06/2020 12:44, Volodymyr Babchuk wrote: > > On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote: >> On 12.06.20 02:22, Volodymyr Babchuk wrote: >>> As scheduler code now collects time spent in IRQ handlers and in >>> do_softirq(), we can present those values to userspace tools like >>> xentop, so system administrator can see how system behaves. >>> >>> We are updating counters only in sched_get_time_correction() function >>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it >>> is not enough to store time with nanosecond precision. So we need to >>> use 64 bit variables and protect them with spinlock. >>> >>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> >>> --- >>> xen/common/sched/core.c | 17 +++++++++++++++++ >>> xen/common/sysctl.c | 1 + >>> xen/include/public/sysctl.h | 4 +++- >>> xen/include/xen/sched.h | 2 ++ >>> 4 files changed, 23 insertions(+), 1 deletion(-) >>> >>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c >>> index a7294ff5c3..ee6b1d9161 100644 >>> --- a/xen/common/sched/core.c >>> +++ b/xen/common/sched/core.c >>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; >>> >>> static bool scheduler_active; >>> >>> +static DEFINE_SPINLOCK(sched_stat_lock); >>> +s_time_t sched_stat_irq_time; >>> +s_time_t sched_stat_hyp_time; >>> + >>> static void sched_set_affinity( >>> struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); >>> >>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u) >>> break; >>> } >>> >>> + spin_lock_irqsave(&sched_stat_lock, flags); >>> + sched_stat_irq_time += irq; >>> + sched_stat_hyp_time += hyp; >>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >> >> Please don't use a lock. Just use add_sized() instead which will add >> atomically. > > Looks like arm does not support 64 bit variables. > > Julien, I believe, this is armv7 limitation? Should armv8 work with 64- > bit atomics? 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't been plumbed yet. I am happy to write a patch if you need atomic64_t or even a 64-bit add_sized(). Cheers,
On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote: > On 12.06.20 14:29, Julien Grall wrote: > > On 12/06/2020 05:57, Jürgen Groß wrote: > > > On 12.06.20 02:22, Volodymyr Babchuk wrote: > > > > > > > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct > > > > sched_unit *u) > > > > break; > > > > } > > > > + spin_lock_irqsave(&sched_stat_lock, flags); > > > > + sched_stat_irq_time += irq; > > > > + sched_stat_hyp_time += hyp; > > > > + spin_unlock_irqrestore(&sched_stat_lock, flags); > > > > > > Please don't use a lock. Just use add_sized() instead which will > > > add > > > atomically. > > > > If we expect sched_get_time_correction to be called concurrently > > then we > > would need to introduce atomic64_t or a spin lock. > > Or we could use percpu variables and add the cpu values up when > fetching the values. > Yes, either percpu or atomic looks much better than locking, to me, for this. Regards
Hi Julien, On Fri, 2020-06-12 at 13:45 +0100, Julien Grall wrote: > Hi Volodymyr, > > On 12/06/2020 12:44, Volodymyr Babchuk wrote: > > On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote: > > > On 12.06.20 02:22, Volodymyr Babchuk wrote: > > > > As scheduler code now collects time spent in IRQ handlers and in > > > > do_softirq(), we can present those values to userspace tools like > > > > xentop, so system administrator can see how system behaves. > > > > > > > > We are updating counters only in sched_get_time_correction() function > > > > to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it > > > > is not enough to store time with nanosecond precision. So we need to > > > > use 64 bit variables and protect them with spinlock. > > > > > > > > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> > > > > --- > > > > xen/common/sched/core.c | 17 +++++++++++++++++ > > > > xen/common/sysctl.c | 1 + > > > > xen/include/public/sysctl.h | 4 +++- > > > > xen/include/xen/sched.h | 2 ++ > > > > 4 files changed, 23 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c > > > > index a7294ff5c3..ee6b1d9161 100644 > > > > --- a/xen/common/sched/core.c > > > > +++ b/xen/common/sched/core.c > > > > @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; > > > > > > > > static bool scheduler_active; > > > > > > > > +static DEFINE_SPINLOCK(sched_stat_lock); > > > > +s_time_t sched_stat_irq_time; > > > > +s_time_t sched_stat_hyp_time; > > > > + > > > > static void sched_set_affinity( > > > > struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); > > > > > > > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u) > > > > break; > > > > } > > > > > > > > + spin_lock_irqsave(&sched_stat_lock, flags); > > > > + sched_stat_irq_time += irq; > > > > + sched_stat_hyp_time += hyp; > > > > + spin_unlock_irqrestore(&sched_stat_lock, flags); > > > > > > Please don't use a lock. Just use add_sized() instead which will add > > > atomically. > > > > Looks like arm does not support 64 bit variables. > > > Julien, I believe, this is armv7 limitation? Should armv8 work with 64- > > bit atomics? > > 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't been > plumbed yet. Wow, didn't know that armv7 is capable of that. > I am happy to write a patch if you need atomic64_t or even a 64-bit > add_sized(). That would be cool. Certainly. But looks like x86 code does not have implementation for atomic64_t as well. So, there would be lots of changes just for one use case. I don't know if it is worth it. Let's finish discussion of other parts of the series. If it will appear that atomic64_t is absolutely necessay, I'll return back to you. Thanks for offer anyways.
On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote: > On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote: > > On 12.06.20 14:29, Julien Grall wrote: > > > On 12/06/2020 05:57, Jürgen Groß wrote: > > > > On 12.06.20 02:22, Volodymyr Babchuk wrote: > > > > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct > > > > > sched_unit *u) > > > > > break; > > > > > } > > > > > + spin_lock_irqsave(&sched_stat_lock, flags); > > > > > + sched_stat_irq_time += irq; > > > > > + sched_stat_hyp_time += hyp; > > > > > + spin_unlock_irqrestore(&sched_stat_lock, flags); > > > > > > > > Please don't use a lock. Just use add_sized() instead which will > > > > add > > > > atomically. > > > > > > If we expect sched_get_time_correction to be called concurrently > > > then we > > > would need to introduce atomic64_t or a spin lock. > > > > Or we could use percpu variables and add the cpu values up when > > fetching the values. > > > Yes, either percpu or atomic looks much better than locking, to me, for > this. Looks like we going to have atomic64_t after all. So, I'll prefer to to use atomics there.
On 13.06.20 00:27, Volodymyr Babchuk wrote: > On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote: >> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote: >>> On 12.06.20 14:29, Julien Grall wrote: >>>> On 12/06/2020 05:57, Jürgen Groß wrote: >>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote: >>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >>>>>> sched_unit *u) >>>>>> break; >>>>>> } >>>>>> + spin_lock_irqsave(&sched_stat_lock, flags); >>>>>> + sched_stat_irq_time += irq; >>>>>> + sched_stat_hyp_time += hyp; >>>>>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >>>>> >>>>> Please don't use a lock. Just use add_sized() instead which will >>>>> add >>>>> atomically. >>>> >>>> If we expect sched_get_time_correction to be called concurrently >>>> then we >>>> would need to introduce atomic64_t or a spin lock. >>> >>> Or we could use percpu variables and add the cpu values up when >>> fetching the values. >>> >> Yes, either percpu or atomic looks much better than locking, to me, for >> this. > > Looks like we going to have atomic64_t after all. So, I'll prefer to to > use atomics there. Performance would be better using percpu variables, as those would avoid the cacheline moved between cpus a lot. Juergen
Hi Jürgen, Jürgen Groß writes: > On 13.06.20 00:27, Volodymyr Babchuk wrote: >> On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote: >>> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote: >>>> On 12.06.20 14:29, Julien Grall wrote: >>>>> On 12/06/2020 05:57, Jürgen Groß wrote: >>>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote: >>>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >>>>>>> sched_unit *u) >>>>>>> break; >>>>>>> } >>>>>>> + spin_lock_irqsave(&sched_stat_lock, flags); >>>>>>> + sched_stat_irq_time += irq; >>>>>>> + sched_stat_hyp_time += hyp; >>>>>>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >>>>>> >>>>>> Please don't use a lock. Just use add_sized() instead which will >>>>>> add >>>>>> atomically. >>>>> >>>>> If we expect sched_get_time_correction to be called concurrently >>>>> then we >>>>> would need to introduce atomic64_t or a spin lock. >>>> >>>> Or we could use percpu variables and add the cpu values up when >>>> fetching the values. >>>> >>> Yes, either percpu or atomic looks much better than locking, to me, for >>> this. >> >> Looks like we going to have atomic64_t after all. So, I'll prefer to to >> use atomics there. > > Performance would be better using percpu variables, as those would avoid > the cacheline moved between cpus a lot. I see. But don't we need locking in this case? I can see scenario, when one pCPU updates own counters while another pCPU is reading them. IIRC, ARMv8 guarantees that 64 bit read of aligned data would be consistent. "Consistent" in the sense that, for example, we would not see lower 32 bits of the new value and upper 32 bits of the old value. I can't say for sure about ARMv7 and about x86.
On 18/06/2020 03:58, Volodymyr Babchuk wrote: > > Hi Jürgen, > > Jürgen Groß writes: > >> On 13.06.20 00:27, Volodymyr Babchuk wrote: >>> On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote: >>>> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote: >>>>> On 12.06.20 14:29, Julien Grall wrote: >>>>>> On 12/06/2020 05:57, Jürgen Groß wrote: >>>>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote: >>>>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >>>>>>>> sched_unit *u) >>>>>>>> break; >>>>>>>> } >>>>>>>> + spin_lock_irqsave(&sched_stat_lock, flags); >>>>>>>> + sched_stat_irq_time += irq; >>>>>>>> + sched_stat_hyp_time += hyp; >>>>>>>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >>>>>>> >>>>>>> Please don't use a lock. Just use add_sized() instead which will >>>>>>> add >>>>>>> atomically. >>>>>> >>>>>> If we expect sched_get_time_correction to be called concurrently >>>>>> then we >>>>>> would need to introduce atomic64_t or a spin lock. >>>>> >>>>> Or we could use percpu variables and add the cpu values up when >>>>> fetching the values. >>>>> >>>> Yes, either percpu or atomic looks much better than locking, to me, for >>>> this. >>> >>> Looks like we going to have atomic64_t after all. So, I'll prefer to to >>> use atomics there. >> >> Performance would be better using percpu variables, as those would avoid >> the cacheline moved between cpus a lot. > > I see. But don't we need locking in this case? I can see scenario, when > one pCPU updates own counters while another pCPU is reading them. > > IIRC, ARMv8 guarantees that 64 bit read of aligned data would be > consistent. "Consistent" in the sense that, for example, we would not > see lower 32 bits of the new value and upper 32 bits of the old value. That's right. Although this would be valid so long you use {read, write}_atomic(). > > I can't say for sure about ARMv7 and about x86. ARMv7 with LPAE support will guarantee 64-bit atomicity when using strd/ldrd as long as the alignment is correct. LPAE is mandatory when supporting HYP mode, so you can safely assume this will work. 64-bit on x86 is also guaranteed to be atomic when using write_atomic().
On 18.06.2020 17:17, Julien Grall wrote: > > > On 18/06/2020 03:58, Volodymyr Babchuk wrote: >> >> Hi Jürgen, >> >> Jürgen Groß writes: >> >>> On 13.06.20 00:27, Volodymyr Babchuk wrote: >>>> On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote: >>>>> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote: >>>>>> On 12.06.20 14:29, Julien Grall wrote: >>>>>>> On 12/06/2020 05:57, Jürgen Groß wrote: >>>>>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote: >>>>>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >>>>>>>>> sched_unit *u) >>>>>>>>> break; >>>>>>>>> } >>>>>>>>> + spin_lock_irqsave(&sched_stat_lock, flags); >>>>>>>>> + sched_stat_irq_time += irq; >>>>>>>>> + sched_stat_hyp_time += hyp; >>>>>>>>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >>>>>>>> >>>>>>>> Please don't use a lock. Just use add_sized() instead which will >>>>>>>> add >>>>>>>> atomically. >>>>>>> >>>>>>> If we expect sched_get_time_correction to be called concurrently >>>>>>> then we >>>>>>> would need to introduce atomic64_t or a spin lock. >>>>>> >>>>>> Or we could use percpu variables and add the cpu values up when >>>>>> fetching the values. >>>>>> >>>>> Yes, either percpu or atomic looks much better than locking, to me, for >>>>> this. >>>> >>>> Looks like we going to have atomic64_t after all. So, I'll prefer to to >>>> use atomics there. >>> >>> Performance would be better using percpu variables, as those would avoid >>> the cacheline moved between cpus a lot. >> >> I see. But don't we need locking in this case? I can see scenario, when >> one pCPU updates own counters while another pCPU is reading them. >> >> IIRC, ARMv8 guarantees that 64 bit read of aligned data would be >> consistent. "Consistent" in the sense that, for example, we would not >> see lower 32 bits of the new value and upper 32 bits of the old value. > > That's right. Although this would be valid so long you use {read, > write}_atomic(). > >> >> I can't say for sure about ARMv7 and about x86. > ARMv7 with LPAE support will guarantee 64-bit atomicity when using > strd/ldrd as long as the alignment is correct. LPAE is mandatory when > supporting HYP mode, so you can safely assume this will work. > > 64-bit on x86 is also guaranteed to be atomic when using write_atomic(). ... and when again the data is suitably aligned, or at the very least (for WB RAM) doesn't cross certain boundaries. Jan
Hi Julien, Julien Grall writes: > Hi Volodymyr, > > On 12/06/2020 12:44, Volodymyr Babchuk wrote: >> >> On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote: >>> On 12.06.20 02:22, Volodymyr Babchuk wrote: >>>> As scheduler code now collects time spent in IRQ handlers and in >>>> do_softirq(), we can present those values to userspace tools like >>>> xentop, so system administrator can see how system behaves. >>>> >>>> We are updating counters only in sched_get_time_correction() function >>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it >>>> is not enough to store time with nanosecond precision. So we need to >>>> use 64 bit variables and protect them with spinlock. >>>> >>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> >>>> --- >>>> xen/common/sched/core.c | 17 +++++++++++++++++ >>>> xen/common/sysctl.c | 1 + >>>> xen/include/public/sysctl.h | 4 +++- >>>> xen/include/xen/sched.h | 2 ++ >>>> 4 files changed, 23 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c >>>> index a7294ff5c3..ee6b1d9161 100644 >>>> --- a/xen/common/sched/core.c >>>> +++ b/xen/common/sched/core.c >>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; >>>> static bool scheduler_active; >>>> +static DEFINE_SPINLOCK(sched_stat_lock); >>>> +s_time_t sched_stat_irq_time; >>>> +s_time_t sched_stat_hyp_time; >>>> + >>>> static void sched_set_affinity( >>>> struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); >>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >>>> sched_unit *u) >>>> break; >>>> } >>>> + spin_lock_irqsave(&sched_stat_lock, flags); >>>> + sched_stat_irq_time += irq; >>>> + sched_stat_hyp_time += hyp; >>>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >>> >>> Please don't use a lock. Just use add_sized() instead which will add >>> atomically. >> >> Looks like arm does not support 64 bit variables. > >> Julien, I believe, this is armv7 limitation? Should armv8 work with 64- >> bit atomics? > > 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't > been plumbed yet. > > I am happy to write a patch if you need atomic64_t or even a 64-bit > add_sized(). Looks like I'll need this patch. So, if you still have time, it will be great, if you'll write it.
On Thu, 18 Jun 2020 at 21:24, Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> wrote: > > > Hi Julien, > > Julien Grall writes: > > > Hi Volodymyr, > > > > On 12/06/2020 12:44, Volodymyr Babchuk wrote: > >> > >> On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote: > >>> On 12.06.20 02:22, Volodymyr Babchuk wrote: > >>>> As scheduler code now collects time spent in IRQ handlers and in > >>>> do_softirq(), we can present those values to userspace tools like > >>>> xentop, so system administrator can see how system behaves. > >>>> > >>>> We are updating counters only in sched_get_time_correction() function > >>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it > >>>> is not enough to store time with nanosecond precision. So we need to > >>>> use 64 bit variables and protect them with spinlock. > >>>> > >>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> > >>>> --- > >>>> xen/common/sched/core.c | 17 +++++++++++++++++ > >>>> xen/common/sysctl.c | 1 + > >>>> xen/include/public/sysctl.h | 4 +++- > >>>> xen/include/xen/sched.h | 2 ++ > >>>> 4 files changed, 23 insertions(+), 1 deletion(-) > >>>> > >>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c > >>>> index a7294ff5c3..ee6b1d9161 100644 > >>>> --- a/xen/common/sched/core.c > >>>> +++ b/xen/common/sched/core.c > >>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; > >>>> static bool scheduler_active; > >>>> +static DEFINE_SPINLOCK(sched_stat_lock); > >>>> +s_time_t sched_stat_irq_time; > >>>> +s_time_t sched_stat_hyp_time; > >>>> + > >>>> static void sched_set_affinity( > >>>> struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); > >>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct > >>>> sched_unit *u) > >>>> break; > >>>> } > >>>> + spin_lock_irqsave(&sched_stat_lock, flags); > >>>> + sched_stat_irq_time += irq; > >>>> + sched_stat_hyp_time += hyp; > >>>> + spin_unlock_irqrestore(&sched_stat_lock, flags); > >>> > >>> Please don't use a lock. Just use add_sized() instead which will add > >>> atomically. > >> > >> Looks like arm does not support 64 bit variables. > > >> Julien, I believe, this is armv7 limitation? Should armv8 work with 64- > >> bit atomics? > > > > 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't > > been plumbed yet. > > > > I am happy to write a patch if you need atomic64_t or even a 64-bit > > add_sized(). > > Looks like I'll need this patch. So, if you still have time, it will be > great, if you'll write it. I offered help for either the atomic64_t or the add_sized(). Can you confirm which one you need? Cheers,
Hi Julien, Julien Grall writes: > On Thu, 18 Jun 2020 at 21:24, Volodymyr Babchuk > <Volodymyr_Babchuk@epam.com> wrote: >> >> >> Hi Julien, >> >> Julien Grall writes: >> >> > Hi Volodymyr, >> > >> > On 12/06/2020 12:44, Volodymyr Babchuk wrote: >> >> >> >> On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote: >> >>> On 12.06.20 02:22, Volodymyr Babchuk wrote: >> >>>> As scheduler code now collects time spent in IRQ handlers and in >> >>>> do_softirq(), we can present those values to userspace tools like >> >>>> xentop, so system administrator can see how system behaves. >> >>>> >> >>>> We are updating counters only in sched_get_time_correction() function >> >>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it >> >>>> is not enough to store time with nanosecond precision. So we need to >> >>>> use 64 bit variables and protect them with spinlock. >> >>>> >> >>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> >> >>>> --- >> >>>> xen/common/sched/core.c | 17 +++++++++++++++++ >> >>>> xen/common/sysctl.c | 1 + >> >>>> xen/include/public/sysctl.h | 4 +++- >> >>>> xen/include/xen/sched.h | 2 ++ >> >>>> 4 files changed, 23 insertions(+), 1 deletion(-) >> >>>> >> >>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c >> >>>> index a7294ff5c3..ee6b1d9161 100644 >> >>>> --- a/xen/common/sched/core.c >> >>>> +++ b/xen/common/sched/core.c >> >>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; >> >>>> static bool scheduler_active; >> >>>> +static DEFINE_SPINLOCK(sched_stat_lock); >> >>>> +s_time_t sched_stat_irq_time; >> >>>> +s_time_t sched_stat_hyp_time; >> >>>> + >> >>>> static void sched_set_affinity( >> >>>> struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); >> >>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct >> >>>> sched_unit *u) >> >>>> break; >> >>>> } >> >>>> + spin_lock_irqsave(&sched_stat_lock, flags); >> >>>> + sched_stat_irq_time += irq; >> >>>> + sched_stat_hyp_time += hyp; >> >>>> + spin_unlock_irqrestore(&sched_stat_lock, flags); >> >>> >> >>> Please don't use a lock. Just use add_sized() instead which will add >> >>> atomically. >> >> >> >> Looks like arm does not support 64 bit variables. > >> >> Julien, I believe, this is armv7 limitation? Should armv8 work with 64- >> >> bit atomics? >> > >> > 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't >> > been plumbed yet. >> > >> > I am happy to write a patch if you need atomic64_t or even a 64-bit >> > add_sized(). >> >> Looks like I'll need this patch. So, if you still have time, it will be >> great, if you'll write it. > > I offered help for either the atomic64_t or the add_sized(). Can you > confirm which one you need? Yes, sorry. I had atomic64_t in mind.
diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c index a7294ff5c3..ee6b1d9161 100644 --- a/xen/common/sched/core.c +++ b/xen/common/sched/core.c @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops; static bool scheduler_active; +static DEFINE_SPINLOCK(sched_stat_lock); +s_time_t sched_stat_irq_time; +s_time_t sched_stat_hyp_time; + static void sched_set_affinity( struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft); @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u) break; } + spin_lock_irqsave(&sched_stat_lock, flags); + sched_stat_irq_time += irq; + sched_stat_hyp_time += hyp; + spin_unlock_irqrestore(&sched_stat_lock, flags); + return irq + hyp; } +void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time) +{ + unsigned long flags; + + spin_lock_irqsave(&sched_stat_lock, flags); + *irq_time = sched_stat_irq_time; + *hyp_time = sched_stat_hyp_time; + spin_unlock_irqrestore(&sched_stat_lock, flags); } /* diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c index 1c6a817476..00683bc93f 100644 --- a/xen/common/sysctl.c +++ b/xen/common/sysctl.c @@ -270,6 +270,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) pi->scrub_pages = 0; pi->cpu_khz = cpu_khz; pi->max_mfn = get_upper_mfn_bound(); + sched_get_time_stats(&pi->irq_time, &pi->hyp_time); arch_do_physinfo(pi); if ( iommu_enabled ) { diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index 3a08c512e8..f320144d40 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -35,7 +35,7 @@ #include "domctl.h" #include "physdev.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014 /* * Read console content from Xen buffer ring. @@ -118,6 +118,8 @@ struct xen_sysctl_physinfo { uint64_aligned_t scrub_pages; uint64_aligned_t outstanding_pages; uint64_aligned_t max_mfn; /* Largest possible MFN on this host */ + uint64_aligned_t irq_time; + uint64_aligned_t hyp_time; uint32_t hw_cap[8]; }; diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 51dc7c4551..869d4efbd6 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -717,6 +717,8 @@ void vcpu_end_irq_handler(void); void vcpu_begin_hyp_task(struct vcpu *v); void vcpu_end_hyp_task(struct vcpu *v); +void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time); + /* * Force synchronisation of given VCPU's state. If it is currently descheduled, * this call will ensure that all its state is committed to memory and that
As scheduler code now collects time spent in IRQ handlers and in do_softirq(), we can present those values to userspace tools like xentop, so system administrator can see how system behaves. We are updating counters only in sched_get_time_correction() function to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it is not enough to store time with nanosecond precision. So we need to use 64 bit variables and protect them with spinlock. Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> --- xen/common/sched/core.c | 17 +++++++++++++++++ xen/common/sysctl.c | 1 + xen/include/public/sysctl.h | 4 +++- xen/include/xen/sched.h | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-)