Message ID | 20201111200721.30551-6-paul@xen.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | viridian: add support for ExProcessorMasks | expand |
On 11.11.2020 21:07, Paul Durrant wrote: > From: Paul Durrant <pdurrant@amazon.com> > > vlapic_ipi() uses a softirq batching mechanism to improve the efficiency of > sending a IPIs to large number of processors. This patch modifies send_ipi() > (the worker function called by hvcall_ipi()) to also make use of the > mechanism when there multiple bits set the hypercall_vpmask. Hence a `nr` > field is added to the structure to track the number of set bits. This is kind of unusual, i.e. we don't do so elsewhere. I take it the assumption is that using bitmap_weight() is too much overhead? > @@ -509,6 +510,7 @@ void viridian_domain_deinit(struct domain *d) > > struct hypercall_vpmask { > DECLARE_BITMAP(mask, HVM_MAX_VCPUS); > + unsigned int nr; > }; > > static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask); > @@ -516,21 +518,24 @@ static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask); > static void vpmask_empty(struct hypercall_vpmask *vpmask) > { > bitmap_zero(vpmask->mask, HVM_MAX_VCPUS); > + vpmask->nr = 0; > } > > static void vpmask_set(struct hypercall_vpmask *vpmask, unsigned int vp) > { > - __set_bit(vp, vpmask->mask); > + if ( !test_and_set_bit(vp, vpmask->mask) ) > + vpmask->nr++; If test_and_set_bit() is the correct thing to use here (rather than __test_and_set_bit()), the counter also needs updating atomically. > } > > static void vpmask_fill(struct hypercall_vpmask *vpmask) > { > bitmap_fill(vpmask->mask, HVM_MAX_VCPUS); > + vpmask->nr = HVM_MAX_VCPUS; > } > > static bool vpmask_test(struct hypercall_vpmask *vpmask, unsigned int vp) > { > - return test_bit(vp, vpmask->mask); > + return vpmask->nr && test_bit(vp, vpmask->mask); Is this in fact an improvement? Jan
> -----Original Message----- > From: Jan Beulich <jbeulich@suse.com> > Sent: 12 November 2020 08:53 > To: Paul Durrant <paul@xen.org> > Cc: Paul Durrant <pdurrant@amazon.com>; Wei Liu <wl@xen.org>; Andrew Cooper > <andrew.cooper3@citrix.com>; Roger Pau Monné <roger.pau@citrix.com>; xen-devel@lists.xenproject.org > Subject: Re: [PATCH 05/10] viridian: use softirq batching in hvcall_ipi() > > On 11.11.2020 21:07, Paul Durrant wrote: > > From: Paul Durrant <pdurrant@amazon.com> > > > > vlapic_ipi() uses a softirq batching mechanism to improve the efficiency of > > sending a IPIs to large number of processors. This patch modifies send_ipi() > > (the worker function called by hvcall_ipi()) to also make use of the > > mechanism when there multiple bits set the hypercall_vpmask. Hence a `nr` > > field is added to the structure to track the number of set bits. > > This is kind of unusual, i.e. we don't do so elsewhere. I take it the > assumption is that using bitmap_weight() is too much overhead? It just seemed wasteful in the circumstances. If I move to bitmap copy OTOH then I'll have to use bitmap_weight(). > > > @@ -509,6 +510,7 @@ void viridian_domain_deinit(struct domain *d) > > > > struct hypercall_vpmask { > > DECLARE_BITMAP(mask, HVM_MAX_VCPUS); > > + unsigned int nr; > > }; > > > > static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask); > > @@ -516,21 +518,24 @@ static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask); > > static void vpmask_empty(struct hypercall_vpmask *vpmask) > > { > > bitmap_zero(vpmask->mask, HVM_MAX_VCPUS); > > + vpmask->nr = 0; > > } > > > > static void vpmask_set(struct hypercall_vpmask *vpmask, unsigned int vp) > > { > > - __set_bit(vp, vpmask->mask); > > + if ( !test_and_set_bit(vp, vpmask->mask) ) > > + vpmask->nr++; > > If test_and_set_bit() is the correct thing to use here (rather > than __test_and_set_bit()), the counter also needs updating > atomically. > It doesn't need to be atomic, but I'll probably drop it. > > } > > > > static void vpmask_fill(struct hypercall_vpmask *vpmask) > > { > > bitmap_fill(vpmask->mask, HVM_MAX_VCPUS); > > + vpmask->nr = HVM_MAX_VCPUS; > > } > > > > static bool vpmask_test(struct hypercall_vpmask *vpmask, unsigned int vp) > > { > > - return test_bit(vp, vpmask->mask); > > + return vpmask->nr && test_bit(vp, vpmask->mask); > > Is this in fact an improvement? > I think so, but it's a moot point if I drop 'nr'. Paul > Jan
diff --git a/xen/arch/x86/hvm/viridian/viridian.c b/xen/arch/x86/hvm/viridian/viridian.c index 63f63093a513..765d53016c02 100644 --- a/xen/arch/x86/hvm/viridian/viridian.c +++ b/xen/arch/x86/hvm/viridian/viridian.c @@ -11,6 +11,7 @@ #include <xen/hypercall.h> #include <xen/domain_page.h> #include <xen/param.h> +#include <xen/softirq.h> #include <asm/guest/hyperv-tlfs.h> #include <asm/paging.h> #include <asm/p2m.h> @@ -509,6 +510,7 @@ void viridian_domain_deinit(struct domain *d) struct hypercall_vpmask { DECLARE_BITMAP(mask, HVM_MAX_VCPUS); + unsigned int nr; }; static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask); @@ -516,21 +518,24 @@ static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask); static void vpmask_empty(struct hypercall_vpmask *vpmask) { bitmap_zero(vpmask->mask, HVM_MAX_VCPUS); + vpmask->nr = 0; } static void vpmask_set(struct hypercall_vpmask *vpmask, unsigned int vp) { - __set_bit(vp, vpmask->mask); + if ( !test_and_set_bit(vp, vpmask->mask) ) + vpmask->nr++; } static void vpmask_fill(struct hypercall_vpmask *vpmask) { bitmap_fill(vpmask->mask, HVM_MAX_VCPUS); + vpmask->nr = HVM_MAX_VCPUS; } static bool vpmask_test(struct hypercall_vpmask *vpmask, unsigned int vp) { - return test_bit(vp, vpmask->mask); + return vpmask->nr && test_bit(vp, vpmask->mask); } static unsigned int vpmask_first(struct hypercall_vpmask *vpmask) @@ -644,8 +649,17 @@ static void send_ipi(struct hypercall_vpmask *vpmask, uint8_t vector) struct domain *currd = current->domain; unsigned int vp; + if ( !vpmask->nr ) + return; + + if ( vpmask->nr > 1 ) + cpu_raise_softirq_batch_begin(); + for_each_vp ( vpmask, vp ) vlapic_set_irq(vcpu_vlapic(currd->vcpu[vp]), vector, 0); + + if ( vpmask->nr > 1 ) + cpu_raise_softirq_batch_finish(); } static int hvcall_ipi(union hypercall_input *input,