diff mbox series

[05/10] viridian: use softirq batching in hvcall_ipi()

Message ID 20201111200721.30551-6-paul@xen.org (mailing list archive)
State Superseded
Headers show
Series viridian: add support for ExProcessorMasks | expand

Commit Message

Paul Durrant Nov. 11, 2020, 8:07 p.m. UTC
From: Paul Durrant <pdurrant@amazon.com>

vlapic_ipi() uses a softirq batching mechanism to improve the efficiency of
sending a IPIs to large number of processors. This patch modifies send_ipi()
(the worker function called by hvcall_ipi()) to also make use of the
mechanism when there multiple bits set the hypercall_vpmask. Hence a `nr`
field is added to the structure to track the number of set bits.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
Cc: Wei Liu <wl@xen.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>
---
 xen/arch/x86/hvm/viridian/viridian.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

Comments

Jan Beulich Nov. 12, 2020, 8:52 a.m. UTC | #1
On 11.11.2020 21:07, Paul Durrant wrote:
> From: Paul Durrant <pdurrant@amazon.com>
> 
> vlapic_ipi() uses a softirq batching mechanism to improve the efficiency of
> sending a IPIs to large number of processors. This patch modifies send_ipi()
> (the worker function called by hvcall_ipi()) to also make use of the
> mechanism when there multiple bits set the hypercall_vpmask. Hence a `nr`
> field is added to the structure to track the number of set bits.

This is kind of unusual, i.e. we don't do so elsewhere. I take it the
assumption is that using bitmap_weight() is too much overhead?

> @@ -509,6 +510,7 @@ void viridian_domain_deinit(struct domain *d)
>  
>  struct hypercall_vpmask {
>      DECLARE_BITMAP(mask, HVM_MAX_VCPUS);
> +    unsigned int nr;
>  };
>  
>  static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask);
> @@ -516,21 +518,24 @@ static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask);
>  static void vpmask_empty(struct hypercall_vpmask *vpmask)
>  {
>      bitmap_zero(vpmask->mask, HVM_MAX_VCPUS);
> +    vpmask->nr = 0;
>  }
>  
>  static void vpmask_set(struct hypercall_vpmask *vpmask, unsigned int vp)
>  {
> -    __set_bit(vp, vpmask->mask);
> +    if ( !test_and_set_bit(vp, vpmask->mask) )
> +        vpmask->nr++;

If test_and_set_bit() is the correct thing to use here (rather
than __test_and_set_bit()), the counter also needs updating
atomically.

>  }
>  
>  static void vpmask_fill(struct hypercall_vpmask *vpmask)
>  {
>      bitmap_fill(vpmask->mask, HVM_MAX_VCPUS);
> +    vpmask->nr = HVM_MAX_VCPUS;
>  }
>  
>  static bool vpmask_test(struct hypercall_vpmask *vpmask, unsigned int vp)
>  {
> -    return test_bit(vp, vpmask->mask);
> +    return vpmask->nr && test_bit(vp, vpmask->mask);

Is this in fact an improvement?

Jan
Paul Durrant Nov. 19, 2020, 4:08 p.m. UTC | #2
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 12 November 2020 08:53
> To: Paul Durrant <paul@xen.org>
> Cc: Paul Durrant <pdurrant@amazon.com>; Wei Liu <wl@xen.org>; Andrew Cooper
> <andrew.cooper3@citrix.com>; Roger Pau Monné <roger.pau@citrix.com>; xen-devel@lists.xenproject.org
> Subject: Re: [PATCH 05/10] viridian: use softirq batching in hvcall_ipi()
> 
> On 11.11.2020 21:07, Paul Durrant wrote:
> > From: Paul Durrant <pdurrant@amazon.com>
> >
> > vlapic_ipi() uses a softirq batching mechanism to improve the efficiency of
> > sending a IPIs to large number of processors. This patch modifies send_ipi()
> > (the worker function called by hvcall_ipi()) to also make use of the
> > mechanism when there multiple bits set the hypercall_vpmask. Hence a `nr`
> > field is added to the structure to track the number of set bits.
> 
> This is kind of unusual, i.e. we don't do so elsewhere. I take it the
> assumption is that using bitmap_weight() is too much overhead?

It just seemed wasteful in the circumstances. If I move to bitmap copy OTOH then I'll have to use bitmap_weight().

> 
> > @@ -509,6 +510,7 @@ void viridian_domain_deinit(struct domain *d)
> >
> >  struct hypercall_vpmask {
> >      DECLARE_BITMAP(mask, HVM_MAX_VCPUS);
> > +    unsigned int nr;
> >  };
> >
> >  static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask);
> > @@ -516,21 +518,24 @@ static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask);
> >  static void vpmask_empty(struct hypercall_vpmask *vpmask)
> >  {
> >      bitmap_zero(vpmask->mask, HVM_MAX_VCPUS);
> > +    vpmask->nr = 0;
> >  }
> >
> >  static void vpmask_set(struct hypercall_vpmask *vpmask, unsigned int vp)
> >  {
> > -    __set_bit(vp, vpmask->mask);
> > +    if ( !test_and_set_bit(vp, vpmask->mask) )
> > +        vpmask->nr++;
> 
> If test_and_set_bit() is the correct thing to use here (rather
> than __test_and_set_bit()), the counter also needs updating
> atomically.
> 

It doesn't need to be atomic, but I'll probably drop it.

> >  }
> >
> >  static void vpmask_fill(struct hypercall_vpmask *vpmask)
> >  {
> >      bitmap_fill(vpmask->mask, HVM_MAX_VCPUS);
> > +    vpmask->nr = HVM_MAX_VCPUS;
> >  }
> >
> >  static bool vpmask_test(struct hypercall_vpmask *vpmask, unsigned int vp)
> >  {
> > -    return test_bit(vp, vpmask->mask);
> > +    return vpmask->nr && test_bit(vp, vpmask->mask);
> 
> Is this in fact an improvement?
> 

I think so, but it's a moot point if I drop 'nr'.

  Paul

> Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/viridian/viridian.c b/xen/arch/x86/hvm/viridian/viridian.c
index 63f63093a513..765d53016c02 100644
--- a/xen/arch/x86/hvm/viridian/viridian.c
+++ b/xen/arch/x86/hvm/viridian/viridian.c
@@ -11,6 +11,7 @@ 
 #include <xen/hypercall.h>
 #include <xen/domain_page.h>
 #include <xen/param.h>
+#include <xen/softirq.h>
 #include <asm/guest/hyperv-tlfs.h>
 #include <asm/paging.h>
 #include <asm/p2m.h>
@@ -509,6 +510,7 @@  void viridian_domain_deinit(struct domain *d)
 
 struct hypercall_vpmask {
     DECLARE_BITMAP(mask, HVM_MAX_VCPUS);
+    unsigned int nr;
 };
 
 static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask);
@@ -516,21 +518,24 @@  static DEFINE_PER_CPU(struct hypercall_vpmask, hypercall_vpmask);
 static void vpmask_empty(struct hypercall_vpmask *vpmask)
 {
     bitmap_zero(vpmask->mask, HVM_MAX_VCPUS);
+    vpmask->nr = 0;
 }
 
 static void vpmask_set(struct hypercall_vpmask *vpmask, unsigned int vp)
 {
-    __set_bit(vp, vpmask->mask);
+    if ( !test_and_set_bit(vp, vpmask->mask) )
+        vpmask->nr++;
 }
 
 static void vpmask_fill(struct hypercall_vpmask *vpmask)
 {
     bitmap_fill(vpmask->mask, HVM_MAX_VCPUS);
+    vpmask->nr = HVM_MAX_VCPUS;
 }
 
 static bool vpmask_test(struct hypercall_vpmask *vpmask, unsigned int vp)
 {
-    return test_bit(vp, vpmask->mask);
+    return vpmask->nr && test_bit(vp, vpmask->mask);
 }
 
 static unsigned int vpmask_first(struct hypercall_vpmask *vpmask)
@@ -644,8 +649,17 @@  static void send_ipi(struct hypercall_vpmask *vpmask, uint8_t vector)
     struct domain *currd = current->domain;
     unsigned int vp;
 
+    if ( !vpmask->nr )
+        return;
+
+    if ( vpmask->nr > 1 )
+        cpu_raise_softirq_batch_begin();
+
     for_each_vp ( vpmask, vp )
         vlapic_set_irq(vcpu_vlapic(currd->vcpu[vp]), vector, 0);
+
+    if ( vpmask->nr > 1 )
+        cpu_raise_softirq_batch_finish();
 }
 
 static int hvcall_ipi(union hypercall_input *input,