diff mbox series

memory: avoid unnecessary iteration when updating ioeventfds

Message ID 20230228142514.2582-1-longpeng2@huawei.com (mailing list archive)
State New, archived
Headers show
Series memory: avoid unnecessary iteration when updating ioeventfds | expand

Commit Message

Zhijian Li (Fujitsu)" via Feb. 28, 2023, 2:25 p.m. UTC
From: Longpeng <longpeng2@huawei.com>

When updating ioeventfds, we need to iterate all address spaces and
iterate all flat ranges of each address space. There is so much
redundant process that a FlatView would be iterated for so many times
during one commit (memory_region_transaction_commit).

We can mark a FlatView as UPDATED and then skip it in the next iteration
and clear the UPDATED flag at the end of the commit. The overhead can
be significantly reduced.

For example, a VM with 16 vdpa net devices and each one has 65 vectors,
can reduce the time spent on memory_region_transaction_commit by 95%.

Signed-off-by: Longpeng <longpeng2@huawei.com>
---
 include/exec/memory.h |  2 ++
 softmmu/memory.c      | 28 +++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

Comments

David Hildenbrand Feb. 28, 2023, 5:16 p.m. UTC | #1
On 28.02.23 15:25, Longpeng(Mike) via wrote:
> From: Longpeng <longpeng2@huawei.com>
> 
> When updating ioeventfds, we need to iterate all address spaces and
> iterate all flat ranges of each address space. There is so much
> redundant process that a FlatView would be iterated for so many times
> during one commit (memory_region_transaction_commit).
> 
> We can mark a FlatView as UPDATED and then skip it in the next iteration
> and clear the UPDATED flag at the end of the commit. The overhead can
> be significantly reduced.
> 
> For example, a VM with 16 vdpa net devices and each one has 65 vectors,
> can reduce the time spent on memory_region_transaction_commit by 95%.
> 
> Signed-off-by: Longpeng <longpeng2@huawei.com>
> ---
>   include/exec/memory.h |  2 ++
>   softmmu/memory.c      | 28 +++++++++++++++++++++++++++-
>   2 files changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 2e602a2fad..974eabf765 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -1093,6 +1093,8 @@ struct FlatView {
>       unsigned nr_allocated;
>       struct AddressSpaceDispatch *dispatch;
>       MemoryRegion *root;
> +#define FLATVIEW_FLAG_IOEVENTFD_UPDATED (1 << 0)
> +    unsigned flags;
>   };
>   
>   static inline FlatView *address_space_to_flatview(AddressSpace *as)
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 9d64efca26..71ff996712 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -815,6 +815,15 @@ FlatView *address_space_get_flatview(AddressSpace *as)
>       return view;
>   }
>   
> +static void address_space_reset_view_flags(AddressSpace *as, unsigned mask)
> +{
> +    FlatView *view = address_space_get_flatview(as);
> +
> +    if (view->flags & mask) {
> +        view->flags &= ~mask;
> +    }

Just do it unconditionally.

Unfortunately, I cannot comment on the bigger picture, but it does look 
good to me.
Jason Wang March 1, 2023, 8:36 a.m. UTC | #2
On Tue, Feb 28, 2023 at 10:25 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
>
> From: Longpeng <longpeng2@huawei.com>
>
> When updating ioeventfds, we need to iterate all address spaces and
> iterate all flat ranges of each address space. There is so much
> redundant process that a FlatView would be iterated for so many times
> during one commit (memory_region_transaction_commit).
>
> We can mark a FlatView as UPDATED and then skip it in the next iteration
> and clear the UPDATED flag at the end of the commit. The overhead can
> be significantly reduced.
>
> For example, a VM with 16 vdpa net devices and each one has 65 vectors,
> can reduce the time spent on memory_region_transaction_commit by 95%.
>
> Signed-off-by: Longpeng <longpeng2@huawei.com>
> ---
>  include/exec/memory.h |  2 ++
>  softmmu/memory.c      | 28 +++++++++++++++++++++++++++-
>  2 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 2e602a2fad..974eabf765 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -1093,6 +1093,8 @@ struct FlatView {
>      unsigned nr_allocated;
>      struct AddressSpaceDispatch *dispatch;
>      MemoryRegion *root;
> +#define FLATVIEW_FLAG_IOEVENTFD_UPDATED (1 << 0)
> +    unsigned flags;
>  };
>
>  static inline FlatView *address_space_to_flatview(AddressSpace *as)
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 9d64efca26..71ff996712 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -815,6 +815,15 @@ FlatView *address_space_get_flatview(AddressSpace *as)
>      return view;
>  }
>
> +static void address_space_reset_view_flags(AddressSpace *as, unsigned mask)
> +{
> +    FlatView *view = address_space_get_flatview(as);
> +
> +    if (view->flags & mask) {
> +        view->flags &= ~mask;
> +    }
> +}
> +
>  static void address_space_update_ioeventfds(AddressSpace *as)
>  {
>      FlatView *view;
> @@ -825,6 +834,12 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      AddrRange tmp;
>      unsigned i;
>
> +    view = address_space_get_flatview(as);
> +    if (view->flags & FLATVIEW_FLAG_IOEVENTFD_UPDATED) {
> +        return;
> +    }
> +    view->flags |= FLATVIEW_FLAG_IOEVENTFD_UPDATED;
> +

Won't we lose the listener calls if multiple address spaces have the
same flatview?

Thanks

>      /*
>       * It is likely that the number of ioeventfds hasn't changed much, so use
>       * the previous size as the starting value, with some headroom to avoid
> @@ -833,7 +848,6 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4);
>      ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
>
> -    view = address_space_get_flatview(as);
>      FOR_EACH_FLAT_RANGE(fr, view) {
>          for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
>              tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
> @@ -1086,6 +1100,15 @@ void memory_region_transaction_begin(void)
>      ++memory_region_transaction_depth;
>  }
>
> +static inline void address_space_update_ioeventfds_finish(void)
> +{
> +    AddressSpace *as;
> +
> +    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
> +        address_space_reset_view_flags(as, FLATVIEW_FLAG_IOEVENTFD_UPDATED);
> +    }
> +}
> +
>  void memory_region_transaction_commit(void)
>  {
>      AddressSpace *as;
> @@ -1106,12 +1129,14 @@ void memory_region_transaction_commit(void)
>              }
>              memory_region_update_pending = false;
>              ioeventfd_update_pending = false;
> +            address_space_update_ioeventfds_finish();
>              MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
>          } else if (ioeventfd_update_pending) {
>              QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
>                  address_space_update_ioeventfds(as);
>              }
>              ioeventfd_update_pending = false;
> +            address_space_update_ioeventfds_finish();
>          }
>     }
>  }
> @@ -3076,6 +3101,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
>      as->name = g_strdup(name ? name : "anonymous");
>      address_space_update_topology(as);
>      address_space_update_ioeventfds(as);
> +    address_space_reset_view_flags(as, FLATVIEW_FLAG_IOEVENTFD_UPDATED);
>  }
>
>  static void do_address_space_destroy(AddressSpace *as)
> --
> 2.23.0
>
Peter Xu March 5, 2023, 9:27 p.m. UTC | #3
On Wed, Mar 01, 2023 at 04:36:20PM +0800, Jason Wang wrote:
> On Tue, Feb 28, 2023 at 10:25 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
> >
> > From: Longpeng <longpeng2@huawei.com>
> >
> > When updating ioeventfds, we need to iterate all address spaces and
> > iterate all flat ranges of each address space. There is so much
> > redundant process that a FlatView would be iterated for so many times
> > during one commit (memory_region_transaction_commit).
> >
> > We can mark a FlatView as UPDATED and then skip it in the next iteration
> > and clear the UPDATED flag at the end of the commit. The overhead can
> > be significantly reduced.
> >
> > For example, a VM with 16 vdpa net devices and each one has 65 vectors,
> > can reduce the time spent on memory_region_transaction_commit by 95%.
> >
> > Signed-off-by: Longpeng <longpeng2@huawei.com>
> > ---
> >  include/exec/memory.h |  2 ++
> >  softmmu/memory.c      | 28 +++++++++++++++++++++++++++-
> >  2 files changed, 29 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > index 2e602a2fad..974eabf765 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -1093,6 +1093,8 @@ struct FlatView {
> >      unsigned nr_allocated;
> >      struct AddressSpaceDispatch *dispatch;
> >      MemoryRegion *root;
> > +#define FLATVIEW_FLAG_IOEVENTFD_UPDATED (1 << 0)
> > +    unsigned flags;
> >  };
> >
> >  static inline FlatView *address_space_to_flatview(AddressSpace *as)
> > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > index 9d64efca26..71ff996712 100644
> > --- a/softmmu/memory.c
> > +++ b/softmmu/memory.c
> > @@ -815,6 +815,15 @@ FlatView *address_space_get_flatview(AddressSpace *as)
> >      return view;
> >  }
> >
> > +static void address_space_reset_view_flags(AddressSpace *as, unsigned mask)
> > +{
> > +    FlatView *view = address_space_get_flatview(as);
> > +
> > +    if (view->flags & mask) {
> > +        view->flags &= ~mask;
> > +    }
> > +}
> > +
> >  static void address_space_update_ioeventfds(AddressSpace *as)
> >  {
> >      FlatView *view;
> > @@ -825,6 +834,12 @@ static void address_space_update_ioeventfds(AddressSpace *as)
> >      AddrRange tmp;
> >      unsigned i;
> >
> > +    view = address_space_get_flatview(as);
> > +    if (view->flags & FLATVIEW_FLAG_IOEVENTFD_UPDATED) {
> > +        return;
> > +    }
> > +    view->flags |= FLATVIEW_FLAG_IOEVENTFD_UPDATED;
> > +
> 
> Won't we lose the listener calls if multiple address spaces have the
> same flatview?

I have the same concern with Jason.  I don't think it matters in reality,
since only address_space_io uses it so far. but it doesn't really look
reasonable and clean.

One other idea of optimizing ioeventfd update is we can add a per-AS
counter (ioeventfd_notifiers), increase if any eventfd_add|del is
registered in memory_listener_register(), and decrease when unregister.
Then address_space_update_ioeventfds() can be skipped completely if
ioeventfd_notifiers==0.

Side note: Jason, do you think we should drop vhost_eventfd_add|del?
They're all no-ops right now.

Thanks,
Jason Wang March 6, 2023, 3:46 a.m. UTC | #4
On Mon, Mar 6, 2023 at 5:27 AM Peter Xu <peterx@redhat.com> wrote:
>
> On Wed, Mar 01, 2023 at 04:36:20PM +0800, Jason Wang wrote:
> > On Tue, Feb 28, 2023 at 10:25 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
> > >
> > > From: Longpeng <longpeng2@huawei.com>
> > >
> > > When updating ioeventfds, we need to iterate all address spaces and
> > > iterate all flat ranges of each address space. There is so much
> > > redundant process that a FlatView would be iterated for so many times
> > > during one commit (memory_region_transaction_commit).
> > >
> > > We can mark a FlatView as UPDATED and then skip it in the next iteration
> > > and clear the UPDATED flag at the end of the commit. The overhead can
> > > be significantly reduced.
> > >
> > > For example, a VM with 16 vdpa net devices and each one has 65 vectors,
> > > can reduce the time spent on memory_region_transaction_commit by 95%.
> > >
> > > Signed-off-by: Longpeng <longpeng2@huawei.com>
> > > ---
> > >  include/exec/memory.h |  2 ++
> > >  softmmu/memory.c      | 28 +++++++++++++++++++++++++++-
> > >  2 files changed, 29 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > > index 2e602a2fad..974eabf765 100644
> > > --- a/include/exec/memory.h
> > > +++ b/include/exec/memory.h
> > > @@ -1093,6 +1093,8 @@ struct FlatView {
> > >      unsigned nr_allocated;
> > >      struct AddressSpaceDispatch *dispatch;
> > >      MemoryRegion *root;
> > > +#define FLATVIEW_FLAG_IOEVENTFD_UPDATED (1 << 0)
> > > +    unsigned flags;
> > >  };
> > >
> > >  static inline FlatView *address_space_to_flatview(AddressSpace *as)
> > > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > > index 9d64efca26..71ff996712 100644
> > > --- a/softmmu/memory.c
> > > +++ b/softmmu/memory.c
> > > @@ -815,6 +815,15 @@ FlatView *address_space_get_flatview(AddressSpace *as)
> > >      return view;
> > >  }
> > >
> > > +static void address_space_reset_view_flags(AddressSpace *as, unsigned mask)
> > > +{
> > > +    FlatView *view = address_space_get_flatview(as);
> > > +
> > > +    if (view->flags & mask) {
> > > +        view->flags &= ~mask;
> > > +    }
> > > +}
> > > +
> > >  static void address_space_update_ioeventfds(AddressSpace *as)
> > >  {
> > >      FlatView *view;
> > > @@ -825,6 +834,12 @@ static void address_space_update_ioeventfds(AddressSpace *as)
> > >      AddrRange tmp;
> > >      unsigned i;
> > >
> > > +    view = address_space_get_flatview(as);
> > > +    if (view->flags & FLATVIEW_FLAG_IOEVENTFD_UPDATED) {
> > > +        return;
> > > +    }
> > > +    view->flags |= FLATVIEW_FLAG_IOEVENTFD_UPDATED;
> > > +
> >
> > Won't we lose the listener calls if multiple address spaces have the
> > same flatview?
>
> I have the same concern with Jason.  I don't think it matters in reality,
> since only address_space_io uses it so far. but it doesn't really look
> reasonable and clean.

Yes, I think in the memory core it should not assume how the eventfds are used.

>
> One other idea of optimizing ioeventfd update is we can add a per-AS
> counter (ioeventfd_notifiers), increase if any eventfd_add|del is
> registered in memory_listener_register(), and decrease when unregister.
> Then address_space_update_ioeventfds() can be skipped completely if
> ioeventfd_notifiers==0.
>
> Side note: Jason, do you think we should drop vhost_eventfd_add|del?
> They're all no-ops right now.

I think so.

Thanks

>
> Thanks,
>
> --
> Peter Xu
>
diff mbox series

Patch

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 2e602a2fad..974eabf765 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1093,6 +1093,8 @@  struct FlatView {
     unsigned nr_allocated;
     struct AddressSpaceDispatch *dispatch;
     MemoryRegion *root;
+#define FLATVIEW_FLAG_IOEVENTFD_UPDATED (1 << 0)
+    unsigned flags;
 };
 
 static inline FlatView *address_space_to_flatview(AddressSpace *as)
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 9d64efca26..71ff996712 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -815,6 +815,15 @@  FlatView *address_space_get_flatview(AddressSpace *as)
     return view;
 }
 
+static void address_space_reset_view_flags(AddressSpace *as, unsigned mask)
+{
+    FlatView *view = address_space_get_flatview(as);
+
+    if (view->flags & mask) {
+        view->flags &= ~mask;
+    }
+}
+
 static void address_space_update_ioeventfds(AddressSpace *as)
 {
     FlatView *view;
@@ -825,6 +834,12 @@  static void address_space_update_ioeventfds(AddressSpace *as)
     AddrRange tmp;
     unsigned i;
 
+    view = address_space_get_flatview(as);
+    if (view->flags & FLATVIEW_FLAG_IOEVENTFD_UPDATED) {
+        return;
+    }
+    view->flags |= FLATVIEW_FLAG_IOEVENTFD_UPDATED;
+
     /*
      * It is likely that the number of ioeventfds hasn't changed much, so use
      * the previous size as the starting value, with some headroom to avoid
@@ -833,7 +848,6 @@  static void address_space_update_ioeventfds(AddressSpace *as)
     ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4);
     ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
 
-    view = address_space_get_flatview(as);
     FOR_EACH_FLAT_RANGE(fr, view) {
         for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
             tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
@@ -1086,6 +1100,15 @@  void memory_region_transaction_begin(void)
     ++memory_region_transaction_depth;
 }
 
+static inline void address_space_update_ioeventfds_finish(void)
+{
+    AddressSpace *as;
+
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        address_space_reset_view_flags(as, FLATVIEW_FLAG_IOEVENTFD_UPDATED);
+    }
+}
+
 void memory_region_transaction_commit(void)
 {
     AddressSpace *as;
@@ -1106,12 +1129,14 @@  void memory_region_transaction_commit(void)
             }
             memory_region_update_pending = false;
             ioeventfd_update_pending = false;
+            address_space_update_ioeventfds_finish();
             MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
         } else if (ioeventfd_update_pending) {
             QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
                 address_space_update_ioeventfds(as);
             }
             ioeventfd_update_pending = false;
+            address_space_update_ioeventfds_finish();
         }
    }
 }
@@ -3076,6 +3101,7 @@  void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
     as->name = g_strdup(name ? name : "anonymous");
     address_space_update_topology(as);
     address_space_update_ioeventfds(as);
+    address_space_reset_view_flags(as, FLATVIEW_FLAG_IOEVENTFD_UPDATED);
 }
 
 static void do_address_space_destroy(AddressSpace *as)