diff mbox series

[v2,2/2] net: move backend cleanup to NIC cleanup

Message ID 20240912165408.234447-3-eperezma@redhat.com (mailing list archive)
State New, archived
Headers show
Series Move net backend cleanup to NIC cleanup | expand

Commit Message

Eugenio Perez Martin Sept. 12, 2024, 4:54 p.m. UTC
Commit a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net
structures if peer nic is present") effectively delayed the backend
cleanup, allowing the frontend or the guest to access it resources as
long as the frontend is still visible to the guest.

However it does not clean up the resources until the qemu process is
over.  This causes an effective leak if the device is deleted with
device_del, as there is no way to close the vdpa device.  This makes
impossible to re-add that device to this or other QEMU instances until
the first instance of QEMU is finished.

Move the cleanup from qemu_cleanup to the NIC deletion and to
net_cleanup.

Fixes: a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present")
Reported-by: Lei Yang <leiyang@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
v2: Remove NIC peer also at net_cleanup. vhost-user trust all the
backends are clean before qemu removes char devices.

This is not a requisite introduced by this commit as
system/runstate.c:qemu_cleanup shows.
---
 net/net.c        | 33 +++++++++++++++++++++++++++------
 net/vhost-vdpa.c |  8 --------
 2 files changed, 27 insertions(+), 14 deletions(-)

--
2.46.0

Comments

Si-Wei Liu Sept. 18, 2024, 3:57 a.m. UTC | #1
On 9/12/2024 9:54 AM, Eugenio Pérez wrote:
> Commit a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net
> structures if peer nic is present") effectively delayed the backend
> cleanup, allowing the frontend or the guest to access it resources as
> long as the frontend is still visible to the guest.
>
> However it does not clean up the resources until the qemu process is
> over.  This causes an effective leak if the device is deleted with
> device_del, as there is no way to close the vdpa device.  This makes
> impossible to re-add that device to this or other QEMU instances until
> the first instance of QEMU is finished.
>
> Move the cleanup from qemu_cleanup to the NIC deletion and to
> net_cleanup.
>
> Fixes: a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present")
> Reported-by: Lei Yang <leiyang@redhat.com>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
> v2: Remove NIC peer also at net_cleanup. vhost-user trust all the
> backends are clean before qemu removes char devices.
>
> This is not a requisite introduced by this commit as
> system/runstate.c:qemu_cleanup shows.
> ---
>   net/net.c        | 33 +++++++++++++++++++++++++++------
>   net/vhost-vdpa.c |  8 --------
>   2 files changed, 27 insertions(+), 14 deletions(-)
>
> diff --git a/net/net.c b/net/net.c
> index 9f96509b2a..d097ac3e74 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -428,7 +428,13 @@ void qemu_del_net_client(NetClientState *nc)
>           object_unparent(OBJECT(nf));
>       }
>
> -    /* If there is a peer NIC, delete and cleanup client, but do not free. */
> +    /*
> +     * If there is a peer NIC, transfer ownership to it.  Delete the client
> +     * from net_client list but do not cleanup nor free.  This way NIC can
> +     * still access to members of the backend.
> +     *
> +     * The cleanup and free will be done when the NIC is free.
> +     */
>       if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
>           NICState *nic = qemu_get_nic(nc->peer);
>           if (nic->peer_deleted) {
> @@ -438,16 +444,13 @@ void qemu_del_net_client(NetClientState *nc)
>
>           for (i = 0; i < queues; i++) {
>               ncs[i]->peer->link_down = true;
> +            QTAILQ_REMOVE(&net_clients, ncs[i], next);
>           }
>
>           if (nc->peer->info->link_status_changed) {
>               nc->peer->info->link_status_changed(nc->peer);
>           }
>
> -        for (i = 0; i < queues; i++) {
> -            qemu_cleanup_net_client(ncs[i], true);
> -        }
> -
>           return;
>       }
>
> @@ -465,8 +468,12 @@ void qemu_del_nic(NICState *nic)
>
>       for (i = 0; i < queues; i++) {
>           NetClientState *nc = qemu_get_subqueue(nic, i);
> -        /* If this is a peer NIC and peer has already been deleted, free it now. */
> +        /*
> +         * If this is a peer NIC and peer has already been deleted, clean it up
> +         * and free it now.
> +         */
>           if (nic->peer_deleted) {
> +            qemu_cleanup_net_client(nc->peer, false);
>               qemu_free_net_client(nc->peer);
>           } else if (nc->peer) {
>               /* if there are RX packets pending, complete them */
> @@ -1686,6 +1693,9 @@ void net_cleanup(void)
>        * of the latest NET_CLIENT_DRIVER_NIC, and operate on *p as we walk
>        * the list.
>        *
> +     * However, the NIC may have peers that trust to be clean beyond this
> +     * point.  For example, if they have been removed with device_del.
> +     *
>        * The 'nc' variable isn't part of the list traversal; it's purely
>        * for convenience as too much '(*p)->' has a tendency to make the
>        * readers' eyes bleed.
> @@ -1693,6 +1703,17 @@ void net_cleanup(void)
>       while (*p) {
>           nc = *p;
>           if (nc->info->type == NET_CLIENT_DRIVER_NIC) {
> +            NICState *nic = qemu_get_nic(nc);
> +
> +            if (nic->peer_deleted) {
> +                int queues = MAX(nic->conf->peers.queues, 1);
> +
> +                for (int i = 0; i < queues; i++) {
> +                    NetClientState *nc = qemu_get_subqueue(nic, i);
> +                    qemu_cleanup_net_client(nc->peer, false);
> +                }
> +            }
> +
>               /* Skip NET_CLIENT_DRIVER_NIC entries */
>               p = &QTAILQ_NEXT(nc, next);
>           } else {
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 46b02c50be..e1a1c91e48 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -224,14 +224,6 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
>   {
>       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
>
> -    /*
> -     * If a peer NIC is attached, do not cleanup anything.
> -     * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
> -     * when the guest is shutting down.
> -     */
> -    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
> -        return;
> -    }
>       munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
>       munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
>       if (s->vhost_net) {
> --
> 2.46.0
>
Jason Wang Oct. 9, 2024, 8:34 a.m. UTC | #2
On Wed, Sep 18, 2024 at 11:57 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 9/12/2024 9:54 AM, Eugenio Pérez wrote:
> > Commit a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net
> > structures if peer nic is present") effectively delayed the backend
> > cleanup, allowing the frontend or the guest to access it resources as
> > long as the frontend is still visible to the guest.
> >
> > However it does not clean up the resources until the qemu process is
> > over.  This causes an effective leak if the device is deleted with
> > device_del, as there is no way to close the vdpa device.  This makes
> > impossible to re-add that device to this or other QEMU instances until
> > the first instance of QEMU is finished.
> >
> > Move the cleanup from qemu_cleanup to the NIC deletion and to
> > net_cleanup.
> >
> > Fixes: a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present")
> > Reported-by: Lei Yang <leiyang@redhat.com>
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com>

Queued.

Thanks
Jason Wang Oct. 23, 2024, 4:04 a.m. UTC | #3
On Fri, Sep 13, 2024 at 12:54 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> Commit a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net
> structures if peer nic is present") effectively delayed the backend
> cleanup, allowing the frontend or the guest to access it resources as
> long as the frontend is still visible to the guest.
>
> However it does not clean up the resources until the qemu process is
> over.  This causes an effective leak if the device is deleted with
> device_del, as there is no way to close the vdpa device.  This makes
> impossible to re-add that device to this or other QEMU instances until
> the first instance of QEMU is finished.
>
> Move the cleanup from qemu_cleanup to the NIC deletion and to
> net_cleanup.
>
> Fixes: a0d7215e33 ("vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present")
> Reported-by: Lei Yang <leiyang@redhat.com>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> v2: Remove NIC peer also at net_cleanup. vhost-user trust all the
> backends are clean before qemu removes char devices.
>
> This is not a requisite introduced by this commit as
> system/runstate.c:qemu_cleanup shows.
> ---

Unfortunately, this fails the build:

https://gitlab.com/jasowang/qemu/-/jobs/8138832559

Thanks
diff mbox series

Patch

diff --git a/net/net.c b/net/net.c
index 9f96509b2a..d097ac3e74 100644
--- a/net/net.c
+++ b/net/net.c
@@ -428,7 +428,13 @@  void qemu_del_net_client(NetClientState *nc)
         object_unparent(OBJECT(nf));
     }

-    /* If there is a peer NIC, delete and cleanup client, but do not free. */
+    /*
+     * If there is a peer NIC, transfer ownership to it.  Delete the client
+     * from net_client list but do not cleanup nor free.  This way NIC can
+     * still access to members of the backend.
+     *
+     * The cleanup and free will be done when the NIC is free.
+     */
     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
         NICState *nic = qemu_get_nic(nc->peer);
         if (nic->peer_deleted) {
@@ -438,16 +444,13 @@  void qemu_del_net_client(NetClientState *nc)

         for (i = 0; i < queues; i++) {
             ncs[i]->peer->link_down = true;
+            QTAILQ_REMOVE(&net_clients, ncs[i], next);
         }

         if (nc->peer->info->link_status_changed) {
             nc->peer->info->link_status_changed(nc->peer);
         }

-        for (i = 0; i < queues; i++) {
-            qemu_cleanup_net_client(ncs[i], true);
-        }
-
         return;
     }

@@ -465,8 +468,12 @@  void qemu_del_nic(NICState *nic)

     for (i = 0; i < queues; i++) {
         NetClientState *nc = qemu_get_subqueue(nic, i);
-        /* If this is a peer NIC and peer has already been deleted, free it now. */
+        /*
+         * If this is a peer NIC and peer has already been deleted, clean it up
+         * and free it now.
+         */
         if (nic->peer_deleted) {
+            qemu_cleanup_net_client(nc->peer, false);
             qemu_free_net_client(nc->peer);
         } else if (nc->peer) {
             /* if there are RX packets pending, complete them */
@@ -1686,6 +1693,9 @@  void net_cleanup(void)
      * of the latest NET_CLIENT_DRIVER_NIC, and operate on *p as we walk
      * the list.
      *
+     * However, the NIC may have peers that trust to be clean beyond this
+     * point.  For example, if they have been removed with device_del.
+     *
      * The 'nc' variable isn't part of the list traversal; it's purely
      * for convenience as too much '(*p)->' has a tendency to make the
      * readers' eyes bleed.
@@ -1693,6 +1703,17 @@  void net_cleanup(void)
     while (*p) {
         nc = *p;
         if (nc->info->type == NET_CLIENT_DRIVER_NIC) {
+            NICState *nic = qemu_get_nic(nc);
+
+            if (nic->peer_deleted) {
+                int queues = MAX(nic->conf->peers.queues, 1);
+
+                for (int i = 0; i < queues; i++) {
+                    NetClientState *nc = qemu_get_subqueue(nic, i);
+                    qemu_cleanup_net_client(nc->peer, false);
+                }
+            }
+
             /* Skip NET_CLIENT_DRIVER_NIC entries */
             p = &QTAILQ_NEXT(nc, next);
         } else {
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 46b02c50be..e1a1c91e48 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -224,14 +224,6 @@  static void vhost_vdpa_cleanup(NetClientState *nc)
 {
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);

-    /*
-     * If a peer NIC is attached, do not cleanup anything.
-     * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
-     * when the guest is shutting down.
-     */
-    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
-        return;
-    }
     munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
     munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
     if (s->vhost_net) {