diff mbox series

[v1,1/2] virtio: struct VirtQueue introduce reset

Message ID 20230129025150.119972-2-xuanzhuo@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series virtio: fix for assertion failure: virtio_net_get_subqueue(nc)->async_tx.elem failed | expand

Commit Message

Xuan Zhuo Jan. 29, 2023, 2:51 a.m. UTC
In the current design, we stop the device from operating on the vring
during per-queue reset by resetting the structure VirtQueue.

But before the reset operation, when recycling some resources, we should
stop referencing new vring resources. For example, when recycling
virtio-net's asynchronous sending resources, virtio-net should be able
to perceive that the current queue is in the per-queue reset state, and
stop sending new packets from the tx queue.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 hw/virtio/virtio.c         | 15 +++++++++++++++
 include/hw/virtio/virtio.h |  1 +
 2 files changed, 16 insertions(+)

Comments

Michael S. Tsirkin Jan. 29, 2023, 7:12 a.m. UTC | #1
subject seems wrong.

On Sun, Jan 29, 2023 at 10:51:49AM +0800, Xuan Zhuo wrote:
> In the current design, we stop the device from operating on the vring
> during per-queue reset by resetting the structure VirtQueue.
> 
> But before the reset operation, when recycling some resources, we should
> stop referencing new vring resources. For example, when recycling
> virtio-net's asynchronous sending resources, virtio-net should be able
> to perceive that the current queue is in the per-queue reset state, and
> stop sending new packets from the tx queue.
> 
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  hw/virtio/virtio.c         | 15 +++++++++++++++
>  include/hw/virtio/virtio.h |  1 +
>  2 files changed, 16 insertions(+)
> 
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index f35178f5fc..c954f2a2b3 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -142,6 +142,8 @@ struct VirtQueue
>      /* Notification enabled? */
>      bool notification;
>  
> +    bool disabled_by_reset;
> +
>      uint16_t queue_index;
>  
>      unsigned int inuse;
> @@ -2079,6 +2081,12 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
>  {
>      VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>  
> +    /*
> +     * Mark this queue is per-queue reset status. The device should release the
> +     * references of the vring, and not refer more new vring item.

items

> +     */
> +    vdev->vq[queue_index].disabled_by_reset = true;
> +
>      if (k->queue_reset) {
>          k->queue_reset(vdev, queue_index);
>      }

can we set this after calling queue_reset? For symmetry with enable.

> @@ -2102,11 +2110,18 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
>      }
>      */
>  
> +    vdev->vq[queue_index].disabled_by_reset = false;
> +
>      if (k->queue_enable) {
>          k->queue_enable(vdev, queue_index);
>      }
>  }
>  
> +bool virtio_queue_reset_state(VirtQueue *vq)
> +{
> +    return vq->disabled_by_reset;
> +}
> +
>  void virtio_reset(void *opaque)
>  {
>      VirtIODevice *vdev = opaque;
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 77c6c55929..00e91af7c4 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -319,6 +319,7 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val);
>  void virtio_reset(void *opaque);
>  void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index);
>  void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
> +bool virtio_queue_reset_state(VirtQueue *vq);
>  void virtio_update_irq(VirtIODevice *vdev);
>  int virtio_set_features(VirtIODevice *vdev, uint64_t val);

OK I guess ... what about migration. This state won't be
set correctly will it?


>  
> -- 
> 2.32.0.3.g01195cf9f
Xuan Zhuo Jan. 29, 2023, 7:15 a.m. UTC | #2
On Sun, 29 Jan 2023 02:12:36 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> subject seems wrong.


Will fix.


>
> On Sun, Jan 29, 2023 at 10:51:49AM +0800, Xuan Zhuo wrote:
> > In the current design, we stop the device from operating on the vring
> > during per-queue reset by resetting the structure VirtQueue.
> >
> > But before the reset operation, when recycling some resources, we should
> > stop referencing new vring resources. For example, when recycling
> > virtio-net's asynchronous sending resources, virtio-net should be able
> > to perceive that the current queue is in the per-queue reset state, and
> > stop sending new packets from the tx queue.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  hw/virtio/virtio.c         | 15 +++++++++++++++
> >  include/hw/virtio/virtio.h |  1 +
> >  2 files changed, 16 insertions(+)
> >
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index f35178f5fc..c954f2a2b3 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -142,6 +142,8 @@ struct VirtQueue
> >      /* Notification enabled? */
> >      bool notification;
> >
> > +    bool disabled_by_reset;
> > +
> >      uint16_t queue_index;
> >
> >      unsigned int inuse;
> > @@ -2079,6 +2081,12 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
> >  {
> >      VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
> >
> > +    /*
> > +     * Mark this queue is per-queue reset status. The device should release the
> > +     * references of the vring, and not refer more new vring item.
>
> items


Will fix.

>
> > +     */
> > +    vdev->vq[queue_index].disabled_by_reset = true;
> > +
> >      if (k->queue_reset) {
> >          k->queue_reset(vdev, queue_index);
> >      }
>
> can we set this after calling queue_reset? For symmetry with enable.


In fact,  queue_reset() will check it.


>
> > @@ -2102,11 +2110,18 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
> >      }
> >      */
> >
> > +    vdev->vq[queue_index].disabled_by_reset = false;
> > +
> >      if (k->queue_enable) {
> >          k->queue_enable(vdev, queue_index);
> >      }
> >  }
> >
> > +bool virtio_queue_reset_state(VirtQueue *vq)
> > +{
> > +    return vq->disabled_by_reset;
> > +}
> > +
> >  void virtio_reset(void *opaque)
> >  {
> >      VirtIODevice *vdev = opaque;
> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > index 77c6c55929..00e91af7c4 100644
> > --- a/include/hw/virtio/virtio.h
> > +++ b/include/hw/virtio/virtio.h
> > @@ -319,6 +319,7 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val);
> >  void virtio_reset(void *opaque);
> >  void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index);
> >  void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
> > +bool virtio_queue_reset_state(VirtQueue *vq);
> >  void virtio_update_irq(VirtIODevice *vdev);
> >  int virtio_set_features(VirtIODevice *vdev, uint64_t val);
>
> OK I guess ... what about migration. This state won't be
> set correctly will it?

I think it has no effect. After the reset, there is actually no state. We can
migrate.

The current variable is only used by ->queue_reset().

Thanks.


>
>
> >
> > --
> > 2.32.0.3.g01195cf9f
>
Michael S. Tsirkin Jan. 29, 2023, 7:37 a.m. UTC | #3
On Sun, Jan 29, 2023 at 03:15:16PM +0800, Xuan Zhuo wrote:
> On Sun, 29 Jan 2023 02:12:36 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >
> > subject seems wrong.
> 
> 
> Will fix.
> 
> 
> >
> > On Sun, Jan 29, 2023 at 10:51:49AM +0800, Xuan Zhuo wrote:
> > > In the current design, we stop the device from operating on the vring
> > > during per-queue reset by resetting the structure VirtQueue.
> > >
> > > But before the reset operation, when recycling some resources, we should
> > > stop referencing new vring resources. For example, when recycling
> > > virtio-net's asynchronous sending resources, virtio-net should be able
> > > to perceive that the current queue is in the per-queue reset state, and
> > > stop sending new packets from the tx queue.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  hw/virtio/virtio.c         | 15 +++++++++++++++
> > >  include/hw/virtio/virtio.h |  1 +
> > >  2 files changed, 16 insertions(+)
> > >
> > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > index f35178f5fc..c954f2a2b3 100644
> > > --- a/hw/virtio/virtio.c
> > > +++ b/hw/virtio/virtio.c
> > > @@ -142,6 +142,8 @@ struct VirtQueue
> > >      /* Notification enabled? */
> > >      bool notification;
> > >
> > > +    bool disabled_by_reset;
> > > +
> > >      uint16_t queue_index;
> > >
> > >      unsigned int inuse;
> > > @@ -2079,6 +2081,12 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
> > >  {
> > >      VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
> > >
> > > +    /*
> > > +     * Mark this queue is per-queue reset status. The device should release the
> > > +     * references of the vring, and not refer more new vring item.
> >
> > items
> 
> 
> Will fix.
> 
> >
> > > +     */
> > > +    vdev->vq[queue_index].disabled_by_reset = true;
> > > +
> > >      if (k->queue_reset) {
> > >          k->queue_reset(vdev, queue_index);
> > >      }
> >
> > can we set this after calling queue_reset? For symmetry with enable.
> 
> 
> In fact,  queue_reset() will check it.
> 

when you disable you first set it then disable.
so when we are not 100% ready it's already set.
when you enable you first clear it then enable.
so we are not 100% ready but it's no longer set.
inconsistent.


> >
> > > @@ -2102,11 +2110,18 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
> > >      }
> > >      */
> > >
> > > +    vdev->vq[queue_index].disabled_by_reset = false;
> > > +
> > >      if (k->queue_enable) {
> > >          k->queue_enable(vdev, queue_index);
> > >      }
> > >  }
> > >
> > > +bool virtio_queue_reset_state(VirtQueue *vq)
> > > +{
> > > +    return vq->disabled_by_reset;
> > > +}
> > > +
> > >  void virtio_reset(void *opaque)
> > >  {
> > >      VirtIODevice *vdev = opaque;
> > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > index 77c6c55929..00e91af7c4 100644
> > > --- a/include/hw/virtio/virtio.h
> > > +++ b/include/hw/virtio/virtio.h
> > > @@ -319,6 +319,7 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val);
> > >  void virtio_reset(void *opaque);
> > >  void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index);
> > >  void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
> > > +bool virtio_queue_reset_state(VirtQueue *vq);
> > >  void virtio_update_irq(VirtIODevice *vdev);
> > >  int virtio_set_features(VirtIODevice *vdev, uint64_t val);
> >
> > OK I guess ... what about migration. This state won't be
> > set correctly will it?
> 
> I think it has no effect. After the reset, there is actually no state. We can
> migrate.
> 
> The current variable is only used by ->queue_reset().
> 
> Thanks.
> 

Yea maybe it works for this bug but ... yack. This means the state has
no logic consistency.  It's just there because you found a bug and
wanted to fix it.
An ultra specific
	bool this_weird_state_fuzzer_gets_in_issue_1451;
is hard to maintain, not happy :(


> >
> >
> > >
> > > --
> > > 2.32.0.3.g01195cf9f
> >
Xuan Zhuo Jan. 29, 2023, 7:39 a.m. UTC | #4
On Sun, 29 Jan 2023 02:37:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Sun, Jan 29, 2023 at 03:15:16PM +0800, Xuan Zhuo wrote:
> > On Sun, 29 Jan 2023 02:12:36 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >
> > > subject seems wrong.
> >
> >
> > Will fix.
> >
> >
> > >
> > > On Sun, Jan 29, 2023 at 10:51:49AM +0800, Xuan Zhuo wrote:
> > > > In the current design, we stop the device from operating on the vring
> > > > during per-queue reset by resetting the structure VirtQueue.
> > > >
> > > > But before the reset operation, when recycling some resources, we should
> > > > stop referencing new vring resources. For example, when recycling
> > > > virtio-net's asynchronous sending resources, virtio-net should be able
> > > > to perceive that the current queue is in the per-queue reset state, and
> > > > stop sending new packets from the tx queue.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >  hw/virtio/virtio.c         | 15 +++++++++++++++
> > > >  include/hw/virtio/virtio.h |  1 +
> > > >  2 files changed, 16 insertions(+)
> > > >
> > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > index f35178f5fc..c954f2a2b3 100644
> > > > --- a/hw/virtio/virtio.c
> > > > +++ b/hw/virtio/virtio.c
> > > > @@ -142,6 +142,8 @@ struct VirtQueue
> > > >      /* Notification enabled? */
> > > >      bool notification;
> > > >
> > > > +    bool disabled_by_reset;
> > > > +
> > > >      uint16_t queue_index;
> > > >
> > > >      unsigned int inuse;
> > > > @@ -2079,6 +2081,12 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
> > > >  {
> > > >      VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
> > > >
> > > > +    /*
> > > > +     * Mark this queue is per-queue reset status. The device should release the
> > > > +     * references of the vring, and not refer more new vring item.
> > >
> > > items
> >
> >
> > Will fix.
> >
> > >
> > > > +     */
> > > > +    vdev->vq[queue_index].disabled_by_reset = true;
> > > > +
> > > >      if (k->queue_reset) {
> > > >          k->queue_reset(vdev, queue_index);
> > > >      }
> > >
> > > can we set this after calling queue_reset? For symmetry with enable.
> >
> >
> > In fact,  queue_reset() will check it.
> >
>
> when you disable you first set it then disable.
> so when we are not 100% ready it's already set.
> when you enable you first clear it then enable.
> so we are not 100% ready but it's no longer set.
> inconsistent.
>
>
> > >
> > > > @@ -2102,11 +2110,18 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
> > > >      }
> > > >      */
> > > >
> > > > +    vdev->vq[queue_index].disabled_by_reset = false;
> > > > +
> > > >      if (k->queue_enable) {
> > > >          k->queue_enable(vdev, queue_index);
> > > >      }
> > > >  }
> > > >
> > > > +bool virtio_queue_reset_state(VirtQueue *vq)
> > > > +{
> > > > +    return vq->disabled_by_reset;
> > > > +}
> > > > +
> > > >  void virtio_reset(void *opaque)
> > > >  {
> > > >      VirtIODevice *vdev = opaque;
> > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > index 77c6c55929..00e91af7c4 100644
> > > > --- a/include/hw/virtio/virtio.h
> > > > +++ b/include/hw/virtio/virtio.h
> > > > @@ -319,6 +319,7 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val);
> > > >  void virtio_reset(void *opaque);
> > > >  void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index);
> > > >  void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
> > > > +bool virtio_queue_reset_state(VirtQueue *vq);
> > > >  void virtio_update_irq(VirtIODevice *vdev);
> > > >  int virtio_set_features(VirtIODevice *vdev, uint64_t val);
> > >
> > > OK I guess ... what about migration. This state won't be
> > > set correctly will it?
> >
> > I think it has no effect. After the reset, there is actually no state. We can
> > migrate.
> >
> > The current variable is only used by ->queue_reset().
> >
> > Thanks.
> >
>
> Yea maybe it works for this bug but ... yack. This means the state has
> no logic consistency.  It's just there because you found a bug and
> wanted to fix it.
> An ultra specific
> 	bool this_weird_state_fuzzer_gets_in_issue_1451;
> is hard to maintain, not happy :(


I agree.


Thanks.


>
>
> > >
> > >
> > > >
> > > > --
> > > > 2.32.0.3.g01195cf9f
> > >
>
diff mbox series

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f35178f5fc..c954f2a2b3 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -142,6 +142,8 @@  struct VirtQueue
     /* Notification enabled? */
     bool notification;
 
+    bool disabled_by_reset;
+
     uint16_t queue_index;
 
     unsigned int inuse;
@@ -2079,6 +2081,12 @@  void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
 {
     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 
+    /*
+     * Mark this queue is per-queue reset status. The device should release the
+     * references of the vring, and not refer more new vring item.
+     */
+    vdev->vq[queue_index].disabled_by_reset = true;
+
     if (k->queue_reset) {
         k->queue_reset(vdev, queue_index);
     }
@@ -2102,11 +2110,18 @@  void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
     }
     */
 
+    vdev->vq[queue_index].disabled_by_reset = false;
+
     if (k->queue_enable) {
         k->queue_enable(vdev, queue_index);
     }
 }
 
+bool virtio_queue_reset_state(VirtQueue *vq)
+{
+    return vq->disabled_by_reset;
+}
+
 void virtio_reset(void *opaque)
 {
     VirtIODevice *vdev = opaque;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 77c6c55929..00e91af7c4 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -319,6 +319,7 @@  int virtio_set_status(VirtIODevice *vdev, uint8_t val);
 void virtio_reset(void *opaque);
 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index);
 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
+bool virtio_queue_reset_state(VirtQueue *vq);
 void virtio_update_irq(VirtIODevice *vdev);
 int virtio_set_features(VirtIODevice *vdev, uint64_t val);