diff mbox series

[v4,17/20] virtio-blk: implement BlockDevOps->drained_begin()

Message ID 20230425172716.1033562-18-stefanha@redhat.com (mailing list archive)
State Superseded
Headers show
Series block: remove aio_disable_external() API | expand

Commit Message

Stefan Hajnoczi April 25, 2023, 5:27 p.m. UTC
Detach ioeventfds during drained sections to stop I/O submission from
the guest. virtio-blk is no longer reliant on aio_disable_external()
after this patch. This will allow us to remove the
aio_disable_external() API once all other code that relies on it is
converted.

Take extra care to avoid attaching/detaching ioeventfds if the data
plane is started/stopped during a drained section. This should be rare,
but maybe the mirror block job can trigger it.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 17 +++++++++------
 hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 7 deletions(-)

Comments

Kevin Wolf May 4, 2023, 9:13 p.m. UTC | #1
Am 25.04.2023 um 19:27 hat Stefan Hajnoczi geschrieben:
> Detach ioeventfds during drained sections to stop I/O submission from
> the guest. virtio-blk is no longer reliant on aio_disable_external()
> after this patch. This will allow us to remove the
> aio_disable_external() API once all other code that relies on it is
> converted.
> 
> Take extra care to avoid attaching/detaching ioeventfds if the data
> plane is started/stopped during a drained section. This should be rare,
> but maybe the mirror block job can trigger it.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/block/dataplane/virtio-blk.c | 17 +++++++++------
>  hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
>  2 files changed, 48 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index bd7cc6e76b..d77fc6028c 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -245,13 +245,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
>      }
>  
>      /* Get this show started by hooking up our callbacks */
> -    aio_context_acquire(s->ctx);
> -    for (i = 0; i < nvqs; i++) {
> -        VirtQueue *vq = virtio_get_queue(s->vdev, i);
> +    if (!blk_in_drain(s->conf->conf.blk)) {
> +        aio_context_acquire(s->ctx);
> +        for (i = 0; i < nvqs; i++) {
> +            VirtQueue *vq = virtio_get_queue(s->vdev, i);
>  
> -        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> +            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> +        }
> +        aio_context_release(s->ctx);
>      }
> -    aio_context_release(s->ctx);
>      return 0;
>  
>    fail_aio_context:
> @@ -317,7 +319,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
>      trace_virtio_blk_data_plane_stop(s);
>  
>      aio_context_acquire(s->ctx);
> -    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> +
> +    if (!blk_in_drain(s->conf->conf.blk)) {
> +        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> +    }

So here we actually get a semantic change: What you described as the
second part in the previous patch, processing the virtqueue one last
time, isn't done any more if the device is drained.

If it's okay to just skip this during drain, why do we need to do it
outside of drain?

Kevin
Stefan Hajnoczi May 11, 2023, 8:54 p.m. UTC | #2
On Thu, May 04, 2023 at 11:13:42PM +0200, Kevin Wolf wrote:
> Am 25.04.2023 um 19:27 hat Stefan Hajnoczi geschrieben:
> > Detach ioeventfds during drained sections to stop I/O submission from
> > the guest. virtio-blk is no longer reliant on aio_disable_external()
> > after this patch. This will allow us to remove the
> > aio_disable_external() API once all other code that relies on it is
> > converted.
> > 
> > Take extra care to avoid attaching/detaching ioeventfds if the data
> > plane is started/stopped during a drained section. This should be rare,
> > but maybe the mirror block job can trigger it.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/block/dataplane/virtio-blk.c | 17 +++++++++------
> >  hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
> >  2 files changed, 48 insertions(+), 7 deletions(-)
> > 
> > diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> > index bd7cc6e76b..d77fc6028c 100644
> > --- a/hw/block/dataplane/virtio-blk.c
> > +++ b/hw/block/dataplane/virtio-blk.c
> > @@ -245,13 +245,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
> >      }
> >  
> >      /* Get this show started by hooking up our callbacks */
> > -    aio_context_acquire(s->ctx);
> > -    for (i = 0; i < nvqs; i++) {
> > -        VirtQueue *vq = virtio_get_queue(s->vdev, i);
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_context_acquire(s->ctx);
> > +        for (i = 0; i < nvqs; i++) {
> > +            VirtQueue *vq = virtio_get_queue(s->vdev, i);
> >  
> > -        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +        }
> > +        aio_context_release(s->ctx);
> >      }
> > -    aio_context_release(s->ctx);
> >      return 0;
> >  
> >    fail_aio_context:
> > @@ -317,7 +319,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
> >      trace_virtio_blk_data_plane_stop(s);
> >  
> >      aio_context_acquire(s->ctx);
> > -    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +    }
> 
> So here we actually get a semantic change: What you described as the
> second part in the previous patch, processing the virtqueue one last
> time, isn't done any more if the device is drained.
> 
> If it's okay to just skip this during drain, why do we need to do it
> outside of drain?

Yes, it's safe because virtio_blk_data_plane_stop() has two cases:
1. The device is being reset. It is not necessary to process new
   requests.
2. 'stop'/'cont'. 'cont' will call virtio_blk_data_plane_start() ->
   event_notifier_set() so new requests will be processed when the guest
   resumes exection.

That's why I think this is safe and the right thing to do.

However, your question led me to a pre-existing drain bug when a vCPU
resets the device during a drained section (e.g. when a mirror block job
has started a drained section and the main loop runs until the block job
exits). New requests must not be processed by
virtio_blk_data_plane_stop() because that would violate drain semantics.

It turns out requests are still processed because of
virtio_blk_data_plane_stop() -> virtio_bus_cleanup_host_notifier() ->
virtio_queue_host_notifier_read().

I think that should be handled in a separate patch series. It's not
related to aio_disable_external().

Stefan
Stefan Hajnoczi May 11, 2023, 9:22 p.m. UTC | #3
On Thu, May 04, 2023 at 11:13:42PM +0200, Kevin Wolf wrote:
> Am 25.04.2023 um 19:27 hat Stefan Hajnoczi geschrieben:
> > Detach ioeventfds during drained sections to stop I/O submission from
> > the guest. virtio-blk is no longer reliant on aio_disable_external()
> > after this patch. This will allow us to remove the
> > aio_disable_external() API once all other code that relies on it is
> > converted.
> > 
> > Take extra care to avoid attaching/detaching ioeventfds if the data
> > plane is started/stopped during a drained section. This should be rare,
> > but maybe the mirror block job can trigger it.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/block/dataplane/virtio-blk.c | 17 +++++++++------
> >  hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
> >  2 files changed, 48 insertions(+), 7 deletions(-)
> > 
> > diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> > index bd7cc6e76b..d77fc6028c 100644
> > --- a/hw/block/dataplane/virtio-blk.c
> > +++ b/hw/block/dataplane/virtio-blk.c
> > @@ -245,13 +245,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
> >      }
> >  
> >      /* Get this show started by hooking up our callbacks */
> > -    aio_context_acquire(s->ctx);
> > -    for (i = 0; i < nvqs; i++) {
> > -        VirtQueue *vq = virtio_get_queue(s->vdev, i);
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_context_acquire(s->ctx);
> > +        for (i = 0; i < nvqs; i++) {
> > +            VirtQueue *vq = virtio_get_queue(s->vdev, i);
> >  
> > -        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +        }
> > +        aio_context_release(s->ctx);
> >      }
> > -    aio_context_release(s->ctx);
> >      return 0;
> >  
> >    fail_aio_context:
> > @@ -317,7 +319,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
> >      trace_virtio_blk_data_plane_stop(s);
> >  
> >      aio_context_acquire(s->ctx);
> > -    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +    }
> 
> So here we actually get a semantic change: What you described as the
> second part in the previous patch, processing the virtqueue one last
> time, isn't done any more if the device is drained.
> 
> If it's okay to just skip this during drain, why do we need to do it
> outside of drain?

I forgot to answer why we need to process requests one last time outside
drain.

This approach comes from how vhost uses ioeventfd. When switching from
vhost back to QEMU emulation, there's a chance that a final virtqueue
kick snuck in while ioeventfd was being disabled.

This is not the case with dataplane nowadays (it may have in the past).
The only state dataplane transitions are device reset and 'stop'/'cont'.
Neither of these require QEMU to process new requests in while stopping
dataplane.

My confidence is not 100%, but still pretty high that the
virtio_queue_host_notifier_read() call could be dropped from dataplane
code. Since I'm not 100% sure I didn't attempt that.

Stefan
diff mbox series

Patch

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index bd7cc6e76b..d77fc6028c 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -245,13 +245,15 @@  int virtio_blk_data_plane_start(VirtIODevice *vdev)
     }
 
     /* Get this show started by hooking up our callbacks */
-    aio_context_acquire(s->ctx);
-    for (i = 0; i < nvqs; i++) {
-        VirtQueue *vq = virtio_get_queue(s->vdev, i);
+    if (!blk_in_drain(s->conf->conf.blk)) {
+        aio_context_acquire(s->ctx);
+        for (i = 0; i < nvqs; i++) {
+            VirtQueue *vq = virtio_get_queue(s->vdev, i);
 
-        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+        }
+        aio_context_release(s->ctx);
     }
-    aio_context_release(s->ctx);
     return 0;
 
   fail_aio_context:
@@ -317,7 +319,10 @@  void virtio_blk_data_plane_stop(VirtIODevice *vdev)
     trace_virtio_blk_data_plane_stop(s);
 
     aio_context_acquire(s->ctx);
-    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
+
+    if (!blk_in_drain(s->conf->conf.blk)) {
+        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
+    }
 
     /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
     blk_drain(s->conf->conf.blk);
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index cefca93b31..d8dedc575c 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1109,8 +1109,44 @@  static void virtio_blk_resize(void *opaque)
     aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
 }
 
+/* Suspend virtqueue ioeventfd processing during drain */
+static void virtio_blk_drained_begin(void *opaque)
+{
+    VirtIOBlock *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
+
+    if (!s->dataplane || !s->dataplane_started) {
+        return;
+    }
+
+    for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+        VirtQueue *vq = virtio_get_queue(vdev, i);
+        virtio_queue_aio_detach_host_notifier(vq, ctx);
+    }
+}
+
+/* Resume virtqueue ioeventfd processing after drain */
+static void virtio_blk_drained_end(void *opaque)
+{
+    VirtIOBlock *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
+
+    if (!s->dataplane || !s->dataplane_started) {
+        return;
+    }
+
+    for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+        VirtQueue *vq = virtio_get_queue(vdev, i);
+        virtio_queue_aio_attach_host_notifier(vq, ctx);
+    }
+}
+
 static const BlockDevOps virtio_block_ops = {
-    .resize_cb = virtio_blk_resize,
+    .resize_cb     = virtio_blk_resize,
+    .drained_begin = virtio_blk_drained_begin,
+    .drained_end   = virtio_blk_drained_end,
 };
 
 static void virtio_blk_device_realize(DeviceState *dev, Error **errp)