diff mbox series

[v7,6/7] vhost-user-blk: Add support to reconnect backend

Message ID 20190228085355.9614-7-xieyongji@baidu.com (mailing list archive)
State New, archived
Headers show
Series vhost-user-blk: Add support for backend reconnecting | expand

Commit Message

Yongji Xie Feb. 28, 2019, 8:53 a.m. UTC
From: Xie Yongji <xieyongji@baidu.com>

Since we now support the message VHOST_USER_GET_INFLIGHT_FD
and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
safely because it can track inflight I/O in shared memory.
This patch allows qemu to reconnect the backend after
connection closed.

Signed-off-by: Xie Yongji <xieyongji@baidu.com>
Signed-off-by: Ni Xun <nixun@baidu.com>
Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
---
 hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
 include/hw/virtio/vhost-user-blk.h |   4 +
 2 files changed, 167 insertions(+), 42 deletions(-)

Comments

Michael S. Tsirkin March 12, 2019, 4:49 p.m. UTC | #1
On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> From: Xie Yongji <xieyongji@baidu.com>
> 
> Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> safely because it can track inflight I/O in shared memory.
> This patch allows qemu to reconnect the backend after
> connection closed.
> 
> Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> Signed-off-by: Ni Xun <nixun@baidu.com>
> Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> ---
>  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
>  include/hw/virtio/vhost-user-blk.h |   4 +
>  2 files changed, 167 insertions(+), 42 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 9682df1a7b..539ea2e571 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
>      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
>  };
>  
> -static void vhost_user_blk_start(VirtIODevice *vdev)
> +static int vhost_user_blk_start(VirtIODevice *vdev)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
>  
>      if (!k->set_guest_notifiers) {
>          error_report("binding does not support guest notifiers");
> -        return;
> +        return -ENOSYS;
>      }
>  
>      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
>      if (ret < 0) {
>          error_report("Error enabling host notifiers: %d", -ret);
> -        return;
> +        return ret;
>      }
>  
>      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
>          vhost_virtqueue_mask(&s->dev, vdev, i, false);
>      }
>  
> -    return;
> +    return ret;
>  
>  err_guest_notifiers:
>      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>  err_host_notifiers:
>      vhost_dev_disable_notifiers(&s->dev, vdev);
> +    return ret;
>  }
>  
>  static void vhost_user_blk_stop(VirtIODevice *vdev)
> @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>      if (ret < 0) {
>          error_report("vhost guest notifier cleanup failed: %d", ret);
> -        return;
>      }
>  
>      vhost_dev_disable_notifiers(&s->dev, vdev);
> @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> +    int ret;
>  
>      if (!vdev->vm_running) {
>          should_start = false;
>      }
>  
> -    if (s->dev.started == should_start) {
> +    if (s->should_start == should_start) {
> +        return;
> +    }
> +
> +    if (!s->connected || s->dev.started == should_start) {
> +        s->should_start = should_start;
>          return;
>      }
>  
>      if (should_start) {
> -        vhost_user_blk_start(vdev);
> +        s->should_start = true;
> +        /*
> +         * make sure vhost_user_blk_handle_output() ignores fake
> +         * guest kick by vhost_dev_enable_notifiers()
> +         */
> +        barrier();
> +        ret = vhost_user_blk_start(vdev);
> +        if (ret < 0) {
> +            error_report("vhost-user-blk: vhost start failed: %s",
> +                         strerror(-ret));
> +            qemu_chr_fe_disconnect(&s->chardev);
> +        }
>      } else {
>          vhost_user_blk_stop(vdev);
> +        /*
> +         * make sure vhost_user_blk_handle_output() ignore fake
> +         * guest kick by vhost_dev_disable_notifiers()
> +         */
> +        barrier();
> +        s->should_start = false;
>      }
> -
>  }
>  
>  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
>  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> -    int i;
> +    int i, ret;
>  
>      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
>          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
>          return;
>      }
>  
> +    if (s->should_start) {
> +        return;
> +    }
> +    s->should_start = true;
> +
> +    if (!s->connected) {
> +        return;
> +    }
> +
>      if (s->dev.started) {
>          return;
>      }
> @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
>       * vhost here instead of waiting for .set_status().
>       */
> -    vhost_user_blk_start(vdev);
> +    ret = vhost_user_blk_start(vdev);
> +    if (ret < 0) {
> +        error_report("vhost-user-blk: vhost start failed: %s",
> +                     strerror(-ret));
> +        qemu_chr_fe_disconnect(&s->chardev);
> +        return;
> +    }
>  
>      /* Kick right away to begin processing requests already in vring */
>      for (i = 0; i < s->dev.nvqs; i++) {
> @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
>      vhost_dev_free_inflight(s->inflight);
>  }
>  
> +static int vhost_user_blk_connect(DeviceState *dev)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +    int ret = 0;
> +
> +    if (s->connected) {
> +        return 0;
> +    }
> +    s->connected = true;
> +
> +    s->dev.nvqs = s->num_queues;
> +    s->dev.vqs = s->vqs;
> +    s->dev.vq_index = 0;
> +    s->dev.backend_features = 0;
> +
> +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> +
> +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> +    if (ret < 0) {
> +        error_report("vhost-user-blk: vhost initialization failed: %s",
> +                     strerror(-ret));
> +        return ret;
> +    }
> +
> +    /* restore vhost state */
> +    if (s->should_start) {
> +        ret = vhost_user_blk_start(vdev);
> +        if (ret < 0) {
> +            error_report("vhost-user-blk: vhost start failed: %s",
> +                         strerror(-ret));
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static void vhost_user_blk_disconnect(DeviceState *dev)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +
> +    if (!s->connected) {
> +        return;
> +    }
> +    s->connected = false;
> +
> +    if (s->dev.started) {
> +        vhost_user_blk_stop(vdev);
> +    }
> +
> +    vhost_dev_cleanup(&s->dev);
> +}
> +
> +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> +                                     void *opaque)
> +{
> +    DeviceState *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +
> +    qemu_chr_fe_disconnect(&s->chardev);
> +
> +    return true;
> +}
> +
> +static void vhost_user_blk_event(void *opaque, int event)
> +{
> +    DeviceState *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +
> +    switch (event) {
> +    case CHR_EVENT_OPENED:
> +        if (vhost_user_blk_connect(dev) < 0) {
> +            qemu_chr_fe_disconnect(&s->chardev);
> +            return;
> +        }
> +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> +                                         vhost_user_blk_watch, dev);
> +        break;
> +    case CHR_EVENT_CLOSED:
> +        vhost_user_blk_disconnect(dev);
> +        if (s->watch) {
> +            g_source_remove(s->watch);
> +            s->watch = 0;
> +        }
> +        break;
> +    }
> +}
> +
> +
>  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      VhostUserState *user;
> -    struct vhost_virtqueue *vqs = NULL;
>      int i, ret;
> +    Error *err = NULL;
>  
>      if (!s->chardev.chr) {
>          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      }
>  
>      s->inflight = g_new0(struct vhost_inflight, 1);
> -
> -    s->dev.nvqs = s->num_queues;
> -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> -    s->dev.vq_index = 0;
> -    s->dev.backend_features = 0;
> -    vqs = s->dev.vqs;
> -
> -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> -
> -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> -    if (ret < 0) {
> -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> -                   strerror(-ret));
> -        goto virtio_err;
> -    }
> +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> +    s->watch = 0;
> +    s->should_start = false;
> +    s->connected = false;
> +
> +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> +                             NULL, (void *)dev, NULL, true);
> +
> +reconnect:
> +    do {
> +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> +            error_report_err(err);
> +            err = NULL;
> +            sleep(1);

Seems arbitrary. Is this basically waiting until backend will reconnect?
Why not block until event on the fd triggers?

Also, it looks like this will just block forever with no monitor input
and no way for user to figure out what is going on short of
crashing QEMU.

> +        }
> +    } while (!s->connected);
>  
>      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> -                              sizeof(struct virtio_blk_config));
> +                               sizeof(struct virtio_blk_config));
>      if (ret < 0) {
> -        error_setg(errp, "vhost-user-blk: get block config failed");
> -        goto vhost_err;
> +        error_report("vhost-user-blk: get block config failed");
> +        goto reconnect;
>      }
>  
>      if (s->blkcfg.num_queues != s->num_queues) {
> @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      }
>  
>      return;
> -
> -vhost_err:
> -    vhost_dev_cleanup(&s->dev);
> -virtio_err:
> -    g_free(vqs);
> -    g_free(s->inflight);
> -    virtio_cleanup(vdev);
> -
> -    vhost_user_cleanup(user);
> -    g_free(user);
> -    s->vhost_user = NULL;
>  }
>  
>  static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VHostUserBlk *s = VHOST_USER_BLK(dev);
> -    struct vhost_virtqueue *vqs = s->dev.vqs;
>  
>      vhost_user_blk_set_status(vdev, 0);
> +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
> +                             NULL, NULL, NULL, false);
>      vhost_dev_cleanup(&s->dev);
>      vhost_dev_free_inflight(s->inflight);
> -    g_free(vqs);
> +    g_free(s->vqs);
>      g_free(s->inflight);
>      virtio_cleanup(vdev);
>  
> diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
> index 445516604a..4849aa5eb5 100644
> --- a/include/hw/virtio/vhost-user-blk.h
> +++ b/include/hw/virtio/vhost-user-blk.h
> @@ -38,6 +38,10 @@ typedef struct VHostUserBlk {
>      struct vhost_dev dev;
>      struct vhost_inflight *inflight;
>      VhostUserState *vhost_user;
> +    struct vhost_virtqueue *vqs;
> +    guint watch;
> +    bool should_start;
> +    bool connected;
>  } VHostUserBlk;
>  
>  #endif
> -- 
> 2.17.1
Michael S. Tsirkin March 13, 2019, 1:16 a.m. UTC | #2
On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> From: Xie Yongji <xieyongji@baidu.com>
> 
> Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> safely because it can track inflight I/O in shared memory.
> This patch allows qemu to reconnect the backend after
> connection closed.
> 
> Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> Signed-off-by: Ni Xun <nixun@baidu.com>
> Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> ---
>  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
>  include/hw/virtio/vhost-user-blk.h |   4 +
>  2 files changed, 167 insertions(+), 42 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 9682df1a7b..539ea2e571 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
>      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
>  };
>  
> -static void vhost_user_blk_start(VirtIODevice *vdev)
> +static int vhost_user_blk_start(VirtIODevice *vdev)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
>  
>      if (!k->set_guest_notifiers) {
>          error_report("binding does not support guest notifiers");
> -        return;
> +        return -ENOSYS;
>      }
>  
>      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
>      if (ret < 0) {
>          error_report("Error enabling host notifiers: %d", -ret);
> -        return;
> +        return ret;
>      }
>  
>      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
>          vhost_virtqueue_mask(&s->dev, vdev, i, false);
>      }
>  
> -    return;
> +    return ret;
>  
>  err_guest_notifiers:
>      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>  err_host_notifiers:
>      vhost_dev_disable_notifiers(&s->dev, vdev);
> +    return ret;
>  }
>  
>  static void vhost_user_blk_stop(VirtIODevice *vdev)
> @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>      if (ret < 0) {
>          error_report("vhost guest notifier cleanup failed: %d", ret);
> -        return;
>      }
>  
>      vhost_dev_disable_notifiers(&s->dev, vdev);
> @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> +    int ret;
>  
>      if (!vdev->vm_running) {
>          should_start = false;
>      }
>  
> -    if (s->dev.started == should_start) {
> +    if (s->should_start == should_start) {
> +        return;
> +    }
> +
> +    if (!s->connected || s->dev.started == should_start) {
> +        s->should_start = should_start;
>          return;
>      }
>  
>      if (should_start) {
> -        vhost_user_blk_start(vdev);
> +        s->should_start = true;
> +        /*
> +         * make sure vhost_user_blk_handle_output() ignores fake
> +         * guest kick by vhost_dev_enable_notifiers()
> +         */
> +        barrier();
> +        ret = vhost_user_blk_start(vdev);
> +        if (ret < 0) {
> +            error_report("vhost-user-blk: vhost start failed: %s",
> +                         strerror(-ret));
> +            qemu_chr_fe_disconnect(&s->chardev);
> +        }
>      } else {
>          vhost_user_blk_stop(vdev);
> +        /*
> +         * make sure vhost_user_blk_handle_output() ignore fake
> +         * guest kick by vhost_dev_disable_notifiers()
> +         */
> +        barrier();
> +        s->should_start = false;
>      }

I don't understand the comment about ignoring fake guest kicks.
Please add an explanation about what does barrier do here.
And maybe a detailed explanation about what "fake kick"
is in the commit log.


> -
>  }
>  
>  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
>  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> -    int i;
> +    int i, ret;
>  
>      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
>          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
>          return;
>      }
>  
> +    if (s->should_start) {
> +        return;
> +    }
> +    s->should_start = true;
> +
> +    if (!s->connected) {
> +        return;
> +    }
> +
>      if (s->dev.started) {
>          return;
>      }
> @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
>       * vhost here instead of waiting for .set_status().
>       */
> -    vhost_user_blk_start(vdev);
> +    ret = vhost_user_blk_start(vdev);
> +    if (ret < 0) {
> +        error_report("vhost-user-blk: vhost start failed: %s",
> +                     strerror(-ret));
> +        qemu_chr_fe_disconnect(&s->chardev);
> +        return;
> +    }
>  
>      /* Kick right away to begin processing requests already in vring */
>      for (i = 0; i < s->dev.nvqs; i++) {
> @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
>      vhost_dev_free_inflight(s->inflight);
>  }
>  
> +static int vhost_user_blk_connect(DeviceState *dev)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +    int ret = 0;
> +
> +    if (s->connected) {
> +        return 0;
> +    }
> +    s->connected = true;
> +
> +    s->dev.nvqs = s->num_queues;
> +    s->dev.vqs = s->vqs;
> +    s->dev.vq_index = 0;
> +    s->dev.backend_features = 0;
> +
> +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> +
> +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> +    if (ret < 0) {
> +        error_report("vhost-user-blk: vhost initialization failed: %s",
> +                     strerror(-ret));
> +        return ret;
> +    }
> +
> +    /* restore vhost state */
> +    if (s->should_start) {
> +        ret = vhost_user_blk_start(vdev);
> +        if (ret < 0) {
> +            error_report("vhost-user-blk: vhost start failed: %s",
> +                         strerror(-ret));
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static void vhost_user_blk_disconnect(DeviceState *dev)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +
> +    if (!s->connected) {
> +        return;
> +    }
> +    s->connected = false;
> +
> +    if (s->dev.started) {
> +        vhost_user_blk_stop(vdev);
> +    }
> +
> +    vhost_dev_cleanup(&s->dev);
> +}
> +
> +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> +                                     void *opaque)
> +{
> +    DeviceState *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +
> +    qemu_chr_fe_disconnect(&s->chardev);
> +
> +    return true;
> +}
> +
> +static void vhost_user_blk_event(void *opaque, int event)
> +{
> +    DeviceState *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> +
> +    switch (event) {
> +    case CHR_EVENT_OPENED:
> +        if (vhost_user_blk_connect(dev) < 0) {
> +            qemu_chr_fe_disconnect(&s->chardev);
> +            return;
> +        }
> +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> +                                         vhost_user_blk_watch, dev);
> +        break;
> +    case CHR_EVENT_CLOSED:
> +        vhost_user_blk_disconnect(dev);
> +        if (s->watch) {
> +            g_source_remove(s->watch);
> +            s->watch = 0;
> +        }
> +        break;
> +    }
> +}
> +
> +
>  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      VhostUserState *user;
> -    struct vhost_virtqueue *vqs = NULL;
>      int i, ret;
> +    Error *err = NULL;
>  
>      if (!s->chardev.chr) {
>          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      }
>  
>      s->inflight = g_new0(struct vhost_inflight, 1);
> -
> -    s->dev.nvqs = s->num_queues;
> -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> -    s->dev.vq_index = 0;
> -    s->dev.backend_features = 0;
> -    vqs = s->dev.vqs;
> -
> -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> -
> -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> -    if (ret < 0) {
> -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> -                   strerror(-ret));
> -        goto virtio_err;
> -    }
> +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> +    s->watch = 0;
> +    s->should_start = false;
> +    s->connected = false;
> +
> +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> +                             NULL, (void *)dev, NULL, true);
> +
> +reconnect:
> +    do {
> +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> +            error_report_err(err);
> +            err = NULL;
> +            sleep(1);
> +        }
> +    } while (!s->connected);
>  
>      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> -                              sizeof(struct virtio_blk_config));
> +                               sizeof(struct virtio_blk_config));
>      if (ret < 0) {
> -        error_setg(errp, "vhost-user-blk: get block config failed");
> -        goto vhost_err;
> +        error_report("vhost-user-blk: get block config failed");
> +        goto reconnect;
>      }
>  
>      if (s->blkcfg.num_queues != s->num_queues) {
> @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      }
>  
>      return;

With this we end up with return at end of function
which makes no sense.

> -
> -vhost_err:
> -    vhost_dev_cleanup(&s->dev);
> -virtio_err:
> -    g_free(vqs);
> -    g_free(s->inflight);
> -    virtio_cleanup(vdev);
> -
> -    vhost_user_cleanup(user);
> -    g_free(user);
> -    s->vhost_user = NULL;
>  }
>  
>  static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VHostUserBlk *s = VHOST_USER_BLK(dev);
> -    struct vhost_virtqueue *vqs = s->dev.vqs;
>  
>      vhost_user_blk_set_status(vdev, 0);
> +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
> +                             NULL, NULL, NULL, false);
>      vhost_dev_cleanup(&s->dev);
>      vhost_dev_free_inflight(s->inflight);
> -    g_free(vqs);
> +    g_free(s->vqs);
>      g_free(s->inflight);
>      virtio_cleanup(vdev);
>  
> diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
> index 445516604a..4849aa5eb5 100644
> --- a/include/hw/virtio/vhost-user-blk.h
> +++ b/include/hw/virtio/vhost-user-blk.h
> @@ -38,6 +38,10 @@ typedef struct VHostUserBlk {
>      struct vhost_dev dev;
>      struct vhost_inflight *inflight;
>      VhostUserState *vhost_user;
> +    struct vhost_virtqueue *vqs;
> +    guint watch;
> +    bool should_start;

Please add comments about fields.
Also should_start seems to be set on guest activity.
Does it need to be migrated?
It would be better not to track guest state like this.
Why do we need it?
Pls add comments with an explanation.

> +    bool connected;
>  } VHostUserBlk;
>  
>  #endif
> -- 
> 2.17.1
Yongji Xie March 13, 2019, 2:05 a.m. UTC | #3
On Wed, 13 Mar 2019 at 00:49, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > From: Xie Yongji <xieyongji@baidu.com>
> >
> > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > safely because it can track inflight I/O in shared memory.
> > This patch allows qemu to reconnect the backend after
> > connection closed.
> >
> > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > Signed-off-by: Ni Xun <nixun@baidu.com>
> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > ---
> >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> >  include/hw/virtio/vhost-user-blk.h |   4 +
> >  2 files changed, 167 insertions(+), 42 deletions(-)
> >
> > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > index 9682df1a7b..539ea2e571 100644
> > --- a/hw/block/vhost-user-blk.c
> > +++ b/hw/block/vhost-user-blk.c
> > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
> >      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
> >  };
> >
> > -static void vhost_user_blk_start(VirtIODevice *vdev)
> > +static int vhost_user_blk_start(VirtIODevice *vdev)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> >
> >      if (!k->set_guest_notifiers) {
> >          error_report("binding does not support guest notifiers");
> > -        return;
> > +        return -ENOSYS;
> >      }
> >
> >      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
> >      if (ret < 0) {
> >          error_report("Error enabling host notifiers: %d", -ret);
> > -        return;
> > +        return ret;
> >      }
> >
> >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> >          vhost_virtqueue_mask(&s->dev, vdev, i, false);
> >      }
> >
> > -    return;
> > +    return ret;
> >
> >  err_guest_notifiers:
> >      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> >  err_host_notifiers:
> >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > +    return ret;
> >  }
> >
> >  static void vhost_user_blk_stop(VirtIODevice *vdev)
> > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> >      if (ret < 0) {
> >          error_report("vhost guest notifier cleanup failed: %d", ret);
> > -        return;
> >      }
> >
> >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> > +    int ret;
> >
> >      if (!vdev->vm_running) {
> >          should_start = false;
> >      }
> >
> > -    if (s->dev.started == should_start) {
> > +    if (s->should_start == should_start) {
> > +        return;
> > +    }
> > +
> > +    if (!s->connected || s->dev.started == should_start) {
> > +        s->should_start = should_start;
> >          return;
> >      }
> >
> >      if (should_start) {
> > -        vhost_user_blk_start(vdev);
> > +        s->should_start = true;
> > +        /*
> > +         * make sure vhost_user_blk_handle_output() ignores fake
> > +         * guest kick by vhost_dev_enable_notifiers()
> > +         */
> > +        barrier();
> > +        ret = vhost_user_blk_start(vdev);
> > +        if (ret < 0) {
> > +            error_report("vhost-user-blk: vhost start failed: %s",
> > +                         strerror(-ret));
> > +            qemu_chr_fe_disconnect(&s->chardev);
> > +        }
> >      } else {
> >          vhost_user_blk_stop(vdev);
> > +        /*
> > +         * make sure vhost_user_blk_handle_output() ignore fake
> > +         * guest kick by vhost_dev_disable_notifiers()
> > +         */
> > +        barrier();
> > +        s->should_start = false;
> >      }
> > -
> >  }
> >
> >  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> >  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > -    int i;
> > +    int i, ret;
> >
> >      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> >          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
> >          return;
> >      }
> >
> > +    if (s->should_start) {
> > +        return;
> > +    }
> > +    s->should_start = true;
> > +
> > +    if (!s->connected) {
> > +        return;
> > +    }
> > +
> >      if (s->dev.started) {
> >          return;
> >      }
> > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> >      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> >       * vhost here instead of waiting for .set_status().
> >       */
> > -    vhost_user_blk_start(vdev);
> > +    ret = vhost_user_blk_start(vdev);
> > +    if (ret < 0) {
> > +        error_report("vhost-user-blk: vhost start failed: %s",
> > +                     strerror(-ret));
> > +        qemu_chr_fe_disconnect(&s->chardev);
> > +        return;
> > +    }
> >
> >      /* Kick right away to begin processing requests already in vring */
> >      for (i = 0; i < s->dev.nvqs; i++) {
> > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
> >      vhost_dev_free_inflight(s->inflight);
> >  }
> >
> > +static int vhost_user_blk_connect(DeviceState *dev)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +    int ret = 0;
> > +
> > +    if (s->connected) {
> > +        return 0;
> > +    }
> > +    s->connected = true;
> > +
> > +    s->dev.nvqs = s->num_queues;
> > +    s->dev.vqs = s->vqs;
> > +    s->dev.vq_index = 0;
> > +    s->dev.backend_features = 0;
> > +
> > +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > +
> > +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > +    if (ret < 0) {
> > +        error_report("vhost-user-blk: vhost initialization failed: %s",
> > +                     strerror(-ret));
> > +        return ret;
> > +    }
> > +
> > +    /* restore vhost state */
> > +    if (s->should_start) {
> > +        ret = vhost_user_blk_start(vdev);
> > +        if (ret < 0) {
> > +            error_report("vhost-user-blk: vhost start failed: %s",
> > +                         strerror(-ret));
> > +            return ret;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static void vhost_user_blk_disconnect(DeviceState *dev)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +
> > +    if (!s->connected) {
> > +        return;
> > +    }
> > +    s->connected = false;
> > +
> > +    if (s->dev.started) {
> > +        vhost_user_blk_stop(vdev);
> > +    }
> > +
> > +    vhost_dev_cleanup(&s->dev);
> > +}
> > +
> > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> > +                                     void *opaque)
> > +{
> > +    DeviceState *dev = opaque;
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +
> > +    qemu_chr_fe_disconnect(&s->chardev);
> > +
> > +    return true;
> > +}
> > +
> > +static void vhost_user_blk_event(void *opaque, int event)
> > +{
> > +    DeviceState *dev = opaque;
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +
> > +    switch (event) {
> > +    case CHR_EVENT_OPENED:
> > +        if (vhost_user_blk_connect(dev) < 0) {
> > +            qemu_chr_fe_disconnect(&s->chardev);
> > +            return;
> > +        }
> > +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> > +                                         vhost_user_blk_watch, dev);
> > +        break;
> > +    case CHR_EVENT_CLOSED:
> > +        vhost_user_blk_disconnect(dev);
> > +        if (s->watch) {
> > +            g_source_remove(s->watch);
> > +            s->watch = 0;
> > +        }
> > +        break;
> > +    }
> > +}
> > +
> > +
> >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >  {
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >      VhostUserState *user;
> > -    struct vhost_virtqueue *vqs = NULL;
> >      int i, ret;
> > +    Error *err = NULL;
> >
> >      if (!s->chardev.chr) {
> >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >      }
> >
> >      s->inflight = g_new0(struct vhost_inflight, 1);
> > -
> > -    s->dev.nvqs = s->num_queues;
> > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > -    s->dev.vq_index = 0;
> > -    s->dev.backend_features = 0;
> > -    vqs = s->dev.vqs;
> > -
> > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > -
> > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > -    if (ret < 0) {
> > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > -                   strerror(-ret));
> > -        goto virtio_err;
> > -    }
> > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > +    s->watch = 0;
> > +    s->should_start = false;
> > +    s->connected = false;
> > +
> > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > +                             NULL, (void *)dev, NULL, true);
> > +
> > +reconnect:
> > +    do {
> > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > +            error_report_err(err);
> > +            err = NULL;
> > +            sleep(1);
>
> Seems arbitrary. Is this basically waiting until backend will reconnect?
> Why not block until event on the fd triggers?
>

Qemu is client in current vhost-user-blk implenment. Seems like
backend will not try to connect.

> Also, it looks like this will just block forever with no monitor input
> and no way for user to figure out what is going on short of
> crashing QEMU.
>

Yes, because I want to support the case that backend starts after
qemu. I can print some warning and add timeout on this.

Thanks,
Yongji
Yongji Xie March 13, 2019, 2:47 a.m. UTC | #4
On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > From: Xie Yongji <xieyongji@baidu.com>
> >
> > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > safely because it can track inflight I/O in shared memory.
> > This patch allows qemu to reconnect the backend after
> > connection closed.
> >
> > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > Signed-off-by: Ni Xun <nixun@baidu.com>
> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > ---
> >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> >  include/hw/virtio/vhost-user-blk.h |   4 +
> >  2 files changed, 167 insertions(+), 42 deletions(-)
> >
> > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > index 9682df1a7b..539ea2e571 100644
> > --- a/hw/block/vhost-user-blk.c
> > +++ b/hw/block/vhost-user-blk.c
> > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
> >      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
> >  };
> >
> > -static void vhost_user_blk_start(VirtIODevice *vdev)
> > +static int vhost_user_blk_start(VirtIODevice *vdev)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> >
> >      if (!k->set_guest_notifiers) {
> >          error_report("binding does not support guest notifiers");
> > -        return;
> > +        return -ENOSYS;
> >      }
> >
> >      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
> >      if (ret < 0) {
> >          error_report("Error enabling host notifiers: %d", -ret);
> > -        return;
> > +        return ret;
> >      }
> >
> >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> >          vhost_virtqueue_mask(&s->dev, vdev, i, false);
> >      }
> >
> > -    return;
> > +    return ret;
> >
> >  err_guest_notifiers:
> >      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> >  err_host_notifiers:
> >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > +    return ret;
> >  }
> >
> >  static void vhost_user_blk_stop(VirtIODevice *vdev)
> > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> >      if (ret < 0) {
> >          error_report("vhost guest notifier cleanup failed: %d", ret);
> > -        return;
> >      }
> >
> >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> > +    int ret;
> >
> >      if (!vdev->vm_running) {
> >          should_start = false;
> >      }
> >
> > -    if (s->dev.started == should_start) {
> > +    if (s->should_start == should_start) {
> > +        return;
> > +    }
> > +
> > +    if (!s->connected || s->dev.started == should_start) {
> > +        s->should_start = should_start;
> >          return;
> >      }
> >
> >      if (should_start) {
> > -        vhost_user_blk_start(vdev);
> > +        s->should_start = true;
> > +        /*
> > +         * make sure vhost_user_blk_handle_output() ignores fake
> > +         * guest kick by vhost_dev_enable_notifiers()
> > +         */
> > +        barrier();
> > +        ret = vhost_user_blk_start(vdev);
> > +        if (ret < 0) {
> > +            error_report("vhost-user-blk: vhost start failed: %s",
> > +                         strerror(-ret));
> > +            qemu_chr_fe_disconnect(&s->chardev);
> > +        }
> >      } else {
> >          vhost_user_blk_stop(vdev);
> > +        /*
> > +         * make sure vhost_user_blk_handle_output() ignore fake
> > +         * guest kick by vhost_dev_disable_notifiers()
> > +         */
> > +        barrier();
> > +        s->should_start = false;
> >      }
>
> I don't understand the comment about ignoring fake guest kicks.
> Please add an explanation about what does barrier do here.

We may get stack like this:

vhost_user_blk_stop()
  vhost_dev_disable_notifiers()
    virtio_bus_cleanup_host_notifier()
      virtio_queue_host_notifier_read()
        virtio_queue_notify_vq()  /* fake guest kick */
          vhost_user_blk_handle_output()

vhost_user_blk_handle_output() will re-start vhost-user if
should_start is false. This is not what we expect. The same to
vhost_dev_enable_notifiers().

> And maybe a detailed explanation about what "fake kick"
> is in the commit log.
>

Sure.
>
> > -
> >  }
> >
> >  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> >  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > -    int i;
> > +    int i, ret;
> >
> >      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> >          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
> >          return;
> >      }
> >
> > +    if (s->should_start) {
> > +        return;
> > +    }
> > +    s->should_start = true;
> > +
> > +    if (!s->connected) {
> > +        return;
> > +    }
> > +
> >      if (s->dev.started) {
> >          return;
> >      }
> > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> >      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> >       * vhost here instead of waiting for .set_status().
> >       */
> > -    vhost_user_blk_start(vdev);
> > +    ret = vhost_user_blk_start(vdev);
> > +    if (ret < 0) {
> > +        error_report("vhost-user-blk: vhost start failed: %s",
> > +                     strerror(-ret));
> > +        qemu_chr_fe_disconnect(&s->chardev);
> > +        return;
> > +    }
> >
> >      /* Kick right away to begin processing requests already in vring */
> >      for (i = 0; i < s->dev.nvqs; i++) {
> > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
> >      vhost_dev_free_inflight(s->inflight);
> >  }
> >
> > +static int vhost_user_blk_connect(DeviceState *dev)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +    int ret = 0;
> > +
> > +    if (s->connected) {
> > +        return 0;
> > +    }
> > +    s->connected = true;
> > +
> > +    s->dev.nvqs = s->num_queues;
> > +    s->dev.vqs = s->vqs;
> > +    s->dev.vq_index = 0;
> > +    s->dev.backend_features = 0;
> > +
> > +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > +
> > +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > +    if (ret < 0) {
> > +        error_report("vhost-user-blk: vhost initialization failed: %s",
> > +                     strerror(-ret));
> > +        return ret;
> > +    }
> > +
> > +    /* restore vhost state */
> > +    if (s->should_start) {
> > +        ret = vhost_user_blk_start(vdev);
> > +        if (ret < 0) {
> > +            error_report("vhost-user-blk: vhost start failed: %s",
> > +                         strerror(-ret));
> > +            return ret;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static void vhost_user_blk_disconnect(DeviceState *dev)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +
> > +    if (!s->connected) {
> > +        return;
> > +    }
> > +    s->connected = false;
> > +
> > +    if (s->dev.started) {
> > +        vhost_user_blk_stop(vdev);
> > +    }
> > +
> > +    vhost_dev_cleanup(&s->dev);
> > +}
> > +
> > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> > +                                     void *opaque)
> > +{
> > +    DeviceState *dev = opaque;
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +
> > +    qemu_chr_fe_disconnect(&s->chardev);
> > +
> > +    return true;
> > +}
> > +
> > +static void vhost_user_blk_event(void *opaque, int event)
> > +{
> > +    DeviceState *dev = opaque;
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > +
> > +    switch (event) {
> > +    case CHR_EVENT_OPENED:
> > +        if (vhost_user_blk_connect(dev) < 0) {
> > +            qemu_chr_fe_disconnect(&s->chardev);
> > +            return;
> > +        }
> > +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> > +                                         vhost_user_blk_watch, dev);
> > +        break;
> > +    case CHR_EVENT_CLOSED:
> > +        vhost_user_blk_disconnect(dev);
> > +        if (s->watch) {
> > +            g_source_remove(s->watch);
> > +            s->watch = 0;
> > +        }
> > +        break;
> > +    }
> > +}
> > +
> > +
> >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >  {
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >      VhostUserState *user;
> > -    struct vhost_virtqueue *vqs = NULL;
> >      int i, ret;
> > +    Error *err = NULL;
> >
> >      if (!s->chardev.chr) {
> >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >      }
> >
> >      s->inflight = g_new0(struct vhost_inflight, 1);
> > -
> > -    s->dev.nvqs = s->num_queues;
> > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > -    s->dev.vq_index = 0;
> > -    s->dev.backend_features = 0;
> > -    vqs = s->dev.vqs;
> > -
> > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > -
> > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > -    if (ret < 0) {
> > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > -                   strerror(-ret));
> > -        goto virtio_err;
> > -    }
> > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > +    s->watch = 0;
> > +    s->should_start = false;
> > +    s->connected = false;
> > +
> > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > +                             NULL, (void *)dev, NULL, true);
> > +
> > +reconnect:
> > +    do {
> > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > +            error_report_err(err);
> > +            err = NULL;
> > +            sleep(1);
> > +        }
> > +    } while (!s->connected);
> >
> >      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> > -                              sizeof(struct virtio_blk_config));
> > +                               sizeof(struct virtio_blk_config));
> >      if (ret < 0) {
> > -        error_setg(errp, "vhost-user-blk: get block config failed");
> > -        goto vhost_err;
> > +        error_report("vhost-user-blk: get block config failed");
> > +        goto reconnect;
> >      }
> >
> >      if (s->blkcfg.num_queues != s->num_queues) {
> > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >      }
> >
> >      return;
>
> With this we end up with return at end of function
> which makes no sense.
>

Oh, yes. I will remove it.

> > -
> > -vhost_err:
> > -    vhost_dev_cleanup(&s->dev);
> > -virtio_err:
> > -    g_free(vqs);
> > -    g_free(s->inflight);
> > -    virtio_cleanup(vdev);
> > -
> > -    vhost_user_cleanup(user);
> > -    g_free(user);
> > -    s->vhost_user = NULL;
> >  }
> >
> >  static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
> >  {
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >      VHostUserBlk *s = VHOST_USER_BLK(dev);
> > -    struct vhost_virtqueue *vqs = s->dev.vqs;
> >
> >      vhost_user_blk_set_status(vdev, 0);
> > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
> > +                             NULL, NULL, NULL, false);
> >      vhost_dev_cleanup(&s->dev);
> >      vhost_dev_free_inflight(s->inflight);
> > -    g_free(vqs);
> > +    g_free(s->vqs);
> >      g_free(s->inflight);
> >      virtio_cleanup(vdev);
> >
> > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
> > index 445516604a..4849aa5eb5 100644
> > --- a/include/hw/virtio/vhost-user-blk.h
> > +++ b/include/hw/virtio/vhost-user-blk.h
> > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk {
> >      struct vhost_dev dev;
> >      struct vhost_inflight *inflight;
> >      VhostUserState *vhost_user;
> > +    struct vhost_virtqueue *vqs;
> > +    guint watch;
> > +    bool should_start;
>
> Please add comments about fields.
> Also should_start seems to be set on guest activity.
> Does it need to be migrated?
> It would be better not to track guest state like this.
> Why do we need it?
> Pls add comments with an explanation.
>

We can't relay on vhost_dev.started to track guest's state in
vhost_user_blk_set_status() when we enable reconnecting. For example,
qemu will lose guest's state if guest stop device during backend is
disconnected. So vhost_dev.started is used to track backend's state
and should_start is used to track guest's state in this patch. And
this variable is based on VirtIODevice.status, so I think it is not
necessary to migrate it. What do you think about it?

Thanks,
Yongji
Michael S. Tsirkin March 14, 2019, 11:14 a.m. UTC | #5
On Wed, Mar 13, 2019 at 10:05:51AM +0800, Yongji Xie wrote:
> On Wed, 13 Mar 2019 at 00:49, Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > From: Xie Yongji <xieyongji@baidu.com>
> > >
> > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > safely because it can track inflight I/O in shared memory.
> > > This patch allows qemu to reconnect the backend after
> > > connection closed.
> > >
> > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > ---
> > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > >
> > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > > index 9682df1a7b..539ea2e571 100644
> > > --- a/hw/block/vhost-user-blk.c
> > > +++ b/hw/block/vhost-user-blk.c
> > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
> > >      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
> > >  };
> > >
> > > -static void vhost_user_blk_start(VirtIODevice *vdev)
> > > +static int vhost_user_blk_start(VirtIODevice *vdev)
> > >  {
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > >
> > >      if (!k->set_guest_notifiers) {
> > >          error_report("binding does not support guest notifiers");
> > > -        return;
> > > +        return -ENOSYS;
> > >      }
> > >
> > >      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
> > >      if (ret < 0) {
> > >          error_report("Error enabling host notifiers: %d", -ret);
> > > -        return;
> > > +        return ret;
> > >      }
> > >
> > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > >          vhost_virtqueue_mask(&s->dev, vdev, i, false);
> > >      }
> > >
> > > -    return;
> > > +    return ret;
> > >
> > >  err_guest_notifiers:
> > >      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > >  err_host_notifiers:
> > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > +    return ret;
> > >  }
> > >
> > >  static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > >      if (ret < 0) {
> > >          error_report("vhost guest notifier cleanup failed: %d", ret);
> > > -        return;
> > >      }
> > >
> > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> > >  {
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> > > +    int ret;
> > >
> > >      if (!vdev->vm_running) {
> > >          should_start = false;
> > >      }
> > >
> > > -    if (s->dev.started == should_start) {
> > > +    if (s->should_start == should_start) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (!s->connected || s->dev.started == should_start) {
> > > +        s->should_start = should_start;
> > >          return;
> > >      }
> > >
> > >      if (should_start) {
> > > -        vhost_user_blk_start(vdev);
> > > +        s->should_start = true;
> > > +        /*
> > > +         * make sure vhost_user_blk_handle_output() ignores fake
> > > +         * guest kick by vhost_dev_enable_notifiers()
> > > +         */
> > > +        barrier();
> > > +        ret = vhost_user_blk_start(vdev);
> > > +        if (ret < 0) {
> > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > +                         strerror(-ret));
> > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > +        }
> > >      } else {
> > >          vhost_user_blk_stop(vdev);
> > > +        /*
> > > +         * make sure vhost_user_blk_handle_output() ignore fake
> > > +         * guest kick by vhost_dev_disable_notifiers()
> > > +         */
> > > +        barrier();
> > > +        s->should_start = false;
> > >      }
> > > -
> > >  }
> > >
> > >  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > >  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > >  {
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > -    int i;
> > > +    int i, ret;
> > >
> > >      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > >          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
> > >          return;
> > >      }
> > >
> > > +    if (s->should_start) {
> > > +        return;
> > > +    }
> > > +    s->should_start = true;
> > > +
> > > +    if (!s->connected) {
> > > +        return;
> > > +    }
> > > +
> > >      if (s->dev.started) {
> > >          return;
> > >      }
> > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > >      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> > >       * vhost here instead of waiting for .set_status().
> > >       */
> > > -    vhost_user_blk_start(vdev);
> > > +    ret = vhost_user_blk_start(vdev);
> > > +    if (ret < 0) {
> > > +        error_report("vhost-user-blk: vhost start failed: %s",
> > > +                     strerror(-ret));
> > > +        qemu_chr_fe_disconnect(&s->chardev);
> > > +        return;
> > > +    }
> > >
> > >      /* Kick right away to begin processing requests already in vring */
> > >      for (i = 0; i < s->dev.nvqs; i++) {
> > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
> > >      vhost_dev_free_inflight(s->inflight);
> > >  }
> > >
> > > +static int vhost_user_blk_connect(DeviceState *dev)
> > > +{
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +    int ret = 0;
> > > +
> > > +    if (s->connected) {
> > > +        return 0;
> > > +    }
> > > +    s->connected = true;
> > > +
> > > +    s->dev.nvqs = s->num_queues;
> > > +    s->dev.vqs = s->vqs;
> > > +    s->dev.vq_index = 0;
> > > +    s->dev.backend_features = 0;
> > > +
> > > +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > +
> > > +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > +    if (ret < 0) {
> > > +        error_report("vhost-user-blk: vhost initialization failed: %s",
> > > +                     strerror(-ret));
> > > +        return ret;
> > > +    }
> > > +
> > > +    /* restore vhost state */
> > > +    if (s->should_start) {
> > > +        ret = vhost_user_blk_start(vdev);
> > > +        if (ret < 0) {
> > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > +                         strerror(-ret));
> > > +            return ret;
> > > +        }
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static void vhost_user_blk_disconnect(DeviceState *dev)
> > > +{
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +
> > > +    if (!s->connected) {
> > > +        return;
> > > +    }
> > > +    s->connected = false;
> > > +
> > > +    if (s->dev.started) {
> > > +        vhost_user_blk_stop(vdev);
> > > +    }
> > > +
> > > +    vhost_dev_cleanup(&s->dev);
> > > +}
> > > +
> > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> > > +                                     void *opaque)
> > > +{
> > > +    DeviceState *dev = opaque;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +
> > > +    qemu_chr_fe_disconnect(&s->chardev);
> > > +
> > > +    return true;
> > > +}
> > > +
> > > +static void vhost_user_blk_event(void *opaque, int event)
> > > +{
> > > +    DeviceState *dev = opaque;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +
> > > +    switch (event) {
> > > +    case CHR_EVENT_OPENED:
> > > +        if (vhost_user_blk_connect(dev) < 0) {
> > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > +            return;
> > > +        }
> > > +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> > > +                                         vhost_user_blk_watch, dev);
> > > +        break;
> > > +    case CHR_EVENT_CLOSED:
> > > +        vhost_user_blk_disconnect(dev);
> > > +        if (s->watch) {
> > > +            g_source_remove(s->watch);
> > > +            s->watch = 0;
> > > +        }
> > > +        break;
> > > +    }
> > > +}
> > > +
> > > +
> > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >  {
> > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >      VhostUserState *user;
> > > -    struct vhost_virtqueue *vqs = NULL;
> > >      int i, ret;
> > > +    Error *err = NULL;
> > >
> > >      if (!s->chardev.chr) {
> > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >      }
> > >
> > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > -
> > > -    s->dev.nvqs = s->num_queues;
> > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > -    s->dev.vq_index = 0;
> > > -    s->dev.backend_features = 0;
> > > -    vqs = s->dev.vqs;
> > > -
> > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > -
> > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > -    if (ret < 0) {
> > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > -                   strerror(-ret));
> > > -        goto virtio_err;
> > > -    }
> > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > +    s->watch = 0;
> > > +    s->should_start = false;
> > > +    s->connected = false;
> > > +
> > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > +                             NULL, (void *)dev, NULL, true);
> > > +
> > > +reconnect:
> > > +    do {
> > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > +            error_report_err(err);
> > > +            err = NULL;
> > > +            sleep(1);
> >
> > Seems arbitrary. Is this basically waiting until backend will reconnect?
> > Why not block until event on the fd triggers?
> >
> 
> Qemu is client in current vhost-user-blk implenment. Seems like
> backend will not try to connect.

Or just fail realize and make management retry?

> > Also, it looks like this will just block forever with no monitor input
> > and no way for user to figure out what is going on short of
> > crashing QEMU.
> >
> 
> Yes, because I want to support the case that backend starts after
> qemu. I can print some warning and add timeout on this.
> 
> Thanks,
> Yongji

Well a chardev already has a reconnect option.
Can you use that machinery?
IIUC it works without blocking QEMU.
Michael S. Tsirkin March 14, 2019, 11:18 a.m. UTC | #6
On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote:
> On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > From: Xie Yongji <xieyongji@baidu.com>
> > >
> > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > safely because it can track inflight I/O in shared memory.
> > > This patch allows qemu to reconnect the backend after
> > > connection closed.
> > >
> > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > ---
> > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > >
> > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > > index 9682df1a7b..539ea2e571 100644
> > > --- a/hw/block/vhost-user-blk.c
> > > +++ b/hw/block/vhost-user-blk.c
> > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
> > >      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
> > >  };
> > >
> > > -static void vhost_user_blk_start(VirtIODevice *vdev)
> > > +static int vhost_user_blk_start(VirtIODevice *vdev)
> > >  {
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > >
> > >      if (!k->set_guest_notifiers) {
> > >          error_report("binding does not support guest notifiers");
> > > -        return;
> > > +        return -ENOSYS;
> > >      }
> > >
> > >      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
> > >      if (ret < 0) {
> > >          error_report("Error enabling host notifiers: %d", -ret);
> > > -        return;
> > > +        return ret;
> > >      }
> > >
> > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > >          vhost_virtqueue_mask(&s->dev, vdev, i, false);
> > >      }
> > >
> > > -    return;
> > > +    return ret;
> > >
> > >  err_guest_notifiers:
> > >      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > >  err_host_notifiers:
> > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > +    return ret;
> > >  }
> > >
> > >  static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > >      if (ret < 0) {
> > >          error_report("vhost guest notifier cleanup failed: %d", ret);
> > > -        return;
> > >      }
> > >
> > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> > >  {
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> > > +    int ret;
> > >
> > >      if (!vdev->vm_running) {
> > >          should_start = false;
> > >      }
> > >
> > > -    if (s->dev.started == should_start) {
> > > +    if (s->should_start == should_start) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (!s->connected || s->dev.started == should_start) {
> > > +        s->should_start = should_start;
> > >          return;
> > >      }
> > >
> > >      if (should_start) {
> > > -        vhost_user_blk_start(vdev);
> > > +        s->should_start = true;
> > > +        /*
> > > +         * make sure vhost_user_blk_handle_output() ignores fake
> > > +         * guest kick by vhost_dev_enable_notifiers()
> > > +         */
> > > +        barrier();
> > > +        ret = vhost_user_blk_start(vdev);
> > > +        if (ret < 0) {
> > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > +                         strerror(-ret));
> > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > +        }
> > >      } else {
> > >          vhost_user_blk_stop(vdev);
> > > +        /*
> > > +         * make sure vhost_user_blk_handle_output() ignore fake
> > > +         * guest kick by vhost_dev_disable_notifiers()
> > > +         */
> > > +        barrier();
> > > +        s->should_start = false;
> > >      }
> >
> > I don't understand the comment about ignoring fake guest kicks.
> > Please add an explanation about what does barrier do here.
> 
> We may get stack like this:
> 
> vhost_user_blk_stop()
>   vhost_dev_disable_notifiers()
>     virtio_bus_cleanup_host_notifier()
>       virtio_queue_host_notifier_read()
>         virtio_queue_notify_vq()  /* fake guest kick */
>           vhost_user_blk_handle_output()
> 
> vhost_user_blk_handle_output() will re-start vhost-user if
> should_start is false. This is not what we expect. The same to
> vhost_dev_enable_notifiers().

Well what does a compiler barrier have to do with it?

Anyway, it's there to avoid losing kicks e.g. across migration. If you
have something else that prevents losing kicks (e.g. migrate a "started"
flag) then add code to skip the notify - don't work around it.


> > And maybe a detailed explanation about what "fake kick"
> > is in the commit log.
> >
> 
> Sure.
> >
> > > -
> > >  }
> > >
> > >  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > >  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > >  {
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > -    int i;
> > > +    int i, ret;
> > >
> > >      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > >          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
> > >          return;
> > >      }
> > >
> > > +    if (s->should_start) {
> > > +        return;
> > > +    }
> > > +    s->should_start = true;
> > > +
> > > +    if (!s->connected) {
> > > +        return;
> > > +    }
> > > +
> > >      if (s->dev.started) {
> > >          return;
> > >      }
> > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > >      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> > >       * vhost here instead of waiting for .set_status().
> > >       */
> > > -    vhost_user_blk_start(vdev);
> > > +    ret = vhost_user_blk_start(vdev);
> > > +    if (ret < 0) {
> > > +        error_report("vhost-user-blk: vhost start failed: %s",
> > > +                     strerror(-ret));
> > > +        qemu_chr_fe_disconnect(&s->chardev);
> > > +        return;
> > > +    }
> > >
> > >      /* Kick right away to begin processing requests already in vring */
> > >      for (i = 0; i < s->dev.nvqs; i++) {
> > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
> > >      vhost_dev_free_inflight(s->inflight);
> > >  }
> > >
> > > +static int vhost_user_blk_connect(DeviceState *dev)
> > > +{
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +    int ret = 0;
> > > +
> > > +    if (s->connected) {
> > > +        return 0;
> > > +    }
> > > +    s->connected = true;
> > > +
> > > +    s->dev.nvqs = s->num_queues;
> > > +    s->dev.vqs = s->vqs;
> > > +    s->dev.vq_index = 0;
> > > +    s->dev.backend_features = 0;
> > > +
> > > +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > +
> > > +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > +    if (ret < 0) {
> > > +        error_report("vhost-user-blk: vhost initialization failed: %s",
> > > +                     strerror(-ret));
> > > +        return ret;
> > > +    }
> > > +
> > > +    /* restore vhost state */
> > > +    if (s->should_start) {
> > > +        ret = vhost_user_blk_start(vdev);
> > > +        if (ret < 0) {
> > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > +                         strerror(-ret));
> > > +            return ret;
> > > +        }
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static void vhost_user_blk_disconnect(DeviceState *dev)
> > > +{
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +
> > > +    if (!s->connected) {
> > > +        return;
> > > +    }
> > > +    s->connected = false;
> > > +
> > > +    if (s->dev.started) {
> > > +        vhost_user_blk_stop(vdev);
> > > +    }
> > > +
> > > +    vhost_dev_cleanup(&s->dev);
> > > +}
> > > +
> > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> > > +                                     void *opaque)
> > > +{
> > > +    DeviceState *dev = opaque;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +
> > > +    qemu_chr_fe_disconnect(&s->chardev);
> > > +
> > > +    return true;
> > > +}
> > > +
> > > +static void vhost_user_blk_event(void *opaque, int event)
> > > +{
> > > +    DeviceState *dev = opaque;
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > +
> > > +    switch (event) {
> > > +    case CHR_EVENT_OPENED:
> > > +        if (vhost_user_blk_connect(dev) < 0) {
> > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > +            return;
> > > +        }
> > > +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> > > +                                         vhost_user_blk_watch, dev);
> > > +        break;
> > > +    case CHR_EVENT_CLOSED:
> > > +        vhost_user_blk_disconnect(dev);
> > > +        if (s->watch) {
> > > +            g_source_remove(s->watch);
> > > +            s->watch = 0;
> > > +        }
> > > +        break;
> > > +    }
> > > +}
> > > +
> > > +
> > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >  {
> > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >      VhostUserState *user;
> > > -    struct vhost_virtqueue *vqs = NULL;
> > >      int i, ret;
> > > +    Error *err = NULL;
> > >
> > >      if (!s->chardev.chr) {
> > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >      }
> > >
> > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > -
> > > -    s->dev.nvqs = s->num_queues;
> > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > -    s->dev.vq_index = 0;
> > > -    s->dev.backend_features = 0;
> > > -    vqs = s->dev.vqs;
> > > -
> > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > -
> > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > -    if (ret < 0) {
> > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > -                   strerror(-ret));
> > > -        goto virtio_err;
> > > -    }
> > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > +    s->watch = 0;
> > > +    s->should_start = false;
> > > +    s->connected = false;
> > > +
> > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > +                             NULL, (void *)dev, NULL, true);
> > > +
> > > +reconnect:
> > > +    do {
> > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > +            error_report_err(err);
> > > +            err = NULL;
> > > +            sleep(1);
> > > +        }
> > > +    } while (!s->connected);
> > >
> > >      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> > > -                              sizeof(struct virtio_blk_config));
> > > +                               sizeof(struct virtio_blk_config));
> > >      if (ret < 0) {
> > > -        error_setg(errp, "vhost-user-blk: get block config failed");
> > > -        goto vhost_err;
> > > +        error_report("vhost-user-blk: get block config failed");
> > > +        goto reconnect;
> > >      }
> > >
> > >      if (s->blkcfg.num_queues != s->num_queues) {
> > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >      }
> > >
> > >      return;
> >
> > With this we end up with return at end of function
> > which makes no sense.
> >
> 
> Oh, yes. I will remove it.
> 
> > > -
> > > -vhost_err:
> > > -    vhost_dev_cleanup(&s->dev);
> > > -virtio_err:
> > > -    g_free(vqs);
> > > -    g_free(s->inflight);
> > > -    virtio_cleanup(vdev);
> > > -
> > > -    vhost_user_cleanup(user);
> > > -    g_free(user);
> > > -    s->vhost_user = NULL;
> > >  }
> > >
> > >  static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
> > >  {
> > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > >      VHostUserBlk *s = VHOST_USER_BLK(dev);
> > > -    struct vhost_virtqueue *vqs = s->dev.vqs;
> > >
> > >      vhost_user_blk_set_status(vdev, 0);
> > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
> > > +                             NULL, NULL, NULL, false);
> > >      vhost_dev_cleanup(&s->dev);
> > >      vhost_dev_free_inflight(s->inflight);
> > > -    g_free(vqs);
> > > +    g_free(s->vqs);
> > >      g_free(s->inflight);
> > >      virtio_cleanup(vdev);
> > >
> > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
> > > index 445516604a..4849aa5eb5 100644
> > > --- a/include/hw/virtio/vhost-user-blk.h
> > > +++ b/include/hw/virtio/vhost-user-blk.h
> > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk {
> > >      struct vhost_dev dev;
> > >      struct vhost_inflight *inflight;
> > >      VhostUserState *vhost_user;
> > > +    struct vhost_virtqueue *vqs;
> > > +    guint watch;
> > > +    bool should_start;
> >
> > Please add comments about fields.
> > Also should_start seems to be set on guest activity.
> > Does it need to be migrated?
> > It would be better not to track guest state like this.
> > Why do we need it?
> > Pls add comments with an explanation.
> >
> 
> We can't relay on vhost_dev.started to track guest's state in
> vhost_user_blk_set_status() when we enable reconnecting. For example,
> qemu will lose guest's state if guest stop device during backend is
> disconnected. So vhost_dev.started is used to track backend's state
> and should_start is used to track guest's state in this patch. And
> this variable is based on VirtIODevice.status, so I think it is not
> necessary to migrate it. What do you think about it?

If it's based on VirtIODevice.status then how about not keeping
it around at all and just calculating from VirtIODevice.status
when necessary?


> Thanks,
> Yongji
Daniel P. Berrangé March 14, 2019, 11:24 a.m. UTC | #7
On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > From: Xie Yongji <xieyongji@baidu.com>
> > 
> > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > safely because it can track inflight I/O in shared memory.
> > This patch allows qemu to reconnect the backend after
> > connection closed.
> > 
> > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > Signed-off-by: Ni Xun <nixun@baidu.com>
> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > ---
> >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> >  include/hw/virtio/vhost-user-blk.h |   4 +
> >  2 files changed, 167 insertions(+), 42 deletions(-)


> >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >  {
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >      VhostUserState *user;
> > -    struct vhost_virtqueue *vqs = NULL;
> >      int i, ret;
> > +    Error *err = NULL;
> >  
> >      if (!s->chardev.chr) {
> >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >      }
> >  
> >      s->inflight = g_new0(struct vhost_inflight, 1);
> > -
> > -    s->dev.nvqs = s->num_queues;
> > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > -    s->dev.vq_index = 0;
> > -    s->dev.backend_features = 0;
> > -    vqs = s->dev.vqs;
> > -
> > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > -
> > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > -    if (ret < 0) {
> > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > -                   strerror(-ret));
> > -        goto virtio_err;
> > -    }
> > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > +    s->watch = 0;
> > +    s->should_start = false;
> > +    s->connected = false;
> > +
> > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > +                             NULL, (void *)dev, NULL, true);
> > +
> > +reconnect:
> > +    do {
> > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > +            error_report_err(err);
> > +            err = NULL;
> > +            sleep(1);
> 
> Seems arbitrary. Is this basically waiting until backend will reconnect?
> Why not block until event on the fd triggers?
> 
> Also, it looks like this will just block forever with no monitor input
> and no way for user to figure out what is going on short of
> crashing QEMU.

FWIW, the current vhost-user-net device does exactly the same thing
with calling qemu_chr_fe_wait_connected during its realize() function.

Can't say I'm thrilled about the existing usage either, but I don't
see a particularly nice alternative if it isn't possible to realize
the device without having a backend available.

I don't think this an especially serious problem during cold startup
though since doesn't expect the monitor to become responsive until
the device model is fully realized & all backends connected.

The real concern is if you need to hotplug this kind of device
at runtime. In that case blocking the main loop / monitor is a
serious problem that will actively harm both the guest OS and
mgmt applications.

The vhost-user-net device will already suffer from that.

To solve the problem with hotplug, the mgmt app would need to
plug the chardev backend, then wait for it to become connected,
and only then plug the device frontend. In that case we would not
want this loop. We'd expect it to use the already connected
chardev, and fail the realize() function if the chardev has
lost its connection.

Regards,
Daniel
Michael S. Tsirkin March 14, 2019, 11:34 a.m. UTC | #8
On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
> On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > From: Xie Yongji <xieyongji@baidu.com>
> > > 
> > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > safely because it can track inflight I/O in shared memory.
> > > This patch allows qemu to reconnect the backend after
> > > connection closed.
> > > 
> > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > ---
> > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > >  2 files changed, 167 insertions(+), 42 deletions(-)
> 
> 
> > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >  {
> > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >      VhostUserState *user;
> > > -    struct vhost_virtqueue *vqs = NULL;
> > >      int i, ret;
> > > +    Error *err = NULL;
> > >  
> > >      if (!s->chardev.chr) {
> > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >      }
> > >  
> > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > -
> > > -    s->dev.nvqs = s->num_queues;
> > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > -    s->dev.vq_index = 0;
> > > -    s->dev.backend_features = 0;
> > > -    vqs = s->dev.vqs;
> > > -
> > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > -
> > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > -    if (ret < 0) {
> > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > -                   strerror(-ret));
> > > -        goto virtio_err;
> > > -    }
> > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > +    s->watch = 0;
> > > +    s->should_start = false;
> > > +    s->connected = false;
> > > +
> > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > +                             NULL, (void *)dev, NULL, true);
> > > +
> > > +reconnect:
> > > +    do {
> > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > +            error_report_err(err);
> > > +            err = NULL;
> > > +            sleep(1);
> > 
> > Seems arbitrary. Is this basically waiting until backend will reconnect?
> > Why not block until event on the fd triggers?
> > 
> > Also, it looks like this will just block forever with no monitor input
> > and no way for user to figure out what is going on short of
> > crashing QEMU.
> 
> FWIW, the current vhost-user-net device does exactly the same thing
> with calling qemu_chr_fe_wait_connected during its realize() function.

Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)

> Can't say I'm thrilled about the existing usage either, but I don't
> see a particularly nice alternative if it isn't possible to realize
> the device without having a backend available.
> 
> I don't think this an especially serious problem during cold startup
> though since doesn't expect the monitor to become responsive until
> the device model is fully realized & all backends connected.
> 
> The real concern is if you need to hotplug this kind of device
> at runtime. In that case blocking the main loop / monitor is a
> serious problem that will actively harm both the guest OS and
> mgmt applications.
> 
> The vhost-user-net device will already suffer from that.

Right.

> To solve the problem with hotplug, the mgmt app would need to
> plug the chardev backend, then wait for it to become connected,
> and only then plug the device frontend. In that case we would not
> want this loop. We'd expect it to use the already connected
> chardev, and fail the realize() function if the chardev has
> lost its connection.
> 
> Regards,
> Daniel

I think as step one we should maybe just busy-wait with no sleep(1).
But respecting reconnect would be nice too.

And I think if we can't respect nowait we should
detect it and fail, in both net and block ...


> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Daniel P. Berrangé March 14, 2019, 11:43 a.m. UTC | #9
On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
> On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
> > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > 
> > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > > safely because it can track inflight I/O in shared memory.
> > > > This patch allows qemu to reconnect the backend after
> > > > connection closed.
> > > > 
> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > ---
> > > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > 
> > 
> > > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > >  {
> > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > >      VhostUserState *user;
> > > > -    struct vhost_virtqueue *vqs = NULL;
> > > >      int i, ret;
> > > > +    Error *err = NULL;
> > > >  
> > > >      if (!s->chardev.chr) {
> > > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > >      }
> > > >  
> > > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > > -
> > > > -    s->dev.nvqs = s->num_queues;
> > > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > > -    s->dev.vq_index = 0;
> > > > -    s->dev.backend_features = 0;
> > > > -    vqs = s->dev.vqs;
> > > > -
> > > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > -
> > > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > -    if (ret < 0) {
> > > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > > -                   strerror(-ret));
> > > > -        goto virtio_err;
> > > > -    }
> > > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > > +    s->watch = 0;
> > > > +    s->should_start = false;
> > > > +    s->connected = false;
> > > > +
> > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > > +                             NULL, (void *)dev, NULL, true);
> > > > +
> > > > +reconnect:
> > > > +    do {
> > > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > > +            error_report_err(err);
> > > > +            err = NULL;
> > > > +            sleep(1);
> > > 
> > > Seems arbitrary. Is this basically waiting until backend will reconnect?
> > > Why not block until event on the fd triggers?
> > > 
> > > Also, it looks like this will just block forever with no monitor input
> > > and no way for user to figure out what is going on short of
> > > crashing QEMU.
> > 
> > FWIW, the current vhost-user-net device does exactly the same thing
> > with calling qemu_chr_fe_wait_connected during its realize() function.
> 
> Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)

The sleep(1) in this patch simply needs to be removed. I think that
probably dates from when it was written against the earlier broken
version of qemu_chr_fe_wait_connected(). That would not correctly
deal with the "reconnect" flag, and so needing this loop with a sleep
in it.

In fact the while loop can be removed as well in this code. It just
needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
to have a connected peer once that returns 0.

qemu_chr_fe_wait_connected() only returns -1 if the operating in
client mode, and it failed to connect and reconnect is *not*
requested. In such case the caller should honour the failure and
quit, not loop to retry.


The reason vhost-user-net does a loop is because once it has a
connection it tries todo a protocol handshake, and if that
handshake fails it closes the chardev and tries to connect
again. That's not the case in this blk code os the loop is
not needed.

> > Can't say I'm thrilled about the existing usage either, but I don't
> > see a particularly nice alternative if it isn't possible to realize
> > the device without having a backend available.
> > 
> > I don't think this an especially serious problem during cold startup
> > though since doesn't expect the monitor to become responsive until
> > the device model is fully realized & all backends connected.
> > 
> > The real concern is if you need to hotplug this kind of device
> > at runtime. In that case blocking the main loop / monitor is a
> > serious problem that will actively harm both the guest OS and
> > mgmt applications.
> > 
> > The vhost-user-net device will already suffer from that.
> 
> Right.
> 
> > To solve the problem with hotplug, the mgmt app would need to
> > plug the chardev backend, then wait for it to become connected,
> > and only then plug the device frontend. In that case we would not
> > want this loop. We'd expect it to use the already connected
> > chardev, and fail the realize() function if the chardev has
> > lost its connection.
> 
> I think as step one we should maybe just busy-wait with no sleep(1).
> But respecting reconnect would be nice too.

As above I don't see a need to loop at all.

> And I think if we can't respect nowait we should
> detect it and fail, in both net and block ...

If the chardev is in server mode with "wait" set, or in client mode
with reconnect not set, then there's actually no need to call
qemu_chr_fe_wait_connected at all. The qemu_chr_fe_wait_connected is
only needed if the chardev is a server with "nowait" set or a client
with "reconnect" set, or if the caller explicitly closes the existing
connection & wants to establish a new one.

Regards,
Daniel
Michael S. Tsirkin March 14, 2019, 12:11 p.m. UTC | #10
On Thu, Mar 14, 2019 at 11:43:26AM +0000, Daniel P. Berrangé wrote:
> On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
> > On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
> > > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > 
> > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > > > safely because it can track inflight I/O in shared memory.
> > > > > This patch allows qemu to reconnect the backend after
> > > > > connection closed.
> > > > > 
> > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > ---
> > > > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > > > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > > > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > > 
> > > 
> > > > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > >  {
> > > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > >      VhostUserState *user;
> > > > > -    struct vhost_virtqueue *vqs = NULL;
> > > > >      int i, ret;
> > > > > +    Error *err = NULL;
> > > > >  
> > > > >      if (!s->chardev.chr) {
> > > > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > >      }
> > > > >  
> > > > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > > > -
> > > > > -    s->dev.nvqs = s->num_queues;
> > > > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > > > -    s->dev.vq_index = 0;
> > > > > -    s->dev.backend_features = 0;
> > > > > -    vqs = s->dev.vqs;
> > > > > -
> > > > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > > -
> > > > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > > -    if (ret < 0) {
> > > > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > > > -                   strerror(-ret));
> > > > > -        goto virtio_err;
> > > > > -    }
> > > > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > > > +    s->watch = 0;
> > > > > +    s->should_start = false;
> > > > > +    s->connected = false;
> > > > > +
> > > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > > > +                             NULL, (void *)dev, NULL, true);
> > > > > +
> > > > > +reconnect:
> > > > > +    do {
> > > > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > > > +            error_report_err(err);
> > > > > +            err = NULL;
> > > > > +            sleep(1);
> > > > 
> > > > Seems arbitrary. Is this basically waiting until backend will reconnect?
> > > > Why not block until event on the fd triggers?
> > > > 
> > > > Also, it looks like this will just block forever with no monitor input
> > > > and no way for user to figure out what is going on short of
> > > > crashing QEMU.
> > > 
> > > FWIW, the current vhost-user-net device does exactly the same thing
> > > with calling qemu_chr_fe_wait_connected during its realize() function.
> > 
> > Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)
> 
> The sleep(1) in this patch simply needs to be removed. I think that
> probably dates from when it was written against the earlier broken
> version of qemu_chr_fe_wait_connected(). That would not correctly
> deal with the "reconnect" flag, and so needing this loop with a sleep
> in it.
> 
> In fact the while loop can be removed as well in this code. It just
> needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
> to have a connected peer once that returns 0.
> 
> qemu_chr_fe_wait_connected() only returns -1 if the operating in
> client mode, and it failed to connect and reconnect is *not*
> requested. In such case the caller should honour the failure and
> quit, not loop to retry.
> 
> 
> The reason vhost-user-net does a loop is because once it has a
> connection it tries todo a protocol handshake, and if that
> handshake fails it closes the chardev and tries to connect
> again. That's not the case in this blk code os the loop is
> not needed.
> 
> > > Can't say I'm thrilled about the existing usage either, but I don't
> > > see a particularly nice alternative if it isn't possible to realize
> > > the device without having a backend available.
> > > 
> > > I don't think this an especially serious problem during cold startup
> > > though since doesn't expect the monitor to become responsive until
> > > the device model is fully realized & all backends connected.
> > > 
> > > The real concern is if you need to hotplug this kind of device
> > > at runtime. In that case blocking the main loop / monitor is a
> > > serious problem that will actively harm both the guest OS and
> > > mgmt applications.
> > > 
> > > The vhost-user-net device will already suffer from that.
> > 
> > Right.
> > 
> > > To solve the problem with hotplug, the mgmt app would need to
> > > plug the chardev backend, then wait for it to become connected,
> > > and only then plug the device frontend. In that case we would not
> > > want this loop. We'd expect it to use the already connected
> > > chardev, and fail the realize() function if the chardev has
> > > lost its connection.
> > 
> > I think as step one we should maybe just busy-wait with no sleep(1).
> > But respecting reconnect would be nice too.
> 
> As above I don't see a need to loop at all.
> 
> > And I think if we can't respect nowait we should
> > detect it and fail, in both net and block ...
> 
> If the chardev is in server mode with "wait" set, or in client mode
> with reconnect not set, then there's actually no need to call
> qemu_chr_fe_wait_connected at all. The qemu_chr_fe_wait_connected is
> only needed if the chardev is a server with "nowait" set or a client
> with "reconnect" set, or if the caller explicitly closes the existing
> connection & wants to establish a new one.
> 
> Regards,
> Daniel

Thanks for the comments Daniel!

> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Yury Kotov March 14, 2019, 12:31 p.m. UTC | #11
Hi,

14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>:
> On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
>>  On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
>>  > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
>>  > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
>>  > > > From: Xie Yongji <xieyongji@baidu.com>
>>  > > >
>>  > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
>>  > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
>>  > > > safely because it can track inflight I/O in shared memory.
>>  > > > This patch allows qemu to reconnect the backend after
>>  > > > connection closed.
>>  > > >
>>  > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
>>  > > > Signed-off-by: Ni Xun <nixun@baidu.com>
>>  > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
>>  > > > ---
>>  > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------
>>  > > > include/hw/virtio/vhost-user-blk.h | 4 +
>>  > > > 2 files changed, 167 insertions(+), 42 deletions(-)
>>  >
>>  >
>>  > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>>  > > > {
>>  > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>>  > > > VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>  > > > VhostUserState *user;
>>  > > > - struct vhost_virtqueue *vqs = NULL;
>>  > > > int i, ret;
>>  > > > + Error *err = NULL;
>>  > > >
>>  > > > if (!s->chardev.chr) {
>>  > > > error_setg(errp, "vhost-user-blk: chardev is mandatory");
>>  > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>>  > > > }
>>  > > >
>>  > > > s->inflight = g_new0(struct vhost_inflight, 1);
>>  > > > -
>>  > > > - s->dev.nvqs = s->num_queues;
>>  > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
>>  > > > - s->dev.vq_index = 0;
>>  > > > - s->dev.backend_features = 0;
>>  > > > - vqs = s->dev.vqs;
>>  > > > -
>>  > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops);
>>  > > > -
>>  > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
>>  > > > - if (ret < 0) {
>>  > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
>>  > > > - strerror(-ret));
>>  > > > - goto virtio_err;
>>  > > > - }
>>  > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
>>  > > > + s->watch = 0;
>>  > > > + s->should_start = false;
>>  > > > + s->connected = false;
>>  > > > +
>>  > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
>>  > > > + NULL, (void *)dev, NULL, true);
>>  > > > +
>>  > > > +reconnect:
>>  > > > + do {
>>  > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
>>  > > > + error_report_err(err);
>>  > > > + err = NULL;
>>  > > > + sleep(1);
>>  > >
>>  > > Seems arbitrary. Is this basically waiting until backend will reconnect?
>>  > > Why not block until event on the fd triggers?
>>  > >
>>  > > Also, it looks like this will just block forever with no monitor input
>>  > > and no way for user to figure out what is going on short of
>>  > > crashing QEMU.
>>  >
>>  > FWIW, the current vhost-user-net device does exactly the same thing
>>  > with calling qemu_chr_fe_wait_connected during its realize() function.
>>
>>  Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)
>
> The sleep(1) in this patch simply needs to be removed. I think that
> probably dates from when it was written against the earlier broken
> version of qemu_chr_fe_wait_connected(). That would not correctly
> deal with the "reconnect" flag, and so needing this loop with a sleep
> in it.
>
> In fact the while loop can be removed as well in this code. It just
> needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
> to have a connected peer once that returns 0.
>
> qemu_chr_fe_wait_connected() only returns -1 if the operating in
> client mode, and it failed to connect and reconnect is *not*
> requested. In such case the caller should honour the failure and
> quit, not loop to retry.
>
> The reason vhost-user-net does a loop is because once it has a
> connection it tries todo a protocol handshake, and if that
> handshake fails it closes the chardev and tries to connect
> again. That's not the case in this blk code os the loop is
> not needed.
>

But vhost-user-blk also has a handshake in device realize. What happens if the
connection is broken during realization? IIUC we have to retry a handshake in
such case just like vhost-user-net.

>>  > Can't say I'm thrilled about the existing usage either, but I don't
>>  > see a particularly nice alternative if it isn't possible to realize
>>  > the device without having a backend available.
>>  >
>>  > I don't think this an especially serious problem during cold startup
>>  > though since doesn't expect the monitor to become responsive until
>>  > the device model is fully realized & all backends connected.
>>  >
>>  > The real concern is if you need to hotplug this kind of device
>>  > at runtime. In that case blocking the main loop / monitor is a
>>  > serious problem that will actively harm both the guest OS and
>>  > mgmt applications.
>>  >
>>  > The vhost-user-net device will already suffer from that.
>>
>>  Right.
>>
>>  > To solve the problem with hotplug, the mgmt app would need to
>>  > plug the chardev backend, then wait for it to become connected,
>>  > and only then plug the device frontend. In that case we would not
>>  > want this loop. We'd expect it to use the already connected
>>  > chardev, and fail the realize() function if the chardev has
>>  > lost its connection.
>>
>>  I think as step one we should maybe just busy-wait with no sleep(1).
>>  But respecting reconnect would be nice too.
>
> As above I don't see a need to loop at all.
>
>>  And I think if we can't respect nowait we should
>>  detect it and fail, in both net and block ...
>
> If the chardev is in server mode with "wait" set, or in client mode
> with reconnect not set, then there's actually no need to call
> qemu_chr_fe_wait_connected at all. The qemu_chr_fe_wait_connected is
> only needed if the chardev is a server with "nowait" set or a client
> with "reconnect" set, or if the caller explicitly closes the existing
> connection & wants to establish a new one.
>
> Regards,
> Daniel
> --
> |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o- https://fstop138.berrange.com :|
> |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Regards,
Yury
Yongji Xie March 15, 2019, 2:46 a.m. UTC | #12
On Thu, 14 Mar 2019 at 19:18, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote:
> > On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > > From: Xie Yongji <xieyongji@baidu.com>
> > > >
> > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > > safely because it can track inflight I/O in shared memory.
> > > > This patch allows qemu to reconnect the backend after
> > > > connection closed.
> > > >
> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > ---
> > > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > > >
> > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > > > index 9682df1a7b..539ea2e571 100644
> > > > --- a/hw/block/vhost-user-blk.c
> > > > +++ b/hw/block/vhost-user-blk.c
> > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
> > > >      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
> > > >  };
> > > >
> > > > -static void vhost_user_blk_start(VirtIODevice *vdev)
> > > > +static int vhost_user_blk_start(VirtIODevice *vdev)
> > > >  {
> > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > >      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > > >
> > > >      if (!k->set_guest_notifiers) {
> > > >          error_report("binding does not support guest notifiers");
> > > > -        return;
> > > > +        return -ENOSYS;
> > > >      }
> > > >
> > > >      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
> > > >      if (ret < 0) {
> > > >          error_report("Error enabling host notifiers: %d", -ret);
> > > > -        return;
> > > > +        return ret;
> > > >      }
> > > >
> > > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > > >          vhost_virtqueue_mask(&s->dev, vdev, i, false);
> > > >      }
> > > >
> > > > -    return;
> > > > +    return ret;
> > > >
> > > >  err_guest_notifiers:
> > > >      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > > >  err_host_notifiers:
> > > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > > +    return ret;
> > > >  }
> > > >
> > > >  static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > > >      if (ret < 0) {
> > > >          error_report("vhost guest notifier cleanup failed: %d", ret);
> > > > -        return;
> > > >      }
> > > >
> > > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> > > >  {
> > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > >      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > +    int ret;
> > > >
> > > >      if (!vdev->vm_running) {
> > > >          should_start = false;
> > > >      }
> > > >
> > > > -    if (s->dev.started == should_start) {
> > > > +    if (s->should_start == should_start) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (!s->connected || s->dev.started == should_start) {
> > > > +        s->should_start = should_start;
> > > >          return;
> > > >      }
> > > >
> > > >      if (should_start) {
> > > > -        vhost_user_blk_start(vdev);
> > > > +        s->should_start = true;
> > > > +        /*
> > > > +         * make sure vhost_user_blk_handle_output() ignores fake
> > > > +         * guest kick by vhost_dev_enable_notifiers()
> > > > +         */
> > > > +        barrier();
> > > > +        ret = vhost_user_blk_start(vdev);
> > > > +        if (ret < 0) {
> > > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > > +                         strerror(-ret));
> > > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > > +        }
> > > >      } else {
> > > >          vhost_user_blk_stop(vdev);
> > > > +        /*
> > > > +         * make sure vhost_user_blk_handle_output() ignore fake
> > > > +         * guest kick by vhost_dev_disable_notifiers()
> > > > +         */
> > > > +        barrier();
> > > > +        s->should_start = false;
> > > >      }
> > >
> > > I don't understand the comment about ignoring fake guest kicks.
> > > Please add an explanation about what does barrier do here.
> >
> > We may get stack like this:
> >
> > vhost_user_blk_stop()
> >   vhost_dev_disable_notifiers()
> >     virtio_bus_cleanup_host_notifier()
> >       virtio_queue_host_notifier_read()
> >         virtio_queue_notify_vq()  /* fake guest kick */
> >           vhost_user_blk_handle_output()
> >
> > vhost_user_blk_handle_output() will re-start vhost-user if
> > should_start is false. This is not what we expect. The same to
> > vhost_dev_enable_notifiers().
>
> Well what does a compiler barrier have to do with it?
>
> Anyway, it's there to avoid losing kicks e.g. across migration. If you
> have something else that prevents losing kicks (e.g. migrate a "started"
> flag) then add code to skip the notify - don't work around it.
>

OK, I see. Thanks.

IIUC, vhost-user-blk device actually does not need this kind of kick
because dataplane is not in qemu side, right?

>
> > > And maybe a detailed explanation about what "fake kick"
> > > is in the commit log.
> > >
> >
> > Sure.
> > >
> > > > -
> > > >  }
> > > >
> > > >  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > >  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > >  {
> > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > -    int i;
> > > > +    int i, ret;
> > > >
> > > >      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > >          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
> > > >          return;
> > > >      }
> > > >
> > > > +    if (s->should_start) {
> > > > +        return;
> > > > +    }
> > > > +    s->should_start = true;
> > > > +
> > > > +    if (!s->connected) {
> > > > +        return;
> > > > +    }
> > > > +
> > > >      if (s->dev.started) {
> > > >          return;
> > > >      }
> > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > >      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> > > >       * vhost here instead of waiting for .set_status().
> > > >       */
> > > > -    vhost_user_blk_start(vdev);
> > > > +    ret = vhost_user_blk_start(vdev);
> > > > +    if (ret < 0) {
> > > > +        error_report("vhost-user-blk: vhost start failed: %s",
> > > > +                     strerror(-ret));
> > > > +        qemu_chr_fe_disconnect(&s->chardev);
> > > > +        return;
> > > > +    }
> > > >
> > > >      /* Kick right away to begin processing requests already in vring */
> > > >      for (i = 0; i < s->dev.nvqs; i++) {
> > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
> > > >      vhost_dev_free_inflight(s->inflight);
> > > >  }
> > > >
> > > > +static int vhost_user_blk_connect(DeviceState *dev)
> > > > +{
> > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > +    int ret = 0;
> > > > +
> > > > +    if (s->connected) {
> > > > +        return 0;
> > > > +    }
> > > > +    s->connected = true;
> > > > +
> > > > +    s->dev.nvqs = s->num_queues;
> > > > +    s->dev.vqs = s->vqs;
> > > > +    s->dev.vq_index = 0;
> > > > +    s->dev.backend_features = 0;
> > > > +
> > > > +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > +
> > > > +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > +    if (ret < 0) {
> > > > +        error_report("vhost-user-blk: vhost initialization failed: %s",
> > > > +                     strerror(-ret));
> > > > +        return ret;
> > > > +    }
> > > > +
> > > > +    /* restore vhost state */
> > > > +    if (s->should_start) {
> > > > +        ret = vhost_user_blk_start(vdev);
> > > > +        if (ret < 0) {
> > > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > > +                         strerror(-ret));
> > > > +            return ret;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > > +static void vhost_user_blk_disconnect(DeviceState *dev)
> > > > +{
> > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > +
> > > > +    if (!s->connected) {
> > > > +        return;
> > > > +    }
> > > > +    s->connected = false;
> > > > +
> > > > +    if (s->dev.started) {
> > > > +        vhost_user_blk_stop(vdev);
> > > > +    }
> > > > +
> > > > +    vhost_dev_cleanup(&s->dev);
> > > > +}
> > > > +
> > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> > > > +                                     void *opaque)
> > > > +{
> > > > +    DeviceState *dev = opaque;
> > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > +
> > > > +    qemu_chr_fe_disconnect(&s->chardev);
> > > > +
> > > > +    return true;
> > > > +}
> > > > +
> > > > +static void vhost_user_blk_event(void *opaque, int event)
> > > > +{
> > > > +    DeviceState *dev = opaque;
> > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > +
> > > > +    switch (event) {
> > > > +    case CHR_EVENT_OPENED:
> > > > +        if (vhost_user_blk_connect(dev) < 0) {
> > > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > > +            return;
> > > > +        }
> > > > +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> > > > +                                         vhost_user_blk_watch, dev);
> > > > +        break;
> > > > +    case CHR_EVENT_CLOSED:
> > > > +        vhost_user_blk_disconnect(dev);
> > > > +        if (s->watch) {
> > > > +            g_source_remove(s->watch);
> > > > +            s->watch = 0;
> > > > +        }
> > > > +        break;
> > > > +    }
> > > > +}
> > > > +
> > > > +
> > > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > >  {
> > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > >      VhostUserState *user;
> > > > -    struct vhost_virtqueue *vqs = NULL;
> > > >      int i, ret;
> > > > +    Error *err = NULL;
> > > >
> > > >      if (!s->chardev.chr) {
> > > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > >      }
> > > >
> > > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > > -
> > > > -    s->dev.nvqs = s->num_queues;
> > > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > > -    s->dev.vq_index = 0;
> > > > -    s->dev.backend_features = 0;
> > > > -    vqs = s->dev.vqs;
> > > > -
> > > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > -
> > > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > -    if (ret < 0) {
> > > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > > -                   strerror(-ret));
> > > > -        goto virtio_err;
> > > > -    }
> > > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > > +    s->watch = 0;
> > > > +    s->should_start = false;
> > > > +    s->connected = false;
> > > > +
> > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > > +                             NULL, (void *)dev, NULL, true);
> > > > +
> > > > +reconnect:
> > > > +    do {
> > > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > > +            error_report_err(err);
> > > > +            err = NULL;
> > > > +            sleep(1);
> > > > +        }
> > > > +    } while (!s->connected);
> > > >
> > > >      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> > > > -                              sizeof(struct virtio_blk_config));
> > > > +                               sizeof(struct virtio_blk_config));
> > > >      if (ret < 0) {
> > > > -        error_setg(errp, "vhost-user-blk: get block config failed");
> > > > -        goto vhost_err;
> > > > +        error_report("vhost-user-blk: get block config failed");
> > > > +        goto reconnect;
> > > >      }
> > > >
> > > >      if (s->blkcfg.num_queues != s->num_queues) {
> > > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > >      }
> > > >
> > > >      return;
> > >
> > > With this we end up with return at end of function
> > > which makes no sense.
> > >
> >
> > Oh, yes. I will remove it.
> >
> > > > -
> > > > -vhost_err:
> > > > -    vhost_dev_cleanup(&s->dev);
> > > > -virtio_err:
> > > > -    g_free(vqs);
> > > > -    g_free(s->inflight);
> > > > -    virtio_cleanup(vdev);
> > > > -
> > > > -    vhost_user_cleanup(user);
> > > > -    g_free(user);
> > > > -    s->vhost_user = NULL;
> > > >  }
> > > >
> > > >  static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
> > > >  {
> > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > >      VHostUserBlk *s = VHOST_USER_BLK(dev);
> > > > -    struct vhost_virtqueue *vqs = s->dev.vqs;
> > > >
> > > >      vhost_user_blk_set_status(vdev, 0);
> > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
> > > > +                             NULL, NULL, NULL, false);
> > > >      vhost_dev_cleanup(&s->dev);
> > > >      vhost_dev_free_inflight(s->inflight);
> > > > -    g_free(vqs);
> > > > +    g_free(s->vqs);
> > > >      g_free(s->inflight);
> > > >      virtio_cleanup(vdev);
> > > >
> > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
> > > > index 445516604a..4849aa5eb5 100644
> > > > --- a/include/hw/virtio/vhost-user-blk.h
> > > > +++ b/include/hw/virtio/vhost-user-blk.h
> > > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk {
> > > >      struct vhost_dev dev;
> > > >      struct vhost_inflight *inflight;
> > > >      VhostUserState *vhost_user;
> > > > +    struct vhost_virtqueue *vqs;
> > > > +    guint watch;
> > > > +    bool should_start;
> > >
> > > Please add comments about fields.
> > > Also should_start seems to be set on guest activity.
> > > Does it need to be migrated?
> > > It would be better not to track guest state like this.
> > > Why do we need it?
> > > Pls add comments with an explanation.
> > >
> >
> > We can't relay on vhost_dev.started to track guest's state in
> > vhost_user_blk_set_status() when we enable reconnecting. For example,
> > qemu will lose guest's state if guest stop device during backend is
> > disconnected. So vhost_dev.started is used to track backend's state
> > and should_start is used to track guest's state in this patch. And
> > this variable is based on VirtIODevice.status, so I think it is not
> > necessary to migrate it. What do you think about it?
>
> If it's based on VirtIODevice.status then how about not keeping
> it around at all and just calculating from VirtIODevice.status
> when necessary?
>

Sorry, I made a mistake before. Normally, should_start is based on
VirtIODevice.status. But it's not the case for virtio 1.0 transitional
devices. Then I realize there may be a bug. If migration completes
between kicking virtqueue and setting VIRTIO_CONFIG_S_DRIVER_OK, guest
may be hung. So I think should_start should be migrated in this case.

Thanks,
Yongji
Michael S. Tsirkin March 15, 2019, 3:08 a.m. UTC | #13
On Fri, Mar 15, 2019 at 10:46:34AM +0800, Yongji Xie wrote:
> On Thu, 14 Mar 2019 at 19:18, Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote:
> > > On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > >
> > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > > > safely because it can track inflight I/O in shared memory.
> > > > > This patch allows qemu to reconnect the backend after
> > > > > connection closed.
> > > > >
> > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > ---
> > > > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > > > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > > > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > > > >
> > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > > > > index 9682df1a7b..539ea2e571 100644
> > > > > --- a/hw/block/vhost-user-blk.c
> > > > > +++ b/hw/block/vhost-user-blk.c
> > > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
> > > > >      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
> > > > >  };
> > > > >
> > > > > -static void vhost_user_blk_start(VirtIODevice *vdev)
> > > > > +static int vhost_user_blk_start(VirtIODevice *vdev)
> > > > >  {
> > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > >      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> > > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > > > >
> > > > >      if (!k->set_guest_notifiers) {
> > > > >          error_report("binding does not support guest notifiers");
> > > > > -        return;
> > > > > +        return -ENOSYS;
> > > > >      }
> > > > >
> > > > >      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
> > > > >      if (ret < 0) {
> > > > >          error_report("Error enabling host notifiers: %d", -ret);
> > > > > -        return;
> > > > > +        return ret;
> > > > >      }
> > > > >
> > > > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> > > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > > > >          vhost_virtqueue_mask(&s->dev, vdev, i, false);
> > > > >      }
> > > > >
> > > > > -    return;
> > > > > +    return ret;
> > > > >
> > > > >  err_guest_notifiers:
> > > > >      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > > > >  err_host_notifiers:
> > > > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > > > +    return ret;
> > > > >  }
> > > > >
> > > > >  static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > > > >      if (ret < 0) {
> > > > >          error_report("vhost guest notifier cleanup failed: %d", ret);
> > > > > -        return;
> > > > >      }
> > > > >
> > > > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> > > > >  {
> > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > >      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > +    int ret;
> > > > >
> > > > >      if (!vdev->vm_running) {
> > > > >          should_start = false;
> > > > >      }
> > > > >
> > > > > -    if (s->dev.started == should_start) {
> > > > > +    if (s->should_start == should_start) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > > +    if (!s->connected || s->dev.started == should_start) {
> > > > > +        s->should_start = should_start;
> > > > >          return;
> > > > >      }
> > > > >
> > > > >      if (should_start) {
> > > > > -        vhost_user_blk_start(vdev);
> > > > > +        s->should_start = true;
> > > > > +        /*
> > > > > +         * make sure vhost_user_blk_handle_output() ignores fake
> > > > > +         * guest kick by vhost_dev_enable_notifiers()
> > > > > +         */
> > > > > +        barrier();
> > > > > +        ret = vhost_user_blk_start(vdev);
> > > > > +        if (ret < 0) {
> > > > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > > > +                         strerror(-ret));
> > > > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > > > +        }
> > > > >      } else {
> > > > >          vhost_user_blk_stop(vdev);
> > > > > +        /*
> > > > > +         * make sure vhost_user_blk_handle_output() ignore fake
> > > > > +         * guest kick by vhost_dev_disable_notifiers()
> > > > > +         */
> > > > > +        barrier();
> > > > > +        s->should_start = false;
> > > > >      }
> > > >
> > > > I don't understand the comment about ignoring fake guest kicks.
> > > > Please add an explanation about what does barrier do here.
> > >
> > > We may get stack like this:
> > >
> > > vhost_user_blk_stop()
> > >   vhost_dev_disable_notifiers()
> > >     virtio_bus_cleanup_host_notifier()
> > >       virtio_queue_host_notifier_read()
> > >         virtio_queue_notify_vq()  /* fake guest kick */
> > >           vhost_user_blk_handle_output()
> > >
> > > vhost_user_blk_handle_output() will re-start vhost-user if
> > > should_start is false. This is not what we expect. The same to
> > > vhost_dev_enable_notifiers().
> >
> > Well what does a compiler barrier have to do with it?
> >
> > Anyway, it's there to avoid losing kicks e.g. across migration. If you
> > have something else that prevents losing kicks (e.g. migrate a "started"
> > flag) then add code to skip the notify - don't work around it.
> >
> 
> OK, I see. Thanks.
> 
> IIUC, vhost-user-blk device actually does not need this kind of kick
> because dataplane is not in qemu side, right?
> 
> >
> > > > And maybe a detailed explanation about what "fake kick"
> > > > is in the commit log.
> > > >
> > >
> > > Sure.
> > > >
> > > > > -
> > > > >  }
> > > > >
> > > > >  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > > >  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > > >  {
> > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > -    int i;
> > > > > +    int i, ret;
> > > > >
> > > > >      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > >          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
> > > > >          return;
> > > > >      }
> > > > >
> > > > > +    if (s->should_start) {
> > > > > +        return;
> > > > > +    }
> > > > > +    s->should_start = true;
> > > > > +
> > > > > +    if (!s->connected) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > >      if (s->dev.started) {
> > > > >          return;
> > > > >      }
> > > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > > >      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> > > > >       * vhost here instead of waiting for .set_status().
> > > > >       */
> > > > > -    vhost_user_blk_start(vdev);
> > > > > +    ret = vhost_user_blk_start(vdev);
> > > > > +    if (ret < 0) {
> > > > > +        error_report("vhost-user-blk: vhost start failed: %s",
> > > > > +                     strerror(-ret));
> > > > > +        qemu_chr_fe_disconnect(&s->chardev);
> > > > > +        return;
> > > > > +    }
> > > > >
> > > > >      /* Kick right away to begin processing requests already in vring */
> > > > >      for (i = 0; i < s->dev.nvqs; i++) {
> > > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
> > > > >      vhost_dev_free_inflight(s->inflight);
> > > > >  }
> > > > >
> > > > > +static int vhost_user_blk_connect(DeviceState *dev)
> > > > > +{
> > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > +    int ret = 0;
> > > > > +
> > > > > +    if (s->connected) {
> > > > > +        return 0;
> > > > > +    }
> > > > > +    s->connected = true;
> > > > > +
> > > > > +    s->dev.nvqs = s->num_queues;
> > > > > +    s->dev.vqs = s->vqs;
> > > > > +    s->dev.vq_index = 0;
> > > > > +    s->dev.backend_features = 0;
> > > > > +
> > > > > +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > > +
> > > > > +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > > +    if (ret < 0) {
> > > > > +        error_report("vhost-user-blk: vhost initialization failed: %s",
> > > > > +                     strerror(-ret));
> > > > > +        return ret;
> > > > > +    }
> > > > > +
> > > > > +    /* restore vhost state */
> > > > > +    if (s->should_start) {
> > > > > +        ret = vhost_user_blk_start(vdev);
> > > > > +        if (ret < 0) {
> > > > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > > > +                         strerror(-ret));
> > > > > +            return ret;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    return 0;
> > > > > +}
> > > > > +
> > > > > +static void vhost_user_blk_disconnect(DeviceState *dev)
> > > > > +{
> > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > +
> > > > > +    if (!s->connected) {
> > > > > +        return;
> > > > > +    }
> > > > > +    s->connected = false;
> > > > > +
> > > > > +    if (s->dev.started) {
> > > > > +        vhost_user_blk_stop(vdev);
> > > > > +    }
> > > > > +
> > > > > +    vhost_dev_cleanup(&s->dev);
> > > > > +}
> > > > > +
> > > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> > > > > +                                     void *opaque)
> > > > > +{
> > > > > +    DeviceState *dev = opaque;
> > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > +
> > > > > +    qemu_chr_fe_disconnect(&s->chardev);
> > > > > +
> > > > > +    return true;
> > > > > +}
> > > > > +
> > > > > +static void vhost_user_blk_event(void *opaque, int event)
> > > > > +{
> > > > > +    DeviceState *dev = opaque;
> > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > +
> > > > > +    switch (event) {
> > > > > +    case CHR_EVENT_OPENED:
> > > > > +        if (vhost_user_blk_connect(dev) < 0) {
> > > > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > > > +            return;
> > > > > +        }
> > > > > +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> > > > > +                                         vhost_user_blk_watch, dev);
> > > > > +        break;
> > > > > +    case CHR_EVENT_CLOSED:
> > > > > +        vhost_user_blk_disconnect(dev);
> > > > > +        if (s->watch) {
> > > > > +            g_source_remove(s->watch);
> > > > > +            s->watch = 0;
> > > > > +        }
> > > > > +        break;
> > > > > +    }
> > > > > +}
> > > > > +
> > > > > +
> > > > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > >  {
> > > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > >      VhostUserState *user;
> > > > > -    struct vhost_virtqueue *vqs = NULL;
> > > > >      int i, ret;
> > > > > +    Error *err = NULL;
> > > > >
> > > > >      if (!s->chardev.chr) {
> > > > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > >      }
> > > > >
> > > > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > > > -
> > > > > -    s->dev.nvqs = s->num_queues;
> > > > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > > > -    s->dev.vq_index = 0;
> > > > > -    s->dev.backend_features = 0;
> > > > > -    vqs = s->dev.vqs;
> > > > > -
> > > > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > > -
> > > > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > > -    if (ret < 0) {
> > > > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > > > -                   strerror(-ret));
> > > > > -        goto virtio_err;
> > > > > -    }
> > > > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > > > +    s->watch = 0;
> > > > > +    s->should_start = false;
> > > > > +    s->connected = false;
> > > > > +
> > > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > > > +                             NULL, (void *)dev, NULL, true);
> > > > > +
> > > > > +reconnect:
> > > > > +    do {
> > > > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > > > +            error_report_err(err);
> > > > > +            err = NULL;
> > > > > +            sleep(1);
> > > > > +        }
> > > > > +    } while (!s->connected);
> > > > >
> > > > >      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> > > > > -                              sizeof(struct virtio_blk_config));
> > > > > +                               sizeof(struct virtio_blk_config));
> > > > >      if (ret < 0) {
> > > > > -        error_setg(errp, "vhost-user-blk: get block config failed");
> > > > > -        goto vhost_err;
> > > > > +        error_report("vhost-user-blk: get block config failed");
> > > > > +        goto reconnect;
> > > > >      }
> > > > >
> > > > >      if (s->blkcfg.num_queues != s->num_queues) {
> > > > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > >      }
> > > > >
> > > > >      return;
> > > >
> > > > With this we end up with return at end of function
> > > > which makes no sense.
> > > >
> > >
> > > Oh, yes. I will remove it.
> > >
> > > > > -
> > > > > -vhost_err:
> > > > > -    vhost_dev_cleanup(&s->dev);
> > > > > -virtio_err:
> > > > > -    g_free(vqs);
> > > > > -    g_free(s->inflight);
> > > > > -    virtio_cleanup(vdev);
> > > > > -
> > > > > -    vhost_user_cleanup(user);
> > > > > -    g_free(user);
> > > > > -    s->vhost_user = NULL;
> > > > >  }
> > > > >
> > > > >  static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
> > > > >  {
> > > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > >      VHostUserBlk *s = VHOST_USER_BLK(dev);
> > > > > -    struct vhost_virtqueue *vqs = s->dev.vqs;
> > > > >
> > > > >      vhost_user_blk_set_status(vdev, 0);
> > > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
> > > > > +                             NULL, NULL, NULL, false);
> > > > >      vhost_dev_cleanup(&s->dev);
> > > > >      vhost_dev_free_inflight(s->inflight);
> > > > > -    g_free(vqs);
> > > > > +    g_free(s->vqs);
> > > > >      g_free(s->inflight);
> > > > >      virtio_cleanup(vdev);
> > > > >
> > > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
> > > > > index 445516604a..4849aa5eb5 100644
> > > > > --- a/include/hw/virtio/vhost-user-blk.h
> > > > > +++ b/include/hw/virtio/vhost-user-blk.h
> > > > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk {
> > > > >      struct vhost_dev dev;
> > > > >      struct vhost_inflight *inflight;
> > > > >      VhostUserState *vhost_user;
> > > > > +    struct vhost_virtqueue *vqs;
> > > > > +    guint watch;
> > > > > +    bool should_start;
> > > >
> > > > Please add comments about fields.
> > > > Also should_start seems to be set on guest activity.
> > > > Does it need to be migrated?
> > > > It would be better not to track guest state like this.
> > > > Why do we need it?
> > > > Pls add comments with an explanation.
> > > >
> > >
> > > We can't relay on vhost_dev.started to track guest's state in
> > > vhost_user_blk_set_status() when we enable reconnecting. For example,
> > > qemu will lose guest's state if guest stop device during backend is
> > > disconnected. So vhost_dev.started is used to track backend's state
> > > and should_start is used to track guest's state in this patch. And
> > > this variable is based on VirtIODevice.status, so I think it is not
> > > necessary to migrate it. What do you think about it?
> >
> > If it's based on VirtIODevice.status then how about not keeping
> > it around at all and just calculating from VirtIODevice.status
> > when necessary?
> >
> 
> Sorry, I made a mistake before. Normally, should_start is based on
> VirtIODevice.status. But it's not the case for virtio 1.0 transitional
> devices. Then I realize there may be a bug. If migration completes
> between kicking virtqueue and setting VIRTIO_CONFIG_S_DRIVER_OK, guest
> may be hung. So I think should_start should be migrated in this case.
> 
> Thanks,
> Yongji

This is exactly the case that the fake kick is intended to fix.

We can migrate this state but it's really a virtio thing,
not vhost specific. migrate it and skip fake kick, sure.
Yongji Xie March 15, 2019, 3:17 a.m. UTC | #14
On Thu, 14 Mar 2019 at 20:31, Yury Kotov <yury-kotov@yandex-team.ru> wrote:
>
> Hi,
>
> 14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>:
> > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
> >>  On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
> >>  > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> >>  > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> >>  > > > From: Xie Yongji <xieyongji@baidu.com>
> >>  > > >
> >>  > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> >>  > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> >>  > > > safely because it can track inflight I/O in shared memory.
> >>  > > > This patch allows qemu to reconnect the backend after
> >>  > > > connection closed.
> >>  > > >
> >>  > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> >>  > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> >>  > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> >>  > > > ---
> >>  > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------
> >>  > > > include/hw/virtio/vhost-user-blk.h | 4 +
> >>  > > > 2 files changed, 167 insertions(+), 42 deletions(-)
> >>  >
> >>  >
> >>  > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >>  > > > {
> >>  > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>  > > > VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >>  > > > VhostUserState *user;
> >>  > > > - struct vhost_virtqueue *vqs = NULL;
> >>  > > > int i, ret;
> >>  > > > + Error *err = NULL;
> >>  > > >
> >>  > > > if (!s->chardev.chr) {
> >>  > > > error_setg(errp, "vhost-user-blk: chardev is mandatory");
> >>  > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >>  > > > }
> >>  > > >
> >>  > > > s->inflight = g_new0(struct vhost_inflight, 1);
> >>  > > > -
> >>  > > > - s->dev.nvqs = s->num_queues;
> >>  > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> >>  > > > - s->dev.vq_index = 0;
> >>  > > > - s->dev.backend_features = 0;
> >>  > > > - vqs = s->dev.vqs;
> >>  > > > -
> >>  > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> >>  > > > -
> >>  > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> >>  > > > - if (ret < 0) {
> >>  > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> >>  > > > - strerror(-ret));
> >>  > > > - goto virtio_err;
> >>  > > > - }
> >>  > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> >>  > > > + s->watch = 0;
> >>  > > > + s->should_start = false;
> >>  > > > + s->connected = false;
> >>  > > > +
> >>  > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> >>  > > > + NULL, (void *)dev, NULL, true);
> >>  > > > +
> >>  > > > +reconnect:
> >>  > > > + do {
> >>  > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> >>  > > > + error_report_err(err);
> >>  > > > + err = NULL;
> >>  > > > + sleep(1);
> >>  > >
> >>  > > Seems arbitrary. Is this basically waiting until backend will reconnect?
> >>  > > Why not block until event on the fd triggers?
> >>  > >
> >>  > > Also, it looks like this will just block forever with no monitor input
> >>  > > and no way for user to figure out what is going on short of
> >>  > > crashing QEMU.
> >>  >
> >>  > FWIW, the current vhost-user-net device does exactly the same thing
> >>  > with calling qemu_chr_fe_wait_connected during its realize() function.
> >>
> >>  Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)
> >
> > The sleep(1) in this patch simply needs to be removed. I think that
> > probably dates from when it was written against the earlier broken
> > version of qemu_chr_fe_wait_connected(). That would not correctly
> > deal with the "reconnect" flag, and so needing this loop with a sleep
> > in it.
> >
> > In fact the while loop can be removed as well in this code. It just
> > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
> > to have a connected peer once that returns 0.
> >
> > qemu_chr_fe_wait_connected() only returns -1 if the operating in
> > client mode, and it failed to connect and reconnect is *not*
> > requested. In such case the caller should honour the failure and
> > quit, not loop to retry.
> >
> > The reason vhost-user-net does a loop is because once it has a
> > connection it tries todo a protocol handshake, and if that
> > handshake fails it closes the chardev and tries to connect
> > again. That's not the case in this blk code os the loop is
> > not needed.
> >
>
> But vhost-user-blk also has a handshake in device realize. What happens if the
> connection is broken during realization? IIUC we have to retry a handshake in
> such case just like vhost-user-net.
>

Actually that's what I want to ask too.

Thanks,
Yongji
Yongji Xie March 15, 2019, 3:20 a.m. UTC | #15
On Thu, 14 Mar 2019 at 19:43, Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
> > On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
> > > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > >
> > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > > > safely because it can track inflight I/O in shared memory.
> > > > > This patch allows qemu to reconnect the backend after
> > > > > connection closed.
> > > > >
> > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > ---
> > > > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > > > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > > > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > >
> > >
> > > > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > >  {
> > > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > >      VhostUserState *user;
> > > > > -    struct vhost_virtqueue *vqs = NULL;
> > > > >      int i, ret;
> > > > > +    Error *err = NULL;
> > > > >
> > > > >      if (!s->chardev.chr) {
> > > > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > >      }
> > > > >
> > > > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > > > -
> > > > > -    s->dev.nvqs = s->num_queues;
> > > > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > > > -    s->dev.vq_index = 0;
> > > > > -    s->dev.backend_features = 0;
> > > > > -    vqs = s->dev.vqs;
> > > > > -
> > > > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > > -
> > > > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > > -    if (ret < 0) {
> > > > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > > > -                   strerror(-ret));
> > > > > -        goto virtio_err;
> > > > > -    }
> > > > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > > > +    s->watch = 0;
> > > > > +    s->should_start = false;
> > > > > +    s->connected = false;
> > > > > +
> > > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > > > +                             NULL, (void *)dev, NULL, true);
> > > > > +
> > > > > +reconnect:
> > > > > +    do {
> > > > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > > > +            error_report_err(err);
> > > > > +            err = NULL;
> > > > > +            sleep(1);
> > > >
> > > > Seems arbitrary. Is this basically waiting until backend will reconnect?
> > > > Why not block until event on the fd triggers?
> > > >
> > > > Also, it looks like this will just block forever with no monitor input
> > > > and no way for user to figure out what is going on short of
> > > > crashing QEMU.
> > >
> > > FWIW, the current vhost-user-net device does exactly the same thing
> > > with calling qemu_chr_fe_wait_connected during its realize() function.
> >
> > Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)
>
> The sleep(1) in this patch simply needs to be removed. I think that
> probably dates from when it was written against the earlier broken
> version of qemu_chr_fe_wait_connected(). That would not correctly
> deal with the "reconnect" flag, and so needing this loop with a sleep
> in it.
>
> In fact the while loop can be removed as well in this code. It just
> needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
> to have a connected peer once that returns 0.
>
> qemu_chr_fe_wait_connected() only returns -1 if the operating in
> client mode, and it failed to connect and reconnect is *not*
> requested. In such case the caller should honour the failure and
> quit, not loop to retry.
>

Thanks for your comments. Will do that in next patchset.

>
> The reason vhost-user-net does a loop is because once it has a
> connection it tries todo a protocol handshake, and if that
> handshake fails it closes the chardev and tries to connect
> again. That's not the case in this blk code os the loop is
> not needed.
>

Seems like vhost-user-blk device needs to do that too.

Thanks,
Yongji
Yongji Xie March 15, 2019, 6:45 a.m. UTC | #16
On Fri, 15 Mar 2019 at 11:09, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Mar 15, 2019 at 10:46:34AM +0800, Yongji Xie wrote:
> > On Thu, 14 Mar 2019 at 19:18, Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote:
> > > > On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > >
> > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> > > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> > > > > > safely because it can track inflight I/O in shared memory.
> > > > > > This patch allows qemu to reconnect the backend after
> > > > > > connection closed.
> > > > > >
> > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > ---
> > > > > >  hw/block/vhost-user-blk.c          | 205 +++++++++++++++++++++++------
> > > > > >  include/hw/virtio/vhost-user-blk.h |   4 +
> > > > > >  2 files changed, 167 insertions(+), 42 deletions(-)
> > > > > >
> > > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > > > > > index 9682df1a7b..539ea2e571 100644
> > > > > > --- a/hw/block/vhost-user-blk.c
> > > > > > +++ b/hw/block/vhost-user-blk.c
> > > > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = {
> > > > > >      .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
> > > > > >  };
> > > > > >
> > > > > > -static void vhost_user_blk_start(VirtIODevice *vdev)
> > > > > > +static int vhost_user_blk_start(VirtIODevice *vdev)
> > > > > >  {
> > > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > >      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> > > > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > > > > >
> > > > > >      if (!k->set_guest_notifiers) {
> > > > > >          error_report("binding does not support guest notifiers");
> > > > > > -        return;
> > > > > > +        return -ENOSYS;
> > > > > >      }
> > > > > >
> > > > > >      ret = vhost_dev_enable_notifiers(&s->dev, vdev);
> > > > > >      if (ret < 0) {
> > > > > >          error_report("Error enabling host notifiers: %d", -ret);
> > > > > > -        return;
> > > > > > +        return ret;
> > > > > >      }
> > > > > >
> > > > > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
> > > > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
> > > > > >          vhost_virtqueue_mask(&s->dev, vdev, i, false);
> > > > > >      }
> > > > > >
> > > > > > -    return;
> > > > > > +    return ret;
> > > > > >
> > > > > >  err_guest_notifiers:
> > > > > >      k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > > > > >  err_host_notifiers:
> > > > > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > > > > +    return ret;
> > > > > >  }
> > > > > >
> > > > > >  static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> > > > > >      ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> > > > > >      if (ret < 0) {
> > > > > >          error_report("vhost guest notifier cleanup failed: %d", ret);
> > > > > > -        return;
> > > > > >      }
> > > > > >
> > > > > >      vhost_dev_disable_notifiers(&s->dev, vdev);
> > > > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> > > > > >  {
> > > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > >      bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > +    int ret;
> > > > > >
> > > > > >      if (!vdev->vm_running) {
> > > > > >          should_start = false;
> > > > > >      }
> > > > > >
> > > > > > -    if (s->dev.started == should_start) {
> > > > > > +    if (s->should_start == should_start) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (!s->connected || s->dev.started == should_start) {
> > > > > > +        s->should_start = should_start;
> > > > > >          return;
> > > > > >      }
> > > > > >
> > > > > >      if (should_start) {
> > > > > > -        vhost_user_blk_start(vdev);
> > > > > > +        s->should_start = true;
> > > > > > +        /*
> > > > > > +         * make sure vhost_user_blk_handle_output() ignores fake
> > > > > > +         * guest kick by vhost_dev_enable_notifiers()
> > > > > > +         */
> > > > > > +        barrier();
> > > > > > +        ret = vhost_user_blk_start(vdev);
> > > > > > +        if (ret < 0) {
> > > > > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > > > > +                         strerror(-ret));
> > > > > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > > > > +        }
> > > > > >      } else {
> > > > > >          vhost_user_blk_stop(vdev);
> > > > > > +        /*
> > > > > > +         * make sure vhost_user_blk_handle_output() ignore fake
> > > > > > +         * guest kick by vhost_dev_disable_notifiers()
> > > > > > +         */
> > > > > > +        barrier();
> > > > > > +        s->should_start = false;
> > > > > >      }
> > > > >
> > > > > I don't understand the comment about ignoring fake guest kicks.
> > > > > Please add an explanation about what does barrier do here.
> > > >
> > > > We may get stack like this:
> > > >
> > > > vhost_user_blk_stop()
> > > >   vhost_dev_disable_notifiers()
> > > >     virtio_bus_cleanup_host_notifier()
> > > >       virtio_queue_host_notifier_read()
> > > >         virtio_queue_notify_vq()  /* fake guest kick */
> > > >           vhost_user_blk_handle_output()
> > > >
> > > > vhost_user_blk_handle_output() will re-start vhost-user if
> > > > should_start is false. This is not what we expect. The same to
> > > > vhost_dev_enable_notifiers().
> > >
> > > Well what does a compiler barrier have to do with it?
> > >
> > > Anyway, it's there to avoid losing kicks e.g. across migration. If you
> > > have something else that prevents losing kicks (e.g. migrate a "started"
> > > flag) then add code to skip the notify - don't work around it.
> > >
> >
> > OK, I see. Thanks.
> >
> > IIUC, vhost-user-blk device actually does not need this kind of kick
> > because dataplane is not in qemu side, right?
> >
> > >
> > > > > And maybe a detailed explanation about what "fake kick"
> > > > > is in the commit log.
> > > > >
> > > >
> > > > Sure.
> > > > >
> > > > > > -
> > > > > >  }
> > > > > >
> > > > > >  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> > > > > >  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > > > >  {
> > > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > > -    int i;
> > > > > > +    int i, ret;
> > > > > >
> > > > > >      if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > >          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
> > > > > >          return;
> > > > > >      }
> > > > > >
> > > > > > +    if (s->should_start) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +    s->should_start = true;
> > > > > > +
> > > > > > +    if (!s->connected) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +
> > > > > >      if (s->dev.started) {
> > > > > >          return;
> > > > > >      }
> > > > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > > > >      /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
> > > > > >       * vhost here instead of waiting for .set_status().
> > > > > >       */
> > > > > > -    vhost_user_blk_start(vdev);
> > > > > > +    ret = vhost_user_blk_start(vdev);
> > > > > > +    if (ret < 0) {
> > > > > > +        error_report("vhost-user-blk: vhost start failed: %s",
> > > > > > +                     strerror(-ret));
> > > > > > +        qemu_chr_fe_disconnect(&s->chardev);
> > > > > > +        return;
> > > > > > +    }
> > > > > >
> > > > > >      /* Kick right away to begin processing requests already in vring */
> > > > > >      for (i = 0; i < s->dev.nvqs; i++) {
> > > > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
> > > > > >      vhost_dev_free_inflight(s->inflight);
> > > > > >  }
> > > > > >
> > > > > > +static int vhost_user_blk_connect(DeviceState *dev)
> > > > > > +{
> > > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > > +    int ret = 0;
> > > > > > +
> > > > > > +    if (s->connected) {
> > > > > > +        return 0;
> > > > > > +    }
> > > > > > +    s->connected = true;
> > > > > > +
> > > > > > +    s->dev.nvqs = s->num_queues;
> > > > > > +    s->dev.vqs = s->vqs;
> > > > > > +    s->dev.vq_index = 0;
> > > > > > +    s->dev.backend_features = 0;
> > > > > > +
> > > > > > +    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > > > +
> > > > > > +    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > > > +    if (ret < 0) {
> > > > > > +        error_report("vhost-user-blk: vhost initialization failed: %s",
> > > > > > +                     strerror(-ret));
> > > > > > +        return ret;
> > > > > > +    }
> > > > > > +
> > > > > > +    /* restore vhost state */
> > > > > > +    if (s->should_start) {
> > > > > > +        ret = vhost_user_blk_start(vdev);
> > > > > > +        if (ret < 0) {
> > > > > > +            error_report("vhost-user-blk: vhost start failed: %s",
> > > > > > +                         strerror(-ret));
> > > > > > +            return ret;
> > > > > > +        }
> > > > > > +    }
> > > > > > +
> > > > > > +    return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static void vhost_user_blk_disconnect(DeviceState *dev)
> > > > > > +{
> > > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > > +
> > > > > > +    if (!s->connected) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +    s->connected = false;
> > > > > > +
> > > > > > +    if (s->dev.started) {
> > > > > > +        vhost_user_blk_stop(vdev);
> > > > > > +    }
> > > > > > +
> > > > > > +    vhost_dev_cleanup(&s->dev);
> > > > > > +}
> > > > > > +
> > > > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
> > > > > > +                                     void *opaque)
> > > > > > +{
> > > > > > +    DeviceState *dev = opaque;
> > > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > > +
> > > > > > +    qemu_chr_fe_disconnect(&s->chardev);
> > > > > > +
> > > > > > +    return true;
> > > > > > +}
> > > > > > +
> > > > > > +static void vhost_user_blk_event(void *opaque, int event)
> > > > > > +{
> > > > > > +    DeviceState *dev = opaque;
> > > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > > +    VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > > +
> > > > > > +    switch (event) {
> > > > > > +    case CHR_EVENT_OPENED:
> > > > > > +        if (vhost_user_blk_connect(dev) < 0) {
> > > > > > +            qemu_chr_fe_disconnect(&s->chardev);
> > > > > > +            return;
> > > > > > +        }
> > > > > > +        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
> > > > > > +                                         vhost_user_blk_watch, dev);
> > > > > > +        break;
> > > > > > +    case CHR_EVENT_CLOSED:
> > > > > > +        vhost_user_blk_disconnect(dev);
> > > > > > +        if (s->watch) {
> > > > > > +            g_source_remove(s->watch);
> > > > > > +            s->watch = 0;
> > > > > > +        }
> > > > > > +        break;
> > > > > > +    }
> > > > > > +}
> > > > > > +
> > > > > > +
> > > > > >  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > > >  {
> > > > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > > > >      VhostUserState *user;
> > > > > > -    struct vhost_virtqueue *vqs = NULL;
> > > > > >      int i, ret;
> > > > > > +    Error *err = NULL;
> > > > > >
> > > > > >      if (!s->chardev.chr) {
> > > > > >          error_setg(errp, "vhost-user-blk: chardev is mandatory");
> > > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > > >      }
> > > > > >
> > > > > >      s->inflight = g_new0(struct vhost_inflight, 1);
> > > > > > -
> > > > > > -    s->dev.nvqs = s->num_queues;
> > > > > > -    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> > > > > > -    s->dev.vq_index = 0;
> > > > > > -    s->dev.backend_features = 0;
> > > > > > -    vqs = s->dev.vqs;
> > > > > > -
> > > > > > -    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> > > > > > -
> > > > > > -    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> > > > > > -    if (ret < 0) {
> > > > > > -        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> > > > > > -                   strerror(-ret));
> > > > > > -        goto virtio_err;
> > > > > > -    }
> > > > > > +    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> > > > > > +    s->watch = 0;
> > > > > > +    s->should_start = false;
> > > > > > +    s->connected = false;
> > > > > > +
> > > > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
> > > > > > +                             NULL, (void *)dev, NULL, true);
> > > > > > +
> > > > > > +reconnect:
> > > > > > +    do {
> > > > > > +        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > > > > > +            error_report_err(err);
> > > > > > +            err = NULL;
> > > > > > +            sleep(1);
> > > > > > +        }
> > > > > > +    } while (!s->connected);
> > > > > >
> > > > > >      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> > > > > > -                              sizeof(struct virtio_blk_config));
> > > > > > +                               sizeof(struct virtio_blk_config));
> > > > > >      if (ret < 0) {
> > > > > > -        error_setg(errp, "vhost-user-blk: get block config failed");
> > > > > > -        goto vhost_err;
> > > > > > +        error_report("vhost-user-blk: get block config failed");
> > > > > > +        goto reconnect;
> > > > > >      }
> > > > > >
> > > > > >      if (s->blkcfg.num_queues != s->num_queues) {
> > > > > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > > > > >      }
> > > > > >
> > > > > >      return;
> > > > >
> > > > > With this we end up with return at end of function
> > > > > which makes no sense.
> > > > >
> > > >
> > > > Oh, yes. I will remove it.
> > > >
> > > > > > -
> > > > > > -vhost_err:
> > > > > > -    vhost_dev_cleanup(&s->dev);
> > > > > > -virtio_err:
> > > > > > -    g_free(vqs);
> > > > > > -    g_free(s->inflight);
> > > > > > -    virtio_cleanup(vdev);
> > > > > > -
> > > > > > -    vhost_user_cleanup(user);
> > > > > > -    g_free(user);
> > > > > > -    s->vhost_user = NULL;
> > > > > >  }
> > > > > >
> > > > > >  static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
> > > > > >  {
> > > > > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > >      VHostUserBlk *s = VHOST_USER_BLK(dev);
> > > > > > -    struct vhost_virtqueue *vqs = s->dev.vqs;
> > > > > >
> > > > > >      vhost_user_blk_set_status(vdev, 0);
> > > > > > +    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
> > > > > > +                             NULL, NULL, NULL, false);
> > > > > >      vhost_dev_cleanup(&s->dev);
> > > > > >      vhost_dev_free_inflight(s->inflight);
> > > > > > -    g_free(vqs);
> > > > > > +    g_free(s->vqs);
> > > > > >      g_free(s->inflight);
> > > > > >      virtio_cleanup(vdev);
> > > > > >
> > > > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
> > > > > > index 445516604a..4849aa5eb5 100644
> > > > > > --- a/include/hw/virtio/vhost-user-blk.h
> > > > > > +++ b/include/hw/virtio/vhost-user-blk.h
> > > > > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk {
> > > > > >      struct vhost_dev dev;
> > > > > >      struct vhost_inflight *inflight;
> > > > > >      VhostUserState *vhost_user;
> > > > > > +    struct vhost_virtqueue *vqs;
> > > > > > +    guint watch;
> > > > > > +    bool should_start;
> > > > >
> > > > > Please add comments about fields.
> > > > > Also should_start seems to be set on guest activity.
> > > > > Does it need to be migrated?
> > > > > It would be better not to track guest state like this.
> > > > > Why do we need it?
> > > > > Pls add comments with an explanation.
> > > > >
> > > >
> > > > We can't relay on vhost_dev.started to track guest's state in
> > > > vhost_user_blk_set_status() when we enable reconnecting. For example,
> > > > qemu will lose guest's state if guest stop device during backend is
> > > > disconnected. So vhost_dev.started is used to track backend's state
> > > > and should_start is used to track guest's state in this patch. And
> > > > this variable is based on VirtIODevice.status, so I think it is not
> > > > necessary to migrate it. What do you think about it?
> > >
> > > If it's based on VirtIODevice.status then how about not keeping
> > > it around at all and just calculating from VirtIODevice.status
> > > when necessary?
> > >
> >
> > Sorry, I made a mistake before. Normally, should_start is based on
> > VirtIODevice.status. But it's not the case for virtio 1.0 transitional
> > devices. Then I realize there may be a bug. If migration completes
> > between kicking virtqueue and setting VIRTIO_CONFIG_S_DRIVER_OK, guest
> > may be hung. So I think should_start should be migrated in this case.
> >
> > Thanks,
> > Yongji
>
> This is exactly the case that the fake kick is intended to fix.
>

Sorry, I does not understand how this case can be fixed by the fake
kick. In this case, seem like we have no chance to trigger the fake
kick. After migration, vhost_user_blk_set_status() will find
VirtIODevice.status is !VIRTIO_CONFIG_S_DRIVER_OK then skip starting
vhost-user backend (cause guest hung).

> We can migrate this state but it's really a virtio thing,
> not vhost specific. migrate it and skip fake kick, sure.
>

Could you explain a bit more how to skip the fake kick? Thank you.

Thanks,
Yongji
Daniel P. Berrangé March 15, 2019, 9:46 a.m. UTC | #17
On Thu, Mar 14, 2019 at 03:31:47PM +0300, Yury Kotov wrote:
> Hi,
> 
> 14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>:
> > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
> >>  On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
> >>  > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> >>  > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> >>  > > > From: Xie Yongji <xieyongji@baidu.com>
> >>  > > >
> >>  > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> >>  > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> >>  > > > safely because it can track inflight I/O in shared memory.
> >>  > > > This patch allows qemu to reconnect the backend after
> >>  > > > connection closed.
> >>  > > >
> >>  > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> >>  > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> >>  > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> >>  > > > ---
> >>  > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------
> >>  > > > include/hw/virtio/vhost-user-blk.h | 4 +
> >>  > > > 2 files changed, 167 insertions(+), 42 deletions(-)
> >>  >
> >>  >
> >>  > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >>  > > > {
> >>  > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>  > > > VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >>  > > > VhostUserState *user;
> >>  > > > - struct vhost_virtqueue *vqs = NULL;
> >>  > > > int i, ret;
> >>  > > > + Error *err = NULL;
> >>  > > >
> >>  > > > if (!s->chardev.chr) {
> >>  > > > error_setg(errp, "vhost-user-blk: chardev is mandatory");
> >>  > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >>  > > > }
> >>  > > >
> >>  > > > s->inflight = g_new0(struct vhost_inflight, 1);
> >>  > > > -
> >>  > > > - s->dev.nvqs = s->num_queues;
> >>  > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> >>  > > > - s->dev.vq_index = 0;
> >>  > > > - s->dev.backend_features = 0;
> >>  > > > - vqs = s->dev.vqs;
> >>  > > > -
> >>  > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> >>  > > > -
> >>  > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> >>  > > > - if (ret < 0) {
> >>  > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> >>  > > > - strerror(-ret));
> >>  > > > - goto virtio_err;
> >>  > > > - }
> >>  > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> >>  > > > + s->watch = 0;
> >>  > > > + s->should_start = false;
> >>  > > > + s->connected = false;
> >>  > > > +
> >>  > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> >>  > > > + NULL, (void *)dev, NULL, true);
> >>  > > > +
> >>  > > > +reconnect:
> >>  > > > + do {
> >>  > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> >>  > > > + error_report_err(err);
> >>  > > > + err = NULL;
> >>  > > > + sleep(1);
> >>  > >
> >>  > > Seems arbitrary. Is this basically waiting until backend will reconnect?
> >>  > > Why not block until event on the fd triggers?
> >>  > >
> >>  > > Also, it looks like this will just block forever with no monitor input
> >>  > > and no way for user to figure out what is going on short of
> >>  > > crashing QEMU.
> >>  >
> >>  > FWIW, the current vhost-user-net device does exactly the same thing
> >>  > with calling qemu_chr_fe_wait_connected during its realize() function.
> >>
> >>  Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)
> >
> > The sleep(1) in this patch simply needs to be removed. I think that
> > probably dates from when it was written against the earlier broken
> > version of qemu_chr_fe_wait_connected(). That would not correctly
> > deal with the "reconnect" flag, and so needing this loop with a sleep
> > in it.
> >
> > In fact the while loop can be removed as well in this code. It just
> > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
> > to have a connected peer once that returns 0.
> >
> > qemu_chr_fe_wait_connected() only returns -1 if the operating in
> > client mode, and it failed to connect and reconnect is *not*
> > requested. In such case the caller should honour the failure and
> > quit, not loop to retry.
> >
> > The reason vhost-user-net does a loop is because once it has a
> > connection it tries todo a protocol handshake, and if that
> > handshake fails it closes the chardev and tries to connect
> > again. That's not the case in this blk code os the loop is
> > not needed.
> >
> 
> But vhost-user-blk also has a handshake in device realize. What happens if the
> connection is broken during realization? IIUC we have to retry a handshake in
> such case just like vhost-user-net.

I'm just commenting on the current code which does not do that
handshake in the loop afaict. If it needs to do that then the
patch should be updated...


Regards,
Daniel
Yury Kotov March 15, 2019, 10:41 a.m. UTC | #18
15.03.2019, 12:46, "Daniel P. Berrangé" <berrange@redhat.com>:
> On Thu, Mar 14, 2019 at 03:31:47PM +0300, Yury Kotov wrote:
>>  Hi,
>>
>>  14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>:
>>  > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
>>  >>  On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
>>  >>  > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
>>  >>  > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
>>  >>  > > > From: Xie Yongji <xieyongji@baidu.com>
>>  >>  > > >
>>  >>  > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
>>  >>  > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
>>  >>  > > > safely because it can track inflight I/O in shared memory.
>>  >>  > > > This patch allows qemu to reconnect the backend after
>>  >>  > > > connection closed.
>>  >>  > > >
>>  >>  > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
>>  >>  > > > Signed-off-by: Ni Xun <nixun@baidu.com>
>>  >>  > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
>>  >>  > > > ---
>>  >>  > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------
>>  >>  > > > include/hw/virtio/vhost-user-blk.h | 4 +
>>  >>  > > > 2 files changed, 167 insertions(+), 42 deletions(-)
>>  >>  >
>>  >>  >
>>  >>  > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>>  >>  > > > {
>>  >>  > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>>  >>  > > > VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>  >>  > > > VhostUserState *user;
>>  >>  > > > - struct vhost_virtqueue *vqs = NULL;
>>  >>  > > > int i, ret;
>>  >>  > > > + Error *err = NULL;
>>  >>  > > >
>>  >>  > > > if (!s->chardev.chr) {
>>  >>  > > > error_setg(errp, "vhost-user-blk: chardev is mandatory");
>>  >>  > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>>  >>  > > > }
>>  >>  > > >
>>  >>  > > > s->inflight = g_new0(struct vhost_inflight, 1);
>>  >>  > > > -
>>  >>  > > > - s->dev.nvqs = s->num_queues;
>>  >>  > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
>>  >>  > > > - s->dev.vq_index = 0;
>>  >>  > > > - s->dev.backend_features = 0;
>>  >>  > > > - vqs = s->dev.vqs;
>>  >>  > > > -
>>  >>  > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops);
>>  >>  > > > -
>>  >>  > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
>>  >>  > > > - if (ret < 0) {
>>  >>  > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
>>  >>  > > > - strerror(-ret));
>>  >>  > > > - goto virtio_err;
>>  >>  > > > - }
>>  >>  > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
>>  >>  > > > + s->watch = 0;
>>  >>  > > > + s->should_start = false;
>>  >>  > > > + s->connected = false;
>>  >>  > > > +
>>  >>  > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
>>  >>  > > > + NULL, (void *)dev, NULL, true);
>>  >>  > > > +
>>  >>  > > > +reconnect:
>>  >>  > > > + do {
>>  >>  > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
>>  >>  > > > + error_report_err(err);
>>  >>  > > > + err = NULL;
>>  >>  > > > + sleep(1);
>>  >>  > >
>>  >>  > > Seems arbitrary. Is this basically waiting until backend will reconnect?
>>  >>  > > Why not block until event on the fd triggers?
>>  >>  > >
>>  >>  > > Also, it looks like this will just block forever with no monitor input
>>  >>  > > and no way for user to figure out what is going on short of
>>  >>  > > crashing QEMU.
>>  >>  >
>>  >>  > FWIW, the current vhost-user-net device does exactly the same thing
>>  >>  > with calling qemu_chr_fe_wait_connected during its realize() function.
>>  >>
>>  >>  Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)
>>  >
>>  > The sleep(1) in this patch simply needs to be removed. I think that
>>  > probably dates from when it was written against the earlier broken
>>  > version of qemu_chr_fe_wait_connected(). That would not correctly
>>  > deal with the "reconnect" flag, and so needing this loop with a sleep
>>  > in it.
>>  >
>>  > In fact the while loop can be removed as well in this code. It just
>>  > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
>>  > to have a connected peer once that returns 0.
>>  >
>>  > qemu_chr_fe_wait_connected() only returns -1 if the operating in
>>  > client mode, and it failed to connect and reconnect is *not*
>>  > requested. In such case the caller should honour the failure and
>>  > quit, not loop to retry.
>>  >
>>  > The reason vhost-user-net does a loop is because once it has a
>>  > connection it tries todo a protocol handshake, and if that
>>  > handshake fails it closes the chardev and tries to connect
>>  > again. That's not the case in this blk code os the loop is
>>  > not needed.
>>  >
>>
>>  But vhost-user-blk also has a handshake in device realize. What happens if the
>>  connection is broken during realization? IIUC we have to retry a handshake in
>>  such case just like vhost-user-net.
>
> I'm just commenting on the current code which does not do that
> handshake in the loop afaict. If it needs to do that then the
> patch should be updated...
>

Oh, yes... This loop doesn't do a handshake. Handshake is after the loop.
But now it gotos to reconnect. So may be it makes sense to rewrite a handshake
since we don't need two nested loops to get reconnection without gotos.

Regards,
Yury
Yongji Xie March 15, 2019, 12:37 p.m. UTC | #19
On Fri, 15 Mar 2019 at 18:41, Yury Kotov <yury-kotov@yandex-team.ru> wrote:
>
> 15.03.2019, 12:46, "Daniel P. Berrangé" <berrange@redhat.com>:
> > On Thu, Mar 14, 2019 at 03:31:47PM +0300, Yury Kotov wrote:
> >>  Hi,
> >>
> >>  14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>:
> >>  > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote:
> >>  >>  On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote:
> >>  >>  > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote:
> >>  >>  > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote:
> >>  >>  > > > From: Xie Yongji <xieyongji@baidu.com>
> >>  >>  > > >
> >>  >>  > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD
> >>  >>  > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart
> >>  >>  > > > safely because it can track inflight I/O in shared memory.
> >>  >>  > > > This patch allows qemu to reconnect the backend after
> >>  >>  > > > connection closed.
> >>  >>  > > >
> >>  >>  > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> >>  >>  > > > Signed-off-by: Ni Xun <nixun@baidu.com>
> >>  >>  > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> >>  >>  > > > ---
> >>  >>  > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------
> >>  >>  > > > include/hw/virtio/vhost-user-blk.h | 4 +
> >>  >>  > > > 2 files changed, 167 insertions(+), 42 deletions(-)
> >>  >>  >
> >>  >>  >
> >>  >>  > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >>  >>  > > > {
> >>  >>  > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>  >>  > > > VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >>  >>  > > > VhostUserState *user;
> >>  >>  > > > - struct vhost_virtqueue *vqs = NULL;
> >>  >>  > > > int i, ret;
> >>  >>  > > > + Error *err = NULL;
> >>  >>  > > >
> >>  >>  > > > if (!s->chardev.chr) {
> >>  >>  > > > error_setg(errp, "vhost-user-blk: chardev is mandatory");
> >>  >>  > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >>  >>  > > > }
> >>  >>  > > >
> >>  >>  > > > s->inflight = g_new0(struct vhost_inflight, 1);
> >>  >>  > > > -
> >>  >>  > > > - s->dev.nvqs = s->num_queues;
> >>  >>  > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
> >>  >>  > > > - s->dev.vq_index = 0;
> >>  >>  > > > - s->dev.backend_features = 0;
> >>  >>  > > > - vqs = s->dev.vqs;
> >>  >>  > > > -
> >>  >>  > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> >>  >>  > > > -
> >>  >>  > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
> >>  >>  > > > - if (ret < 0) {
> >>  >>  > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
> >>  >>  > > > - strerror(-ret));
> >>  >>  > > > - goto virtio_err;
> >>  >>  > > > - }
> >>  >>  > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
> >>  >>  > > > + s->watch = 0;
> >>  >>  > > > + s->should_start = false;
> >>  >>  > > > + s->connected = false;
> >>  >>  > > > +
> >>  >>  > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> >>  >>  > > > + NULL, (void *)dev, NULL, true);
> >>  >>  > > > +
> >>  >>  > > > +reconnect:
> >>  >>  > > > + do {
> >>  >>  > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> >>  >>  > > > + error_report_err(err);
> >>  >>  > > > + err = NULL;
> >>  >>  > > > + sleep(1);
> >>  >>  > >
> >>  >>  > > Seems arbitrary. Is this basically waiting until backend will reconnect?
> >>  >>  > > Why not block until event on the fd triggers?
> >>  >>  > >
> >>  >>  > > Also, it looks like this will just block forever with no monitor input
> >>  >>  > > and no way for user to figure out what is going on short of
> >>  >>  > > crashing QEMU.
> >>  >>  >
> >>  >>  > FWIW, the current vhost-user-net device does exactly the same thing
> >>  >>  > with calling qemu_chr_fe_wait_connected during its realize() function.
> >>  >>
> >>  >>  Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :)
> >>  >
> >>  > The sleep(1) in this patch simply needs to be removed. I think that
> >>  > probably dates from when it was written against the earlier broken
> >>  > version of qemu_chr_fe_wait_connected(). That would not correctly
> >>  > deal with the "reconnect" flag, and so needing this loop with a sleep
> >>  > in it.
> >>  >
> >>  > In fact the while loop can be removed as well in this code. It just
> >>  > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed
> >>  > to have a connected peer once that returns 0.
> >>  >
> >>  > qemu_chr_fe_wait_connected() only returns -1 if the operating in
> >>  > client mode, and it failed to connect and reconnect is *not*
> >>  > requested. In such case the caller should honour the failure and
> >>  > quit, not loop to retry.
> >>  >
> >>  > The reason vhost-user-net does a loop is because once it has a
> >>  > connection it tries todo a protocol handshake, and if that
> >>  > handshake fails it closes the chardev and tries to connect
> >>  > again. That's not the case in this blk code os the loop is
> >>  > not needed.
> >>  >
> >>
> >>  But vhost-user-blk also has a handshake in device realize. What happens if the
> >>  connection is broken during realization? IIUC we have to retry a handshake in
> >>  such case just like vhost-user-net.
> >
> > I'm just commenting on the current code which does not do that
> > handshake in the loop afaict. If it needs to do that then the
> > patch should be updated...
> >
>
> Oh, yes... This loop doesn't do a handshake. Handshake is after the loop.
> But now it gotos to reconnect. So may be it makes sense to rewrite a handshake
> since we don't need two nested loops to get reconnection without gotos.
>

Actually we do a handshake in loop like this:

qemu_chr_fe_wait_connected()
  tcp_chr_wait_connected()
    tcp_chr_connect_client_sync()
      tcp_chr_new_client()
        qemu_chr_be_event(chr, CHR_EVENT_OPENED);
          vhost_user_blk_event()
            vhost_user_blk_connect()
              vhost_dev_init()

Then I use s->connected to check the result of vhost_dev_init().

Thanks,
Yongji
diff mbox series

Patch

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 9682df1a7b..539ea2e571 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -103,7 +103,7 @@  const VhostDevConfigOps blk_ops = {
     .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
 };
 
-static void vhost_user_blk_start(VirtIODevice *vdev)
+static int vhost_user_blk_start(VirtIODevice *vdev)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -112,13 +112,13 @@  static void vhost_user_blk_start(VirtIODevice *vdev)
 
     if (!k->set_guest_notifiers) {
         error_report("binding does not support guest notifiers");
-        return;
+        return -ENOSYS;
     }
 
     ret = vhost_dev_enable_notifiers(&s->dev, vdev);
     if (ret < 0) {
         error_report("Error enabling host notifiers: %d", -ret);
-        return;
+        return ret;
     }
 
     ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
@@ -157,12 +157,13 @@  static void vhost_user_blk_start(VirtIODevice *vdev)
         vhost_virtqueue_mask(&s->dev, vdev, i, false);
     }
 
-    return;
+    return ret;
 
 err_guest_notifiers:
     k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
 err_host_notifiers:
     vhost_dev_disable_notifiers(&s->dev, vdev);
+    return ret;
 }
 
 static void vhost_user_blk_stop(VirtIODevice *vdev)
@@ -181,7 +182,6 @@  static void vhost_user_blk_stop(VirtIODevice *vdev)
     ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
     if (ret < 0) {
         error_report("vhost guest notifier cleanup failed: %d", ret);
-        return;
     }
 
     vhost_dev_disable_notifiers(&s->dev, vdev);
@@ -191,21 +191,43 @@  static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
     bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+    int ret;
 
     if (!vdev->vm_running) {
         should_start = false;
     }
 
-    if (s->dev.started == should_start) {
+    if (s->should_start == should_start) {
+        return;
+    }
+
+    if (!s->connected || s->dev.started == should_start) {
+        s->should_start = should_start;
         return;
     }
 
     if (should_start) {
-        vhost_user_blk_start(vdev);
+        s->should_start = true;
+        /*
+         * make sure vhost_user_blk_handle_output() ignores fake
+         * guest kick by vhost_dev_enable_notifiers()
+         */
+        barrier();
+        ret = vhost_user_blk_start(vdev);
+        if (ret < 0) {
+            error_report("vhost-user-blk: vhost start failed: %s",
+                         strerror(-ret));
+            qemu_chr_fe_disconnect(&s->chardev);
+        }
     } else {
         vhost_user_blk_stop(vdev);
+        /*
+         * make sure vhost_user_blk_handle_output() ignore fake
+         * guest kick by vhost_dev_disable_notifiers()
+         */
+        barrier();
+        s->should_start = false;
     }
-
 }
 
 static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
@@ -237,13 +259,22 @@  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
 static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
-    int i;
+    int i, ret;
 
     if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) {
         return;
     }
 
+    if (s->should_start) {
+        return;
+    }
+    s->should_start = true;
+
+    if (!s->connected) {
+        return;
+    }
+
     if (s->dev.started) {
         return;
     }
@@ -251,7 +282,13 @@  static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
     /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
      * vhost here instead of waiting for .set_status().
      */
-    vhost_user_blk_start(vdev);
+    ret = vhost_user_blk_start(vdev);
+    if (ret < 0) {
+        error_report("vhost-user-blk: vhost start failed: %s",
+                     strerror(-ret));
+        qemu_chr_fe_disconnect(&s->chardev);
+        return;
+    }
 
     /* Kick right away to begin processing requests already in vring */
     for (i = 0; i < s->dev.nvqs; i++) {
@@ -271,13 +308,106 @@  static void vhost_user_blk_reset(VirtIODevice *vdev)
     vhost_dev_free_inflight(s->inflight);
 }
 
+static int vhost_user_blk_connect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserBlk *s = VHOST_USER_BLK(vdev);
+    int ret = 0;
+
+    if (s->connected) {
+        return 0;
+    }
+    s->connected = true;
+
+    s->dev.nvqs = s->num_queues;
+    s->dev.vqs = s->vqs;
+    s->dev.vq_index = 0;
+    s->dev.backend_features = 0;
+
+    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
+
+    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
+    if (ret < 0) {
+        error_report("vhost-user-blk: vhost initialization failed: %s",
+                     strerror(-ret));
+        return ret;
+    }
+
+    /* restore vhost state */
+    if (s->should_start) {
+        ret = vhost_user_blk_start(vdev);
+        if (ret < 0) {
+            error_report("vhost-user-blk: vhost start failed: %s",
+                         strerror(-ret));
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static void vhost_user_blk_disconnect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+    if (!s->connected) {
+        return;
+    }
+    s->connected = false;
+
+    if (s->dev.started) {
+        vhost_user_blk_stop(vdev);
+    }
+
+    vhost_dev_cleanup(&s->dev);
+}
+
+static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond,
+                                     void *opaque)
+{
+    DeviceState *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+    qemu_chr_fe_disconnect(&s->chardev);
+
+    return true;
+}
+
+static void vhost_user_blk_event(void *opaque, int event)
+{
+    DeviceState *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        if (vhost_user_blk_connect(dev) < 0) {
+            qemu_chr_fe_disconnect(&s->chardev);
+            return;
+        }
+        s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP,
+                                         vhost_user_blk_watch, dev);
+        break;
+    case CHR_EVENT_CLOSED:
+        vhost_user_blk_disconnect(dev);
+        if (s->watch) {
+            g_source_remove(s->watch);
+            s->watch = 0;
+        }
+        break;
+    }
+}
+
+
 static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
     VhostUserState *user;
-    struct vhost_virtqueue *vqs = NULL;
     int i, ret;
+    Error *err = NULL;
 
     if (!s->chardev.chr) {
         error_setg(errp, "vhost-user-blk: chardev is mandatory");
@@ -312,27 +442,28 @@  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     s->inflight = g_new0(struct vhost_inflight, 1);
-
-    s->dev.nvqs = s->num_queues;
-    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
-    s->dev.vq_index = 0;
-    s->dev.backend_features = 0;
-    vqs = s->dev.vqs;
-
-    vhost_dev_set_config_notifier(&s->dev, &blk_ops);
-
-    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
-    if (ret < 0) {
-        error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
-                   strerror(-ret));
-        goto virtio_err;
-    }
+    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
+    s->watch = 0;
+    s->should_start = false;
+    s->connected = false;
+
+    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, vhost_user_blk_event,
+                             NULL, (void *)dev, NULL, true);
+
+reconnect:
+    do {
+        if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
+            error_report_err(err);
+            err = NULL;
+            sleep(1);
+        }
+    } while (!s->connected);
 
     ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
-                              sizeof(struct virtio_blk_config));
+                               sizeof(struct virtio_blk_config));
     if (ret < 0) {
-        error_setg(errp, "vhost-user-blk: get block config failed");
-        goto vhost_err;
+        error_report("vhost-user-blk: get block config failed");
+        goto reconnect;
     }
 
     if (s->blkcfg.num_queues != s->num_queues) {
@@ -340,29 +471,19 @@  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     return;
-
-vhost_err:
-    vhost_dev_cleanup(&s->dev);
-virtio_err:
-    g_free(vqs);
-    g_free(s->inflight);
-    virtio_cleanup(vdev);
-
-    vhost_user_cleanup(user);
-    g_free(user);
-    s->vhost_user = NULL;
 }
 
 static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(dev);
-    struct vhost_virtqueue *vqs = s->dev.vqs;
 
     vhost_user_blk_set_status(vdev, 0);
+    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
+                             NULL, NULL, NULL, false);
     vhost_dev_cleanup(&s->dev);
     vhost_dev_free_inflight(s->inflight);
-    g_free(vqs);
+    g_free(s->vqs);
     g_free(s->inflight);
     virtio_cleanup(vdev);
 
diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
index 445516604a..4849aa5eb5 100644
--- a/include/hw/virtio/vhost-user-blk.h
+++ b/include/hw/virtio/vhost-user-blk.h
@@ -38,6 +38,10 @@  typedef struct VHostUserBlk {
     struct vhost_dev dev;
     struct vhost_inflight *inflight;
     VhostUserState *vhost_user;
+    struct vhost_virtqueue *vqs;
+    guint watch;
+    bool should_start;
+    bool connected;
 } VHostUserBlk;
 
 #endif