Message ID | 20190228085355.9614-7-xieyongji@baidu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vhost-user-blk: Add support for backend reconnecting | expand |
On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > From: Xie Yongji <xieyongji@baidu.com> > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > safely because it can track inflight I/O in shared memory. > This patch allows qemu to reconnect the backend after > connection closed. > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > Signed-off-by: Ni Xun <nixun@baidu.com> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > --- > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > include/hw/virtio/vhost-user-blk.h | 4 + > 2 files changed, 167 insertions(+), 42 deletions(-) > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > index 9682df1a7b..539ea2e571 100644 > --- a/hw/block/vhost-user-blk.c > +++ b/hw/block/vhost-user-blk.c > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > }; > > -static void vhost_user_blk_start(VirtIODevice *vdev) > +static int vhost_user_blk_start(VirtIODevice *vdev) > { > VHostUserBlk *s = VHOST_USER_BLK(vdev); > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > if (!k->set_guest_notifiers) { > error_report("binding does not support guest notifiers"); > - return; > + return -ENOSYS; > } > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > if (ret < 0) { > error_report("Error enabling host notifiers: %d", -ret); > - return; > + return ret; > } > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > vhost_virtqueue_mask(&s->dev, vdev, i, false); > } > > - return; > + return ret; > > err_guest_notifiers: > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > err_host_notifiers: > vhost_dev_disable_notifiers(&s->dev, vdev); > + return ret; > } > > static void vhost_user_blk_stop(VirtIODevice *vdev) > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > if (ret < 0) { > error_report("vhost guest notifier cleanup failed: %d", ret); > - return; > } > > vhost_dev_disable_notifiers(&s->dev, vdev); > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > { > VHostUserBlk *s = VHOST_USER_BLK(vdev); > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > + int ret; > > if (!vdev->vm_running) { > should_start = false; > } > > - if (s->dev.started == should_start) { > + if (s->should_start == should_start) { > + return; > + } > + > + if (!s->connected || s->dev.started == should_start) { > + s->should_start = should_start; > return; > } > > if (should_start) { > - vhost_user_blk_start(vdev); > + s->should_start = true; > + /* > + * make sure vhost_user_blk_handle_output() ignores fake > + * guest kick by vhost_dev_enable_notifiers() > + */ > + barrier(); > + ret = vhost_user_blk_start(vdev); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost start failed: %s", > + strerror(-ret)); > + qemu_chr_fe_disconnect(&s->chardev); > + } > } else { > vhost_user_blk_stop(vdev); > + /* > + * make sure vhost_user_blk_handle_output() ignore fake > + * guest kick by vhost_dev_disable_notifiers() > + */ > + barrier(); > + s->should_start = false; > } > - > } > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > { > VHostUserBlk *s = VHOST_USER_BLK(vdev); > - int i; > + int i, ret; > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > return; > } > > + if (s->should_start) { > + return; > + } > + s->should_start = true; > + > + if (!s->connected) { > + return; > + } > + > if (s->dev.started) { > return; > } > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > * vhost here instead of waiting for .set_status(). > */ > - vhost_user_blk_start(vdev); > + ret = vhost_user_blk_start(vdev); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost start failed: %s", > + strerror(-ret)); > + qemu_chr_fe_disconnect(&s->chardev); > + return; > + } > > /* Kick right away to begin processing requests already in vring */ > for (i = 0; i < s->dev.nvqs; i++) { > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > vhost_dev_free_inflight(s->inflight); > } > > +static int vhost_user_blk_connect(DeviceState *dev) > +{ > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + int ret = 0; > + > + if (s->connected) { > + return 0; > + } > + s->connected = true; > + > + s->dev.nvqs = s->num_queues; > + s->dev.vqs = s->vqs; > + s->dev.vq_index = 0; > + s->dev.backend_features = 0; > + > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > + > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost initialization failed: %s", > + strerror(-ret)); > + return ret; > + } > + > + /* restore vhost state */ > + if (s->should_start) { > + ret = vhost_user_blk_start(vdev); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost start failed: %s", > + strerror(-ret)); > + return ret; > + } > + } > + > + return 0; > +} > + > +static void vhost_user_blk_disconnect(DeviceState *dev) > +{ > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + > + if (!s->connected) { > + return; > + } > + s->connected = false; > + > + if (s->dev.started) { > + vhost_user_blk_stop(vdev); > + } > + > + vhost_dev_cleanup(&s->dev); > +} > + > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > + void *opaque) > +{ > + DeviceState *dev = opaque; > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + > + qemu_chr_fe_disconnect(&s->chardev); > + > + return true; > +} > + > +static void vhost_user_blk_event(void *opaque, int event) > +{ > + DeviceState *dev = opaque; > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + > + switch (event) { > + case CHR_EVENT_OPENED: > + if (vhost_user_blk_connect(dev) < 0) { > + qemu_chr_fe_disconnect(&s->chardev); > + return; > + } > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > + vhost_user_blk_watch, dev); > + break; > + case CHR_EVENT_CLOSED: > + vhost_user_blk_disconnect(dev); > + if (s->watch) { > + g_source_remove(s->watch); > + s->watch = 0; > + } > + break; > + } > +} > + > + > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > VHostUserBlk *s = VHOST_USER_BLK(vdev); > VhostUserState *user; > - struct vhost_virtqueue *vqs = NULL; > int i, ret; > + Error *err = NULL; > > if (!s->chardev.chr) { > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > } > > s->inflight = g_new0(struct vhost_inflight, 1); > - > - s->dev.nvqs = s->num_queues; > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > - s->dev.vq_index = 0; > - s->dev.backend_features = 0; > - vqs = s->dev.vqs; > - > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > - > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > - if (ret < 0) { > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > - strerror(-ret)); > - goto virtio_err; > - } > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > + s->watch = 0; > + s->should_start = false; > + s->connected = false; > + > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > + NULL, (void *)dev, NULL, true); > + > +reconnect: > + do { > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > + error_report_err(err); > + err = NULL; > + sleep(1); Seems arbitrary. Is this basically waiting until backend will reconnect? Why not block until event on the fd triggers? Also, it looks like this will just block forever with no monitor input and no way for user to figure out what is going on short of crashing QEMU. > + } > + } while (!s->connected); > > ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, > - sizeof(struct virtio_blk_config)); > + sizeof(struct virtio_blk_config)); > if (ret < 0) { > - error_setg(errp, "vhost-user-blk: get block config failed"); > - goto vhost_err; > + error_report("vhost-user-blk: get block config failed"); > + goto reconnect; > } > > if (s->blkcfg.num_queues != s->num_queues) { > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > } > > return; > - > -vhost_err: > - vhost_dev_cleanup(&s->dev); > -virtio_err: > - g_free(vqs); > - g_free(s->inflight); > - virtio_cleanup(vdev); > - > - vhost_user_cleanup(user); > - g_free(user); > - s->vhost_user = NULL; > } > > static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > VHostUserBlk *s = VHOST_USER_BLK(dev); > - struct vhost_virtqueue *vqs = s->dev.vqs; > > vhost_user_blk_set_status(vdev, 0); > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, > + NULL, NULL, NULL, false); > vhost_dev_cleanup(&s->dev); > vhost_dev_free_inflight(s->inflight); > - g_free(vqs); > + g_free(s->vqs); > g_free(s->inflight); > virtio_cleanup(vdev); > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h > index 445516604a..4849aa5eb5 100644 > --- a/include/hw/virtio/vhost-user-blk.h > +++ b/include/hw/virtio/vhost-user-blk.h > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { > struct vhost_dev dev; > struct vhost_inflight *inflight; > VhostUserState *vhost_user; > + struct vhost_virtqueue *vqs; > + guint watch; > + bool should_start; > + bool connected; > } VHostUserBlk; > > #endif > -- > 2.17.1
On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > From: Xie Yongji <xieyongji@baidu.com> > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > safely because it can track inflight I/O in shared memory. > This patch allows qemu to reconnect the backend after > connection closed. > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > Signed-off-by: Ni Xun <nixun@baidu.com> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > --- > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > include/hw/virtio/vhost-user-blk.h | 4 + > 2 files changed, 167 insertions(+), 42 deletions(-) > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > index 9682df1a7b..539ea2e571 100644 > --- a/hw/block/vhost-user-blk.c > +++ b/hw/block/vhost-user-blk.c > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > }; > > -static void vhost_user_blk_start(VirtIODevice *vdev) > +static int vhost_user_blk_start(VirtIODevice *vdev) > { > VHostUserBlk *s = VHOST_USER_BLK(vdev); > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > if (!k->set_guest_notifiers) { > error_report("binding does not support guest notifiers"); > - return; > + return -ENOSYS; > } > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > if (ret < 0) { > error_report("Error enabling host notifiers: %d", -ret); > - return; > + return ret; > } > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > vhost_virtqueue_mask(&s->dev, vdev, i, false); > } > > - return; > + return ret; > > err_guest_notifiers: > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > err_host_notifiers: > vhost_dev_disable_notifiers(&s->dev, vdev); > + return ret; > } > > static void vhost_user_blk_stop(VirtIODevice *vdev) > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > if (ret < 0) { > error_report("vhost guest notifier cleanup failed: %d", ret); > - return; > } > > vhost_dev_disable_notifiers(&s->dev, vdev); > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > { > VHostUserBlk *s = VHOST_USER_BLK(vdev); > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > + int ret; > > if (!vdev->vm_running) { > should_start = false; > } > > - if (s->dev.started == should_start) { > + if (s->should_start == should_start) { > + return; > + } > + > + if (!s->connected || s->dev.started == should_start) { > + s->should_start = should_start; > return; > } > > if (should_start) { > - vhost_user_blk_start(vdev); > + s->should_start = true; > + /* > + * make sure vhost_user_blk_handle_output() ignores fake > + * guest kick by vhost_dev_enable_notifiers() > + */ > + barrier(); > + ret = vhost_user_blk_start(vdev); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost start failed: %s", > + strerror(-ret)); > + qemu_chr_fe_disconnect(&s->chardev); > + } > } else { > vhost_user_blk_stop(vdev); > + /* > + * make sure vhost_user_blk_handle_output() ignore fake > + * guest kick by vhost_dev_disable_notifiers() > + */ > + barrier(); > + s->should_start = false; > } I don't understand the comment about ignoring fake guest kicks. Please add an explanation about what does barrier do here. And maybe a detailed explanation about what "fake kick" is in the commit log. > - > } > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > { > VHostUserBlk *s = VHOST_USER_BLK(vdev); > - int i; > + int i, ret; > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > return; > } > > + if (s->should_start) { > + return; > + } > + s->should_start = true; > + > + if (!s->connected) { > + return; > + } > + > if (s->dev.started) { > return; > } > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > * vhost here instead of waiting for .set_status(). > */ > - vhost_user_blk_start(vdev); > + ret = vhost_user_blk_start(vdev); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost start failed: %s", > + strerror(-ret)); > + qemu_chr_fe_disconnect(&s->chardev); > + return; > + } > > /* Kick right away to begin processing requests already in vring */ > for (i = 0; i < s->dev.nvqs; i++) { > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > vhost_dev_free_inflight(s->inflight); > } > > +static int vhost_user_blk_connect(DeviceState *dev) > +{ > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + int ret = 0; > + > + if (s->connected) { > + return 0; > + } > + s->connected = true; > + > + s->dev.nvqs = s->num_queues; > + s->dev.vqs = s->vqs; > + s->dev.vq_index = 0; > + s->dev.backend_features = 0; > + > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > + > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost initialization failed: %s", > + strerror(-ret)); > + return ret; > + } > + > + /* restore vhost state */ > + if (s->should_start) { > + ret = vhost_user_blk_start(vdev); > + if (ret < 0) { > + error_report("vhost-user-blk: vhost start failed: %s", > + strerror(-ret)); > + return ret; > + } > + } > + > + return 0; > +} > + > +static void vhost_user_blk_disconnect(DeviceState *dev) > +{ > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + > + if (!s->connected) { > + return; > + } > + s->connected = false; > + > + if (s->dev.started) { > + vhost_user_blk_stop(vdev); > + } > + > + vhost_dev_cleanup(&s->dev); > +} > + > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > + void *opaque) > +{ > + DeviceState *dev = opaque; > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + > + qemu_chr_fe_disconnect(&s->chardev); > + > + return true; > +} > + > +static void vhost_user_blk_event(void *opaque, int event) > +{ > + DeviceState *dev = opaque; > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > + > + switch (event) { > + case CHR_EVENT_OPENED: > + if (vhost_user_blk_connect(dev) < 0) { > + qemu_chr_fe_disconnect(&s->chardev); > + return; > + } > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > + vhost_user_blk_watch, dev); > + break; > + case CHR_EVENT_CLOSED: > + vhost_user_blk_disconnect(dev); > + if (s->watch) { > + g_source_remove(s->watch); > + s->watch = 0; > + } > + break; > + } > +} > + > + > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > VHostUserBlk *s = VHOST_USER_BLK(vdev); > VhostUserState *user; > - struct vhost_virtqueue *vqs = NULL; > int i, ret; > + Error *err = NULL; > > if (!s->chardev.chr) { > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > } > > s->inflight = g_new0(struct vhost_inflight, 1); > - > - s->dev.nvqs = s->num_queues; > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > - s->dev.vq_index = 0; > - s->dev.backend_features = 0; > - vqs = s->dev.vqs; > - > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > - > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > - if (ret < 0) { > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > - strerror(-ret)); > - goto virtio_err; > - } > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > + s->watch = 0; > + s->should_start = false; > + s->connected = false; > + > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > + NULL, (void *)dev, NULL, true); > + > +reconnect: > + do { > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > + error_report_err(err); > + err = NULL; > + sleep(1); > + } > + } while (!s->connected); > > ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, > - sizeof(struct virtio_blk_config)); > + sizeof(struct virtio_blk_config)); > if (ret < 0) { > - error_setg(errp, "vhost-user-blk: get block config failed"); > - goto vhost_err; > + error_report("vhost-user-blk: get block config failed"); > + goto reconnect; > } > > if (s->blkcfg.num_queues != s->num_queues) { > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > } > > return; With this we end up with return at end of function which makes no sense. > - > -vhost_err: > - vhost_dev_cleanup(&s->dev); > -virtio_err: > - g_free(vqs); > - g_free(s->inflight); > - virtio_cleanup(vdev); > - > - vhost_user_cleanup(user); > - g_free(user); > - s->vhost_user = NULL; > } > > static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > VHostUserBlk *s = VHOST_USER_BLK(dev); > - struct vhost_virtqueue *vqs = s->dev.vqs; > > vhost_user_blk_set_status(vdev, 0); > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, > + NULL, NULL, NULL, false); > vhost_dev_cleanup(&s->dev); > vhost_dev_free_inflight(s->inflight); > - g_free(vqs); > + g_free(s->vqs); > g_free(s->inflight); > virtio_cleanup(vdev); > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h > index 445516604a..4849aa5eb5 100644 > --- a/include/hw/virtio/vhost-user-blk.h > +++ b/include/hw/virtio/vhost-user-blk.h > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { > struct vhost_dev dev; > struct vhost_inflight *inflight; > VhostUserState *vhost_user; > + struct vhost_virtqueue *vqs; > + guint watch; > + bool should_start; Please add comments about fields. Also should_start seems to be set on guest activity. Does it need to be migrated? It would be better not to track guest state like this. Why do we need it? Pls add comments with an explanation. > + bool connected; > } VHostUserBlk; > > #endif > -- > 2.17.1
On Wed, 13 Mar 2019 at 00:49, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > From: Xie Yongji <xieyongji@baidu.com> > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > safely because it can track inflight I/O in shared memory. > > This patch allows qemu to reconnect the backend after > > connection closed. > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > Signed-off-by: Ni Xun <nixun@baidu.com> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > --- > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > include/hw/virtio/vhost-user-blk.h | 4 + > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > > index 9682df1a7b..539ea2e571 100644 > > --- a/hw/block/vhost-user-blk.c > > +++ b/hw/block/vhost-user-blk.c > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > > }; > > > > -static void vhost_user_blk_start(VirtIODevice *vdev) > > +static int vhost_user_blk_start(VirtIODevice *vdev) > > { > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > if (!k->set_guest_notifiers) { > > error_report("binding does not support guest notifiers"); > > - return; > > + return -ENOSYS; > > } > > > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > > if (ret < 0) { > > error_report("Error enabling host notifiers: %d", -ret); > > - return; > > + return ret; > > } > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > vhost_virtqueue_mask(&s->dev, vdev, i, false); > > } > > > > - return; > > + return ret; > > > > err_guest_notifiers: > > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > err_host_notifiers: > > vhost_dev_disable_notifiers(&s->dev, vdev); > > + return ret; > > } > > > > static void vhost_user_blk_stop(VirtIODevice *vdev) > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > if (ret < 0) { > > error_report("vhost guest notifier cleanup failed: %d", ret); > > - return; > > } > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > > { > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > > + int ret; > > > > if (!vdev->vm_running) { > > should_start = false; > > } > > > > - if (s->dev.started == should_start) { > > + if (s->should_start == should_start) { > > + return; > > + } > > + > > + if (!s->connected || s->dev.started == should_start) { > > + s->should_start = should_start; > > return; > > } > > > > if (should_start) { > > - vhost_user_blk_start(vdev); > > + s->should_start = true; > > + /* > > + * make sure vhost_user_blk_handle_output() ignores fake > > + * guest kick by vhost_dev_enable_notifiers() > > + */ > > + barrier(); > > + ret = vhost_user_blk_start(vdev); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost start failed: %s", > > + strerror(-ret)); > > + qemu_chr_fe_disconnect(&s->chardev); > > + } > > } else { > > vhost_user_blk_stop(vdev); > > + /* > > + * make sure vhost_user_blk_handle_output() ignore fake > > + * guest kick by vhost_dev_disable_notifiers() > > + */ > > + barrier(); > > + s->should_start = false; > > } > > - > > } > > > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > { > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > - int i; > > + int i, ret; > > > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > > return; > > } > > > > + if (s->should_start) { > > + return; > > + } > > + s->should_start = true; > > + > > + if (!s->connected) { > > + return; > > + } > > + > > if (s->dev.started) { > > return; > > } > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > > * vhost here instead of waiting for .set_status(). > > */ > > - vhost_user_blk_start(vdev); > > + ret = vhost_user_blk_start(vdev); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost start failed: %s", > > + strerror(-ret)); > > + qemu_chr_fe_disconnect(&s->chardev); > > + return; > > + } > > > > /* Kick right away to begin processing requests already in vring */ > > for (i = 0; i < s->dev.nvqs; i++) { > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > > vhost_dev_free_inflight(s->inflight); > > } > > > > +static int vhost_user_blk_connect(DeviceState *dev) > > +{ > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + int ret = 0; > > + > > + if (s->connected) { > > + return 0; > > + } > > + s->connected = true; > > + > > + s->dev.nvqs = s->num_queues; > > + s->dev.vqs = s->vqs; > > + s->dev.vq_index = 0; > > + s->dev.backend_features = 0; > > + > > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > + > > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost initialization failed: %s", > > + strerror(-ret)); > > + return ret; > > + } > > + > > + /* restore vhost state */ > > + if (s->should_start) { > > + ret = vhost_user_blk_start(vdev); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost start failed: %s", > > + strerror(-ret)); > > + return ret; > > + } > > + } > > + > > + return 0; > > +} > > + > > +static void vhost_user_blk_disconnect(DeviceState *dev) > > +{ > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + > > + if (!s->connected) { > > + return; > > + } > > + s->connected = false; > > + > > + if (s->dev.started) { > > + vhost_user_blk_stop(vdev); > > + } > > + > > + vhost_dev_cleanup(&s->dev); > > +} > > + > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > > + void *opaque) > > +{ > > + DeviceState *dev = opaque; > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + > > + qemu_chr_fe_disconnect(&s->chardev); > > + > > + return true; > > +} > > + > > +static void vhost_user_blk_event(void *opaque, int event) > > +{ > > + DeviceState *dev = opaque; > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + > > + switch (event) { > > + case CHR_EVENT_OPENED: > > + if (vhost_user_blk_connect(dev) < 0) { > > + qemu_chr_fe_disconnect(&s->chardev); > > + return; > > + } > > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > > + vhost_user_blk_watch, dev); > > + break; > > + case CHR_EVENT_CLOSED: > > + vhost_user_blk_disconnect(dev); > > + if (s->watch) { > > + g_source_remove(s->watch); > > + s->watch = 0; > > + } > > + break; > > + } > > +} > > + > > + > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > { > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > VhostUserState *user; > > - struct vhost_virtqueue *vqs = NULL; > > int i, ret; > > + Error *err = NULL; > > > > if (!s->chardev.chr) { > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > } > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > - > > - s->dev.nvqs = s->num_queues; > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > - s->dev.vq_index = 0; > > - s->dev.backend_features = 0; > > - vqs = s->dev.vqs; > > - > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > - > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > - if (ret < 0) { > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > - strerror(-ret)); > > - goto virtio_err; > > - } > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > + s->watch = 0; > > + s->should_start = false; > > + s->connected = false; > > + > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > + NULL, (void *)dev, NULL, true); > > + > > +reconnect: > > + do { > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > + error_report_err(err); > > + err = NULL; > > + sleep(1); > > Seems arbitrary. Is this basically waiting until backend will reconnect? > Why not block until event on the fd triggers? > Qemu is client in current vhost-user-blk implenment. Seems like backend will not try to connect. > Also, it looks like this will just block forever with no monitor input > and no way for user to figure out what is going on short of > crashing QEMU. > Yes, because I want to support the case that backend starts after qemu. I can print some warning and add timeout on this. Thanks, Yongji
On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > From: Xie Yongji <xieyongji@baidu.com> > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > safely because it can track inflight I/O in shared memory. > > This patch allows qemu to reconnect the backend after > > connection closed. > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > Signed-off-by: Ni Xun <nixun@baidu.com> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > --- > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > include/hw/virtio/vhost-user-blk.h | 4 + > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > > index 9682df1a7b..539ea2e571 100644 > > --- a/hw/block/vhost-user-blk.c > > +++ b/hw/block/vhost-user-blk.c > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > > }; > > > > -static void vhost_user_blk_start(VirtIODevice *vdev) > > +static int vhost_user_blk_start(VirtIODevice *vdev) > > { > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > if (!k->set_guest_notifiers) { > > error_report("binding does not support guest notifiers"); > > - return; > > + return -ENOSYS; > > } > > > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > > if (ret < 0) { > > error_report("Error enabling host notifiers: %d", -ret); > > - return; > > + return ret; > > } > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > vhost_virtqueue_mask(&s->dev, vdev, i, false); > > } > > > > - return; > > + return ret; > > > > err_guest_notifiers: > > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > err_host_notifiers: > > vhost_dev_disable_notifiers(&s->dev, vdev); > > + return ret; > > } > > > > static void vhost_user_blk_stop(VirtIODevice *vdev) > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > if (ret < 0) { > > error_report("vhost guest notifier cleanup failed: %d", ret); > > - return; > > } > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > > { > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > > + int ret; > > > > if (!vdev->vm_running) { > > should_start = false; > > } > > > > - if (s->dev.started == should_start) { > > + if (s->should_start == should_start) { > > + return; > > + } > > + > > + if (!s->connected || s->dev.started == should_start) { > > + s->should_start = should_start; > > return; > > } > > > > if (should_start) { > > - vhost_user_blk_start(vdev); > > + s->should_start = true; > > + /* > > + * make sure vhost_user_blk_handle_output() ignores fake > > + * guest kick by vhost_dev_enable_notifiers() > > + */ > > + barrier(); > > + ret = vhost_user_blk_start(vdev); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost start failed: %s", > > + strerror(-ret)); > > + qemu_chr_fe_disconnect(&s->chardev); > > + } > > } else { > > vhost_user_blk_stop(vdev); > > + /* > > + * make sure vhost_user_blk_handle_output() ignore fake > > + * guest kick by vhost_dev_disable_notifiers() > > + */ > > + barrier(); > > + s->should_start = false; > > } > > I don't understand the comment about ignoring fake guest kicks. > Please add an explanation about what does barrier do here. We may get stack like this: vhost_user_blk_stop() vhost_dev_disable_notifiers() virtio_bus_cleanup_host_notifier() virtio_queue_host_notifier_read() virtio_queue_notify_vq() /* fake guest kick */ vhost_user_blk_handle_output() vhost_user_blk_handle_output() will re-start vhost-user if should_start is false. This is not what we expect. The same to vhost_dev_enable_notifiers(). > And maybe a detailed explanation about what "fake kick" > is in the commit log. > Sure. > > > - > > } > > > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > { > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > - int i; > > + int i, ret; > > > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > > return; > > } > > > > + if (s->should_start) { > > + return; > > + } > > + s->should_start = true; > > + > > + if (!s->connected) { > > + return; > > + } > > + > > if (s->dev.started) { > > return; > > } > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > > * vhost here instead of waiting for .set_status(). > > */ > > - vhost_user_blk_start(vdev); > > + ret = vhost_user_blk_start(vdev); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost start failed: %s", > > + strerror(-ret)); > > + qemu_chr_fe_disconnect(&s->chardev); > > + return; > > + } > > > > /* Kick right away to begin processing requests already in vring */ > > for (i = 0; i < s->dev.nvqs; i++) { > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > > vhost_dev_free_inflight(s->inflight); > > } > > > > +static int vhost_user_blk_connect(DeviceState *dev) > > +{ > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + int ret = 0; > > + > > + if (s->connected) { > > + return 0; > > + } > > + s->connected = true; > > + > > + s->dev.nvqs = s->num_queues; > > + s->dev.vqs = s->vqs; > > + s->dev.vq_index = 0; > > + s->dev.backend_features = 0; > > + > > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > + > > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost initialization failed: %s", > > + strerror(-ret)); > > + return ret; > > + } > > + > > + /* restore vhost state */ > > + if (s->should_start) { > > + ret = vhost_user_blk_start(vdev); > > + if (ret < 0) { > > + error_report("vhost-user-blk: vhost start failed: %s", > > + strerror(-ret)); > > + return ret; > > + } > > + } > > + > > + return 0; > > +} > > + > > +static void vhost_user_blk_disconnect(DeviceState *dev) > > +{ > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + > > + if (!s->connected) { > > + return; > > + } > > + s->connected = false; > > + > > + if (s->dev.started) { > > + vhost_user_blk_stop(vdev); > > + } > > + > > + vhost_dev_cleanup(&s->dev); > > +} > > + > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > > + void *opaque) > > +{ > > + DeviceState *dev = opaque; > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + > > + qemu_chr_fe_disconnect(&s->chardev); > > + > > + return true; > > +} > > + > > +static void vhost_user_blk_event(void *opaque, int event) > > +{ > > + DeviceState *dev = opaque; > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > + > > + switch (event) { > > + case CHR_EVENT_OPENED: > > + if (vhost_user_blk_connect(dev) < 0) { > > + qemu_chr_fe_disconnect(&s->chardev); > > + return; > > + } > > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > > + vhost_user_blk_watch, dev); > > + break; > > + case CHR_EVENT_CLOSED: > > + vhost_user_blk_disconnect(dev); > > + if (s->watch) { > > + g_source_remove(s->watch); > > + s->watch = 0; > > + } > > + break; > > + } > > +} > > + > > + > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > { > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > VhostUserState *user; > > - struct vhost_virtqueue *vqs = NULL; > > int i, ret; > > + Error *err = NULL; > > > > if (!s->chardev.chr) { > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > } > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > - > > - s->dev.nvqs = s->num_queues; > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > - s->dev.vq_index = 0; > > - s->dev.backend_features = 0; > > - vqs = s->dev.vqs; > > - > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > - > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > - if (ret < 0) { > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > - strerror(-ret)); > > - goto virtio_err; > > - } > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > + s->watch = 0; > > + s->should_start = false; > > + s->connected = false; > > + > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > + NULL, (void *)dev, NULL, true); > > + > > +reconnect: > > + do { > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > + error_report_err(err); > > + err = NULL; > > + sleep(1); > > + } > > + } while (!s->connected); > > > > ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, > > - sizeof(struct virtio_blk_config)); > > + sizeof(struct virtio_blk_config)); > > if (ret < 0) { > > - error_setg(errp, "vhost-user-blk: get block config failed"); > > - goto vhost_err; > > + error_report("vhost-user-blk: get block config failed"); > > + goto reconnect; > > } > > > > if (s->blkcfg.num_queues != s->num_queues) { > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > } > > > > return; > > With this we end up with return at end of function > which makes no sense. > Oh, yes. I will remove it. > > - > > -vhost_err: > > - vhost_dev_cleanup(&s->dev); > > -virtio_err: > > - g_free(vqs); > > - g_free(s->inflight); > > - virtio_cleanup(vdev); > > - > > - vhost_user_cleanup(user); > > - g_free(user); > > - s->vhost_user = NULL; > > } > > > > static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) > > { > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > VHostUserBlk *s = VHOST_USER_BLK(dev); > > - struct vhost_virtqueue *vqs = s->dev.vqs; > > > > vhost_user_blk_set_status(vdev, 0); > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, > > + NULL, NULL, NULL, false); > > vhost_dev_cleanup(&s->dev); > > vhost_dev_free_inflight(s->inflight); > > - g_free(vqs); > > + g_free(s->vqs); > > g_free(s->inflight); > > virtio_cleanup(vdev); > > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h > > index 445516604a..4849aa5eb5 100644 > > --- a/include/hw/virtio/vhost-user-blk.h > > +++ b/include/hw/virtio/vhost-user-blk.h > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { > > struct vhost_dev dev; > > struct vhost_inflight *inflight; > > VhostUserState *vhost_user; > > + struct vhost_virtqueue *vqs; > > + guint watch; > > + bool should_start; > > Please add comments about fields. > Also should_start seems to be set on guest activity. > Does it need to be migrated? > It would be better not to track guest state like this. > Why do we need it? > Pls add comments with an explanation. > We can't relay on vhost_dev.started to track guest's state in vhost_user_blk_set_status() when we enable reconnecting. For example, qemu will lose guest's state if guest stop device during backend is disconnected. So vhost_dev.started is used to track backend's state and should_start is used to track guest's state in this patch. And this variable is based on VirtIODevice.status, so I think it is not necessary to migrate it. What do you think about it? Thanks, Yongji
On Wed, Mar 13, 2019 at 10:05:51AM +0800, Yongji Xie wrote: > On Wed, 13 Mar 2019 at 00:49, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > safely because it can track inflight I/O in shared memory. > > > This patch allows qemu to reconnect the backend after > > > connection closed. > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > --- > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > > > index 9682df1a7b..539ea2e571 100644 > > > --- a/hw/block/vhost-user-blk.c > > > +++ b/hw/block/vhost-user-blk.c > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > > > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > > > }; > > > > > > -static void vhost_user_blk_start(VirtIODevice *vdev) > > > +static int vhost_user_blk_start(VirtIODevice *vdev) > > > { > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > > if (!k->set_guest_notifiers) { > > > error_report("binding does not support guest notifiers"); > > > - return; > > > + return -ENOSYS; > > > } > > > > > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > > > if (ret < 0) { > > > error_report("Error enabling host notifiers: %d", -ret); > > > - return; > > > + return ret; > > > } > > > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > vhost_virtqueue_mask(&s->dev, vdev, i, false); > > > } > > > > > > - return; > > > + return ret; > > > > > > err_guest_notifiers: > > > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > err_host_notifiers: > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > + return ret; > > > } > > > > > > static void vhost_user_blk_stop(VirtIODevice *vdev) > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > if (ret < 0) { > > > error_report("vhost guest notifier cleanup failed: %d", ret); > > > - return; > > > } > > > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > > > { > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > > > + int ret; > > > > > > if (!vdev->vm_running) { > > > should_start = false; > > > } > > > > > > - if (s->dev.started == should_start) { > > > + if (s->should_start == should_start) { > > > + return; > > > + } > > > + > > > + if (!s->connected || s->dev.started == should_start) { > > > + s->should_start = should_start; > > > return; > > > } > > > > > > if (should_start) { > > > - vhost_user_blk_start(vdev); > > > + s->should_start = true; > > > + /* > > > + * make sure vhost_user_blk_handle_output() ignores fake > > > + * guest kick by vhost_dev_enable_notifiers() > > > + */ > > > + barrier(); > > > + ret = vhost_user_blk_start(vdev); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > + strerror(-ret)); > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + } > > > } else { > > > vhost_user_blk_stop(vdev); > > > + /* > > > + * make sure vhost_user_blk_handle_output() ignore fake > > > + * guest kick by vhost_dev_disable_notifiers() > > > + */ > > > + barrier(); > > > + s->should_start = false; > > > } > > > - > > > } > > > > > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > { > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > - int i; > > > + int i, ret; > > > > > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > > > return; > > > } > > > > > > + if (s->should_start) { > > > + return; > > > + } > > > + s->should_start = true; > > > + > > > + if (!s->connected) { > > > + return; > > > + } > > > + > > > if (s->dev.started) { > > > return; > > > } > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > > > * vhost here instead of waiting for .set_status(). > > > */ > > > - vhost_user_blk_start(vdev); > > > + ret = vhost_user_blk_start(vdev); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > + strerror(-ret)); > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + return; > > > + } > > > > > > /* Kick right away to begin processing requests already in vring */ > > > for (i = 0; i < s->dev.nvqs; i++) { > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > > > vhost_dev_free_inflight(s->inflight); > > > } > > > > > > +static int vhost_user_blk_connect(DeviceState *dev) > > > +{ > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + int ret = 0; > > > + > > > + if (s->connected) { > > > + return 0; > > > + } > > > + s->connected = true; > > > + > > > + s->dev.nvqs = s->num_queues; > > > + s->dev.vqs = s->vqs; > > > + s->dev.vq_index = 0; > > > + s->dev.backend_features = 0; > > > + > > > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > + > > > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost initialization failed: %s", > > > + strerror(-ret)); > > > + return ret; > > > + } > > > + > > > + /* restore vhost state */ > > > + if (s->should_start) { > > > + ret = vhost_user_blk_start(vdev); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > + strerror(-ret)); > > > + return ret; > > > + } > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +static void vhost_user_blk_disconnect(DeviceState *dev) > > > +{ > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + > > > + if (!s->connected) { > > > + return; > > > + } > > > + s->connected = false; > > > + > > > + if (s->dev.started) { > > > + vhost_user_blk_stop(vdev); > > > + } > > > + > > > + vhost_dev_cleanup(&s->dev); > > > +} > > > + > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > > > + void *opaque) > > > +{ > > > + DeviceState *dev = opaque; > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + > > > + return true; > > > +} > > > + > > > +static void vhost_user_blk_event(void *opaque, int event) > > > +{ > > > + DeviceState *dev = opaque; > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + > > > + switch (event) { > > > + case CHR_EVENT_OPENED: > > > + if (vhost_user_blk_connect(dev) < 0) { > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + return; > > > + } > > > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > > > + vhost_user_blk_watch, dev); > > > + break; > > > + case CHR_EVENT_CLOSED: > > > + vhost_user_blk_disconnect(dev); > > > + if (s->watch) { > > > + g_source_remove(s->watch); > > > + s->watch = 0; > > > + } > > > + break; > > > + } > > > +} > > > + > > > + > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > { > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > VhostUserState *user; > > > - struct vhost_virtqueue *vqs = NULL; > > > int i, ret; > > > + Error *err = NULL; > > > > > > if (!s->chardev.chr) { > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > } > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > - > > > - s->dev.nvqs = s->num_queues; > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > - s->dev.vq_index = 0; > > > - s->dev.backend_features = 0; > > > - vqs = s->dev.vqs; > > > - > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > - > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > - if (ret < 0) { > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > - strerror(-ret)); > > > - goto virtio_err; > > > - } > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > + s->watch = 0; > > > + s->should_start = false; > > > + s->connected = false; > > > + > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > + NULL, (void *)dev, NULL, true); > > > + > > > +reconnect: > > > + do { > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > + error_report_err(err); > > > + err = NULL; > > > + sleep(1); > > > > Seems arbitrary. Is this basically waiting until backend will reconnect? > > Why not block until event on the fd triggers? > > > > Qemu is client in current vhost-user-blk implenment. Seems like > backend will not try to connect. Or just fail realize and make management retry? > > Also, it looks like this will just block forever with no monitor input > > and no way for user to figure out what is going on short of > > crashing QEMU. > > > > Yes, because I want to support the case that backend starts after > qemu. I can print some warning and add timeout on this. > > Thanks, > Yongji Well a chardev already has a reconnect option. Can you use that machinery? IIUC it works without blocking QEMU.
On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote: > On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > safely because it can track inflight I/O in shared memory. > > > This patch allows qemu to reconnect the backend after > > > connection closed. > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > --- > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > > > index 9682df1a7b..539ea2e571 100644 > > > --- a/hw/block/vhost-user-blk.c > > > +++ b/hw/block/vhost-user-blk.c > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > > > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > > > }; > > > > > > -static void vhost_user_blk_start(VirtIODevice *vdev) > > > +static int vhost_user_blk_start(VirtIODevice *vdev) > > > { > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > > if (!k->set_guest_notifiers) { > > > error_report("binding does not support guest notifiers"); > > > - return; > > > + return -ENOSYS; > > > } > > > > > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > > > if (ret < 0) { > > > error_report("Error enabling host notifiers: %d", -ret); > > > - return; > > > + return ret; > > > } > > > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > vhost_virtqueue_mask(&s->dev, vdev, i, false); > > > } > > > > > > - return; > > > + return ret; > > > > > > err_guest_notifiers: > > > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > err_host_notifiers: > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > + return ret; > > > } > > > > > > static void vhost_user_blk_stop(VirtIODevice *vdev) > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > if (ret < 0) { > > > error_report("vhost guest notifier cleanup failed: %d", ret); > > > - return; > > > } > > > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > > > { > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > > > + int ret; > > > > > > if (!vdev->vm_running) { > > > should_start = false; > > > } > > > > > > - if (s->dev.started == should_start) { > > > + if (s->should_start == should_start) { > > > + return; > > > + } > > > + > > > + if (!s->connected || s->dev.started == should_start) { > > > + s->should_start = should_start; > > > return; > > > } > > > > > > if (should_start) { > > > - vhost_user_blk_start(vdev); > > > + s->should_start = true; > > > + /* > > > + * make sure vhost_user_blk_handle_output() ignores fake > > > + * guest kick by vhost_dev_enable_notifiers() > > > + */ > > > + barrier(); > > > + ret = vhost_user_blk_start(vdev); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > + strerror(-ret)); > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + } > > > } else { > > > vhost_user_blk_stop(vdev); > > > + /* > > > + * make sure vhost_user_blk_handle_output() ignore fake > > > + * guest kick by vhost_dev_disable_notifiers() > > > + */ > > > + barrier(); > > > + s->should_start = false; > > > } > > > > I don't understand the comment about ignoring fake guest kicks. > > Please add an explanation about what does barrier do here. > > We may get stack like this: > > vhost_user_blk_stop() > vhost_dev_disable_notifiers() > virtio_bus_cleanup_host_notifier() > virtio_queue_host_notifier_read() > virtio_queue_notify_vq() /* fake guest kick */ > vhost_user_blk_handle_output() > > vhost_user_blk_handle_output() will re-start vhost-user if > should_start is false. This is not what we expect. The same to > vhost_dev_enable_notifiers(). Well what does a compiler barrier have to do with it? Anyway, it's there to avoid losing kicks e.g. across migration. If you have something else that prevents losing kicks (e.g. migrate a "started" flag) then add code to skip the notify - don't work around it. > > And maybe a detailed explanation about what "fake kick" > > is in the commit log. > > > > Sure. > > > > > - > > > } > > > > > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > { > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > - int i; > > > + int i, ret; > > > > > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > > > return; > > > } > > > > > > + if (s->should_start) { > > > + return; > > > + } > > > + s->should_start = true; > > > + > > > + if (!s->connected) { > > > + return; > > > + } > > > + > > > if (s->dev.started) { > > > return; > > > } > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > > > * vhost here instead of waiting for .set_status(). > > > */ > > > - vhost_user_blk_start(vdev); > > > + ret = vhost_user_blk_start(vdev); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > + strerror(-ret)); > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + return; > > > + } > > > > > > /* Kick right away to begin processing requests already in vring */ > > > for (i = 0; i < s->dev.nvqs; i++) { > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > > > vhost_dev_free_inflight(s->inflight); > > > } > > > > > > +static int vhost_user_blk_connect(DeviceState *dev) > > > +{ > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + int ret = 0; > > > + > > > + if (s->connected) { > > > + return 0; > > > + } > > > + s->connected = true; > > > + > > > + s->dev.nvqs = s->num_queues; > > > + s->dev.vqs = s->vqs; > > > + s->dev.vq_index = 0; > > > + s->dev.backend_features = 0; > > > + > > > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > + > > > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost initialization failed: %s", > > > + strerror(-ret)); > > > + return ret; > > > + } > > > + > > > + /* restore vhost state */ > > > + if (s->should_start) { > > > + ret = vhost_user_blk_start(vdev); > > > + if (ret < 0) { > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > + strerror(-ret)); > > > + return ret; > > > + } > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +static void vhost_user_blk_disconnect(DeviceState *dev) > > > +{ > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + > > > + if (!s->connected) { > > > + return; > > > + } > > > + s->connected = false; > > > + > > > + if (s->dev.started) { > > > + vhost_user_blk_stop(vdev); > > > + } > > > + > > > + vhost_dev_cleanup(&s->dev); > > > +} > > > + > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > > > + void *opaque) > > > +{ > > > + DeviceState *dev = opaque; > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + > > > + return true; > > > +} > > > + > > > +static void vhost_user_blk_event(void *opaque, int event) > > > +{ > > > + DeviceState *dev = opaque; > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > + > > > + switch (event) { > > > + case CHR_EVENT_OPENED: > > > + if (vhost_user_blk_connect(dev) < 0) { > > > + qemu_chr_fe_disconnect(&s->chardev); > > > + return; > > > + } > > > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > > > + vhost_user_blk_watch, dev); > > > + break; > > > + case CHR_EVENT_CLOSED: > > > + vhost_user_blk_disconnect(dev); > > > + if (s->watch) { > > > + g_source_remove(s->watch); > > > + s->watch = 0; > > > + } > > > + break; > > > + } > > > +} > > > + > > > + > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > { > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > VhostUserState *user; > > > - struct vhost_virtqueue *vqs = NULL; > > > int i, ret; > > > + Error *err = NULL; > > > > > > if (!s->chardev.chr) { > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > } > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > - > > > - s->dev.nvqs = s->num_queues; > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > - s->dev.vq_index = 0; > > > - s->dev.backend_features = 0; > > > - vqs = s->dev.vqs; > > > - > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > - > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > - if (ret < 0) { > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > - strerror(-ret)); > > > - goto virtio_err; > > > - } > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > + s->watch = 0; > > > + s->should_start = false; > > > + s->connected = false; > > > + > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > + NULL, (void *)dev, NULL, true); > > > + > > > +reconnect: > > > + do { > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > + error_report_err(err); > > > + err = NULL; > > > + sleep(1); > > > + } > > > + } while (!s->connected); > > > > > > ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, > > > - sizeof(struct virtio_blk_config)); > > > + sizeof(struct virtio_blk_config)); > > > if (ret < 0) { > > > - error_setg(errp, "vhost-user-blk: get block config failed"); > > > - goto vhost_err; > > > + error_report("vhost-user-blk: get block config failed"); > > > + goto reconnect; > > > } > > > > > > if (s->blkcfg.num_queues != s->num_queues) { > > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > } > > > > > > return; > > > > With this we end up with return at end of function > > which makes no sense. > > > > Oh, yes. I will remove it. > > > > - > > > -vhost_err: > > > - vhost_dev_cleanup(&s->dev); > > > -virtio_err: > > > - g_free(vqs); > > > - g_free(s->inflight); > > > - virtio_cleanup(vdev); > > > - > > > - vhost_user_cleanup(user); > > > - g_free(user); > > > - s->vhost_user = NULL; > > > } > > > > > > static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) > > > { > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > VHostUserBlk *s = VHOST_USER_BLK(dev); > > > - struct vhost_virtqueue *vqs = s->dev.vqs; > > > > > > vhost_user_blk_set_status(vdev, 0); > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, > > > + NULL, NULL, NULL, false); > > > vhost_dev_cleanup(&s->dev); > > > vhost_dev_free_inflight(s->inflight); > > > - g_free(vqs); > > > + g_free(s->vqs); > > > g_free(s->inflight); > > > virtio_cleanup(vdev); > > > > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h > > > index 445516604a..4849aa5eb5 100644 > > > --- a/include/hw/virtio/vhost-user-blk.h > > > +++ b/include/hw/virtio/vhost-user-blk.h > > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { > > > struct vhost_dev dev; > > > struct vhost_inflight *inflight; > > > VhostUserState *vhost_user; > > > + struct vhost_virtqueue *vqs; > > > + guint watch; > > > + bool should_start; > > > > Please add comments about fields. > > Also should_start seems to be set on guest activity. > > Does it need to be migrated? > > It would be better not to track guest state like this. > > Why do we need it? > > Pls add comments with an explanation. > > > > We can't relay on vhost_dev.started to track guest's state in > vhost_user_blk_set_status() when we enable reconnecting. For example, > qemu will lose guest's state if guest stop device during backend is > disconnected. So vhost_dev.started is used to track backend's state > and should_start is used to track guest's state in this patch. And > this variable is based on VirtIODevice.status, so I think it is not > necessary to migrate it. What do you think about it? If it's based on VirtIODevice.status then how about not keeping it around at all and just calculating from VirtIODevice.status when necessary? > Thanks, > Yongji
On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > From: Xie Yongji <xieyongji@baidu.com> > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > safely because it can track inflight I/O in shared memory. > > This patch allows qemu to reconnect the backend after > > connection closed. > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > Signed-off-by: Ni Xun <nixun@baidu.com> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > --- > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > include/hw/virtio/vhost-user-blk.h | 4 + > > 2 files changed, 167 insertions(+), 42 deletions(-) > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > { > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > VhostUserState *user; > > - struct vhost_virtqueue *vqs = NULL; > > int i, ret; > > + Error *err = NULL; > > > > if (!s->chardev.chr) { > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > } > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > - > > - s->dev.nvqs = s->num_queues; > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > - s->dev.vq_index = 0; > > - s->dev.backend_features = 0; > > - vqs = s->dev.vqs; > > - > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > - > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > - if (ret < 0) { > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > - strerror(-ret)); > > - goto virtio_err; > > - } > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > + s->watch = 0; > > + s->should_start = false; > > + s->connected = false; > > + > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > + NULL, (void *)dev, NULL, true); > > + > > +reconnect: > > + do { > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > + error_report_err(err); > > + err = NULL; > > + sleep(1); > > Seems arbitrary. Is this basically waiting until backend will reconnect? > Why not block until event on the fd triggers? > > Also, it looks like this will just block forever with no monitor input > and no way for user to figure out what is going on short of > crashing QEMU. FWIW, the current vhost-user-net device does exactly the same thing with calling qemu_chr_fe_wait_connected during its realize() function. Can't say I'm thrilled about the existing usage either, but I don't see a particularly nice alternative if it isn't possible to realize the device without having a backend available. I don't think this an especially serious problem during cold startup though since doesn't expect the monitor to become responsive until the device model is fully realized & all backends connected. The real concern is if you need to hotplug this kind of device at runtime. In that case blocking the main loop / monitor is a serious problem that will actively harm both the guest OS and mgmt applications. The vhost-user-net device will already suffer from that. To solve the problem with hotplug, the mgmt app would need to plug the chardev backend, then wait for it to become connected, and only then plug the device frontend. In that case we would not want this loop. We'd expect it to use the already connected chardev, and fail the realize() function if the chardev has lost its connection. Regards, Daniel
On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > safely because it can track inflight I/O in shared memory. > > > This patch allows qemu to reconnect the backend after > > > connection closed. > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > --- > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > { > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > VhostUserState *user; > > > - struct vhost_virtqueue *vqs = NULL; > > > int i, ret; > > > + Error *err = NULL; > > > > > > if (!s->chardev.chr) { > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > } > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > - > > > - s->dev.nvqs = s->num_queues; > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > - s->dev.vq_index = 0; > > > - s->dev.backend_features = 0; > > > - vqs = s->dev.vqs; > > > - > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > - > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > - if (ret < 0) { > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > - strerror(-ret)); > > > - goto virtio_err; > > > - } > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > + s->watch = 0; > > > + s->should_start = false; > > > + s->connected = false; > > > + > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > + NULL, (void *)dev, NULL, true); > > > + > > > +reconnect: > > > + do { > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > + error_report_err(err); > > > + err = NULL; > > > + sleep(1); > > > > Seems arbitrary. Is this basically waiting until backend will reconnect? > > Why not block until event on the fd triggers? > > > > Also, it looks like this will just block forever with no monitor input > > and no way for user to figure out what is going on short of > > crashing QEMU. > > FWIW, the current vhost-user-net device does exactly the same thing > with calling qemu_chr_fe_wait_connected during its realize() function. Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) > Can't say I'm thrilled about the existing usage either, but I don't > see a particularly nice alternative if it isn't possible to realize > the device without having a backend available. > > I don't think this an especially serious problem during cold startup > though since doesn't expect the monitor to become responsive until > the device model is fully realized & all backends connected. > > The real concern is if you need to hotplug this kind of device > at runtime. In that case blocking the main loop / monitor is a > serious problem that will actively harm both the guest OS and > mgmt applications. > > The vhost-user-net device will already suffer from that. Right. > To solve the problem with hotplug, the mgmt app would need to > plug the chardev backend, then wait for it to become connected, > and only then plug the device frontend. In that case we would not > want this loop. We'd expect it to use the already connected > chardev, and fail the realize() function if the chardev has > lost its connection. > > Regards, > Daniel I think as step one we should maybe just busy-wait with no sleep(1). But respecting reconnect would be nice too. And I think if we can't respect nowait we should detect it and fail, in both net and block ... > -- > |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| > |: https://libvirt.org -o- https://fstop138.berrange.com :| > |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: > On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: > > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > > safely because it can track inflight I/O in shared memory. > > > > This patch allows qemu to reconnect the backend after > > > > connection closed. > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > --- > > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > { > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > VhostUserState *user; > > > > - struct vhost_virtqueue *vqs = NULL; > > > > int i, ret; > > > > + Error *err = NULL; > > > > > > > > if (!s->chardev.chr) { > > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > } > > > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > > - > > > > - s->dev.nvqs = s->num_queues; > > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > > - s->dev.vq_index = 0; > > > > - s->dev.backend_features = 0; > > > > - vqs = s->dev.vqs; > > > > - > > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > - > > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > - if (ret < 0) { > > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > > - strerror(-ret)); > > > > - goto virtio_err; > > > > - } > > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > > + s->watch = 0; > > > > + s->should_start = false; > > > > + s->connected = false; > > > > + > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > > + NULL, (void *)dev, NULL, true); > > > > + > > > > +reconnect: > > > > + do { > > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > > + error_report_err(err); > > > > + err = NULL; > > > > + sleep(1); > > > > > > Seems arbitrary. Is this basically waiting until backend will reconnect? > > > Why not block until event on the fd triggers? > > > > > > Also, it looks like this will just block forever with no monitor input > > > and no way for user to figure out what is going on short of > > > crashing QEMU. > > > > FWIW, the current vhost-user-net device does exactly the same thing > > with calling qemu_chr_fe_wait_connected during its realize() function. > > Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) The sleep(1) in this patch simply needs to be removed. I think that probably dates from when it was written against the earlier broken version of qemu_chr_fe_wait_connected(). That would not correctly deal with the "reconnect" flag, and so needing this loop with a sleep in it. In fact the while loop can be removed as well in this code. It just needs to call qemu_chr_fe_wait_connected() once. It is guaranteed to have a connected peer once that returns 0. qemu_chr_fe_wait_connected() only returns -1 if the operating in client mode, and it failed to connect and reconnect is *not* requested. In such case the caller should honour the failure and quit, not loop to retry. The reason vhost-user-net does a loop is because once it has a connection it tries todo a protocol handshake, and if that handshake fails it closes the chardev and tries to connect again. That's not the case in this blk code os the loop is not needed. > > Can't say I'm thrilled about the existing usage either, but I don't > > see a particularly nice alternative if it isn't possible to realize > > the device without having a backend available. > > > > I don't think this an especially serious problem during cold startup > > though since doesn't expect the monitor to become responsive until > > the device model is fully realized & all backends connected. > > > > The real concern is if you need to hotplug this kind of device > > at runtime. In that case blocking the main loop / monitor is a > > serious problem that will actively harm both the guest OS and > > mgmt applications. > > > > The vhost-user-net device will already suffer from that. > > Right. > > > To solve the problem with hotplug, the mgmt app would need to > > plug the chardev backend, then wait for it to become connected, > > and only then plug the device frontend. In that case we would not > > want this loop. We'd expect it to use the already connected > > chardev, and fail the realize() function if the chardev has > > lost its connection. > > I think as step one we should maybe just busy-wait with no sleep(1). > But respecting reconnect would be nice too. As above I don't see a need to loop at all. > And I think if we can't respect nowait we should > detect it and fail, in both net and block ... If the chardev is in server mode with "wait" set, or in client mode with reconnect not set, then there's actually no need to call qemu_chr_fe_wait_connected at all. The qemu_chr_fe_wait_connected is only needed if the chardev is a server with "nowait" set or a client with "reconnect" set, or if the caller explicitly closes the existing connection & wants to establish a new one. Regards, Daniel
On Thu, Mar 14, 2019 at 11:43:26AM +0000, Daniel P. Berrangé wrote: > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: > > On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: > > > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > > > safely because it can track inflight I/O in shared memory. > > > > > This patch allows qemu to reconnect the backend after > > > > > connection closed. > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > --- > > > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > > > > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > { > > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > VhostUserState *user; > > > > > - struct vhost_virtqueue *vqs = NULL; > > > > > int i, ret; > > > > > + Error *err = NULL; > > > > > > > > > > if (!s->chardev.chr) { > > > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > } > > > > > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > > > - > > > > > - s->dev.nvqs = s->num_queues; > > > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > > > - s->dev.vq_index = 0; > > > > > - s->dev.backend_features = 0; > > > > > - vqs = s->dev.vqs; > > > > > - > > > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > > - > > > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > > - if (ret < 0) { > > > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > > > - strerror(-ret)); > > > > > - goto virtio_err; > > > > > - } > > > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > > > + s->watch = 0; > > > > > + s->should_start = false; > > > > > + s->connected = false; > > > > > + > > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > > > + NULL, (void *)dev, NULL, true); > > > > > + > > > > > +reconnect: > > > > > + do { > > > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > > > + error_report_err(err); > > > > > + err = NULL; > > > > > + sleep(1); > > > > > > > > Seems arbitrary. Is this basically waiting until backend will reconnect? > > > > Why not block until event on the fd triggers? > > > > > > > > Also, it looks like this will just block forever with no monitor input > > > > and no way for user to figure out what is going on short of > > > > crashing QEMU. > > > > > > FWIW, the current vhost-user-net device does exactly the same thing > > > with calling qemu_chr_fe_wait_connected during its realize() function. > > > > Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) > > The sleep(1) in this patch simply needs to be removed. I think that > probably dates from when it was written against the earlier broken > version of qemu_chr_fe_wait_connected(). That would not correctly > deal with the "reconnect" flag, and so needing this loop with a sleep > in it. > > In fact the while loop can be removed as well in this code. It just > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed > to have a connected peer once that returns 0. > > qemu_chr_fe_wait_connected() only returns -1 if the operating in > client mode, and it failed to connect and reconnect is *not* > requested. In such case the caller should honour the failure and > quit, not loop to retry. > > > The reason vhost-user-net does a loop is because once it has a > connection it tries todo a protocol handshake, and if that > handshake fails it closes the chardev and tries to connect > again. That's not the case in this blk code os the loop is > not needed. > > > > Can't say I'm thrilled about the existing usage either, but I don't > > > see a particularly nice alternative if it isn't possible to realize > > > the device without having a backend available. > > > > > > I don't think this an especially serious problem during cold startup > > > though since doesn't expect the monitor to become responsive until > > > the device model is fully realized & all backends connected. > > > > > > The real concern is if you need to hotplug this kind of device > > > at runtime. In that case blocking the main loop / monitor is a > > > serious problem that will actively harm both the guest OS and > > > mgmt applications. > > > > > > The vhost-user-net device will already suffer from that. > > > > Right. > > > > > To solve the problem with hotplug, the mgmt app would need to > > > plug the chardev backend, then wait for it to become connected, > > > and only then plug the device frontend. In that case we would not > > > want this loop. We'd expect it to use the already connected > > > chardev, and fail the realize() function if the chardev has > > > lost its connection. > > > > I think as step one we should maybe just busy-wait with no sleep(1). > > But respecting reconnect would be nice too. > > As above I don't see a need to loop at all. > > > And I think if we can't respect nowait we should > > detect it and fail, in both net and block ... > > If the chardev is in server mode with "wait" set, or in client mode > with reconnect not set, then there's actually no need to call > qemu_chr_fe_wait_connected at all. The qemu_chr_fe_wait_connected is > only needed if the chardev is a server with "nowait" set or a client > with "reconnect" set, or if the caller explicitly closes the existing > connection & wants to establish a new one. > > Regards, > Daniel Thanks for the comments Daniel! > -- > |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| > |: https://libvirt.org -o- https://fstop138.berrange.com :| > |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
Hi, 14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>: > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: >> On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: >> > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: >> > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: >> > > > From: Xie Yongji <xieyongji@baidu.com> >> > > > >> > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD >> > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart >> > > > safely because it can track inflight I/O in shared memory. >> > > > This patch allows qemu to reconnect the backend after >> > > > connection closed. >> > > > >> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> >> > > > Signed-off-by: Ni Xun <nixun@baidu.com> >> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> >> > > > --- >> > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ >> > > > include/hw/virtio/vhost-user-blk.h | 4 + >> > > > 2 files changed, 167 insertions(+), 42 deletions(-) >> > >> > >> > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) >> > > > { >> > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); >> > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); >> > > > VhostUserState *user; >> > > > - struct vhost_virtqueue *vqs = NULL; >> > > > int i, ret; >> > > > + Error *err = NULL; >> > > > >> > > > if (!s->chardev.chr) { >> > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); >> > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) >> > > > } >> > > > >> > > > s->inflight = g_new0(struct vhost_inflight, 1); >> > > > - >> > > > - s->dev.nvqs = s->num_queues; >> > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); >> > > > - s->dev.vq_index = 0; >> > > > - s->dev.backend_features = 0; >> > > > - vqs = s->dev.vqs; >> > > > - >> > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); >> > > > - >> > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); >> > > > - if (ret < 0) { >> > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", >> > > > - strerror(-ret)); >> > > > - goto virtio_err; >> > > > - } >> > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); >> > > > + s->watch = 0; >> > > > + s->should_start = false; >> > > > + s->connected = false; >> > > > + >> > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, >> > > > + NULL, (void *)dev, NULL, true); >> > > > + >> > > > +reconnect: >> > > > + do { >> > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { >> > > > + error_report_err(err); >> > > > + err = NULL; >> > > > + sleep(1); >> > > >> > > Seems arbitrary. Is this basically waiting until backend will reconnect? >> > > Why not block until event on the fd triggers? >> > > >> > > Also, it looks like this will just block forever with no monitor input >> > > and no way for user to figure out what is going on short of >> > > crashing QEMU. >> > >> > FWIW, the current vhost-user-net device does exactly the same thing >> > with calling qemu_chr_fe_wait_connected during its realize() function. >> >> Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) > > The sleep(1) in this patch simply needs to be removed. I think that > probably dates from when it was written against the earlier broken > version of qemu_chr_fe_wait_connected(). That would not correctly > deal with the "reconnect" flag, and so needing this loop with a sleep > in it. > > In fact the while loop can be removed as well in this code. It just > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed > to have a connected peer once that returns 0. > > qemu_chr_fe_wait_connected() only returns -1 if the operating in > client mode, and it failed to connect and reconnect is *not* > requested. In such case the caller should honour the failure and > quit, not loop to retry. > > The reason vhost-user-net does a loop is because once it has a > connection it tries todo a protocol handshake, and if that > handshake fails it closes the chardev and tries to connect > again. That's not the case in this blk code os the loop is > not needed. > But vhost-user-blk also has a handshake in device realize. What happens if the connection is broken during realization? IIUC we have to retry a handshake in such case just like vhost-user-net. >> > Can't say I'm thrilled about the existing usage either, but I don't >> > see a particularly nice alternative if it isn't possible to realize >> > the device without having a backend available. >> > >> > I don't think this an especially serious problem during cold startup >> > though since doesn't expect the monitor to become responsive until >> > the device model is fully realized & all backends connected. >> > >> > The real concern is if you need to hotplug this kind of device >> > at runtime. In that case blocking the main loop / monitor is a >> > serious problem that will actively harm both the guest OS and >> > mgmt applications. >> > >> > The vhost-user-net device will already suffer from that. >> >> Right. >> >> > To solve the problem with hotplug, the mgmt app would need to >> > plug the chardev backend, then wait for it to become connected, >> > and only then plug the device frontend. In that case we would not >> > want this loop. We'd expect it to use the already connected >> > chardev, and fail the realize() function if the chardev has >> > lost its connection. >> >> I think as step one we should maybe just busy-wait with no sleep(1). >> But respecting reconnect would be nice too. > > As above I don't see a need to loop at all. > >> And I think if we can't respect nowait we should >> detect it and fail, in both net and block ... > > If the chardev is in server mode with "wait" set, or in client mode > with reconnect not set, then there's actually no need to call > qemu_chr_fe_wait_connected at all. The qemu_chr_fe_wait_connected is > only needed if the chardev is a server with "nowait" set or a client > with "reconnect" set, or if the caller explicitly closes the existing > connection & wants to establish a new one. > > Regards, > Daniel > -- > |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| > |: https://libvirt.org -o- https://fstop138.berrange.com :| > |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| Regards, Yury
On Thu, 14 Mar 2019 at 19:18, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote: > > On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > > safely because it can track inflight I/O in shared memory. > > > > This patch allows qemu to reconnect the backend after > > > > connection closed. > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > --- > > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > > > > index 9682df1a7b..539ea2e571 100644 > > > > --- a/hw/block/vhost-user-blk.c > > > > +++ b/hw/block/vhost-user-blk.c > > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > > > > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > > > > }; > > > > > > > > -static void vhost_user_blk_start(VirtIODevice *vdev) > > > > +static int vhost_user_blk_start(VirtIODevice *vdev) > > > > { > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > > > > if (!k->set_guest_notifiers) { > > > > error_report("binding does not support guest notifiers"); > > > > - return; > > > > + return -ENOSYS; > > > > } > > > > > > > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > > > > if (ret < 0) { > > > > error_report("Error enabling host notifiers: %d", -ret); > > > > - return; > > > > + return ret; > > > > } > > > > > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > vhost_virtqueue_mask(&s->dev, vdev, i, false); > > > > } > > > > > > > > - return; > > > > + return ret; > > > > > > > > err_guest_notifiers: > > > > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > > err_host_notifiers: > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > > + return ret; > > > > } > > > > > > > > static void vhost_user_blk_stop(VirtIODevice *vdev) > > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > > if (ret < 0) { > > > > error_report("vhost guest notifier cleanup failed: %d", ret); > > > > - return; > > > > } > > > > > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > > > > { > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > > > > + int ret; > > > > > > > > if (!vdev->vm_running) { > > > > should_start = false; > > > > } > > > > > > > > - if (s->dev.started == should_start) { > > > > + if (s->should_start == should_start) { > > > > + return; > > > > + } > > > > + > > > > + if (!s->connected || s->dev.started == should_start) { > > > > + s->should_start = should_start; > > > > return; > > > > } > > > > > > > > if (should_start) { > > > > - vhost_user_blk_start(vdev); > > > > + s->should_start = true; > > > > + /* > > > > + * make sure vhost_user_blk_handle_output() ignores fake > > > > + * guest kick by vhost_dev_enable_notifiers() > > > > + */ > > > > + barrier(); > > > > + ret = vhost_user_blk_start(vdev); > > > > + if (ret < 0) { > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > + strerror(-ret)); > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > + } > > > > } else { > > > > vhost_user_blk_stop(vdev); > > > > + /* > > > > + * make sure vhost_user_blk_handle_output() ignore fake > > > > + * guest kick by vhost_dev_disable_notifiers() > > > > + */ > > > > + barrier(); > > > > + s->should_start = false; > > > > } > > > > > > I don't understand the comment about ignoring fake guest kicks. > > > Please add an explanation about what does barrier do here. > > > > We may get stack like this: > > > > vhost_user_blk_stop() > > vhost_dev_disable_notifiers() > > virtio_bus_cleanup_host_notifier() > > virtio_queue_host_notifier_read() > > virtio_queue_notify_vq() /* fake guest kick */ > > vhost_user_blk_handle_output() > > > > vhost_user_blk_handle_output() will re-start vhost-user if > > should_start is false. This is not what we expect. The same to > > vhost_dev_enable_notifiers(). > > Well what does a compiler barrier have to do with it? > > Anyway, it's there to avoid losing kicks e.g. across migration. If you > have something else that prevents losing kicks (e.g. migrate a "started" > flag) then add code to skip the notify - don't work around it. > OK, I see. Thanks. IIUC, vhost-user-blk device actually does not need this kind of kick because dataplane is not in qemu side, right? > > > > And maybe a detailed explanation about what "fake kick" > > > is in the commit log. > > > > > > > Sure. > > > > > > > - > > > > } > > > > > > > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > > { > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > - int i; > > > > + int i, ret; > > > > > > > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > > > > return; > > > > } > > > > > > > > + if (s->should_start) { > > > > + return; > > > > + } > > > > + s->should_start = true; > > > > + > > > > + if (!s->connected) { > > > > + return; > > > > + } > > > > + > > > > if (s->dev.started) { > > > > return; > > > > } > > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > > > > * vhost here instead of waiting for .set_status(). > > > > */ > > > > - vhost_user_blk_start(vdev); > > > > + ret = vhost_user_blk_start(vdev); > > > > + if (ret < 0) { > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > + strerror(-ret)); > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > + return; > > > > + } > > > > > > > > /* Kick right away to begin processing requests already in vring */ > > > > for (i = 0; i < s->dev.nvqs; i++) { > > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > > > > vhost_dev_free_inflight(s->inflight); > > > > } > > > > > > > > +static int vhost_user_blk_connect(DeviceState *dev) > > > > +{ > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > + int ret = 0; > > > > + > > > > + if (s->connected) { > > > > + return 0; > > > > + } > > > > + s->connected = true; > > > > + > > > > + s->dev.nvqs = s->num_queues; > > > > + s->dev.vqs = s->vqs; > > > > + s->dev.vq_index = 0; > > > > + s->dev.backend_features = 0; > > > > + > > > > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > + > > > > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > + if (ret < 0) { > > > > + error_report("vhost-user-blk: vhost initialization failed: %s", > > > > + strerror(-ret)); > > > > + return ret; > > > > + } > > > > + > > > > + /* restore vhost state */ > > > > + if (s->should_start) { > > > > + ret = vhost_user_blk_start(vdev); > > > > + if (ret < 0) { > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > + strerror(-ret)); > > > > + return ret; > > > > + } > > > > + } > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +static void vhost_user_blk_disconnect(DeviceState *dev) > > > > +{ > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > + > > > > + if (!s->connected) { > > > > + return; > > > > + } > > > > + s->connected = false; > > > > + > > > > + if (s->dev.started) { > > > > + vhost_user_blk_stop(vdev); > > > > + } > > > > + > > > > + vhost_dev_cleanup(&s->dev); > > > > +} > > > > + > > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > > > > + void *opaque) > > > > +{ > > > > + DeviceState *dev = opaque; > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > + > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > + > > > > + return true; > > > > +} > > > > + > > > > +static void vhost_user_blk_event(void *opaque, int event) > > > > +{ > > > > + DeviceState *dev = opaque; > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > + > > > > + switch (event) { > > > > + case CHR_EVENT_OPENED: > > > > + if (vhost_user_blk_connect(dev) < 0) { > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > + return; > > > > + } > > > > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > > > > + vhost_user_blk_watch, dev); > > > > + break; > > > > + case CHR_EVENT_CLOSED: > > > > + vhost_user_blk_disconnect(dev); > > > > + if (s->watch) { > > > > + g_source_remove(s->watch); > > > > + s->watch = 0; > > > > + } > > > > + break; > > > > + } > > > > +} > > > > + > > > > + > > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > { > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > VhostUserState *user; > > > > - struct vhost_virtqueue *vqs = NULL; > > > > int i, ret; > > > > + Error *err = NULL; > > > > > > > > if (!s->chardev.chr) { > > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > } > > > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > > - > > > > - s->dev.nvqs = s->num_queues; > > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > > - s->dev.vq_index = 0; > > > > - s->dev.backend_features = 0; > > > > - vqs = s->dev.vqs; > > > > - > > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > - > > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > - if (ret < 0) { > > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > > - strerror(-ret)); > > > > - goto virtio_err; > > > > - } > > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > > + s->watch = 0; > > > > + s->should_start = false; > > > > + s->connected = false; > > > > + > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > > + NULL, (void *)dev, NULL, true); > > > > + > > > > +reconnect: > > > > + do { > > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > > + error_report_err(err); > > > > + err = NULL; > > > > + sleep(1); > > > > + } > > > > + } while (!s->connected); > > > > > > > > ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, > > > > - sizeof(struct virtio_blk_config)); > > > > + sizeof(struct virtio_blk_config)); > > > > if (ret < 0) { > > > > - error_setg(errp, "vhost-user-blk: get block config failed"); > > > > - goto vhost_err; > > > > + error_report("vhost-user-blk: get block config failed"); > > > > + goto reconnect; > > > > } > > > > > > > > if (s->blkcfg.num_queues != s->num_queues) { > > > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > } > > > > > > > > return; > > > > > > With this we end up with return at end of function > > > which makes no sense. > > > > > > > Oh, yes. I will remove it. > > > > > > - > > > > -vhost_err: > > > > - vhost_dev_cleanup(&s->dev); > > > > -virtio_err: > > > > - g_free(vqs); > > > > - g_free(s->inflight); > > > > - virtio_cleanup(vdev); > > > > - > > > > - vhost_user_cleanup(user); > > > > - g_free(user); > > > > - s->vhost_user = NULL; > > > > } > > > > > > > > static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) > > > > { > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > VHostUserBlk *s = VHOST_USER_BLK(dev); > > > > - struct vhost_virtqueue *vqs = s->dev.vqs; > > > > > > > > vhost_user_blk_set_status(vdev, 0); > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, > > > > + NULL, NULL, NULL, false); > > > > vhost_dev_cleanup(&s->dev); > > > > vhost_dev_free_inflight(s->inflight); > > > > - g_free(vqs); > > > > + g_free(s->vqs); > > > > g_free(s->inflight); > > > > virtio_cleanup(vdev); > > > > > > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h > > > > index 445516604a..4849aa5eb5 100644 > > > > --- a/include/hw/virtio/vhost-user-blk.h > > > > +++ b/include/hw/virtio/vhost-user-blk.h > > > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { > > > > struct vhost_dev dev; > > > > struct vhost_inflight *inflight; > > > > VhostUserState *vhost_user; > > > > + struct vhost_virtqueue *vqs; > > > > + guint watch; > > > > + bool should_start; > > > > > > Please add comments about fields. > > > Also should_start seems to be set on guest activity. > > > Does it need to be migrated? > > > It would be better not to track guest state like this. > > > Why do we need it? > > > Pls add comments with an explanation. > > > > > > > We can't relay on vhost_dev.started to track guest's state in > > vhost_user_blk_set_status() when we enable reconnecting. For example, > > qemu will lose guest's state if guest stop device during backend is > > disconnected. So vhost_dev.started is used to track backend's state > > and should_start is used to track guest's state in this patch. And > > this variable is based on VirtIODevice.status, so I think it is not > > necessary to migrate it. What do you think about it? > > If it's based on VirtIODevice.status then how about not keeping > it around at all and just calculating from VirtIODevice.status > when necessary? > Sorry, I made a mistake before. Normally, should_start is based on VirtIODevice.status. But it's not the case for virtio 1.0 transitional devices. Then I realize there may be a bug. If migration completes between kicking virtqueue and setting VIRTIO_CONFIG_S_DRIVER_OK, guest may be hung. So I think should_start should be migrated in this case. Thanks, Yongji
On Fri, Mar 15, 2019 at 10:46:34AM +0800, Yongji Xie wrote: > On Thu, 14 Mar 2019 at 19:18, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote: > > > On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > > > safely because it can track inflight I/O in shared memory. > > > > > This patch allows qemu to reconnect the backend after > > > > > connection closed. > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > --- > > > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > > > > > index 9682df1a7b..539ea2e571 100644 > > > > > --- a/hw/block/vhost-user-blk.c > > > > > +++ b/hw/block/vhost-user-blk.c > > > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > > > > > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > > > > > }; > > > > > > > > > > -static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > +static int vhost_user_blk_start(VirtIODevice *vdev) > > > > > { > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > > > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > > > > > > if (!k->set_guest_notifiers) { > > > > > error_report("binding does not support guest notifiers"); > > > > > - return; > > > > > + return -ENOSYS; > > > > > } > > > > > > > > > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > > > > > if (ret < 0) { > > > > > error_report("Error enabling host notifiers: %d", -ret); > > > > > - return; > > > > > + return ret; > > > > > } > > > > > > > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > > > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > vhost_virtqueue_mask(&s->dev, vdev, i, false); > > > > > } > > > > > > > > > > - return; > > > > > + return ret; > > > > > > > > > > err_guest_notifiers: > > > > > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > > > err_host_notifiers: > > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > > > + return ret; > > > > > } > > > > > > > > > > static void vhost_user_blk_stop(VirtIODevice *vdev) > > > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > > > if (ret < 0) { > > > > > error_report("vhost guest notifier cleanup failed: %d", ret); > > > > > - return; > > > > > } > > > > > > > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > > > > > { > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > + int ret; > > > > > > > > > > if (!vdev->vm_running) { > > > > > should_start = false; > > > > > } > > > > > > > > > > - if (s->dev.started == should_start) { > > > > > + if (s->should_start == should_start) { > > > > > + return; > > > > > + } > > > > > + > > > > > + if (!s->connected || s->dev.started == should_start) { > > > > > + s->should_start = should_start; > > > > > return; > > > > > } > > > > > > > > > > if (should_start) { > > > > > - vhost_user_blk_start(vdev); > > > > > + s->should_start = true; > > > > > + /* > > > > > + * make sure vhost_user_blk_handle_output() ignores fake > > > > > + * guest kick by vhost_dev_enable_notifiers() > > > > > + */ > > > > > + barrier(); > > > > > + ret = vhost_user_blk_start(vdev); > > > > > + if (ret < 0) { > > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > > + strerror(-ret)); > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > + } > > > > > } else { > > > > > vhost_user_blk_stop(vdev); > > > > > + /* > > > > > + * make sure vhost_user_blk_handle_output() ignore fake > > > > > + * guest kick by vhost_dev_disable_notifiers() > > > > > + */ > > > > > + barrier(); > > > > > + s->should_start = false; > > > > > } > > > > > > > > I don't understand the comment about ignoring fake guest kicks. > > > > Please add an explanation about what does barrier do here. > > > > > > We may get stack like this: > > > > > > vhost_user_blk_stop() > > > vhost_dev_disable_notifiers() > > > virtio_bus_cleanup_host_notifier() > > > virtio_queue_host_notifier_read() > > > virtio_queue_notify_vq() /* fake guest kick */ > > > vhost_user_blk_handle_output() > > > > > > vhost_user_blk_handle_output() will re-start vhost-user if > > > should_start is false. This is not what we expect. The same to > > > vhost_dev_enable_notifiers(). > > > > Well what does a compiler barrier have to do with it? > > > > Anyway, it's there to avoid losing kicks e.g. across migration. If you > > have something else that prevents losing kicks (e.g. migrate a "started" > > flag) then add code to skip the notify - don't work around it. > > > > OK, I see. Thanks. > > IIUC, vhost-user-blk device actually does not need this kind of kick > because dataplane is not in qemu side, right? > > > > > > > And maybe a detailed explanation about what "fake kick" > > > > is in the commit log. > > > > > > > > > > Sure. > > > > > > > > > - > > > > > } > > > > > > > > > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > > > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > > > { > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > - int i; > > > > > + int i, ret; > > > > > > > > > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > > > > > return; > > > > > } > > > > > > > > > > + if (s->should_start) { > > > > > + return; > > > > > + } > > > > > + s->should_start = true; > > > > > + > > > > > + if (!s->connected) { > > > > > + return; > > > > > + } > > > > > + > > > > > if (s->dev.started) { > > > > > return; > > > > > } > > > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > > > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > > > > > * vhost here instead of waiting for .set_status(). > > > > > */ > > > > > - vhost_user_blk_start(vdev); > > > > > + ret = vhost_user_blk_start(vdev); > > > > > + if (ret < 0) { > > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > > + strerror(-ret)); > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > + return; > > > > > + } > > > > > > > > > > /* Kick right away to begin processing requests already in vring */ > > > > > for (i = 0; i < s->dev.nvqs; i++) { > > > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > > > > > vhost_dev_free_inflight(s->inflight); > > > > > } > > > > > > > > > > +static int vhost_user_blk_connect(DeviceState *dev) > > > > > +{ > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > + int ret = 0; > > > > > + > > > > > + if (s->connected) { > > > > > + return 0; > > > > > + } > > > > > + s->connected = true; > > > > > + > > > > > + s->dev.nvqs = s->num_queues; > > > > > + s->dev.vqs = s->vqs; > > > > > + s->dev.vq_index = 0; > > > > > + s->dev.backend_features = 0; > > > > > + > > > > > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > > + > > > > > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > > + if (ret < 0) { > > > > > + error_report("vhost-user-blk: vhost initialization failed: %s", > > > > > + strerror(-ret)); > > > > > + return ret; > > > > > + } > > > > > + > > > > > + /* restore vhost state */ > > > > > + if (s->should_start) { > > > > > + ret = vhost_user_blk_start(vdev); > > > > > + if (ret < 0) { > > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > > + strerror(-ret)); > > > > > + return ret; > > > > > + } > > > > > + } > > > > > + > > > > > + return 0; > > > > > +} > > > > > + > > > > > +static void vhost_user_blk_disconnect(DeviceState *dev) > > > > > +{ > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > + > > > > > + if (!s->connected) { > > > > > + return; > > > > > + } > > > > > + s->connected = false; > > > > > + > > > > > + if (s->dev.started) { > > > > > + vhost_user_blk_stop(vdev); > > > > > + } > > > > > + > > > > > + vhost_dev_cleanup(&s->dev); > > > > > +} > > > > > + > > > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > > > > > + void *opaque) > > > > > +{ > > > > > + DeviceState *dev = opaque; > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > + > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > + > > > > > + return true; > > > > > +} > > > > > + > > > > > +static void vhost_user_blk_event(void *opaque, int event) > > > > > +{ > > > > > + DeviceState *dev = opaque; > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > + > > > > > + switch (event) { > > > > > + case CHR_EVENT_OPENED: > > > > > + if (vhost_user_blk_connect(dev) < 0) { > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > + return; > > > > > + } > > > > > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > > > > > + vhost_user_blk_watch, dev); > > > > > + break; > > > > > + case CHR_EVENT_CLOSED: > > > > > + vhost_user_blk_disconnect(dev); > > > > > + if (s->watch) { > > > > > + g_source_remove(s->watch); > > > > > + s->watch = 0; > > > > > + } > > > > > + break; > > > > > + } > > > > > +} > > > > > + > > > > > + > > > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > { > > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > VhostUserState *user; > > > > > - struct vhost_virtqueue *vqs = NULL; > > > > > int i, ret; > > > > > + Error *err = NULL; > > > > > > > > > > if (!s->chardev.chr) { > > > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > } > > > > > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > > > - > > > > > - s->dev.nvqs = s->num_queues; > > > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > > > - s->dev.vq_index = 0; > > > > > - s->dev.backend_features = 0; > > > > > - vqs = s->dev.vqs; > > > > > - > > > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > > - > > > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > > - if (ret < 0) { > > > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > > > - strerror(-ret)); > > > > > - goto virtio_err; > > > > > - } > > > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > > > + s->watch = 0; > > > > > + s->should_start = false; > > > > > + s->connected = false; > > > > > + > > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > > > + NULL, (void *)dev, NULL, true); > > > > > + > > > > > +reconnect: > > > > > + do { > > > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > > > + error_report_err(err); > > > > > + err = NULL; > > > > > + sleep(1); > > > > > + } > > > > > + } while (!s->connected); > > > > > > > > > > ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, > > > > > - sizeof(struct virtio_blk_config)); > > > > > + sizeof(struct virtio_blk_config)); > > > > > if (ret < 0) { > > > > > - error_setg(errp, "vhost-user-blk: get block config failed"); > > > > > - goto vhost_err; > > > > > + error_report("vhost-user-blk: get block config failed"); > > > > > + goto reconnect; > > > > > } > > > > > > > > > > if (s->blkcfg.num_queues != s->num_queues) { > > > > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > } > > > > > > > > > > return; > > > > > > > > With this we end up with return at end of function > > > > which makes no sense. > > > > > > > > > > Oh, yes. I will remove it. > > > > > > > > - > > > > > -vhost_err: > > > > > - vhost_dev_cleanup(&s->dev); > > > > > -virtio_err: > > > > > - g_free(vqs); > > > > > - g_free(s->inflight); > > > > > - virtio_cleanup(vdev); > > > > > - > > > > > - vhost_user_cleanup(user); > > > > > - g_free(user); > > > > > - s->vhost_user = NULL; > > > > > } > > > > > > > > > > static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) > > > > > { > > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > VHostUserBlk *s = VHOST_USER_BLK(dev); > > > > > - struct vhost_virtqueue *vqs = s->dev.vqs; > > > > > > > > > > vhost_user_blk_set_status(vdev, 0); > > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, > > > > > + NULL, NULL, NULL, false); > > > > > vhost_dev_cleanup(&s->dev); > > > > > vhost_dev_free_inflight(s->inflight); > > > > > - g_free(vqs); > > > > > + g_free(s->vqs); > > > > > g_free(s->inflight); > > > > > virtio_cleanup(vdev); > > > > > > > > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h > > > > > index 445516604a..4849aa5eb5 100644 > > > > > --- a/include/hw/virtio/vhost-user-blk.h > > > > > +++ b/include/hw/virtio/vhost-user-blk.h > > > > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { > > > > > struct vhost_dev dev; > > > > > struct vhost_inflight *inflight; > > > > > VhostUserState *vhost_user; > > > > > + struct vhost_virtqueue *vqs; > > > > > + guint watch; > > > > > + bool should_start; > > > > > > > > Please add comments about fields. > > > > Also should_start seems to be set on guest activity. > > > > Does it need to be migrated? > > > > It would be better not to track guest state like this. > > > > Why do we need it? > > > > Pls add comments with an explanation. > > > > > > > > > > We can't relay on vhost_dev.started to track guest's state in > > > vhost_user_blk_set_status() when we enable reconnecting. For example, > > > qemu will lose guest's state if guest stop device during backend is > > > disconnected. So vhost_dev.started is used to track backend's state > > > and should_start is used to track guest's state in this patch. And > > > this variable is based on VirtIODevice.status, so I think it is not > > > necessary to migrate it. What do you think about it? > > > > If it's based on VirtIODevice.status then how about not keeping > > it around at all and just calculating from VirtIODevice.status > > when necessary? > > > > Sorry, I made a mistake before. Normally, should_start is based on > VirtIODevice.status. But it's not the case for virtio 1.0 transitional > devices. Then I realize there may be a bug. If migration completes > between kicking virtqueue and setting VIRTIO_CONFIG_S_DRIVER_OK, guest > may be hung. So I think should_start should be migrated in this case. > > Thanks, > Yongji This is exactly the case that the fake kick is intended to fix. We can migrate this state but it's really a virtio thing, not vhost specific. migrate it and skip fake kick, sure.
On Thu, 14 Mar 2019 at 20:31, Yury Kotov <yury-kotov@yandex-team.ru> wrote: > > Hi, > > 14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>: > > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: > >> On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: > >> > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > >> > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > >> > > > From: Xie Yongji <xieyongji@baidu.com> > >> > > > > >> > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > >> > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > >> > > > safely because it can track inflight I/O in shared memory. > >> > > > This patch allows qemu to reconnect the backend after > >> > > > connection closed. > >> > > > > >> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > >> > > > Signed-off-by: Ni Xun <nixun@baidu.com> > >> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > >> > > > --- > >> > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > >> > > > include/hw/virtio/vhost-user-blk.h | 4 + > >> > > > 2 files changed, 167 insertions(+), 42 deletions(-) > >> > > >> > > >> > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > >> > > > { > >> > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > >> > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > >> > > > VhostUserState *user; > >> > > > - struct vhost_virtqueue *vqs = NULL; > >> > > > int i, ret; > >> > > > + Error *err = NULL; > >> > > > > >> > > > if (!s->chardev.chr) { > >> > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > >> > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > >> > > > } > >> > > > > >> > > > s->inflight = g_new0(struct vhost_inflight, 1); > >> > > > - > >> > > > - s->dev.nvqs = s->num_queues; > >> > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > >> > > > - s->dev.vq_index = 0; > >> > > > - s->dev.backend_features = 0; > >> > > > - vqs = s->dev.vqs; > >> > > > - > >> > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > >> > > > - > >> > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > >> > > > - if (ret < 0) { > >> > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > >> > > > - strerror(-ret)); > >> > > > - goto virtio_err; > >> > > > - } > >> > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > >> > > > + s->watch = 0; > >> > > > + s->should_start = false; > >> > > > + s->connected = false; > >> > > > + > >> > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > >> > > > + NULL, (void *)dev, NULL, true); > >> > > > + > >> > > > +reconnect: > >> > > > + do { > >> > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > >> > > > + error_report_err(err); > >> > > > + err = NULL; > >> > > > + sleep(1); > >> > > > >> > > Seems arbitrary. Is this basically waiting until backend will reconnect? > >> > > Why not block until event on the fd triggers? > >> > > > >> > > Also, it looks like this will just block forever with no monitor input > >> > > and no way for user to figure out what is going on short of > >> > > crashing QEMU. > >> > > >> > FWIW, the current vhost-user-net device does exactly the same thing > >> > with calling qemu_chr_fe_wait_connected during its realize() function. > >> > >> Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) > > > > The sleep(1) in this patch simply needs to be removed. I think that > > probably dates from when it was written against the earlier broken > > version of qemu_chr_fe_wait_connected(). That would not correctly > > deal with the "reconnect" flag, and so needing this loop with a sleep > > in it. > > > > In fact the while loop can be removed as well in this code. It just > > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed > > to have a connected peer once that returns 0. > > > > qemu_chr_fe_wait_connected() only returns -1 if the operating in > > client mode, and it failed to connect and reconnect is *not* > > requested. In such case the caller should honour the failure and > > quit, not loop to retry. > > > > The reason vhost-user-net does a loop is because once it has a > > connection it tries todo a protocol handshake, and if that > > handshake fails it closes the chardev and tries to connect > > again. That's not the case in this blk code os the loop is > > not needed. > > > > But vhost-user-blk also has a handshake in device realize. What happens if the > connection is broken during realization? IIUC we have to retry a handshake in > such case just like vhost-user-net. > Actually that's what I want to ask too. Thanks, Yongji
On Thu, 14 Mar 2019 at 19:43, Daniel P. Berrangé <berrange@redhat.com> wrote: > > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: > > On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: > > > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > > > safely because it can track inflight I/O in shared memory. > > > > > This patch allows qemu to reconnect the backend after > > > > > connection closed. > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > --- > > > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > > > > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > { > > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > VhostUserState *user; > > > > > - struct vhost_virtqueue *vqs = NULL; > > > > > int i, ret; > > > > > + Error *err = NULL; > > > > > > > > > > if (!s->chardev.chr) { > > > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > } > > > > > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > > > - > > > > > - s->dev.nvqs = s->num_queues; > > > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > > > - s->dev.vq_index = 0; > > > > > - s->dev.backend_features = 0; > > > > > - vqs = s->dev.vqs; > > > > > - > > > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > > - > > > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > > - if (ret < 0) { > > > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > > > - strerror(-ret)); > > > > > - goto virtio_err; > > > > > - } > > > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > > > + s->watch = 0; > > > > > + s->should_start = false; > > > > > + s->connected = false; > > > > > + > > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > > > + NULL, (void *)dev, NULL, true); > > > > > + > > > > > +reconnect: > > > > > + do { > > > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > > > + error_report_err(err); > > > > > + err = NULL; > > > > > + sleep(1); > > > > > > > > Seems arbitrary. Is this basically waiting until backend will reconnect? > > > > Why not block until event on the fd triggers? > > > > > > > > Also, it looks like this will just block forever with no monitor input > > > > and no way for user to figure out what is going on short of > > > > crashing QEMU. > > > > > > FWIW, the current vhost-user-net device does exactly the same thing > > > with calling qemu_chr_fe_wait_connected during its realize() function. > > > > Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) > > The sleep(1) in this patch simply needs to be removed. I think that > probably dates from when it was written against the earlier broken > version of qemu_chr_fe_wait_connected(). That would not correctly > deal with the "reconnect" flag, and so needing this loop with a sleep > in it. > > In fact the while loop can be removed as well in this code. It just > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed > to have a connected peer once that returns 0. > > qemu_chr_fe_wait_connected() only returns -1 if the operating in > client mode, and it failed to connect and reconnect is *not* > requested. In such case the caller should honour the failure and > quit, not loop to retry. > Thanks for your comments. Will do that in next patchset. > > The reason vhost-user-net does a loop is because once it has a > connection it tries todo a protocol handshake, and if that > handshake fails it closes the chardev and tries to connect > again. That's not the case in this blk code os the loop is > not needed. > Seems like vhost-user-blk device needs to do that too. Thanks, Yongji
On Fri, 15 Mar 2019 at 11:09, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Fri, Mar 15, 2019 at 10:46:34AM +0800, Yongji Xie wrote: > > On Thu, 14 Mar 2019 at 19:18, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Wed, Mar 13, 2019 at 10:47:08AM +0800, Yongji Xie wrote: > > > > On Wed, 13 Mar 2019 at 09:16, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > > > > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > > > > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > > > > > > safely because it can track inflight I/O in shared memory. > > > > > > This patch allows qemu to reconnect the backend after > > > > > > connection closed. > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > Signed-off-by: Ni Xun <nixun@baidu.com> > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > --- > > > > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > > > > > > include/hw/virtio/vhost-user-blk.h | 4 + > > > > > > 2 files changed, 167 insertions(+), 42 deletions(-) > > > > > > > > > > > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > > > > > > index 9682df1a7b..539ea2e571 100644 > > > > > > --- a/hw/block/vhost-user-blk.c > > > > > > +++ b/hw/block/vhost-user-blk.c > > > > > > @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { > > > > > > .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, > > > > > > }; > > > > > > > > > > > > -static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > > +static int vhost_user_blk_start(VirtIODevice *vdev) > > > > > > { > > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > > > > > > @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > > > > > > > > if (!k->set_guest_notifiers) { > > > > > > error_report("binding does not support guest notifiers"); > > > > > > - return; > > > > > > + return -ENOSYS; > > > > > > } > > > > > > > > > > > > ret = vhost_dev_enable_notifiers(&s->dev, vdev); > > > > > > if (ret < 0) { > > > > > > error_report("Error enabling host notifiers: %d", -ret); > > > > > > - return; > > > > > > + return ret; > > > > > > } > > > > > > > > > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > > > > > > @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) > > > > > > vhost_virtqueue_mask(&s->dev, vdev, i, false); > > > > > > } > > > > > > > > > > > > - return; > > > > > > + return ret; > > > > > > > > > > > > err_guest_notifiers: > > > > > > k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > > > > err_host_notifiers: > > > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > > > > + return ret; > > > > > > } > > > > > > > > > > > > static void vhost_user_blk_stop(VirtIODevice *vdev) > > > > > > @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) > > > > > > ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > > > > > > if (ret < 0) { > > > > > > error_report("vhost guest notifier cleanup failed: %d", ret); > > > > > > - return; > > > > > > } > > > > > > > > > > > > vhost_dev_disable_notifiers(&s->dev, vdev); > > > > > > @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) > > > > > > { > > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > + int ret; > > > > > > > > > > > > if (!vdev->vm_running) { > > > > > > should_start = false; > > > > > > } > > > > > > > > > > > > - if (s->dev.started == should_start) { > > > > > > + if (s->should_start == should_start) { > > > > > > + return; > > > > > > + } > > > > > > + > > > > > > + if (!s->connected || s->dev.started == should_start) { > > > > > > + s->should_start = should_start; > > > > > > return; > > > > > > } > > > > > > > > > > > > if (should_start) { > > > > > > - vhost_user_blk_start(vdev); > > > > > > + s->should_start = true; > > > > > > + /* > > > > > > + * make sure vhost_user_blk_handle_output() ignores fake > > > > > > + * guest kick by vhost_dev_enable_notifiers() > > > > > > + */ > > > > > > + barrier(); > > > > > > + ret = vhost_user_blk_start(vdev); > > > > > > + if (ret < 0) { > > > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > > > + strerror(-ret)); > > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > > + } > > > > > > } else { > > > > > > vhost_user_blk_stop(vdev); > > > > > > + /* > > > > > > + * make sure vhost_user_blk_handle_output() ignore fake > > > > > > + * guest kick by vhost_dev_disable_notifiers() > > > > > > + */ > > > > > > + barrier(); > > > > > > + s->should_start = false; > > > > > > } > > > > > > > > > > I don't understand the comment about ignoring fake guest kicks. > > > > > Please add an explanation about what does barrier do here. > > > > > > > > We may get stack like this: > > > > > > > > vhost_user_blk_stop() > > > > vhost_dev_disable_notifiers() > > > > virtio_bus_cleanup_host_notifier() > > > > virtio_queue_host_notifier_read() > > > > virtio_queue_notify_vq() /* fake guest kick */ > > > > vhost_user_blk_handle_output() > > > > > > > > vhost_user_blk_handle_output() will re-start vhost-user if > > > > should_start is false. This is not what we expect. The same to > > > > vhost_dev_enable_notifiers(). > > > > > > Well what does a compiler barrier have to do with it? > > > > > > Anyway, it's there to avoid losing kicks e.g. across migration. If you > > > have something else that prevents losing kicks (e.g. migrate a "started" > > > flag) then add code to skip the notify - don't work around it. > > > > > > > OK, I see. Thanks. > > > > IIUC, vhost-user-blk device actually does not need this kind of kick > > because dataplane is not in qemu side, right? > > > > > > > > > > And maybe a detailed explanation about what "fake kick" > > > > > is in the commit log. > > > > > > > > > > > > > Sure. > > > > > > > > > > > - > > > > > > } > > > > > > > > > > > > static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > > > > @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, > > > > > > static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > > > > { > > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > - int i; > > > > > > + int i, ret; > > > > > > > > > > > > if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { > > > > > > return; > > > > > > } > > > > > > > > > > > > + if (s->should_start) { > > > > > > + return; > > > > > > + } > > > > > > + s->should_start = true; > > > > > > + > > > > > > + if (!s->connected) { > > > > > > + return; > > > > > > + } > > > > > > + > > > > > > if (s->dev.started) { > > > > > > return; > > > > > > } > > > > > > @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > > > > /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start > > > > > > * vhost here instead of waiting for .set_status(). > > > > > > */ > > > > > > - vhost_user_blk_start(vdev); > > > > > > + ret = vhost_user_blk_start(vdev); > > > > > > + if (ret < 0) { > > > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > > > + strerror(-ret)); > > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > > + return; > > > > > > + } > > > > > > > > > > > > /* Kick right away to begin processing requests already in vring */ > > > > > > for (i = 0; i < s->dev.nvqs; i++) { > > > > > > @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) > > > > > > vhost_dev_free_inflight(s->inflight); > > > > > > } > > > > > > > > > > > > +static int vhost_user_blk_connect(DeviceState *dev) > > > > > > +{ > > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > + int ret = 0; > > > > > > + > > > > > > + if (s->connected) { > > > > > > + return 0; > > > > > > + } > > > > > > + s->connected = true; > > > > > > + > > > > > > + s->dev.nvqs = s->num_queues; > > > > > > + s->dev.vqs = s->vqs; > > > > > > + s->dev.vq_index = 0; > > > > > > + s->dev.backend_features = 0; > > > > > > + > > > > > > + vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > > > + > > > > > > + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > > > + if (ret < 0) { > > > > > > + error_report("vhost-user-blk: vhost initialization failed: %s", > > > > > > + strerror(-ret)); > > > > > > + return ret; > > > > > > + } > > > > > > + > > > > > > + /* restore vhost state */ > > > > > > + if (s->should_start) { > > > > > > + ret = vhost_user_blk_start(vdev); > > > > > > + if (ret < 0) { > > > > > > + error_report("vhost-user-blk: vhost start failed: %s", > > > > > > + strerror(-ret)); > > > > > > + return ret; > > > > > > + } > > > > > > + } > > > > > > + > > > > > > + return 0; > > > > > > +} > > > > > > + > > > > > > +static void vhost_user_blk_disconnect(DeviceState *dev) > > > > > > +{ > > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > + > > > > > > + if (!s->connected) { > > > > > > + return; > > > > > > + } > > > > > > + s->connected = false; > > > > > > + > > > > > > + if (s->dev.started) { > > > > > > + vhost_user_blk_stop(vdev); > > > > > > + } > > > > > > + > > > > > > + vhost_dev_cleanup(&s->dev); > > > > > > +} > > > > > > + > > > > > > +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, > > > > > > + void *opaque) > > > > > > +{ > > > > > > + DeviceState *dev = opaque; > > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > + > > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > > + > > > > > > + return true; > > > > > > +} > > > > > > + > > > > > > +static void vhost_user_blk_event(void *opaque, int event) > > > > > > +{ > > > > > > + DeviceState *dev = opaque; > > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > > + VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > + > > > > > > + switch (event) { > > > > > > + case CHR_EVENT_OPENED: > > > > > > + if (vhost_user_blk_connect(dev) < 0) { > > > > > > + qemu_chr_fe_disconnect(&s->chardev); > > > > > > + return; > > > > > > + } > > > > > > + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, > > > > > > + vhost_user_blk_watch, dev); > > > > > > + break; > > > > > > + case CHR_EVENT_CLOSED: > > > > > > + vhost_user_blk_disconnect(dev); > > > > > > + if (s->watch) { > > > > > > + g_source_remove(s->watch); > > > > > > + s->watch = 0; > > > > > > + } > > > > > > + break; > > > > > > + } > > > > > > +} > > > > > > + > > > > > > + > > > > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > > { > > > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > > > > > VhostUserState *user; > > > > > > - struct vhost_virtqueue *vqs = NULL; > > > > > > int i, ret; > > > > > > + Error *err = NULL; > > > > > > > > > > > > if (!s->chardev.chr) { > > > > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > > > > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > > } > > > > > > > > > > > > s->inflight = g_new0(struct vhost_inflight, 1); > > > > > > - > > > > > > - s->dev.nvqs = s->num_queues; > > > > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > > > > > > - s->dev.vq_index = 0; > > > > > > - s->dev.backend_features = 0; > > > > > > - vqs = s->dev.vqs; > > > > > > - > > > > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > > > > > > - > > > > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > > > > > > - if (ret < 0) { > > > > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > > > > > > - strerror(-ret)); > > > > > > - goto virtio_err; > > > > > > - } > > > > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > > > > > > + s->watch = 0; > > > > > > + s->should_start = false; > > > > > > + s->connected = false; > > > > > > + > > > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > > > > > > + NULL, (void *)dev, NULL, true); > > > > > > + > > > > > > +reconnect: > > > > > > + do { > > > > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > > > > > > + error_report_err(err); > > > > > > + err = NULL; > > > > > > + sleep(1); > > > > > > + } > > > > > > + } while (!s->connected); > > > > > > > > > > > > ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, > > > > > > - sizeof(struct virtio_blk_config)); > > > > > > + sizeof(struct virtio_blk_config)); > > > > > > if (ret < 0) { > > > > > > - error_setg(errp, "vhost-user-blk: get block config failed"); > > > > > > - goto vhost_err; > > > > > > + error_report("vhost-user-blk: get block config failed"); > > > > > > + goto reconnect; > > > > > > } > > > > > > > > > > > > if (s->blkcfg.num_queues != s->num_queues) { > > > > > > @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > > > > > > } > > > > > > > > > > > > return; > > > > > > > > > > With this we end up with return at end of function > > > > > which makes no sense. > > > > > > > > > > > > > Oh, yes. I will remove it. > > > > > > > > > > - > > > > > > -vhost_err: > > > > > > - vhost_dev_cleanup(&s->dev); > > > > > > -virtio_err: > > > > > > - g_free(vqs); > > > > > > - g_free(s->inflight); > > > > > > - virtio_cleanup(vdev); > > > > > > - > > > > > > - vhost_user_cleanup(user); > > > > > > - g_free(user); > > > > > > - s->vhost_user = NULL; > > > > > > } > > > > > > > > > > > > static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) > > > > > > { > > > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > > VHostUserBlk *s = VHOST_USER_BLK(dev); > > > > > > - struct vhost_virtqueue *vqs = s->dev.vqs; > > > > > > > > > > > > vhost_user_blk_set_status(vdev, 0); > > > > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, > > > > > > + NULL, NULL, NULL, false); > > > > > > vhost_dev_cleanup(&s->dev); > > > > > > vhost_dev_free_inflight(s->inflight); > > > > > > - g_free(vqs); > > > > > > + g_free(s->vqs); > > > > > > g_free(s->inflight); > > > > > > virtio_cleanup(vdev); > > > > > > > > > > > > diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h > > > > > > index 445516604a..4849aa5eb5 100644 > > > > > > --- a/include/hw/virtio/vhost-user-blk.h > > > > > > +++ b/include/hw/virtio/vhost-user-blk.h > > > > > > @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { > > > > > > struct vhost_dev dev; > > > > > > struct vhost_inflight *inflight; > > > > > > VhostUserState *vhost_user; > > > > > > + struct vhost_virtqueue *vqs; > > > > > > + guint watch; > > > > > > + bool should_start; > > > > > > > > > > Please add comments about fields. > > > > > Also should_start seems to be set on guest activity. > > > > > Does it need to be migrated? > > > > > It would be better not to track guest state like this. > > > > > Why do we need it? > > > > > Pls add comments with an explanation. > > > > > > > > > > > > > We can't relay on vhost_dev.started to track guest's state in > > > > vhost_user_blk_set_status() when we enable reconnecting. For example, > > > > qemu will lose guest's state if guest stop device during backend is > > > > disconnected. So vhost_dev.started is used to track backend's state > > > > and should_start is used to track guest's state in this patch. And > > > > this variable is based on VirtIODevice.status, so I think it is not > > > > necessary to migrate it. What do you think about it? > > > > > > If it's based on VirtIODevice.status then how about not keeping > > > it around at all and just calculating from VirtIODevice.status > > > when necessary? > > > > > > > Sorry, I made a mistake before. Normally, should_start is based on > > VirtIODevice.status. But it's not the case for virtio 1.0 transitional > > devices. Then I realize there may be a bug. If migration completes > > between kicking virtqueue and setting VIRTIO_CONFIG_S_DRIVER_OK, guest > > may be hung. So I think should_start should be migrated in this case. > > > > Thanks, > > Yongji > > This is exactly the case that the fake kick is intended to fix. > Sorry, I does not understand how this case can be fixed by the fake kick. In this case, seem like we have no chance to trigger the fake kick. After migration, vhost_user_blk_set_status() will find VirtIODevice.status is !VIRTIO_CONFIG_S_DRIVER_OK then skip starting vhost-user backend (cause guest hung). > We can migrate this state but it's really a virtio thing, > not vhost specific. migrate it and skip fake kick, sure. > Could you explain a bit more how to skip the fake kick? Thank you. Thanks, Yongji
On Thu, Mar 14, 2019 at 03:31:47PM +0300, Yury Kotov wrote: > Hi, > > 14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>: > > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: > >> On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: > >> > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > >> > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > >> > > > From: Xie Yongji <xieyongji@baidu.com> > >> > > > > >> > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > >> > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > >> > > > safely because it can track inflight I/O in shared memory. > >> > > > This patch allows qemu to reconnect the backend after > >> > > > connection closed. > >> > > > > >> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > >> > > > Signed-off-by: Ni Xun <nixun@baidu.com> > >> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > >> > > > --- > >> > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > >> > > > include/hw/virtio/vhost-user-blk.h | 4 + > >> > > > 2 files changed, 167 insertions(+), 42 deletions(-) > >> > > >> > > >> > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > >> > > > { > >> > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > >> > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > >> > > > VhostUserState *user; > >> > > > - struct vhost_virtqueue *vqs = NULL; > >> > > > int i, ret; > >> > > > + Error *err = NULL; > >> > > > > >> > > > if (!s->chardev.chr) { > >> > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > >> > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > >> > > > } > >> > > > > >> > > > s->inflight = g_new0(struct vhost_inflight, 1); > >> > > > - > >> > > > - s->dev.nvqs = s->num_queues; > >> > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > >> > > > - s->dev.vq_index = 0; > >> > > > - s->dev.backend_features = 0; > >> > > > - vqs = s->dev.vqs; > >> > > > - > >> > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > >> > > > - > >> > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > >> > > > - if (ret < 0) { > >> > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > >> > > > - strerror(-ret)); > >> > > > - goto virtio_err; > >> > > > - } > >> > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > >> > > > + s->watch = 0; > >> > > > + s->should_start = false; > >> > > > + s->connected = false; > >> > > > + > >> > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > >> > > > + NULL, (void *)dev, NULL, true); > >> > > > + > >> > > > +reconnect: > >> > > > + do { > >> > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > >> > > > + error_report_err(err); > >> > > > + err = NULL; > >> > > > + sleep(1); > >> > > > >> > > Seems arbitrary. Is this basically waiting until backend will reconnect? > >> > > Why not block until event on the fd triggers? > >> > > > >> > > Also, it looks like this will just block forever with no monitor input > >> > > and no way for user to figure out what is going on short of > >> > > crashing QEMU. > >> > > >> > FWIW, the current vhost-user-net device does exactly the same thing > >> > with calling qemu_chr_fe_wait_connected during its realize() function. > >> > >> Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) > > > > The sleep(1) in this patch simply needs to be removed. I think that > > probably dates from when it was written against the earlier broken > > version of qemu_chr_fe_wait_connected(). That would not correctly > > deal with the "reconnect" flag, and so needing this loop with a sleep > > in it. > > > > In fact the while loop can be removed as well in this code. It just > > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed > > to have a connected peer once that returns 0. > > > > qemu_chr_fe_wait_connected() only returns -1 if the operating in > > client mode, and it failed to connect and reconnect is *not* > > requested. In such case the caller should honour the failure and > > quit, not loop to retry. > > > > The reason vhost-user-net does a loop is because once it has a > > connection it tries todo a protocol handshake, and if that > > handshake fails it closes the chardev and tries to connect > > again. That's not the case in this blk code os the loop is > > not needed. > > > > But vhost-user-blk also has a handshake in device realize. What happens if the > connection is broken during realization? IIUC we have to retry a handshake in > such case just like vhost-user-net. I'm just commenting on the current code which does not do that handshake in the loop afaict. If it needs to do that then the patch should be updated... Regards, Daniel
15.03.2019, 12:46, "Daniel P. Berrangé" <berrange@redhat.com>: > On Thu, Mar 14, 2019 at 03:31:47PM +0300, Yury Kotov wrote: >> Hi, >> >> 14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>: >> > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: >> >> On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: >> >> > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: >> >> > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: >> >> > > > From: Xie Yongji <xieyongji@baidu.com> >> >> > > > >> >> > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD >> >> > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart >> >> > > > safely because it can track inflight I/O in shared memory. >> >> > > > This patch allows qemu to reconnect the backend after >> >> > > > connection closed. >> >> > > > >> >> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> >> >> > > > Signed-off-by: Ni Xun <nixun@baidu.com> >> >> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> >> >> > > > --- >> >> > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ >> >> > > > include/hw/virtio/vhost-user-blk.h | 4 + >> >> > > > 2 files changed, 167 insertions(+), 42 deletions(-) >> >> > >> >> > >> >> > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) >> >> > > > { >> >> > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); >> >> > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); >> >> > > > VhostUserState *user; >> >> > > > - struct vhost_virtqueue *vqs = NULL; >> >> > > > int i, ret; >> >> > > > + Error *err = NULL; >> >> > > > >> >> > > > if (!s->chardev.chr) { >> >> > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); >> >> > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) >> >> > > > } >> >> > > > >> >> > > > s->inflight = g_new0(struct vhost_inflight, 1); >> >> > > > - >> >> > > > - s->dev.nvqs = s->num_queues; >> >> > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); >> >> > > > - s->dev.vq_index = 0; >> >> > > > - s->dev.backend_features = 0; >> >> > > > - vqs = s->dev.vqs; >> >> > > > - >> >> > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); >> >> > > > - >> >> > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); >> >> > > > - if (ret < 0) { >> >> > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", >> >> > > > - strerror(-ret)); >> >> > > > - goto virtio_err; >> >> > > > - } >> >> > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); >> >> > > > + s->watch = 0; >> >> > > > + s->should_start = false; >> >> > > > + s->connected = false; >> >> > > > + >> >> > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, >> >> > > > + NULL, (void *)dev, NULL, true); >> >> > > > + >> >> > > > +reconnect: >> >> > > > + do { >> >> > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { >> >> > > > + error_report_err(err); >> >> > > > + err = NULL; >> >> > > > + sleep(1); >> >> > > >> >> > > Seems arbitrary. Is this basically waiting until backend will reconnect? >> >> > > Why not block until event on the fd triggers? >> >> > > >> >> > > Also, it looks like this will just block forever with no monitor input >> >> > > and no way for user to figure out what is going on short of >> >> > > crashing QEMU. >> >> > >> >> > FWIW, the current vhost-user-net device does exactly the same thing >> >> > with calling qemu_chr_fe_wait_connected during its realize() function. >> >> >> >> Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) >> > >> > The sleep(1) in this patch simply needs to be removed. I think that >> > probably dates from when it was written against the earlier broken >> > version of qemu_chr_fe_wait_connected(). That would not correctly >> > deal with the "reconnect" flag, and so needing this loop with a sleep >> > in it. >> > >> > In fact the while loop can be removed as well in this code. It just >> > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed >> > to have a connected peer once that returns 0. >> > >> > qemu_chr_fe_wait_connected() only returns -1 if the operating in >> > client mode, and it failed to connect and reconnect is *not* >> > requested. In such case the caller should honour the failure and >> > quit, not loop to retry. >> > >> > The reason vhost-user-net does a loop is because once it has a >> > connection it tries todo a protocol handshake, and if that >> > handshake fails it closes the chardev and tries to connect >> > again. That's not the case in this blk code os the loop is >> > not needed. >> > >> >> But vhost-user-blk also has a handshake in device realize. What happens if the >> connection is broken during realization? IIUC we have to retry a handshake in >> such case just like vhost-user-net. > > I'm just commenting on the current code which does not do that > handshake in the loop afaict. If it needs to do that then the > patch should be updated... > Oh, yes... This loop doesn't do a handshake. Handshake is after the loop. But now it gotos to reconnect. So may be it makes sense to rewrite a handshake since we don't need two nested loops to get reconnection without gotos. Regards, Yury
On Fri, 15 Mar 2019 at 18:41, Yury Kotov <yury-kotov@yandex-team.ru> wrote: > > 15.03.2019, 12:46, "Daniel P. Berrangé" <berrange@redhat.com>: > > On Thu, Mar 14, 2019 at 03:31:47PM +0300, Yury Kotov wrote: > >> Hi, > >> > >> 14.03.2019, 14:44, "Daniel P. Berrangé" <berrange@redhat.com>: > >> > On Thu, Mar 14, 2019 at 07:34:03AM -0400, Michael S. Tsirkin wrote: > >> >> On Thu, Mar 14, 2019 at 11:24:22AM +0000, Daniel P. Berrangé wrote: > >> >> > On Tue, Mar 12, 2019 at 12:49:35PM -0400, Michael S. Tsirkin wrote: > >> >> > > On Thu, Feb 28, 2019 at 04:53:54PM +0800, elohimes@gmail.com wrote: > >> >> > > > From: Xie Yongji <xieyongji@baidu.com> > >> >> > > > > >> >> > > > Since we now support the message VHOST_USER_GET_INFLIGHT_FD > >> >> > > > and VHOST_USER_SET_INFLIGHT_FD. The backend is able to restart > >> >> > > > safely because it can track inflight I/O in shared memory. > >> >> > > > This patch allows qemu to reconnect the backend after > >> >> > > > connection closed. > >> >> > > > > >> >> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > >> >> > > > Signed-off-by: Ni Xun <nixun@baidu.com> > >> >> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > >> >> > > > --- > >> >> > > > hw/block/vhost-user-blk.c | 205 +++++++++++++++++++++++------ > >> >> > > > include/hw/virtio/vhost-user-blk.h | 4 + > >> >> > > > 2 files changed, 167 insertions(+), 42 deletions(-) > >> >> > > >> >> > > >> >> > > > static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > >> >> > > > { > >> >> > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > >> >> > > > VHostUserBlk *s = VHOST_USER_BLK(vdev); > >> >> > > > VhostUserState *user; > >> >> > > > - struct vhost_virtqueue *vqs = NULL; > >> >> > > > int i, ret; > >> >> > > > + Error *err = NULL; > >> >> > > > > >> >> > > > if (!s->chardev.chr) { > >> >> > > > error_setg(errp, "vhost-user-blk: chardev is mandatory"); > >> >> > > > @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) > >> >> > > > } > >> >> > > > > >> >> > > > s->inflight = g_new0(struct vhost_inflight, 1); > >> >> > > > - > >> >> > > > - s->dev.nvqs = s->num_queues; > >> >> > > > - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); > >> >> > > > - s->dev.vq_index = 0; > >> >> > > > - s->dev.backend_features = 0; > >> >> > > > - vqs = s->dev.vqs; > >> >> > > > - > >> >> > > > - vhost_dev_set_config_notifier(&s->dev, &blk_ops); > >> >> > > > - > >> >> > > > - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); > >> >> > > > - if (ret < 0) { > >> >> > > > - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", > >> >> > > > - strerror(-ret)); > >> >> > > > - goto virtio_err; > >> >> > > > - } > >> >> > > > + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); > >> >> > > > + s->watch = 0; > >> >> > > > + s->should_start = false; > >> >> > > > + s->connected = false; > >> >> > > > + > >> >> > > > + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > >> >> > > > + NULL, (void *)dev, NULL, true); > >> >> > > > + > >> >> > > > +reconnect: > >> >> > > > + do { > >> >> > > > + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { > >> >> > > > + error_report_err(err); > >> >> > > > + err = NULL; > >> >> > > > + sleep(1); > >> >> > > > >> >> > > Seems arbitrary. Is this basically waiting until backend will reconnect? > >> >> > > Why not block until event on the fd triggers? > >> >> > > > >> >> > > Also, it looks like this will just block forever with no monitor input > >> >> > > and no way for user to figure out what is going on short of > >> >> > > crashing QEMU. > >> >> > > >> >> > FWIW, the current vhost-user-net device does exactly the same thing > >> >> > with calling qemu_chr_fe_wait_connected during its realize() function. > >> >> > >> >> Hmm yes. It doesn't sleep for an arbitrary 1 sec so less of an eyesore :) > >> > > >> > The sleep(1) in this patch simply needs to be removed. I think that > >> > probably dates from when it was written against the earlier broken > >> > version of qemu_chr_fe_wait_connected(). That would not correctly > >> > deal with the "reconnect" flag, and so needing this loop with a sleep > >> > in it. > >> > > >> > In fact the while loop can be removed as well in this code. It just > >> > needs to call qemu_chr_fe_wait_connected() once. It is guaranteed > >> > to have a connected peer once that returns 0. > >> > > >> > qemu_chr_fe_wait_connected() only returns -1 if the operating in > >> > client mode, and it failed to connect and reconnect is *not* > >> > requested. In such case the caller should honour the failure and > >> > quit, not loop to retry. > >> > > >> > The reason vhost-user-net does a loop is because once it has a > >> > connection it tries todo a protocol handshake, and if that > >> > handshake fails it closes the chardev and tries to connect > >> > again. That's not the case in this blk code os the loop is > >> > not needed. > >> > > >> > >> But vhost-user-blk also has a handshake in device realize. What happens if the > >> connection is broken during realization? IIUC we have to retry a handshake in > >> such case just like vhost-user-net. > > > > I'm just commenting on the current code which does not do that > > handshake in the loop afaict. If it needs to do that then the > > patch should be updated... > > > > Oh, yes... This loop doesn't do a handshake. Handshake is after the loop. > But now it gotos to reconnect. So may be it makes sense to rewrite a handshake > since we don't need two nested loops to get reconnection without gotos. > Actually we do a handshake in loop like this: qemu_chr_fe_wait_connected() tcp_chr_wait_connected() tcp_chr_connect_client_sync() tcp_chr_new_client() qemu_chr_be_event(chr, CHR_EVENT_OPENED); vhost_user_blk_event() vhost_user_blk_connect() vhost_dev_init() Then I use s->connected to check the result of vhost_dev_init(). Thanks, Yongji
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 9682df1a7b..539ea2e571 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -103,7 +103,7 @@ const VhostDevConfigOps blk_ops = { .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, }; -static void vhost_user_blk_start(VirtIODevice *vdev) +static int vhost_user_blk_start(VirtIODevice *vdev) { VHostUserBlk *s = VHOST_USER_BLK(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -112,13 +112,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) if (!k->set_guest_notifiers) { error_report("binding does not support guest notifiers"); - return; + return -ENOSYS; } ret = vhost_dev_enable_notifiers(&s->dev, vdev); if (ret < 0) { error_report("Error enabling host notifiers: %d", -ret); - return; + return ret; } ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); @@ -157,12 +157,13 @@ static void vhost_user_blk_start(VirtIODevice *vdev) vhost_virtqueue_mask(&s->dev, vdev, i, false); } - return; + return ret; err_guest_notifiers: k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); err_host_notifiers: vhost_dev_disable_notifiers(&s->dev, vdev); + return ret; } static void vhost_user_blk_stop(VirtIODevice *vdev) @@ -181,7 +182,6 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); if (ret < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; } vhost_dev_disable_notifiers(&s->dev, vdev); @@ -191,21 +191,43 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBlk *s = VHOST_USER_BLK(vdev); bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + int ret; if (!vdev->vm_running) { should_start = false; } - if (s->dev.started == should_start) { + if (s->should_start == should_start) { + return; + } + + if (!s->connected || s->dev.started == should_start) { + s->should_start = should_start; return; } if (should_start) { - vhost_user_blk_start(vdev); + s->should_start = true; + /* + * make sure vhost_user_blk_handle_output() ignores fake + * guest kick by vhost_dev_enable_notifiers() + */ + barrier(); + ret = vhost_user_blk_start(vdev); + if (ret < 0) { + error_report("vhost-user-blk: vhost start failed: %s", + strerror(-ret)); + qemu_chr_fe_disconnect(&s->chardev); + } } else { vhost_user_blk_stop(vdev); + /* + * make sure vhost_user_blk_handle_output() ignore fake + * guest kick by vhost_dev_disable_notifiers() + */ + barrier(); + s->should_start = false; } - } static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, @@ -237,13 +259,22 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VHostUserBlk *s = VHOST_USER_BLK(vdev); - int i; + int i, ret; if (!(virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1))) { return; } + if (s->should_start) { + return; + } + s->should_start = true; + + if (!s->connected) { + return; + } + if (s->dev.started) { return; } @@ -251,7 +282,13 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start * vhost here instead of waiting for .set_status(). */ - vhost_user_blk_start(vdev); + ret = vhost_user_blk_start(vdev); + if (ret < 0) { + error_report("vhost-user-blk: vhost start failed: %s", + strerror(-ret)); + qemu_chr_fe_disconnect(&s->chardev); + return; + } /* Kick right away to begin processing requests already in vring */ for (i = 0; i < s->dev.nvqs; i++) { @@ -271,13 +308,106 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) vhost_dev_free_inflight(s->inflight); } +static int vhost_user_blk_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + int ret = 0; + + if (s->connected) { + return 0; + } + s->connected = true; + + s->dev.nvqs = s->num_queues; + s->dev.vqs = s->vqs; + s->dev.vq_index = 0; + s->dev.backend_features = 0; + + vhost_dev_set_config_notifier(&s->dev, &blk_ops); + + ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_report("vhost-user-blk: vhost initialization failed: %s", + strerror(-ret)); + return ret; + } + + /* restore vhost state */ + if (s->should_start) { + ret = vhost_user_blk_start(vdev); + if (ret < 0) { + error_report("vhost-user-blk: vhost start failed: %s", + strerror(-ret)); + return ret; + } + } + + return 0; +} + +static void vhost_user_blk_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + if (!s->connected) { + return; + } + s->connected = false; + + if (s->dev.started) { + vhost_user_blk_stop(vdev); + } + + vhost_dev_cleanup(&s->dev); +} + +static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, + void *opaque) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + qemu_chr_fe_disconnect(&s->chardev); + + return true; +} + +static void vhost_user_blk_event(void *opaque, int event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + switch (event) { + case CHR_EVENT_OPENED: + if (vhost_user_blk_connect(dev) < 0) { + qemu_chr_fe_disconnect(&s->chardev); + return; + } + s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, + vhost_user_blk_watch, dev); + break; + case CHR_EVENT_CLOSED: + vhost_user_blk_disconnect(dev); + if (s->watch) { + g_source_remove(s->watch); + s->watch = 0; + } + break; + } +} + + static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); VhostUserState *user; - struct vhost_virtqueue *vqs = NULL; int i, ret; + Error *err = NULL; if (!s->chardev.chr) { error_setg(errp, "vhost-user-blk: chardev is mandatory"); @@ -312,27 +442,28 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) } s->inflight = g_new0(struct vhost_inflight, 1); - - s->dev.nvqs = s->num_queues; - s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); - s->dev.vq_index = 0; - s->dev.backend_features = 0; - vqs = s->dev.vqs; - - vhost_dev_set_config_notifier(&s->dev, &blk_ops); - - ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); - if (ret < 0) { - error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", - strerror(-ret)); - goto virtio_err; - } + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); + s->watch = 0; + s->should_start = false; + s->connected = false; + + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, + NULL, (void *)dev, NULL, true); + +reconnect: + do { + if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { + error_report_err(err); + err = NULL; + sleep(1); + } + } while (!s->connected); ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, - sizeof(struct virtio_blk_config)); + sizeof(struct virtio_blk_config)); if (ret < 0) { - error_setg(errp, "vhost-user-blk: get block config failed"); - goto vhost_err; + error_report("vhost-user-blk: get block config failed"); + goto reconnect; } if (s->blkcfg.num_queues != s->num_queues) { @@ -340,29 +471,19 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) } return; - -vhost_err: - vhost_dev_cleanup(&s->dev); -virtio_err: - g_free(vqs); - g_free(s->inflight); - virtio_cleanup(vdev); - - vhost_user_cleanup(user); - g_free(user); - s->vhost_user = NULL; } static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(dev); - struct vhost_virtqueue *vqs = s->dev.vqs; vhost_user_blk_set_status(vdev, 0); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, + NULL, NULL, NULL, false); vhost_dev_cleanup(&s->dev); vhost_dev_free_inflight(s->inflight); - g_free(vqs); + g_free(s->vqs); g_free(s->inflight); virtio_cleanup(vdev); diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h index 445516604a..4849aa5eb5 100644 --- a/include/hw/virtio/vhost-user-blk.h +++ b/include/hw/virtio/vhost-user-blk.h @@ -38,6 +38,10 @@ typedef struct VHostUserBlk { struct vhost_dev dev; struct vhost_inflight *inflight; VhostUserState *vhost_user; + struct vhost_virtqueue *vqs; + guint watch; + bool should_start; + bool connected; } VHostUserBlk; #endif