Message ID | 20220105005900.860-6-longpeng2@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | add generic vDPA device support | expand |
On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote: > From: Longpeng <longpeng2@huawei.com> > > Implements the .realize interface. > > Signed-off-by: Longpeng <longpeng2@huawei.com> > --- > hw/virtio/vdpa-dev.c | 114 +++++++++++++++++++++++++++++++++++ > include/hw/virtio/vdpa-dev.h | 8 +++ > 2 files changed, 122 insertions(+) > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > index 790117fb3b..2d534d837a 100644 > --- a/hw/virtio/vdpa-dev.c > +++ b/hw/virtio/vdpa-dev.c > @@ -15,9 +15,122 @@ > #include "sysemu/sysemu.h" > #include "sysemu/runstate.h" > > +static void > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) > +{ > + /* Nothing to do */ > +} > + > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp) This looks similar to the helper function in a previous patch but this time the return value type is int instead of uint32_t. Please make the types consistent. > +{ > + int val; > + > + if (ioctl(fd, cmd, &val) < 0) { > + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", > + cmd, strerror(errno)); > + return -1; > + } > + > + return val; > +} > + > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp) > +{ > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp); > +} > + > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp) > +{ > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp); > +} > + > +static inline int vdpa_dev_get_config_size(int fd, Error **errp) > +{ > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp); > +} > + > static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) > { > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > + uint32_t device_id; > + int max_queue_size; > + int fd; > + int i, ret; > + > + fd = qemu_open(s->vdpa_dev, O_RDWR, errp); > + if (fd == -1) { > + return; > + } > + s->vdpa.device_fd = fd; This is the field I suggest exposing as a QOM property so it can be set from the proxy object (e.g. when the PCI proxy opens the vdpa device before our .realize() function is called). > + > + max_queue_size = vdpa_dev_get_queue_size(fd, errp); > + if (*errp) { > + goto out; > + } > + > + if (s->queue_size > max_queue_size) { > + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d (max:%d)", > + s->queue_size, max_queue_size); > + goto out; > + } else if (!s->queue_size) { > + s->queue_size = max_queue_size; > + } > + > + ret = vdpa_dev_get_vqs_num(fd, errp); > + if (*errp) { > + goto out; > + } > + > + s->dev.nvqs = ret; There is no input validation because we trust the kernel vDPA return values. That seems okay for now but if there is a vhost-user version of this in the future then input validation will be necessary to achieve isolation between QEMU and the vhost-user processes. I suggest including input validation code right away because it's harder to audit the code and fix missing input validation later on. > + s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); > + s->dev.vq_index = 0; > + s->dev.vq_index_end = s->dev.nvqs; > + s->dev.backend_features = 0; > + s->started = false; > + > + ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); > + if (ret < 0) { > + error_setg(errp, "vhost-vdpa-device: vhost initialization failed: %s", > + strerror(-ret)); > + goto out; > + } > + > + ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id); The vhost_*() API abstracts the ioctl calls but this source file and the PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls calls into the vhost_*() API? That would be cleaner and also make it easier to add vhost-user vDPA support in the future.
> -----Original Message----- > From: Stefan Hajnoczi [mailto:stefanha@redhat.com] > Sent: Wednesday, January 5, 2022 6:18 PM > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.) > <longpeng2@huawei.com> > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com; > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei) > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao > <huangzhichao@huawei.com>; qemu-devel@nongnu.org > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface > > On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote: > > From: Longpeng <longpeng2@huawei.com> > > > > Implements the .realize interface. > > > > Signed-off-by: Longpeng <longpeng2@huawei.com> > > --- > > hw/virtio/vdpa-dev.c | 114 +++++++++++++++++++++++++++++++++++ > > include/hw/virtio/vdpa-dev.h | 8 +++ > > 2 files changed, 122 insertions(+) > > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > > index 790117fb3b..2d534d837a 100644 > > --- a/hw/virtio/vdpa-dev.c > > +++ b/hw/virtio/vdpa-dev.c > > @@ -15,9 +15,122 @@ > > #include "sysemu/sysemu.h" > > #include "sysemu/runstate.h" > > > > +static void > > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > +{ > > + /* Nothing to do */ > > +} > > + > > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp) > > This looks similar to the helper function in a previous patch but this > time the return value type is int instead of uint32_t. Please make the > types consistent. > OK. > > +{ > > + int val; > > + > > + if (ioctl(fd, cmd, &val) < 0) { > > + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", > > + cmd, strerror(errno)); > > + return -1; > > + } > > + > > + return val; > > +} > > + > > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp) > > +{ > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp); > > +} > > + > > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp) > > +{ > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp); > > +} > > + > > +static inline int vdpa_dev_get_config_size(int fd, Error **errp) > > +{ > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp); > > +} > > + > > static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) > > { > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > > + uint32_t device_id; > > + int max_queue_size; > > + int fd; > > + int i, ret; > > + > > + fd = qemu_open(s->vdpa_dev, O_RDWR, errp); > > + if (fd == -1) { > > + return; > > + } > > + s->vdpa.device_fd = fd; > > This is the field I suggest exposing as a QOM property so it can be set > from the proxy object (e.g. when the PCI proxy opens the vdpa device > before our .realize() function is called). > OK. > > + > > + max_queue_size = vdpa_dev_get_queue_size(fd, errp); > > + if (*errp) { > > + goto out; > > + } > > + > > + if (s->queue_size > max_queue_size) { > > + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d > (max:%d)", > > + s->queue_size, max_queue_size); > > + goto out; > > + } else if (!s->queue_size) { > > + s->queue_size = max_queue_size; > > + } > > + > > + ret = vdpa_dev_get_vqs_num(fd, errp); > > + if (*errp) { > > + goto out; > > + } > > + > > + s->dev.nvqs = ret; > > There is no input validation because we trust the kernel vDPA return > values. That seems okay for now but if there is a vhost-user version of > this in the future then input validation will be necessary to achieve > isolation between QEMU and the vhost-user processes. I suggest including > input validation code right away because it's harder to audit the code > and fix missing input validation later on. > Make sense! Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)? > > + s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); > > + s->dev.vq_index = 0; > > + s->dev.vq_index_end = s->dev.nvqs; > > + s->dev.backend_features = 0; > > + s->started = false; > > + > > + ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, > NULL); > > + if (ret < 0) { > > + error_setg(errp, "vhost-vdpa-device: vhost initialization > failed: %s", > > + strerror(-ret)); > > + goto out; > > + } > > + > > + ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id); > > The vhost_*() API abstracts the ioctl calls but this source file and the > PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls > calls into the vhost_*() API? That would be cleaner and also make it > easier to add vhost-user vDPA support in the future. We need these ioctls calls because we need invoke them before the vhost-dev object is initialized.
On Thu, Jan 06, 2022 at 03:02:37AM +0000, Longpeng (Mike, Cloud Infrastructure Service Product Dept.) wrote: > > > > -----Original Message----- > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com] > > Sent: Wednesday, January 5, 2022 6:18 PM > > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.) > > <longpeng2@huawei.com> > > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com; > > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei) > > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao > > <huangzhichao@huawei.com>; qemu-devel@nongnu.org > > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface > > > > On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote: > > > From: Longpeng <longpeng2@huawei.com> > > > > > > Implements the .realize interface. > > > > > > Signed-off-by: Longpeng <longpeng2@huawei.com> > > > --- > > > hw/virtio/vdpa-dev.c | 114 +++++++++++++++++++++++++++++++++++ > > > include/hw/virtio/vdpa-dev.h | 8 +++ > > > 2 files changed, 122 insertions(+) > > > > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > > > index 790117fb3b..2d534d837a 100644 > > > --- a/hw/virtio/vdpa-dev.c > > > +++ b/hw/virtio/vdpa-dev.c > > > @@ -15,9 +15,122 @@ > > > #include "sysemu/sysemu.h" > > > #include "sysemu/runstate.h" > > > > > > +static void > > > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) > > > +{ > > > + /* Nothing to do */ > > > +} > > > + > > > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp) > > > > This looks similar to the helper function in a previous patch but this > > time the return value type is int instead of uint32_t. Please make the > > types consistent. > > > > OK. > > > > +{ > > > + int val; > > > + > > > + if (ioctl(fd, cmd, &val) < 0) { > > > + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", > > > + cmd, strerror(errno)); > > > + return -1; > > > + } > > > + > > > + return val; > > > +} > > > + > > > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp) > > > +{ > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp); > > > +} > > > + > > > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp) > > > +{ > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp); > > > +} > > > + > > > +static inline int vdpa_dev_get_config_size(int fd, Error **errp) > > > +{ > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp); > > > +} > > > + > > > static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) > > > { > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > > > + uint32_t device_id; > > > + int max_queue_size; > > > + int fd; > > > + int i, ret; > > > + > > > + fd = qemu_open(s->vdpa_dev, O_RDWR, errp); > > > + if (fd == -1) { > > > + return; > > > + } > > > + s->vdpa.device_fd = fd; > > > > This is the field I suggest exposing as a QOM property so it can be set > > from the proxy object (e.g. when the PCI proxy opens the vdpa device > > before our .realize() function is called). > > > > OK. > > > > + > > > + max_queue_size = vdpa_dev_get_queue_size(fd, errp); > > > + if (*errp) { > > > + goto out; > > > + } > > > + > > > + if (s->queue_size > max_queue_size) { > > > + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d > > (max:%d)", > > > + s->queue_size, max_queue_size); > > > + goto out; > > > + } else if (!s->queue_size) { > > > + s->queue_size = max_queue_size; > > > + } > > > + > > > + ret = vdpa_dev_get_vqs_num(fd, errp); > > > + if (*errp) { > > > + goto out; > > > + } > > > + > > > + s->dev.nvqs = ret; > > > > There is no input validation because we trust the kernel vDPA return > > values. That seems okay for now but if there is a vhost-user version of > > this in the future then input validation will be necessary to achieve > > isolation between QEMU and the vhost-user processes. I suggest including > > input validation code right away because it's harder to audit the code > > and fix missing input validation later on. > > > > Make sense! > > Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)? Careful, ret is currently an int so negative values would bypass the < VIRTIO_QUEUE_MAX check. > > > > + s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); > > > + s->dev.vq_index = 0; > > > + s->dev.vq_index_end = s->dev.nvqs; > > > + s->dev.backend_features = 0; > > > + s->started = false; > > > + > > > + ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, > > NULL); > > > + if (ret < 0) { > > > + error_setg(errp, "vhost-vdpa-device: vhost initialization > > failed: %s", > > > + strerror(-ret)); > > > + goto out; > > > + } > > > + > > > + ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id); > > > > The vhost_*() API abstracts the ioctl calls but this source file and the > > PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls > > calls into the vhost_*() API? That would be cleaner and also make it > > easier to add vhost-user vDPA support in the future. > > We need these ioctls calls because we need invoke them before the vhost-dev > object is initialized. It may be possible to clean this up by changing how vhost_dev_init() works but I haven't investigated. The issue is that the vhost_dev_init() API requires information from the caller that has to be fetched from the vDPA device. This forces the caller to communicate directly with the vDPA device before calling vhost_dev_init(). It may be possible to move this setup code inside vhost_dev_init() (and vhost_ops callbacks). Stefan
> -----Original Message----- > From: Stefan Hajnoczi [mailto:stefanha@redhat.com] > Sent: Thursday, January 6, 2022 7:34 PM > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.) > <longpeng2@huawei.com> > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com; > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei) > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao > <huangzhichao@huawei.com>; qemu-devel@nongnu.org > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface > > On Thu, Jan 06, 2022 at 03:02:37AM +0000, Longpeng (Mike, Cloud Infrastructure > Service Product Dept.) wrote: > > > > > > > -----Original Message----- > > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com] > > > Sent: Wednesday, January 5, 2022 6:18 PM > > > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.) > > > <longpeng2@huawei.com> > > > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com; > > > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei) > > > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao > > > <huangzhichao@huawei.com>; qemu-devel@nongnu.org > > > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface > > > > > > On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote: > > > > From: Longpeng <longpeng2@huawei.com> > > > > > > > > Implements the .realize interface. > > > > > > > > Signed-off-by: Longpeng <longpeng2@huawei.com> > > > > --- > > > > hw/virtio/vdpa-dev.c | 114 +++++++++++++++++++++++++++++++++++ > > > > include/hw/virtio/vdpa-dev.h | 8 +++ > > > > 2 files changed, 122 insertions(+) > > > > > > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > > > > index 790117fb3b..2d534d837a 100644 > > > > --- a/hw/virtio/vdpa-dev.c > > > > +++ b/hw/virtio/vdpa-dev.c > > > > @@ -15,9 +15,122 @@ > > > > #include "sysemu/sysemu.h" > > > > #include "sysemu/runstate.h" > > > > > > > > +static void > > > > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue > *vq) > > > > +{ > > > > + /* Nothing to do */ > > > > +} > > > > + > > > > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp) > > > > > > This looks similar to the helper function in a previous patch but this > > > time the return value type is int instead of uint32_t. Please make the > > > types consistent. > > > > > > > OK. > > > > > > +{ > > > > + int val; > > > > + > > > > + if (ioctl(fd, cmd, &val) < 0) { > > > > + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", > > > > + cmd, strerror(errno)); > > > > + return -1; > > > > + } > > > > + > > > > + return val; > > > > +} > > > > + > > > > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp) > > > > +{ > > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp); > > > > +} > > > > + > > > > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp) > > > > +{ > > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp); > > > > +} > > > > + > > > > +static inline int vdpa_dev_get_config_size(int fd, Error **errp) > > > > +{ > > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, > errp); > > > > +} > > > > + > > > > static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) > > > > { > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > > > > + uint32_t device_id; > > > > + int max_queue_size; > > > > + int fd; > > > > + int i, ret; > > > > + > > > > + fd = qemu_open(s->vdpa_dev, O_RDWR, errp); > > > > + if (fd == -1) { > > > > + return; > > > > + } > > > > + s->vdpa.device_fd = fd; > > > > > > This is the field I suggest exposing as a QOM property so it can be set > > > from the proxy object (e.g. when the PCI proxy opens the vdpa device > > > before our .realize() function is called). > > > > > > > OK. > > > > > > + > > > > + max_queue_size = vdpa_dev_get_queue_size(fd, errp); > > > > + if (*errp) { > > > > + goto out; > > > > + } > > > > + > > > > + if (s->queue_size > max_queue_size) { > > > > + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d > > > (max:%d)", > > > > + s->queue_size, max_queue_size); > > > > + goto out; > > > > + } else if (!s->queue_size) { > > > > + s->queue_size = max_queue_size; > > > > + } > > > > + > > > > + ret = vdpa_dev_get_vqs_num(fd, errp); > > > > + if (*errp) { > > > > + goto out; > > > > + } > > > > + > > > > + s->dev.nvqs = ret; > > > > > > There is no input validation because we trust the kernel vDPA return > > > values. That seems okay for now but if there is a vhost-user version of > > > this in the future then input validation will be necessary to achieve > > > isolation between QEMU and the vhost-user processes. I suggest including > > > input validation code right away because it's harder to audit the code > > > and fix missing input validation later on. > > > > > > > Make sense! > > > > Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)? > > Careful, ret is currently an int so negative values would bypass the < > VIRTIO_QUEUE_MAX check. > > > > > > > + s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); > > > > + s->dev.vq_index = 0; > > > > + s->dev.vq_index_end = s->dev.nvqs; > > > > + s->dev.backend_features = 0; > > > > + s->started = false; > > > > + > > > > + ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, > > > NULL); > > > > + if (ret < 0) { > > > > + error_setg(errp, "vhost-vdpa-device: vhost initialization > > > failed: %s", > > > > + strerror(-ret)); > > > > + goto out; > > > > + } > > > > + > > > > + ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id); > > > > > > The vhost_*() API abstracts the ioctl calls but this source file and the > > > PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls > > > calls into the vhost_*() API? That would be cleaner and also make it > > > easier to add vhost-user vDPA support in the future. > > > > We need these ioctls calls because we need invoke them before the vhost-dev > > object is initialized. > > It may be possible to clean this up by changing how vhost_dev_init() > works but I haven't investigated. The issue is that the vhost_dev_init() > API requires information from the caller that has to be fetched from the > vDPA device. This forces the caller to communicate directly with the > vDPA device before calling vhost_dev_init(). It may be possible to move > this setup code inside vhost_dev_init() (and vhost_ops callbacks). > Hmm, this is still not clear to me, so let's continue to discuss this in v2 if you think it's necessary. > Stefan
On Mon, Jan 17, 2022 at 12:34:50PM +0000, Longpeng (Mike, Cloud Infrastructure Service Product Dept.) wrote: > > > > -----Original Message----- > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com] > > Sent: Thursday, January 6, 2022 7:34 PM > > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.) > > <longpeng2@huawei.com> > > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com; > > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei) > > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao > > <huangzhichao@huawei.com>; qemu-devel@nongnu.org > > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface > > > > On Thu, Jan 06, 2022 at 03:02:37AM +0000, Longpeng (Mike, Cloud Infrastructure > > Service Product Dept.) wrote: > > > > > > > > > > -----Original Message----- > > > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com] > > > > Sent: Wednesday, January 5, 2022 6:18 PM > > > > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.) > > > > <longpeng2@huawei.com> > > > > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com; > > > > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei) > > > > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao > > > > <huangzhichao@huawei.com>; qemu-devel@nongnu.org > > > > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface > > > > > > > > On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote: > > > > > From: Longpeng <longpeng2@huawei.com> > > > > > > > > > > Implements the .realize interface. > > > > > > > > > > Signed-off-by: Longpeng <longpeng2@huawei.com> > > > > > --- > > > > > hw/virtio/vdpa-dev.c | 114 +++++++++++++++++++++++++++++++++++ > > > > > include/hw/virtio/vdpa-dev.h | 8 +++ > > > > > 2 files changed, 122 insertions(+) > > > > > > > > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > > > > > index 790117fb3b..2d534d837a 100644 > > > > > --- a/hw/virtio/vdpa-dev.c > > > > > +++ b/hw/virtio/vdpa-dev.c > > > > > @@ -15,9 +15,122 @@ > > > > > #include "sysemu/sysemu.h" > > > > > #include "sysemu/runstate.h" > > > > > > > > > > +static void > > > > > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue > > *vq) > > > > > +{ > > > > > + /* Nothing to do */ > > > > > +} > > > > > + > > > > > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp) > > > > > > > > This looks similar to the helper function in a previous patch but this > > > > time the return value type is int instead of uint32_t. Please make the > > > > types consistent. > > > > > > > > > > OK. > > > > > > > > +{ > > > > > + int val; > > > > > + > > > > > + if (ioctl(fd, cmd, &val) < 0) { > > > > > + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", > > > > > + cmd, strerror(errno)); > > > > > + return -1; > > > > > + } > > > > > + > > > > > + return val; > > > > > +} > > > > > + > > > > > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp) > > > > > +{ > > > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp); > > > > > +} > > > > > + > > > > > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp) > > > > > +{ > > > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp); > > > > > +} > > > > > + > > > > > +static inline int vdpa_dev_get_config_size(int fd, Error **errp) > > > > > +{ > > > > > + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, > > errp); > > > > > +} > > > > > + > > > > > static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) > > > > > { > > > > > + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > > > > + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > > > > > + uint32_t device_id; > > > > > + int max_queue_size; > > > > > + int fd; > > > > > + int i, ret; > > > > > + > > > > > + fd = qemu_open(s->vdpa_dev, O_RDWR, errp); > > > > > + if (fd == -1) { > > > > > + return; > > > > > + } > > > > > + s->vdpa.device_fd = fd; > > > > > > > > This is the field I suggest exposing as a QOM property so it can be set > > > > from the proxy object (e.g. when the PCI proxy opens the vdpa device > > > > before our .realize() function is called). > > > > > > > > > > OK. > > > > > > > > + > > > > > + max_queue_size = vdpa_dev_get_queue_size(fd, errp); > > > > > + if (*errp) { > > > > > + goto out; > > > > > + } > > > > > + > > > > > + if (s->queue_size > max_queue_size) { > > > > > + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d > > > > (max:%d)", > > > > > + s->queue_size, max_queue_size); > > > > > + goto out; > > > > > + } else if (!s->queue_size) { > > > > > + s->queue_size = max_queue_size; > > > > > + } > > > > > + > > > > > + ret = vdpa_dev_get_vqs_num(fd, errp); > > > > > + if (*errp) { > > > > > + goto out; > > > > > + } > > > > > + > > > > > + s->dev.nvqs = ret; > > > > > > > > There is no input validation because we trust the kernel vDPA return > > > > values. That seems okay for now but if there is a vhost-user version of > > > > this in the future then input validation will be necessary to achieve > > > > isolation between QEMU and the vhost-user processes. I suggest including > > > > input validation code right away because it's harder to audit the code > > > > and fix missing input validation later on. > > > > > > > > > > Make sense! > > > > > > Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)? > > > > Careful, ret is currently an int so negative values would bypass the < > > VIRTIO_QUEUE_MAX check. > > > > > > > > > > + s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); > > > > > + s->dev.vq_index = 0; > > > > > + s->dev.vq_index_end = s->dev.nvqs; > > > > > + s->dev.backend_features = 0; > > > > > + s->started = false; > > > > > + > > > > > + ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, > > > > NULL); > > > > > + if (ret < 0) { > > > > > + error_setg(errp, "vhost-vdpa-device: vhost initialization > > > > failed: %s", > > > > > + strerror(-ret)); > > > > > + goto out; > > > > > + } > > > > > + > > > > > + ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id); > > > > > > > > The vhost_*() API abstracts the ioctl calls but this source file and the > > > > PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls > > > > calls into the vhost_*() API? That would be cleaner and also make it > > > > easier to add vhost-user vDPA support in the future. > > > > > > We need these ioctls calls because we need invoke them before the vhost-dev > > > object is initialized. > > > > It may be possible to clean this up by changing how vhost_dev_init() > > works but I haven't investigated. The issue is that the vhost_dev_init() > > API requires information from the caller that has to be fetched from the > > vDPA device. This forces the caller to communicate directly with the > > vDPA device before calling vhost_dev_init(). It may be possible to move > > this setup code inside vhost_dev_init() (and vhost_ops callbacks). > > > > Hmm, this is still not clear to me, so let's continue to discuss this > in v2 if you think it's necessary. Okay. Stefan
diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 790117fb3b..2d534d837a 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -15,9 +15,122 @@ #include "sysemu/sysemu.h" #include "sysemu/runstate.h" +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* Nothing to do */ +} + +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp) +{ + int val; + + if (ioctl(fd, cmd, &val) < 0) { + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); + return -1; + } + + return val; +} + +static inline int vdpa_dev_get_queue_size(int fd, Error **errp) +{ + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp); +} + +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp) +{ + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp); +} + +static inline int vdpa_dev_get_config_size(int fd, Error **errp) +{ + return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp); +} + static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + uint32_t device_id; + int max_queue_size; + int fd; + int i, ret; + + fd = qemu_open(s->vdpa_dev, O_RDWR, errp); + if (fd == -1) { + return; + } + s->vdpa.device_fd = fd; + + max_queue_size = vdpa_dev_get_queue_size(fd, errp); + if (*errp) { + goto out; + } + + if (s->queue_size > max_queue_size) { + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d (max:%d)", + s->queue_size, max_queue_size); + goto out; + } else if (!s->queue_size) { + s->queue_size = max_queue_size; + } + + ret = vdpa_dev_get_vqs_num(fd, errp); + if (*errp) { + goto out; + } + + s->dev.nvqs = ret; + s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); + s->dev.vq_index = 0; + s->dev.vq_index_end = s->dev.nvqs; + s->dev.backend_features = 0; + s->started = false; + + ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); + if (ret < 0) { + error_setg(errp, "vhost-vdpa-device: vhost initialization failed: %s", + strerror(-ret)); + goto out; + } + + ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id); + if (ret < 0) { + error_setg(errp, "vhost-vdpa-device: vhost get device id failed: %s", + strerror(-ret)); + goto vhost_cleanup; + } + + s->config_size = vdpa_dev_get_config_size(fd, errp); + if (*errp) { + goto vhost_cleanup; + } + + s->config = g_malloc0(s->config_size); + + ret = vhost_dev_get_config(&s->dev, s->config, s->config_size, NULL); + if (ret < 0) { + error_setg(errp, "vhost-vdpa-device: get config failed"); + goto config_err; + } + + virtio_init(vdev, "vhost-vdpa", device_id, s->config_size); + + s->virtqs = g_new0(VirtQueue *, s->dev.nvqs); + for (i = 0; i < s->dev.nvqs; i++) { + s->virtqs[i] = virtio_add_queue(vdev, s->queue_size, + vhost_vdpa_device_dummy_handle_output); + } + return; +config_err: + g_free(s->config); +vhost_cleanup: + vhost_dev_cleanup(&s->dev); +out: + close(fd); } static void vhost_vdpa_device_unrealize(DeviceState *dev) @@ -51,6 +164,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) static Property vhost_vdpa_device_properties[] = { DEFINE_PROP_STRING("vdpa-dev", VhostVdpaDevice, vdpa_dev), + DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h index 7a0e6bdcf8..49f8145d61 100644 --- a/include/hw/virtio/vdpa-dev.h +++ b/include/hw/virtio/vdpa-dev.h @@ -13,6 +13,14 @@ struct VhostVdpaDevice { VirtIODevice parent_obj; char *vdpa_dev; int32_t bootindex; + struct vhost_dev dev; + struct vhost_vdpa vdpa; + VirtQueue **virtqs; + uint8_t *config; + int config_size; + uint32_t num_queues; + uint16_t queue_size; + bool started; }; #endif