diff mbox series

[RFC,05/10] vdpa-dev: implement the realize interface

Message ID 20220105005900.860-6-longpeng2@huawei.com (mailing list archive)
State New, archived
Headers show
Series add generic vDPA device support | expand

Commit Message

Zhijian Li (Fujitsu)" via Jan. 5, 2022, 12:58 a.m. UTC
From: Longpeng <longpeng2@huawei.com>

Implements the .realize interface.

Signed-off-by: Longpeng <longpeng2@huawei.com>
---
 hw/virtio/vdpa-dev.c         | 114 +++++++++++++++++++++++++++++++++++
 include/hw/virtio/vdpa-dev.h |   8 +++
 2 files changed, 122 insertions(+)

Comments

Stefan Hajnoczi Jan. 5, 2022, 10:17 a.m. UTC | #1
On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote:
> From: Longpeng <longpeng2@huawei.com>
> 
> Implements the .realize interface.
> 
> Signed-off-by: Longpeng <longpeng2@huawei.com>
> ---
>  hw/virtio/vdpa-dev.c         | 114 +++++++++++++++++++++++++++++++++++
>  include/hw/virtio/vdpa-dev.h |   8 +++
>  2 files changed, 122 insertions(+)
> 
> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> index 790117fb3b..2d534d837a 100644
> --- a/hw/virtio/vdpa-dev.c
> +++ b/hw/virtio/vdpa-dev.c
> @@ -15,9 +15,122 @@
>  #include "sysemu/sysemu.h"
>  #include "sysemu/runstate.h"
>  
> +static void
> +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    /* Nothing to do */
> +}
> +
> +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp)

This looks similar to the helper function in a previous patch but this
time the return value type is int instead of uint32_t. Please make the
types consistent.

> +{
> +    int val;
> +
> +    if (ioctl(fd, cmd, &val) < 0) {
> +        error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s",
> +                   cmd, strerror(errno));
> +        return -1;
> +    }
> +
> +    return val;
> +}
> +
> +static inline int vdpa_dev_get_queue_size(int fd, Error **errp)
> +{
> +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp);
> +}
> +
> +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp)
> +{
> +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp);
> +}
> +
> +static inline int vdpa_dev_get_config_size(int fd, Error **errp)
> +{
> +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp);
> +}
> +
>  static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp)
>  {
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
> +    uint32_t device_id;
> +    int max_queue_size;
> +    int fd;
> +    int i, ret;
> +
> +    fd = qemu_open(s->vdpa_dev, O_RDWR, errp);
> +    if (fd == -1) {
> +        return;
> +    }
> +    s->vdpa.device_fd = fd;

This is the field I suggest exposing as a QOM property so it can be set
from the proxy object (e.g. when the PCI proxy opens the vdpa device
before our .realize() function is called).

> +
> +    max_queue_size = vdpa_dev_get_queue_size(fd, errp);
> +    if (*errp) {
> +        goto out;
> +    }
> +
> +    if (s->queue_size > max_queue_size) {
> +        error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d (max:%d)",
> +                   s->queue_size, max_queue_size);
> +        goto out;
> +    } else if (!s->queue_size) {
> +        s->queue_size = max_queue_size;
> +    }
> +
> +    ret = vdpa_dev_get_vqs_num(fd, errp);
> +    if (*errp) {
> +        goto out;
> +    }
> +
> +    s->dev.nvqs = ret;

There is no input validation because we trust the kernel vDPA return
values. That seems okay for now but if there is a vhost-user version of
this in the future then input validation will be necessary to achieve
isolation between QEMU and the vhost-user processes. I suggest including
input validation code right away because it's harder to audit the code
and fix missing input validation later on.

> +    s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs);
> +    s->dev.vq_index = 0;
> +    s->dev.vq_index_end = s->dev.nvqs;
> +    s->dev.backend_features = 0;
> +    s->started = false;
> +
> +    ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL);
> +    if (ret < 0) {
> +        error_setg(errp, "vhost-vdpa-device: vhost initialization failed: %s",
> +                   strerror(-ret));
> +        goto out;
> +    }
> +
> +    ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id);

The vhost_*() API abstracts the ioctl calls but this source file and the
PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls
calls into the vhost_*() API? That would be cleaner and also make it
easier to add vhost-user vDPA support in the future.
Zhijian Li (Fujitsu)" via Jan. 6, 2022, 3:02 a.m. UTC | #2
> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Wednesday, January 5, 2022 6:18 PM
> To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> <longpeng2@huawei.com>
> Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com;
> cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei)
> <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao
> <huangzhichao@huawei.com>; qemu-devel@nongnu.org
> Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface
> 
> On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote:
> > From: Longpeng <longpeng2@huawei.com>
> >
> > Implements the .realize interface.
> >
> > Signed-off-by: Longpeng <longpeng2@huawei.com>
> > ---
> >  hw/virtio/vdpa-dev.c         | 114 +++++++++++++++++++++++++++++++++++
> >  include/hw/virtio/vdpa-dev.h |   8 +++
> >  2 files changed, 122 insertions(+)
> >
> > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > index 790117fb3b..2d534d837a 100644
> > --- a/hw/virtio/vdpa-dev.c
> > +++ b/hw/virtio/vdpa-dev.c
> > @@ -15,9 +15,122 @@
> >  #include "sysemu/sysemu.h"
> >  #include "sysemu/runstate.h"
> >
> > +static void
> > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > +{
> > +    /* Nothing to do */
> > +}
> > +
> > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp)
> 
> This looks similar to the helper function in a previous patch but this
> time the return value type is int instead of uint32_t. Please make the
> types consistent.
> 

OK.

> > +{
> > +    int val;
> > +
> > +    if (ioctl(fd, cmd, &val) < 0) {
> > +        error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s",
> > +                   cmd, strerror(errno));
> > +        return -1;
> > +    }
> > +
> > +    return val;
> > +}
> > +
> > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp)
> > +{
> > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp);
> > +}
> > +
> > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp)
> > +{
> > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp);
> > +}
> > +
> > +static inline int vdpa_dev_get_config_size(int fd, Error **errp)
> > +{
> > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp);
> > +}
> > +
> >  static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp)
> >  {
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
> > +    uint32_t device_id;
> > +    int max_queue_size;
> > +    int fd;
> > +    int i, ret;
> > +
> > +    fd = qemu_open(s->vdpa_dev, O_RDWR, errp);
> > +    if (fd == -1) {
> > +        return;
> > +    }
> > +    s->vdpa.device_fd = fd;
> 
> This is the field I suggest exposing as a QOM property so it can be set
> from the proxy object (e.g. when the PCI proxy opens the vdpa device
> before our .realize() function is called).
> 

OK.

> > +
> > +    max_queue_size = vdpa_dev_get_queue_size(fd, errp);
> > +    if (*errp) {
> > +        goto out;
> > +    }
> > +
> > +    if (s->queue_size > max_queue_size) {
> > +        error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d
> (max:%d)",
> > +                   s->queue_size, max_queue_size);
> > +        goto out;
> > +    } else if (!s->queue_size) {
> > +        s->queue_size = max_queue_size;
> > +    }
> > +
> > +    ret = vdpa_dev_get_vqs_num(fd, errp);
> > +    if (*errp) {
> > +        goto out;
> > +    }
> > +
> > +    s->dev.nvqs = ret;
> 
> There is no input validation because we trust the kernel vDPA return
> values. That seems okay for now but if there is a vhost-user version of
> this in the future then input validation will be necessary to achieve
> isolation between QEMU and the vhost-user processes. I suggest including
> input validation code right away because it's harder to audit the code
> and fix missing input validation later on.
> 

Make sense!

Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)?

> > +    s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs);
> > +    s->dev.vq_index = 0;
> > +    s->dev.vq_index_end = s->dev.nvqs;
> > +    s->dev.backend_features = 0;
> > +    s->started = false;
> > +
> > +    ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0,
> NULL);
> > +    if (ret < 0) {
> > +        error_setg(errp, "vhost-vdpa-device: vhost initialization
> failed: %s",
> > +                   strerror(-ret));
> > +        goto out;
> > +    }
> > +
> > +    ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id);
> 
> The vhost_*() API abstracts the ioctl calls but this source file and the
> PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls
> calls into the vhost_*() API? That would be cleaner and also make it
> easier to add vhost-user vDPA support in the future.

We need these ioctls calls because we need invoke them before the vhost-dev
object is initialized.
Stefan Hajnoczi Jan. 6, 2022, 11:34 a.m. UTC | #3
On Thu, Jan 06, 2022 at 03:02:37AM +0000, Longpeng (Mike, Cloud Infrastructure Service Product Dept.) wrote:
> 
> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Wednesday, January 5, 2022 6:18 PM
> > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> > <longpeng2@huawei.com>
> > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com;
> > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei)
> > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao
> > <huangzhichao@huawei.com>; qemu-devel@nongnu.org
> > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface
> > 
> > On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote:
> > > From: Longpeng <longpeng2@huawei.com>
> > >
> > > Implements the .realize interface.
> > >
> > > Signed-off-by: Longpeng <longpeng2@huawei.com>
> > > ---
> > >  hw/virtio/vdpa-dev.c         | 114 +++++++++++++++++++++++++++++++++++
> > >  include/hw/virtio/vdpa-dev.h |   8 +++
> > >  2 files changed, 122 insertions(+)
> > >
> > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > > index 790117fb3b..2d534d837a 100644
> > > --- a/hw/virtio/vdpa-dev.c
> > > +++ b/hw/virtio/vdpa-dev.c
> > > @@ -15,9 +15,122 @@
> > >  #include "sysemu/sysemu.h"
> > >  #include "sysemu/runstate.h"
> > >
> > > +static void
> > > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > +{
> > > +    /* Nothing to do */
> > > +}
> > > +
> > > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp)
> > 
> > This looks similar to the helper function in a previous patch but this
> > time the return value type is int instead of uint32_t. Please make the
> > types consistent.
> > 
> 
> OK.
> 
> > > +{
> > > +    int val;
> > > +
> > > +    if (ioctl(fd, cmd, &val) < 0) {
> > > +        error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s",
> > > +                   cmd, strerror(errno));
> > > +        return -1;
> > > +    }
> > > +
> > > +    return val;
> > > +}
> > > +
> > > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp)
> > > +{
> > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp);
> > > +}
> > > +
> > > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp)
> > > +{
> > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp);
> > > +}
> > > +
> > > +static inline int vdpa_dev_get_config_size(int fd, Error **errp)
> > > +{
> > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp);
> > > +}
> > > +
> > >  static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp)
> > >  {
> > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +    VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
> > > +    uint32_t device_id;
> > > +    int max_queue_size;
> > > +    int fd;
> > > +    int i, ret;
> > > +
> > > +    fd = qemu_open(s->vdpa_dev, O_RDWR, errp);
> > > +    if (fd == -1) {
> > > +        return;
> > > +    }
> > > +    s->vdpa.device_fd = fd;
> > 
> > This is the field I suggest exposing as a QOM property so it can be set
> > from the proxy object (e.g. when the PCI proxy opens the vdpa device
> > before our .realize() function is called).
> > 
> 
> OK.
> 
> > > +
> > > +    max_queue_size = vdpa_dev_get_queue_size(fd, errp);
> > > +    if (*errp) {
> > > +        goto out;
> > > +    }
> > > +
> > > +    if (s->queue_size > max_queue_size) {
> > > +        error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d
> > (max:%d)",
> > > +                   s->queue_size, max_queue_size);
> > > +        goto out;
> > > +    } else if (!s->queue_size) {
> > > +        s->queue_size = max_queue_size;
> > > +    }
> > > +
> > > +    ret = vdpa_dev_get_vqs_num(fd, errp);
> > > +    if (*errp) {
> > > +        goto out;
> > > +    }
> > > +
> > > +    s->dev.nvqs = ret;
> > 
> > There is no input validation because we trust the kernel vDPA return
> > values. That seems okay for now but if there is a vhost-user version of
> > this in the future then input validation will be necessary to achieve
> > isolation between QEMU and the vhost-user processes. I suggest including
> > input validation code right away because it's harder to audit the code
> > and fix missing input validation later on.
> > 
> 
> Make sense!
> 
> Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)?

Careful, ret is currently an int so negative values would bypass the <
VIRTIO_QUEUE_MAX check.

> 
> > > +    s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs);
> > > +    s->dev.vq_index = 0;
> > > +    s->dev.vq_index_end = s->dev.nvqs;
> > > +    s->dev.backend_features = 0;
> > > +    s->started = false;
> > > +
> > > +    ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0,
> > NULL);
> > > +    if (ret < 0) {
> > > +        error_setg(errp, "vhost-vdpa-device: vhost initialization
> > failed: %s",
> > > +                   strerror(-ret));
> > > +        goto out;
> > > +    }
> > > +
> > > +    ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id);
> > 
> > The vhost_*() API abstracts the ioctl calls but this source file and the
> > PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls
> > calls into the vhost_*() API? That would be cleaner and also make it
> > easier to add vhost-user vDPA support in the future.
> 
> We need these ioctls calls because we need invoke them before the vhost-dev
> object is initialized.

It may be possible to clean this up by changing how vhost_dev_init()
works but I haven't investigated. The issue is that the vhost_dev_init()
API requires information from the caller that has to be fetched from the
vDPA device. This forces the caller to communicate directly with the
vDPA device before calling vhost_dev_init(). It may be possible to move
this setup code inside vhost_dev_init() (and vhost_ops callbacks).

Stefan
Zhijian Li (Fujitsu)" via Jan. 17, 2022, 12:34 p.m. UTC | #4
> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Thursday, January 6, 2022 7:34 PM
> To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> <longpeng2@huawei.com>
> Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com;
> cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei)
> <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao
> <huangzhichao@huawei.com>; qemu-devel@nongnu.org
> Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface
> 
> On Thu, Jan 06, 2022 at 03:02:37AM +0000, Longpeng (Mike, Cloud Infrastructure
> Service Product Dept.) wrote:
> >
> >
> > > -----Original Message-----
> > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > > Sent: Wednesday, January 5, 2022 6:18 PM
> > > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> > > <longpeng2@huawei.com>
> > > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com;
> > > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei)
> > > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao
> > > <huangzhichao@huawei.com>; qemu-devel@nongnu.org
> > > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface
> > >
> > > On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote:
> > > > From: Longpeng <longpeng2@huawei.com>
> > > >
> > > > Implements the .realize interface.
> > > >
> > > > Signed-off-by: Longpeng <longpeng2@huawei.com>
> > > > ---
> > > >  hw/virtio/vdpa-dev.c         | 114 +++++++++++++++++++++++++++++++++++
> > > >  include/hw/virtio/vdpa-dev.h |   8 +++
> > > >  2 files changed, 122 insertions(+)
> > > >
> > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > > > index 790117fb3b..2d534d837a 100644
> > > > --- a/hw/virtio/vdpa-dev.c
> > > > +++ b/hw/virtio/vdpa-dev.c
> > > > @@ -15,9 +15,122 @@
> > > >  #include "sysemu/sysemu.h"
> > > >  #include "sysemu/runstate.h"
> > > >
> > > > +static void
> > > > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue
> *vq)
> > > > +{
> > > > +    /* Nothing to do */
> > > > +}
> > > > +
> > > > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp)
> > >
> > > This looks similar to the helper function in a previous patch but this
> > > time the return value type is int instead of uint32_t. Please make the
> > > types consistent.
> > >
> >
> > OK.
> >
> > > > +{
> > > > +    int val;
> > > > +
> > > > +    if (ioctl(fd, cmd, &val) < 0) {
> > > > +        error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s",
> > > > +                   cmd, strerror(errno));
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    return val;
> > > > +}
> > > > +
> > > > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp)
> > > > +{
> > > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp);
> > > > +}
> > > > +
> > > > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp)
> > > > +{
> > > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp);
> > > > +}
> > > > +
> > > > +static inline int vdpa_dev_get_config_size(int fd, Error **errp)
> > > > +{
> > > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE,
> errp);
> > > > +}
> > > > +
> > > >  static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp)
> > > >  {
> > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > +    VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
> > > > +    uint32_t device_id;
> > > > +    int max_queue_size;
> > > > +    int fd;
> > > > +    int i, ret;
> > > > +
> > > > +    fd = qemu_open(s->vdpa_dev, O_RDWR, errp);
> > > > +    if (fd == -1) {
> > > > +        return;
> > > > +    }
> > > > +    s->vdpa.device_fd = fd;
> > >
> > > This is the field I suggest exposing as a QOM property so it can be set
> > > from the proxy object (e.g. when the PCI proxy opens the vdpa device
> > > before our .realize() function is called).
> > >
> >
> > OK.
> >
> > > > +
> > > > +    max_queue_size = vdpa_dev_get_queue_size(fd, errp);
> > > > +    if (*errp) {
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    if (s->queue_size > max_queue_size) {
> > > > +        error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d
> > > (max:%d)",
> > > > +                   s->queue_size, max_queue_size);
> > > > +        goto out;
> > > > +    } else if (!s->queue_size) {
> > > > +        s->queue_size = max_queue_size;
> > > > +    }
> > > > +
> > > > +    ret = vdpa_dev_get_vqs_num(fd, errp);
> > > > +    if (*errp) {
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    s->dev.nvqs = ret;
> > >
> > > There is no input validation because we trust the kernel vDPA return
> > > values. That seems okay for now but if there is a vhost-user version of
> > > this in the future then input validation will be necessary to achieve
> > > isolation between QEMU and the vhost-user processes. I suggest including
> > > input validation code right away because it's harder to audit the code
> > > and fix missing input validation later on.
> > >
> >
> > Make sense!
> >
> > Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)?
> 
> Careful, ret is currently an int so negative values would bypass the <
> VIRTIO_QUEUE_MAX check.
> 
> >
> > > > +    s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs);
> > > > +    s->dev.vq_index = 0;
> > > > +    s->dev.vq_index_end = s->dev.nvqs;
> > > > +    s->dev.backend_features = 0;
> > > > +    s->started = false;
> > > > +
> > > > +    ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0,
> > > NULL);
> > > > +    if (ret < 0) {
> > > > +        error_setg(errp, "vhost-vdpa-device: vhost initialization
> > > failed: %s",
> > > > +                   strerror(-ret));
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id);
> > >
> > > The vhost_*() API abstracts the ioctl calls but this source file and the
> > > PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls
> > > calls into the vhost_*() API? That would be cleaner and also make it
> > > easier to add vhost-user vDPA support in the future.
> >
> > We need these ioctls calls because we need invoke them before the vhost-dev
> > object is initialized.
> 
> It may be possible to clean this up by changing how vhost_dev_init()
> works but I haven't investigated. The issue is that the vhost_dev_init()
> API requires information from the caller that has to be fetched from the
> vDPA device. This forces the caller to communicate directly with the
> vDPA device before calling vhost_dev_init(). It may be possible to move
> this setup code inside vhost_dev_init() (and vhost_ops callbacks).
> 

Hmm, this is still not clear to me, so let's continue to discuss this
in v2 if you think it's necessary.

> Stefan
Stefan Hajnoczi Jan. 19, 2022, 5:15 p.m. UTC | #5
On Mon, Jan 17, 2022 at 12:34:50PM +0000, Longpeng (Mike, Cloud Infrastructure Service Product Dept.) wrote:
> 
> 
> > -----Original Message-----
> > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > Sent: Thursday, January 6, 2022 7:34 PM
> > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> > <longpeng2@huawei.com>
> > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com;
> > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei)
> > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao
> > <huangzhichao@huawei.com>; qemu-devel@nongnu.org
> > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface
> > 
> > On Thu, Jan 06, 2022 at 03:02:37AM +0000, Longpeng (Mike, Cloud Infrastructure
> > Service Product Dept.) wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> > > > Sent: Wednesday, January 5, 2022 6:18 PM
> > > > To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> > > > <longpeng2@huawei.com>
> > > > Cc: mst@redhat.com; jasowang@redhat.com; sgarzare@redhat.com;
> > > > cohuck@redhat.com; pbonzini@redhat.com; Gonglei (Arei)
> > > > <arei.gonglei@huawei.com>; Yechuan <yechuan@huawei.com>; Huangzhichao
> > > > <huangzhichao@huawei.com>; qemu-devel@nongnu.org
> > > > Subject: Re: [RFC 05/10] vdpa-dev: implement the realize interface
> > > >
> > > > On Wed, Jan 05, 2022 at 08:58:55AM +0800, Longpeng(Mike) wrote:
> > > > > From: Longpeng <longpeng2@huawei.com>
> > > > >
> > > > > Implements the .realize interface.
> > > > >
> > > > > Signed-off-by: Longpeng <longpeng2@huawei.com>
> > > > > ---
> > > > >  hw/virtio/vdpa-dev.c         | 114 +++++++++++++++++++++++++++++++++++
> > > > >  include/hw/virtio/vdpa-dev.h |   8 +++
> > > > >  2 files changed, 122 insertions(+)
> > > > >
> > > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > > > > index 790117fb3b..2d534d837a 100644
> > > > > --- a/hw/virtio/vdpa-dev.c
> > > > > +++ b/hw/virtio/vdpa-dev.c
> > > > > @@ -15,9 +15,122 @@
> > > > >  #include "sysemu/sysemu.h"
> > > > >  #include "sysemu/runstate.h"
> > > > >
> > > > > +static void
> > > > > +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue
> > *vq)
> > > > > +{
> > > > > +    /* Nothing to do */
> > > > > +}
> > > > > +
> > > > > +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp)
> > > >
> > > > This looks similar to the helper function in a previous patch but this
> > > > time the return value type is int instead of uint32_t. Please make the
> > > > types consistent.
> > > >
> > >
> > > OK.
> > >
> > > > > +{
> > > > > +    int val;
> > > > > +
> > > > > +    if (ioctl(fd, cmd, &val) < 0) {
> > > > > +        error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s",
> > > > > +                   cmd, strerror(errno));
> > > > > +        return -1;
> > > > > +    }
> > > > > +
> > > > > +    return val;
> > > > > +}
> > > > > +
> > > > > +static inline int vdpa_dev_get_queue_size(int fd, Error **errp)
> > > > > +{
> > > > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp);
> > > > > +}
> > > > > +
> > > > > +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp)
> > > > > +{
> > > > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp);
> > > > > +}
> > > > > +
> > > > > +static inline int vdpa_dev_get_config_size(int fd, Error **errp)
> > > > > +{
> > > > > +    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE,
> > errp);
> > > > > +}
> > > > > +
> > > > >  static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp)
> > > > >  {
> > > > > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > +    VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
> > > > > +    uint32_t device_id;
> > > > > +    int max_queue_size;
> > > > > +    int fd;
> > > > > +    int i, ret;
> > > > > +
> > > > > +    fd = qemu_open(s->vdpa_dev, O_RDWR, errp);
> > > > > +    if (fd == -1) {
> > > > > +        return;
> > > > > +    }
> > > > > +    s->vdpa.device_fd = fd;
> > > >
> > > > This is the field I suggest exposing as a QOM property so it can be set
> > > > from the proxy object (e.g. when the PCI proxy opens the vdpa device
> > > > before our .realize() function is called).
> > > >
> > >
> > > OK.
> > >
> > > > > +
> > > > > +    max_queue_size = vdpa_dev_get_queue_size(fd, errp);
> > > > > +    if (*errp) {
> > > > > +        goto out;
> > > > > +    }
> > > > > +
> > > > > +    if (s->queue_size > max_queue_size) {
> > > > > +        error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d
> > > > (max:%d)",
> > > > > +                   s->queue_size, max_queue_size);
> > > > > +        goto out;
> > > > > +    } else if (!s->queue_size) {
> > > > > +        s->queue_size = max_queue_size;
> > > > > +    }
> > > > > +
> > > > > +    ret = vdpa_dev_get_vqs_num(fd, errp);
> > > > > +    if (*errp) {
> > > > > +        goto out;
> > > > > +    }
> > > > > +
> > > > > +    s->dev.nvqs = ret;
> > > >
> > > > There is no input validation because we trust the kernel vDPA return
> > > > values. That seems okay for now but if there is a vhost-user version of
> > > > this in the future then input validation will be necessary to achieve
> > > > isolation between QEMU and the vhost-user processes. I suggest including
> > > > input validation code right away because it's harder to audit the code
> > > > and fix missing input validation later on.
> > > >
> > >
> > > Make sense!
> > >
> > > Should we only need to validate the upper boundary (e.g. <VIRTIO_QUEUE_MAX)?
> > 
> > Careful, ret is currently an int so negative values would bypass the <
> > VIRTIO_QUEUE_MAX check.
> > 
> > >
> > > > > +    s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs);
> > > > > +    s->dev.vq_index = 0;
> > > > > +    s->dev.vq_index_end = s->dev.nvqs;
> > > > > +    s->dev.backend_features = 0;
> > > > > +    s->started = false;
> > > > > +
> > > > > +    ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0,
> > > > NULL);
> > > > > +    if (ret < 0) {
> > > > > +        error_setg(errp, "vhost-vdpa-device: vhost initialization
> > > > failed: %s",
> > > > > +                   strerror(-ret));
> > > > > +        goto out;
> > > > > +    }
> > > > > +
> > > > > +    ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id);
> > > >
> > > > The vhost_*() API abstracts the ioctl calls but this source file and the
> > > > PCI proxy have ioctl calls. I wonder if it's possible to move the ioctls
> > > > calls into the vhost_*() API? That would be cleaner and also make it
> > > > easier to add vhost-user vDPA support in the future.
> > >
> > > We need these ioctls calls because we need invoke them before the vhost-dev
> > > object is initialized.
> > 
> > It may be possible to clean this up by changing how vhost_dev_init()
> > works but I haven't investigated. The issue is that the vhost_dev_init()
> > API requires information from the caller that has to be fetched from the
> > vDPA device. This forces the caller to communicate directly with the
> > vDPA device before calling vhost_dev_init(). It may be possible to move
> > this setup code inside vhost_dev_init() (and vhost_ops callbacks).
> > 
> 
> Hmm, this is still not clear to me, so let's continue to discuss this
> in v2 if you think it's necessary.

Okay.

Stefan
diff mbox series

Patch

diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
index 790117fb3b..2d534d837a 100644
--- a/hw/virtio/vdpa-dev.c
+++ b/hw/virtio/vdpa-dev.c
@@ -15,9 +15,122 @@ 
 #include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
 
+static void
+vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /* Nothing to do */
+}
+
+static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp)
+{
+    int val;
+
+    if (ioctl(fd, cmd, &val) < 0) {
+        error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s",
+                   cmd, strerror(errno));
+        return -1;
+    }
+
+    return val;
+}
+
+static inline int vdpa_dev_get_queue_size(int fd, Error **errp)
+{
+    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp);
+}
+
+static inline int vdpa_dev_get_vqs_num(int fd, Error **errp)
+{
+    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp);
+}
+
+static inline int vdpa_dev_get_config_size(int fd, Error **errp)
+{
+    return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp);
+}
+
 static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp)
 {
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
+    uint32_t device_id;
+    int max_queue_size;
+    int fd;
+    int i, ret;
+
+    fd = qemu_open(s->vdpa_dev, O_RDWR, errp);
+    if (fd == -1) {
+        return;
+    }
+    s->vdpa.device_fd = fd;
+
+    max_queue_size = vdpa_dev_get_queue_size(fd, errp);
+    if (*errp) {
+        goto out;
+    }
+
+    if (s->queue_size > max_queue_size) {
+        error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d (max:%d)",
+                   s->queue_size, max_queue_size);
+        goto out;
+    } else if (!s->queue_size) {
+        s->queue_size = max_queue_size;
+    }
+
+    ret = vdpa_dev_get_vqs_num(fd, errp);
+    if (*errp) {
+        goto out;
+    }
+
+    s->dev.nvqs = ret;
+    s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs);
+    s->dev.vq_index = 0;
+    s->dev.vq_index_end = s->dev.nvqs;
+    s->dev.backend_features = 0;
+    s->started = false;
+
+    ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL);
+    if (ret < 0) {
+        error_setg(errp, "vhost-vdpa-device: vhost initialization failed: %s",
+                   strerror(-ret));
+        goto out;
+    }
+
+    ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id);
+    if (ret < 0) {
+        error_setg(errp, "vhost-vdpa-device: vhost get device id failed: %s",
+                   strerror(-ret));
+        goto vhost_cleanup;
+    }
+
+    s->config_size = vdpa_dev_get_config_size(fd, errp);
+    if (*errp) {
+        goto vhost_cleanup;
+    }
+
+    s->config = g_malloc0(s->config_size);
+
+    ret = vhost_dev_get_config(&s->dev, s->config, s->config_size, NULL);
+    if (ret < 0) {
+        error_setg(errp, "vhost-vdpa-device: get config failed");
+        goto config_err;
+    }
+
+    virtio_init(vdev, "vhost-vdpa", device_id, s->config_size);
+
+    s->virtqs = g_new0(VirtQueue *, s->dev.nvqs);
+    for (i = 0; i < s->dev.nvqs; i++) {
+        s->virtqs[i] = virtio_add_queue(vdev, s->queue_size,
+                                        vhost_vdpa_device_dummy_handle_output);
+    }
+
     return;
+config_err:
+    g_free(s->config);
+vhost_cleanup:
+    vhost_dev_cleanup(&s->dev);
+out:
+    close(fd);
 }
 
 static void vhost_vdpa_device_unrealize(DeviceState *dev)
@@ -51,6 +164,7 @@  static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status)
 
 static Property vhost_vdpa_device_properties[] = {
     DEFINE_PROP_STRING("vdpa-dev", VhostVdpaDevice, vdpa_dev),
+    DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h
index 7a0e6bdcf8..49f8145d61 100644
--- a/include/hw/virtio/vdpa-dev.h
+++ b/include/hw/virtio/vdpa-dev.h
@@ -13,6 +13,14 @@  struct VhostVdpaDevice {
     VirtIODevice parent_obj;
     char *vdpa_dev;
     int32_t bootindex;
+    struct vhost_dev dev;
+    struct vhost_vdpa vdpa;
+    VirtQueue **virtqs;
+    uint8_t *config;
+    int config_size;
+    uint32_t num_queues;
+    uint16_t queue_size;
+    bool started;
 };
 
 #endif