Message ID | 5532e8b1721cdf68d8932c747dc6b5f42738e139.1667542066.git.john.g.johnson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user client | expand |
On Tue, Nov 08, 2022 at 03:13:36PM -0800, John Johnson wrote: > +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev, > + struct vfio_irq_info *irq) > +{ > + int ret; > + > + ret = vfio_user_get_irq_info(vbasedev->proxy, irq); > + if (ret) { > + return ret; > + } > + > + if (irq->index > vbasedev->num_irqs) { > + return -EINVAL; > + } Why are we validating ->index *after* requesting the info? Seems a bit weird? regards john
> On Dec 9, 2022, at 9:29 AM, John Levon <levon@movementarian.org> wrote: > > On Tue, Nov 08, 2022 at 03:13:36PM -0800, John Johnson wrote: > >> +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev, >> + struct vfio_irq_info *irq) >> +{ >> + int ret; >> + >> + ret = vfio_user_get_irq_info(vbasedev->proxy, irq); >> + if (ret) { >> + return ret; >> + } >> + >> + if (irq->index > vbasedev->num_irqs) { >> + return -EINVAL; >> + } > > Why are we validating ->index *after* requesting the info? Seems a bit weird? > That check is to validate the server return content (to the extent we can). JJ
On 11/9/22 00:13, John Johnson wrote: > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > hw/vfio/pci.c | 7 ++- > hw/vfio/user-protocol.h | 25 +++++++++ > hw/vfio/user.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 166 insertions(+), 1 deletion(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 7abe44e..be39a4e 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -713,7 +713,8 @@ retry: > ret = vfio_enable_vectors(vdev, false); > if (ret) { > if (ret < 0) { > - error_report("vfio: Error: Failed to setup MSI fds: %m"); > + error_report("vfio: Error: Failed to setup MSI fds: %s", > + strerror(-ret)); This change belongs to another patch. > } else { > error_report("vfio: Error: Failed to enable %d " > "MSI vectors, retry with %d", vdev->nr_vectors, ret); > @@ -2712,6 +2713,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) > irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; > > ret = VDEV_GET_IRQ_INFO(vbasedev, &irq_info); > + > if (ret) { > /* This can fail for an old kernel or legacy PCI dev */ > trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); > @@ -3593,6 +3595,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > goto out_teardown; > } > > + vfio_register_err_notifier(vdev); > + vfio_register_req_notifier(vdev); > + > return; > > out_teardown: > diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > index 124340c..31704cf 100644 > --- a/hw/vfio/user-protocol.h > +++ b/hw/vfio/user-protocol.h > @@ -141,6 +141,31 @@ typedef struct { > } VFIOUserRegionInfo; > > /* > + * VFIO_USER_DEVICE_GET_IRQ_INFO > + * imported from struct vfio_irq_info > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint32_t argsz; > + uint32_t flags; > + uint32_t index; > + uint32_t count; > +} VFIOUserIRQInfo; > + > +/* > + * VFIO_USER_DEVICE_SET_IRQS > + * imported from struct vfio_irq_set > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint32_t argsz; > + uint32_t flags; > + uint32_t index; > + uint32_t start; > + uint32_t count; > +} VFIOUserIRQSet; > + > +/* > * VFIO_USER_REGION_READ > * VFIO_USER_REGION_WRITE > */ > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index 1453bb5..815385b 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -1164,6 +1164,117 @@ static int vfio_user_get_region_info(VFIOProxy *proxy, > return 0; > } > > +static int vfio_user_get_irq_info(VFIOProxy *proxy, > + struct vfio_irq_info *info) > +{ > + VFIOUserIRQInfo msg; > + > + memset(&msg, 0, sizeof(msg)); > + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, > + sizeof(msg), 0); > + msg.argsz = info->argsz; > + msg.index = info->index; > + > + vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, false); > + if (msg.hdr.flags & VFIO_USER_ERROR) { > + return -msg.hdr.error_reply; > + } > + > + memcpy(info, &msg.argsz, sizeof(*info)); > + return 0; > +} > + > +static int irq_howmany(int *fdp, uint32_t cur, uint32_t max) intriguing routine. See comment below. > +{ > + int n = 0; > + > + if (fdp[cur] != -1) { > + do { > + n++; > + } while (n < max && fdp[cur + n] != -1); > + } else { > + do { > + n++; > + } while (n < max && fdp[cur + n] == -1); > + } > + > + return n; > +} > + > +static int vfio_user_set_irqs(VFIOProxy *proxy, struct vfio_irq_set *irq) > +{ > + g_autofree VFIOUserIRQSet *msgp = NULL; > + uint32_t size, nfds, send_fds, sent_fds, max; > + > + if (irq->argsz < sizeof(*irq)) { > + error_printf("vfio_user_set_irqs argsz too small\n"); > + return -EINVAL; > + } > + > + /* > + * Handle simple case > + */ > + if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) { > + size = sizeof(VFIOUserHdr) + irq->argsz; > + msgp = g_malloc0(size); > + > + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0); > + msgp->argsz = irq->argsz; > + msgp->flags = irq->flags; > + msgp->index = irq->index; > + msgp->start = irq->start; > + msgp->count = irq->count; > + > + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false); > + if (msgp->hdr.flags & VFIO_USER_ERROR) { > + return -msgp->hdr.error_reply; > + } > + > + return 0; > + } > + > + /* > + * Calculate the number of FDs to send > + * and adjust argsz > + */ > + nfds = (irq->argsz - sizeof(*irq)) / sizeof(int); > + irq->argsz = sizeof(*irq); > + msgp = g_malloc0(sizeof(*msgp)); > + /* > + * Send in chunks if over max_send_fds > + */ > + for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) { > + VFIOUserFDs *arg_fds, loop_fds; > + > + /* must send all valid FDs or all invalid FDs in single msg */ why is that ? > + max = nfds - sent_fds; > + if (max > proxy->max_send_fds) { > + max = proxy->max_send_fds; > + } > + send_fds = irq_howmany((int *)irq->data, sent_fds, max); sent_fds can never be -1 but irq_howmany() is taking into account this value. Why ? > + > + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, > + sizeof(*msgp), 0); > + msgp->argsz = irq->argsz; > + msgp->flags = irq->flags; > + msgp->index = irq->index; > + msgp->start = irq->start + sent_fds; > + msgp->count = send_fds; > + > + loop_fds.send_fds = send_fds; > + loop_fds.recv_fds = 0; > + loop_fds.fds = (int *)irq->data + sent_fds; > + arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL; > + > + vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, false); > + if (msgp->hdr.flags & VFIO_USER_ERROR) { > + return -msgp->hdr.error_reply; > + } > + } > + > + return 0; > +} > + > static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset, > uint32_t count, void *data) > { > @@ -1277,6 +1388,28 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev, > return 0; > } > > +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev, > + struct vfio_irq_info *irq) > +{ > + int ret; > + > + ret = vfio_user_get_irq_info(vbasedev->proxy, irq); > + if (ret) { > + return ret; > + } > + > + if (irq->index > vbasedev->num_irqs) { > + return -EINVAL; > + } > + return 0; > +} > + > +static int vfio_user_io_set_irqs(VFIODevice *vbasedev, > + struct vfio_irq_set *irqs) > +{ > + return vfio_user_set_irqs(vbasedev->proxy, irqs); > +} > + > static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index, > off_t off, uint32_t size, void *data) > { > @@ -1294,6 +1427,8 @@ static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index, > VFIODevIO vfio_dev_io_sock = { > .get_info = vfio_user_io_get_info, > .get_region_info = vfio_user_io_get_region_info, > + .get_irq_info = vfio_user_io_get_irq_info, > + .set_irqs = vfio_user_io_set_irqs, > .region_read = vfio_user_io_region_read, > .region_write = vfio_user_io_region_write, > };
> On Dec 13, 2022, at 8:39 AM, Cédric Le Goater <clg@redhat.com> wrote: > > On 11/9/22 00:13, John Johnson wrote: >> >> + >> + /* must send all valid FDs or all invalid FDs in single msg */ > > why is that ? > This has to do with how VFIO sends FDs to the kernel. The ioctl() i/f has an array of FDs, with -1 signifying an invalid one. FDs can only be sent over a UNIX socket as a single array of them (all must be valid, no -1 holes in the array). In order to emulate the ioctl() use, we’d need to send an ancillary array to map the ordinal index of irq_data to the ordinal index of the FDs in the message. Since multi-FD sends are only done at device set-up, it seemed simpler to just break them up. JJ
> On Dec 13, 2022, at 8:39 AM, Cédric Le Goater <clg@redhat.com> wrote: > > On 11/9/22 00:13, John Johnson wrote: >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index 7abe44e..be39a4e 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -713,7 +713,8 @@ retry: >> ret = vfio_enable_vectors(vdev, false); >> if (ret) { >> if (ret < 0) { >> - error_report("vfio: Error: Failed to setup MSI fds: %m"); >> + error_report("vfio: Error: Failed to setup MSI fds: %s", >> + strerror(-ret)); > > This change belongs to another patch. > This is the patch where errno may not be set because vfio_enable_vectors() didn’t make a syscall. JJ
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7abe44e..be39a4e 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -713,7 +713,8 @@ retry: ret = vfio_enable_vectors(vdev, false); if (ret) { if (ret < 0) { - error_report("vfio: Error: Failed to setup MSI fds: %m"); + error_report("vfio: Error: Failed to setup MSI fds: %s", + strerror(-ret)); } else { error_report("vfio: Error: Failed to enable %d " "MSI vectors, retry with %d", vdev->nr_vectors, ret); @@ -2712,6 +2713,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; ret = VDEV_GET_IRQ_INFO(vbasedev, &irq_info); + if (ret) { /* This can fail for an old kernel or legacy PCI dev */ trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); @@ -3593,6 +3595,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) goto out_teardown; } + vfio_register_err_notifier(vdev); + vfio_register_req_notifier(vdev); + return; out_teardown: diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 124340c..31704cf 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -141,6 +141,31 @@ typedef struct { } VFIOUserRegionInfo; /* + * VFIO_USER_DEVICE_GET_IRQ_INFO + * imported from struct vfio_irq_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t count; +} VFIOUserIRQInfo; + +/* + * VFIO_USER_DEVICE_SET_IRQS + * imported from struct vfio_irq_set + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t start; + uint32_t count; +} VFIOUserIRQSet; + +/* * VFIO_USER_REGION_READ * VFIO_USER_REGION_WRITE */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 1453bb5..815385b 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -1164,6 +1164,117 @@ static int vfio_user_get_region_info(VFIOProxy *proxy, return 0; } +static int vfio_user_get_irq_info(VFIOProxy *proxy, + struct vfio_irq_info *info) +{ + VFIOUserIRQInfo msg; + + memset(&msg, 0, sizeof(msg)); + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, + sizeof(msg), 0); + msg.argsz = info->argsz; + msg.index = info->index; + + vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, false); + if (msg.hdr.flags & VFIO_USER_ERROR) { + return -msg.hdr.error_reply; + } + + memcpy(info, &msg.argsz, sizeof(*info)); + return 0; +} + +static int irq_howmany(int *fdp, uint32_t cur, uint32_t max) +{ + int n = 0; + + if (fdp[cur] != -1) { + do { + n++; + } while (n < max && fdp[cur + n] != -1); + } else { + do { + n++; + } while (n < max && fdp[cur + n] == -1); + } + + return n; +} + +static int vfio_user_set_irqs(VFIOProxy *proxy, struct vfio_irq_set *irq) +{ + g_autofree VFIOUserIRQSet *msgp = NULL; + uint32_t size, nfds, send_fds, sent_fds, max; + + if (irq->argsz < sizeof(*irq)) { + error_printf("vfio_user_set_irqs argsz too small\n"); + return -EINVAL; + } + + /* + * Handle simple case + */ + if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) { + size = sizeof(VFIOUserHdr) + irq->argsz; + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0); + msgp->argsz = irq->argsz; + msgp->flags = irq->flags; + msgp->index = irq->index; + msgp->start = irq->start; + msgp->count = irq->count; + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + + return 0; + } + + /* + * Calculate the number of FDs to send + * and adjust argsz + */ + nfds = (irq->argsz - sizeof(*irq)) / sizeof(int); + irq->argsz = sizeof(*irq); + msgp = g_malloc0(sizeof(*msgp)); + /* + * Send in chunks if over max_send_fds + */ + for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) { + VFIOUserFDs *arg_fds, loop_fds; + + /* must send all valid FDs or all invalid FDs in single msg */ + max = nfds - sent_fds; + if (max > proxy->max_send_fds) { + max = proxy->max_send_fds; + } + send_fds = irq_howmany((int *)irq->data, sent_fds, max); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, + sizeof(*msgp), 0); + msgp->argsz = irq->argsz; + msgp->flags = irq->flags; + msgp->index = irq->index; + msgp->start = irq->start + sent_fds; + msgp->count = send_fds; + + loop_fds.send_fds = send_fds; + loop_fds.recv_fds = 0; + loop_fds.fds = (int *)irq->data + sent_fds; + arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL; + + vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + } + + return 0; +} + static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset, uint32_t count, void *data) { @@ -1277,6 +1388,28 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev, return 0; } +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev, + struct vfio_irq_info *irq) +{ + int ret; + + ret = vfio_user_get_irq_info(vbasedev->proxy, irq); + if (ret) { + return ret; + } + + if (irq->index > vbasedev->num_irqs) { + return -EINVAL; + } + return 0; +} + +static int vfio_user_io_set_irqs(VFIODevice *vbasedev, + struct vfio_irq_set *irqs) +{ + return vfio_user_set_irqs(vbasedev->proxy, irqs); +} + static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index, off_t off, uint32_t size, void *data) { @@ -1294,6 +1427,8 @@ static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index, VFIODevIO vfio_dev_io_sock = { .get_info = vfio_user_io_get_info, .get_region_info = vfio_user_io_get_region_info, + .get_irq_info = vfio_user_io_get_irq_info, + .set_irqs = vfio_user_io_set_irqs, .region_read = vfio_user_io_region_read, .region_write = vfio_user_io_region_write, };