Message ID | 4d292fab645b18b8a20f0f5a83356ae7fc0efa28.1629131628.git.elena.ufimtseva@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user implementation | expand |
On Mon, Aug 16, 2021 at 09:42:44AM -0700, Elena Ufimtseva wrote: > From: John Johnson <john.g.johnson@oracle.com> > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > hw/vfio/user-protocol.h | 25 ++++++++++ > hw/vfio/user.h | 2 + > hw/vfio/common.c | 26 ++++++++-- > hw/vfio/pci.c | 31 ++++++++++-- > hw/vfio/user.c | 106 ++++++++++++++++++++++++++++++++++++++++ > 5 files changed, 181 insertions(+), 9 deletions(-) > > diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > index 56904cf872..5614efa0a4 100644 > --- a/hw/vfio/user-protocol.h > +++ b/hw/vfio/user-protocol.h > @@ -109,6 +109,31 @@ typedef struct { > uint64_t offset; > } VFIOUserRegionInfo; > > +/* > + * VFIO_USER_DEVICE_GET_IRQ_INFO > + * imported from struct vfio_irq_info > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint32_t argsz; > + uint32_t flags; > + uint32_t index; > + uint32_t count; > +} VFIOUserIRQInfo; > + > +/* > + * VFIO_USER_DEVICE_SET_IRQS > + * imported from struct vfio_irq_set > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint32_t argsz; > + uint32_t flags; > + uint32_t index; > + uint32_t start; > + uint32_t count; > +} VFIOUserIRQSet; > + > /* > * VFIO_USER_REGION_READ > * VFIO_USER_REGION_WRITE > diff --git a/hw/vfio/user.h b/hw/vfio/user.h > index 02f832a173..248ad80943 100644 > --- a/hw/vfio/user.h > +++ b/hw/vfio/user.h > @@ -74,6 +74,8 @@ int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp); > int vfio_user_get_info(VFIODevice *vbasedev); > int vfio_user_get_region_info(VFIODevice *vbasedev, int index, > struct vfio_region_info *info, VFIOUserFDs *fds); > +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info); > +int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq); > int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t offset, > uint32_t count, void *data); > int vfio_user_region_write(VFIODevice *vbasedev, uint32_t index, > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index a8b1ea9358..9fe3e05dc6 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -71,7 +71,11 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index) > .count = 0, > }; > > - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > + if (vbasedev->proxy != NULL) { > + vfio_user_set_irqs(vbasedev, &irq_set); > + } else { > + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > + } > } > > void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) > @@ -84,7 +88,11 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) > .count = 1, > }; > > - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > + if (vbasedev->proxy != NULL) { > + vfio_user_set_irqs(vbasedev, &irq_set); > + } else { > + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > + } > } > > void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) > @@ -97,7 +105,11 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) > .count = 1, > }; > > - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > + if (vbasedev->proxy != NULL) { > + vfio_user_set_irqs(vbasedev, &irq_set); > + } else { > + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > + } > } > > static inline const char *action_to_str(int action) > @@ -178,8 +190,12 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, > pfd = (int32_t *)&irq_set->data; > *pfd = fd; > > - if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { > - ret = -errno; > + if (vbasedev->proxy != NULL) { > + ret = vfio_user_set_irqs(vbasedev, irq_set); > + } else { > + if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { > + ret = -errno; > + } > } > g_free(irq_set); > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index ea0df8be65..282de6a30b 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -403,7 +403,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) > fds[i] = fd; > } > > - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); > + if (vdev->vbasedev.proxy != NULL) { > + ret = vfio_user_set_irqs(&vdev->vbasedev, irq_set); > + } else { > + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); > + } > > g_free(irq_set); > > @@ -2675,7 +2679,13 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) > > irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; > > - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > + if (vbasedev->proxy != NULL) { > + ret = vfio_user_get_irq_info(vbasedev, &irq_info); > + } else { > + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > + } > + > + > if (ret) { > /* This can fail for an old kernel or legacy PCI dev */ > trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); > @@ -2794,8 +2804,16 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) > return; > } > > - if (ioctl(vdev->vbasedev.fd, > - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count < 1) { > + if (vdev->vbasedev.proxy != NULL) { > + if (vfio_user_get_irq_info(&vdev->vbasedev, &irq_info) < 0) { > + return; > + } > + } else { > + if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { > + return; > + } > + } > + if (irq_info.count < 1) { > return; > } > > @@ -3557,6 +3575,11 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > } > } > > + vfio_register_err_notifier(vdev); > + vfio_register_req_notifier(vdev); > + > + return; > + > out_deregister: > pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); > kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index 83235b2411..b68ca1279d 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -768,6 +768,112 @@ int vfio_user_get_region_info(VFIODevice *vbasedev, int index, > return 0; > } > > +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info) > +{ > + VFIOUserIRQInfo msg; > + > + memset(&msg, 0, sizeof(msg)); > + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, > + sizeof(msg), 0); > + msg.argsz = info->argsz; > + msg.index = info->index; > + > + vfio_user_send_recv(vbasedev->proxy, &msg.hdr, NULL, 0, 0); > + if (msg.hdr.flags & VFIO_USER_ERROR) { > + return -msg.hdr.error_reply; > + } > + > + memcpy(info, &msg.argsz, sizeof(*info)); Should this be info.count = msg.count instead? Not sure why argsz is used here. Also, I just noticed the lack of endianness conversion in this patch series. The spec says values are little-endian but these patches mostly use host-endian. Did I miss something?
> On Sep 7, 2021, at 8:14 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Mon, Aug 16, 2021 at 09:42:44AM -0700, Elena Ufimtseva wrote: >> From: John Johnson <john.g.johnson@oracle.com> >> >> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> >> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> >> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> >> --- >> hw/vfio/user-protocol.h | 25 ++++++++++ >> hw/vfio/user.h | 2 + >> hw/vfio/common.c | 26 ++++++++-- >> hw/vfio/pci.c | 31 ++++++++++-- >> hw/vfio/user.c | 106 ++++++++++++++++++++++++++++++++++++++++ >> 5 files changed, 181 insertions(+), 9 deletions(-) >> >> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h >> index 56904cf872..5614efa0a4 100644 >> --- a/hw/vfio/user-protocol.h >> +++ b/hw/vfio/user-protocol.h >> @@ -109,6 +109,31 @@ typedef struct { >> uint64_t offset; >> } VFIOUserRegionInfo; >> >> +/* >> + * VFIO_USER_DEVICE_GET_IRQ_INFO >> + * imported from struct vfio_irq_info >> + */ >> +typedef struct { >> + VFIOUserHdr hdr; >> + uint32_t argsz; >> + uint32_t flags; >> + uint32_t index; >> + uint32_t count; >> +} VFIOUserIRQInfo; >> + >> +/* >> + * VFIO_USER_DEVICE_SET_IRQS >> + * imported from struct vfio_irq_set >> + */ >> +typedef struct { >> + VFIOUserHdr hdr; >> + uint32_t argsz; >> + uint32_t flags; >> + uint32_t index; >> + uint32_t start; >> + uint32_t count; >> +} VFIOUserIRQSet; >> + >> /* >> * VFIO_USER_REGION_READ >> * VFIO_USER_REGION_WRITE >> diff --git a/hw/vfio/user.h b/hw/vfio/user.h >> index 02f832a173..248ad80943 100644 >> --- a/hw/vfio/user.h >> +++ b/hw/vfio/user.h >> @@ -74,6 +74,8 @@ int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp); >> int vfio_user_get_info(VFIODevice *vbasedev); >> int vfio_user_get_region_info(VFIODevice *vbasedev, int index, >> struct vfio_region_info *info, VFIOUserFDs *fds); >> +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info); >> +int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq); >> int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t offset, >> uint32_t count, void *data); >> int vfio_user_region_write(VFIODevice *vbasedev, uint32_t index, >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index a8b1ea9358..9fe3e05dc6 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -71,7 +71,11 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index) >> .count = 0, >> }; >> >> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); >> + if (vbasedev->proxy != NULL) { >> + vfio_user_set_irqs(vbasedev, &irq_set); >> + } else { >> + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); >> + } >> } >> >> void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) >> @@ -84,7 +88,11 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) >> .count = 1, >> }; >> >> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); >> + if (vbasedev->proxy != NULL) { >> + vfio_user_set_irqs(vbasedev, &irq_set); >> + } else { >> + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); >> + } >> } >> >> void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) >> @@ -97,7 +105,11 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) >> .count = 1, >> }; >> >> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); >> + if (vbasedev->proxy != NULL) { >> + vfio_user_set_irqs(vbasedev, &irq_set); >> + } else { >> + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); >> + } >> } >> >> static inline const char *action_to_str(int action) >> @@ -178,8 +190,12 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, >> pfd = (int32_t *)&irq_set->data; >> *pfd = fd; >> >> - if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { >> - ret = -errno; >> + if (vbasedev->proxy != NULL) { >> + ret = vfio_user_set_irqs(vbasedev, irq_set); >> + } else { >> + if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { >> + ret = -errno; >> + } >> } >> g_free(irq_set); >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index ea0df8be65..282de6a30b 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -403,7 +403,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) >> fds[i] = fd; >> } >> >> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); >> + if (vdev->vbasedev.proxy != NULL) { >> + ret = vfio_user_set_irqs(&vdev->vbasedev, irq_set); >> + } else { >> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); >> + } >> >> g_free(irq_set); >> >> @@ -2675,7 +2679,13 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) >> >> irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; >> >> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); >> + if (vbasedev->proxy != NULL) { >> + ret = vfio_user_get_irq_info(vbasedev, &irq_info); >> + } else { >> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); >> + } >> + >> + >> if (ret) { >> /* This can fail for an old kernel or legacy PCI dev */ >> trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); >> @@ -2794,8 +2804,16 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) >> return; >> } >> >> - if (ioctl(vdev->vbasedev.fd, >> - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count < 1) { >> + if (vdev->vbasedev.proxy != NULL) { >> + if (vfio_user_get_irq_info(&vdev->vbasedev, &irq_info) < 0) { >> + return; >> + } >> + } else { >> + if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { >> + return; >> + } >> + } >> + if (irq_info.count < 1) { >> return; >> } >> >> @@ -3557,6 +3575,11 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) >> } >> } >> >> + vfio_register_err_notifier(vdev); >> + vfio_register_req_notifier(vdev); >> + >> + return; >> + >> out_deregister: >> pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); >> kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); >> diff --git a/hw/vfio/user.c b/hw/vfio/user.c >> index 83235b2411..b68ca1279d 100644 >> --- a/hw/vfio/user.c >> +++ b/hw/vfio/user.c >> @@ -768,6 +768,112 @@ int vfio_user_get_region_info(VFIODevice *vbasedev, int index, >> return 0; >> } >> >> +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info) >> +{ >> + VFIOUserIRQInfo msg; >> + >> + memset(&msg, 0, sizeof(msg)); >> + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, >> + sizeof(msg), 0); >> + msg.argsz = info->argsz; >> + msg.index = info->index; >> + >> + vfio_user_send_recv(vbasedev->proxy, &msg.hdr, NULL, 0, 0); >> + if (msg.hdr.flags & VFIO_USER_ERROR) { >> + return -msg.hdr.error_reply; >> + } >> + >> + memcpy(info, &msg.argsz, sizeof(*info)); > > Should this be info.count = msg.count instead? Not sure why argsz is > used here. It’s copying the entire returned vfio_irq_info struct, which starts at &msg.argsz. > > Also, I just noticed the lack of endianness conversion in this patch > series. The spec says values are little-endian but these patches mostly > use host-endian. Did I miss something? I had thought we were using host endianness for UNIX sockets and were deferring the cross endianness issue to when we support TCP, but the spec does say all LE. JJ
On Thu, Sep 09, 2021 at 05:50:39AM +0000, John Johnson wrote: > > > > On Sep 7, 2021, at 8:14 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > > On Mon, Aug 16, 2021 at 09:42:44AM -0700, Elena Ufimtseva wrote: > >> From: John Johnson <john.g.johnson@oracle.com> > >> > >> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > >> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > >> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > >> --- > >> hw/vfio/user-protocol.h | 25 ++++++++++ > >> hw/vfio/user.h | 2 + > >> hw/vfio/common.c | 26 ++++++++-- > >> hw/vfio/pci.c | 31 ++++++++++-- > >> hw/vfio/user.c | 106 ++++++++++++++++++++++++++++++++++++++++ > >> 5 files changed, 181 insertions(+), 9 deletions(-) > >> > >> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > >> index 56904cf872..5614efa0a4 100644 > >> --- a/hw/vfio/user-protocol.h > >> +++ b/hw/vfio/user-protocol.h > >> @@ -109,6 +109,31 @@ typedef struct { > >> uint64_t offset; > >> } VFIOUserRegionInfo; > >> > >> +/* > >> + * VFIO_USER_DEVICE_GET_IRQ_INFO > >> + * imported from struct vfio_irq_info > >> + */ > >> +typedef struct { > >> + VFIOUserHdr hdr; > >> + uint32_t argsz; > >> + uint32_t flags; > >> + uint32_t index; > >> + uint32_t count; > >> +} VFIOUserIRQInfo; > >> + > >> +/* > >> + * VFIO_USER_DEVICE_SET_IRQS > >> + * imported from struct vfio_irq_set > >> + */ > >> +typedef struct { > >> + VFIOUserHdr hdr; > >> + uint32_t argsz; > >> + uint32_t flags; > >> + uint32_t index; > >> + uint32_t start; > >> + uint32_t count; > >> +} VFIOUserIRQSet; > >> + > >> /* > >> * VFIO_USER_REGION_READ > >> * VFIO_USER_REGION_WRITE > >> diff --git a/hw/vfio/user.h b/hw/vfio/user.h > >> index 02f832a173..248ad80943 100644 > >> --- a/hw/vfio/user.h > >> +++ b/hw/vfio/user.h > >> @@ -74,6 +74,8 @@ int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp); > >> int vfio_user_get_info(VFIODevice *vbasedev); > >> int vfio_user_get_region_info(VFIODevice *vbasedev, int index, > >> struct vfio_region_info *info, VFIOUserFDs *fds); > >> +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info); > >> +int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq); > >> int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t offset, > >> uint32_t count, void *data); > >> int vfio_user_region_write(VFIODevice *vbasedev, uint32_t index, > >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c > >> index a8b1ea9358..9fe3e05dc6 100644 > >> --- a/hw/vfio/common.c > >> +++ b/hw/vfio/common.c > >> @@ -71,7 +71,11 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index) > >> .count = 0, > >> }; > >> > >> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > >> + if (vbasedev->proxy != NULL) { > >> + vfio_user_set_irqs(vbasedev, &irq_set); > >> + } else { > >> + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > >> + } > >> } > >> > >> void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) > >> @@ -84,7 +88,11 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) > >> .count = 1, > >> }; > >> > >> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > >> + if (vbasedev->proxy != NULL) { > >> + vfio_user_set_irqs(vbasedev, &irq_set); > >> + } else { > >> + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > >> + } > >> } > >> > >> void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) > >> @@ -97,7 +105,11 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) > >> .count = 1, > >> }; > >> > >> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > >> + if (vbasedev->proxy != NULL) { > >> + vfio_user_set_irqs(vbasedev, &irq_set); > >> + } else { > >> + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); > >> + } > >> } > >> > >> static inline const char *action_to_str(int action) > >> @@ -178,8 +190,12 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, > >> pfd = (int32_t *)&irq_set->data; > >> *pfd = fd; > >> > >> - if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { > >> - ret = -errno; > >> + if (vbasedev->proxy != NULL) { > >> + ret = vfio_user_set_irqs(vbasedev, irq_set); > >> + } else { > >> + if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { > >> + ret = -errno; > >> + } > >> } > >> g_free(irq_set); > >> > >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > >> index ea0df8be65..282de6a30b 100644 > >> --- a/hw/vfio/pci.c > >> +++ b/hw/vfio/pci.c > >> @@ -403,7 +403,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) > >> fds[i] = fd; > >> } > >> > >> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); > >> + if (vdev->vbasedev.proxy != NULL) { > >> + ret = vfio_user_set_irqs(&vdev->vbasedev, irq_set); > >> + } else { > >> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); > >> + } > >> > >> g_free(irq_set); > >> > >> @@ -2675,7 +2679,13 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) > >> > >> irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; > >> > >> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > >> + if (vbasedev->proxy != NULL) { > >> + ret = vfio_user_get_irq_info(vbasedev, &irq_info); > >> + } else { > >> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > >> + } > >> + > >> + > >> if (ret) { > >> /* This can fail for an old kernel or legacy PCI dev */ > >> trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); > >> @@ -2794,8 +2804,16 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) > >> return; > >> } > >> > >> - if (ioctl(vdev->vbasedev.fd, > >> - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count < 1) { > >> + if (vdev->vbasedev.proxy != NULL) { > >> + if (vfio_user_get_irq_info(&vdev->vbasedev, &irq_info) < 0) { > >> + return; > >> + } > >> + } else { > >> + if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { > >> + return; > >> + } > >> + } > >> + if (irq_info.count < 1) { > >> return; > >> } > >> > >> @@ -3557,6 +3575,11 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > >> } > >> } > >> > >> + vfio_register_err_notifier(vdev); > >> + vfio_register_req_notifier(vdev); > >> + > >> + return; > >> + > >> out_deregister: > >> pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); > >> kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); > >> diff --git a/hw/vfio/user.c b/hw/vfio/user.c > >> index 83235b2411..b68ca1279d 100644 > >> --- a/hw/vfio/user.c > >> +++ b/hw/vfio/user.c > >> @@ -768,6 +768,112 @@ int vfio_user_get_region_info(VFIODevice *vbasedev, int index, > >> return 0; > >> } > >> > >> +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info) > >> +{ > >> + VFIOUserIRQInfo msg; > >> + > >> + memset(&msg, 0, sizeof(msg)); > >> + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, > >> + sizeof(msg), 0); > >> + msg.argsz = info->argsz; > >> + msg.index = info->index; > >> + > >> + vfio_user_send_recv(vbasedev->proxy, &msg.hdr, NULL, 0, 0); > >> + if (msg.hdr.flags & VFIO_USER_ERROR) { > >> + return -msg.hdr.error_reply; > >> + } > >> + > >> + memcpy(info, &msg.argsz, sizeof(*info)); > > > > Should this be info.count = msg.count instead? Not sure why argsz is > > used here. > > It’s copying the entire returned vfio_irq_info struct, which starts > at &msg.argsz. That makes sense, I missed it. Thanks! Stefan
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 56904cf872..5614efa0a4 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -109,6 +109,31 @@ typedef struct { uint64_t offset; } VFIOUserRegionInfo; +/* + * VFIO_USER_DEVICE_GET_IRQ_INFO + * imported from struct vfio_irq_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t count; +} VFIOUserIRQInfo; + +/* + * VFIO_USER_DEVICE_SET_IRQS + * imported from struct vfio_irq_set + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t start; + uint32_t count; +} VFIOUserIRQSet; + /* * VFIO_USER_REGION_READ * VFIO_USER_REGION_WRITE diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 02f832a173..248ad80943 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -74,6 +74,8 @@ int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp); int vfio_user_get_info(VFIODevice *vbasedev); int vfio_user_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info *info, VFIOUserFDs *fds); +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info); +int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq); int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t offset, uint32_t count, void *data); int vfio_user_region_write(VFIODevice *vbasedev, uint32_t index, diff --git a/hw/vfio/common.c b/hw/vfio/common.c index a8b1ea9358..9fe3e05dc6 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -71,7 +71,11 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index) .count = 0, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + if (vbasedev->proxy != NULL) { + vfio_user_set_irqs(vbasedev, &irq_set); + } else { + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + } } void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) @@ -84,7 +88,11 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + if (vbasedev->proxy != NULL) { + vfio_user_set_irqs(vbasedev, &irq_set); + } else { + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + } } void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) @@ -97,7 +105,11 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + if (vbasedev->proxy != NULL) { + vfio_user_set_irqs(vbasedev, &irq_set); + } else { + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + } } static inline const char *action_to_str(int action) @@ -178,8 +190,12 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, pfd = (int32_t *)&irq_set->data; *pfd = fd; - if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { - ret = -errno; + if (vbasedev->proxy != NULL) { + ret = vfio_user_set_irqs(vbasedev, irq_set); + } else { + if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + ret = -errno; + } } g_free(irq_set); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ea0df8be65..282de6a30b 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -403,7 +403,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) fds[i] = fd; } - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (vdev->vbasedev.proxy != NULL) { + ret = vfio_user_set_irqs(&vdev->vbasedev, irq_set); + } else { + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + } g_free(irq_set); @@ -2675,7 +2679,13 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + if (vbasedev->proxy != NULL) { + ret = vfio_user_get_irq_info(vbasedev, &irq_info); + } else { + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + } + + if (ret) { /* This can fail for an old kernel or legacy PCI dev */ trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); @@ -2794,8 +2804,16 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) return; } - if (ioctl(vdev->vbasedev.fd, - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count < 1) { + if (vdev->vbasedev.proxy != NULL) { + if (vfio_user_get_irq_info(&vdev->vbasedev, &irq_info) < 0) { + return; + } + } else { + if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { + return; + } + } + if (irq_info.count < 1) { return; } @@ -3557,6 +3575,11 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) } } + vfio_register_err_notifier(vdev); + vfio_register_req_notifier(vdev); + + return; + out_deregister: pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 83235b2411..b68ca1279d 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -768,6 +768,112 @@ int vfio_user_get_region_info(VFIODevice *vbasedev, int index, return 0; } +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info) +{ + VFIOUserIRQInfo msg; + + memset(&msg, 0, sizeof(msg)); + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, + sizeof(msg), 0); + msg.argsz = info->argsz; + msg.index = info->index; + + vfio_user_send_recv(vbasedev->proxy, &msg.hdr, NULL, 0, 0); + if (msg.hdr.flags & VFIO_USER_ERROR) { + return -msg.hdr.error_reply; + } + + memcpy(info, &msg.argsz, sizeof(*info)); + return 0; +} + +static int irq_howmany(int *fdp, int cur, int max) +{ + int n = 0; + + if (fdp[cur] != -1) { + do { + n++; + } while (n < max && fdp[cur + n] != -1 && n < max_send_fds); + } else { + do { + n++; + } while (n < max && fdp[cur + n] == -1 && n < max_send_fds); + } + + return n; +} + +int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq) +{ + g_autofree VFIOUserIRQSet *msgp = NULL; + uint32_t size, nfds, send_fds, sent_fds; + + if (irq->argsz < sizeof(*irq)) { + error_printf("vfio_user_set_irqs argsz too small\n"); + return -EINVAL; + } + + /* + * Handle simple case + */ + if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) { + size = sizeof(VFIOUserHdr) + irq->argsz; + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0); + msgp->argsz = irq->argsz; + msgp->flags = irq->flags; + msgp->index = irq->index; + msgp->start = irq->start; + msgp->count = irq->count; + + vfio_user_send_recv(vbasedev->proxy, &msgp->hdr, NULL, 0, 0); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + + return 0; + } + + /* + * Calculate the number of FDs to send + * and adjust argsz + */ + nfds = (irq->argsz - sizeof(*irq)) / sizeof(int); + irq->argsz = sizeof(*irq); + msgp = g_malloc0(sizeof(*msgp)); + /* + * Send in chunks if over max_send_fds + */ + for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) { + VFIOUserFDs *arg_fds, loop_fds; + + /* must send all valid FDs or all invalid FDs in single msg */ + send_fds = irq_howmany((int *)irq->data, sent_fds, nfds - sent_fds); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, + sizeof(*msgp), 0); + msgp->argsz = irq->argsz; + msgp->flags = irq->flags; + msgp->index = irq->index; + msgp->start = irq->start + sent_fds; + msgp->count = send_fds; + + loop_fds.send_fds = send_fds; + loop_fds.recv_fds = 0; + loop_fds.fds = (int *)irq->data + sent_fds; + arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL; + + vfio_user_send_recv(vbasedev->proxy, &msgp->hdr, arg_fds, 0, 0); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + } + + return 0; +} + int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t offset, uint32_t count, void *data) {