Message ID | b5ae7ec3fe3fb88166fe80e8bf0cbba9e85088e0.1667542066.git.john.g.johnson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user client | expand |
On Tue, Nov 08, 2022 at 03:13:33PM -0800, John Johnson wrote: > Add per-region FD to support mmap() of remote device regions > > +/* > + * VFIO_USER_DEVICE_GET_REGION_INFO > + * imported from struct_vfio_region_info s/struct_vfio_region_info/struct vfio_region_info/ regards john
On 11/9/22 00:13, John Johnson wrote: > Add per-region FD to support mmap() of remote device regions > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > hw/vfio/common.c | 32 ++++++++++++++++++++--- > hw/vfio/user-protocol.h | 14 ++++++++++ > hw/vfio/user.c | 59 +++++++++++++++++++++++++++++++++++++++++++ > include/hw/vfio/vfio-common.h | 8 +++--- > 4 files changed, 107 insertions(+), 6 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index c589bd9..87400b3 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -41,6 +41,7 @@ > #include "qapi/error.h" > #include "migration/migration.h" > #include "sysemu/tpm.h" > +#include "hw/vfio/user.h" > > VFIOGroupList vfio_group_list = > QLIST_HEAD_INITIALIZER(vfio_group_list); > @@ -1586,6 +1587,11 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, > region->size = info->size; > region->fd_offset = info->offset; > region->nr = index; > + if (vbasedev->regfds != NULL) { > + region->fd = vbasedev->regfds[index]; > + } else { > + region->fd = vbasedev->fd; > + } > > if (region->size) { > region->mem = g_new0(MemoryRegion, 1); > @@ -1637,7 +1643,7 @@ int vfio_region_mmap(VFIORegion *region) > > for (i = 0; i < region->nr_mmaps; i++) { > region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, > - MAP_SHARED, region->vbasedev->fd, > + MAP_SHARED, region->fd, > region->fd_offset + > region->mmaps[i].offset); > if (region->mmaps[i].mmap == MAP_FAILED) { > @@ -2442,10 +2448,17 @@ void vfio_put_base_device(VFIODevice *vbasedev) > int i; > > for (i = 0; i < vbasedev->num_regions; i++) { > + if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) { > + close(vbasedev->regfds[i]); > + } > g_free(vbasedev->regions[i]); > } > g_free(vbasedev->regions); > vbasedev->regions = NULL; > + if (vbasedev->regfds != NULL) { > + g_free(vbasedev->regfds); > + vbasedev->regfds = NULL; > + } > } > > if (!vbasedev->group) { > @@ -2461,12 +2474,16 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, > struct vfio_region_info **info) > { > size_t argsz = sizeof(struct vfio_region_info); > + int fd = -1; > int ret; > > /* create region cache */ > if (vbasedev->regions == NULL) { > vbasedev->regions = g_new0(struct vfio_region_info *, > vbasedev->num_regions); > + if (vbasedev->proxy != NULL) { This looks like a shortcut. Could we introduce at least an inline helper returning the need to allocate an fd per region ? It could use the same kind of heuristic but it would be hidden under a helper. > + vbasedev->regfds = g_new0(int, vbasedev->num_regions); > + } > } > /* check cache */ > if (vbasedev->regions[index] != NULL) { > @@ -2480,7 +2497,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, > retry: > (*info)->argsz = argsz; > > - ret = VDEV_GET_REGION_INFO(vbasedev, *info); > + ret = VDEV_GET_REGION_INFO(vbasedev, *info, &fd); > if (ret != 0) { > g_free(*info); > *info = NULL; > @@ -2490,12 +2507,19 @@ retry: > if ((*info)->argsz > argsz) { > argsz = (*info)->argsz; > *info = g_realloc(*info, argsz); > + if (fd != -1) { > + close(fd); > + fd = -1; > + } > > goto retry; > } > > /* fill cache */ > vbasedev->regions[index] = *info; > + if (vbasedev->regfds != NULL) { > + vbasedev->regfds[index] = fd; > + } > > return 0; > } > @@ -2655,10 +2679,12 @@ static int vfio_io_get_info(VFIODevice *vbasedev, struct vfio_device_info *info) > } > > static int vfio_io_get_region_info(VFIODevice *vbasedev, > - struct vfio_region_info *info) > + struct vfio_region_info *info, > + int *fd) > { > int ret; > > + *fd = -1; > ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); > > return ret < 0 ? -errno : ret; > diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > index 43912a5..a1b64fe 100644 > --- a/hw/vfio/user-protocol.h > +++ b/hw/vfio/user-protocol.h > @@ -126,4 +126,18 @@ typedef struct { > uint32_t cap_offset; > } VFIOUserDeviceInfo; > > +/* > + * VFIO_USER_DEVICE_GET_REGION_INFO > + * imported from struct_vfio_region_info > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint32_t argsz; > + uint32_t flags; > + uint32_t index; > + uint32_t cap_offset; > + uint64_t size; > + uint64_t offset; > +} VFIOUserRegionInfo; > + > #endif /* VFIO_USER_PROTOCOL_H */ > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index 7873534..69b0fed 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -1101,6 +1101,40 @@ static int vfio_user_get_info(VFIOProxy *proxy, struct vfio_device_info *info) > return 0; > } > > +static int vfio_user_get_region_info(VFIOProxy *proxy, > + struct vfio_region_info *info, > + VFIOUserFDs *fds) > +{ > + g_autofree VFIOUserRegionInfo *msgp = NULL; > + uint32_t size; > + > + /* data returned can be larger than vfio_region_info */ > + if (info->argsz < sizeof(*info)) { > + error_printf("vfio_user_get_region_info argsz too small\n"); It would be nice to report the errors at the upper layer and remove all the error_printf(). Since it seems too difficult in this case, may be return -E2BIG. Thanks, C. > + return -EINVAL; > + } > + if (fds != NULL && fds->send_fds != 0) { > + error_printf("vfio_user_get_region_info can't send FDs\n"); > + return -EINVAL; > + } > + > + size = info->argsz + sizeof(VFIOUserHdr); > + msgp = g_malloc0(size); > + > + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, > + sizeof(*msgp), 0); > + msgp->argsz = info->argsz; > + msgp->index = info->index; > + > + vfio_user_send_wait(proxy, &msgp->hdr, fds, size, false); > + if (msgp->hdr.flags & VFIO_USER_ERROR) { > + return -msgp->hdr.error_reply; > + } > + > + memcpy(info, &msgp->argsz, info->argsz); > + return 0; > +} > + > > /* > * Socket-based io_ops > @@ -1126,7 +1160,32 @@ static int vfio_user_io_get_info(VFIODevice *vbasedev, > return 0; > } > > +static int vfio_user_io_get_region_info(VFIODevice *vbasedev, > + struct vfio_region_info *info, > + int *fd) > +{ > + int ret; > + VFIOUserFDs fds = { 0, 1, fd}; > + > + ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); > + if (ret) { > + return ret; > + } > + > + if (info->index > vbasedev->num_regions) { > + return -EINVAL; > + } > + /* cap_offset in valid area */ > + if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && > + (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) { > + return -EINVAL; > + } > + > + return 0; > +} > + > VFIODevIO vfio_dev_io_sock = { > .get_info = vfio_user_io_get_info, > + .get_region_info = vfio_user_io_get_region_info, > }; > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index fb7d865..3406e6a 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -56,6 +56,7 @@ typedef struct VFIORegion { > uint32_t nr_mmaps; > VFIOMmap *mmaps; > uint8_t nr; /* cache the region number for debug */ > + int fd; /* fd to mmap() region */ > } VFIORegion; > > typedef struct VFIOMigration { > @@ -150,6 +151,7 @@ typedef struct VFIODevice { > OnOffAuto pre_copy_dirty_page_tracking; > VFIOProxy *proxy; > struct vfio_region_info **regions; > + int *regfds; > } VFIODevice; > > struct VFIODeviceOps { > @@ -172,7 +174,7 @@ struct VFIODeviceOps { > struct VFIODevIO { > int (*get_info)(VFIODevice *vdev, struct vfio_device_info *info); > int (*get_region_info)(VFIODevice *vdev, > - struct vfio_region_info *info); > + struct vfio_region_info *info, int *fd); > int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); > int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); > int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, > @@ -183,8 +185,8 @@ struct VFIODevIO { > > #define VDEV_GET_INFO(vdev, info) \ > ((vdev)->io_ops->get_info((vdev), (info))) > -#define VDEV_GET_REGION_INFO(vdev, info) \ > - ((vdev)->io_ops->get_region_info((vdev), (info))) > +#define VDEV_GET_REGION_INFO(vdev, info, fd) \ > + ((vdev)->io_ops->get_region_info((vdev), (info), (fd))) > #define VDEV_GET_IRQ_INFO(vdev, irq) \ > ((vdev)->io_ops->get_irq_info((vdev), (irq))) > #define VDEV_SET_IRQS(vdev, irqs) \
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index c589bd9..87400b3 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -41,6 +41,7 @@ #include "qapi/error.h" #include "migration/migration.h" #include "sysemu/tpm.h" +#include "hw/vfio/user.h" VFIOGroupList vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); @@ -1586,6 +1587,11 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + if (vbasedev->regfds != NULL) { + region->fd = vbasedev->regfds[index]; + } else { + region->fd = vbasedev->fd; + } if (region->size) { region->mem = g_new0(MemoryRegion, 1); @@ -1637,7 +1643,7 @@ int vfio_region_mmap(VFIORegion *region) for (i = 0; i < region->nr_mmaps; i++) { region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, - MAP_SHARED, region->vbasedev->fd, + MAP_SHARED, region->fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) { @@ -2442,10 +2448,17 @@ void vfio_put_base_device(VFIODevice *vbasedev) int i; for (i = 0; i < vbasedev->num_regions; i++) { + if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) { + close(vbasedev->regfds[i]); + } g_free(vbasedev->regions[i]); } g_free(vbasedev->regions); vbasedev->regions = NULL; + if (vbasedev->regfds != NULL) { + g_free(vbasedev->regfds); + vbasedev->regfds = NULL; + } } if (!vbasedev->group) { @@ -2461,12 +2474,16 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int fd = -1; int ret; /* create region cache */ if (vbasedev->regions == NULL) { vbasedev->regions = g_new0(struct vfio_region_info *, vbasedev->num_regions); + if (vbasedev->proxy != NULL) { + vbasedev->regfds = g_new0(int, vbasedev->num_regions); + } } /* check cache */ if (vbasedev->regions[index] != NULL) { @@ -2480,7 +2497,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - ret = VDEV_GET_REGION_INFO(vbasedev, *info); + ret = VDEV_GET_REGION_INFO(vbasedev, *info, &fd); if (ret != 0) { g_free(*info); *info = NULL; @@ -2490,12 +2507,19 @@ retry: if ((*info)->argsz > argsz) { argsz = (*info)->argsz; *info = g_realloc(*info, argsz); + if (fd != -1) { + close(fd); + fd = -1; + } goto retry; } /* fill cache */ vbasedev->regions[index] = *info; + if (vbasedev->regfds != NULL) { + vbasedev->regfds[index] = fd; + } return 0; } @@ -2655,10 +2679,12 @@ static int vfio_io_get_info(VFIODevice *vbasedev, struct vfio_device_info *info) } static int vfio_io_get_region_info(VFIODevice *vbasedev, - struct vfio_region_info *info) + struct vfio_region_info *info, + int *fd) { int ret; + *fd = -1; ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); return ret < 0 ? -errno : ret; diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 43912a5..a1b64fe 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -126,4 +126,18 @@ typedef struct { uint32_t cap_offset; } VFIOUserDeviceInfo; +/* + * VFIO_USER_DEVICE_GET_REGION_INFO + * imported from struct_vfio_region_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t cap_offset; + uint64_t size; + uint64_t offset; +} VFIOUserRegionInfo; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 7873534..69b0fed 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -1101,6 +1101,40 @@ static int vfio_user_get_info(VFIOProxy *proxy, struct vfio_device_info *info) return 0; } +static int vfio_user_get_region_info(VFIOProxy *proxy, + struct vfio_region_info *info, + VFIOUserFDs *fds) +{ + g_autofree VFIOUserRegionInfo *msgp = NULL; + uint32_t size; + + /* data returned can be larger than vfio_region_info */ + if (info->argsz < sizeof(*info)) { + error_printf("vfio_user_get_region_info argsz too small\n"); + return -EINVAL; + } + if (fds != NULL && fds->send_fds != 0) { + error_printf("vfio_user_get_region_info can't send FDs\n"); + return -EINVAL; + } + + size = info->argsz + sizeof(VFIOUserHdr); + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, + sizeof(*msgp), 0); + msgp->argsz = info->argsz; + msgp->index = info->index; + + vfio_user_send_wait(proxy, &msgp->hdr, fds, size, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + + memcpy(info, &msgp->argsz, info->argsz); + return 0; +} + /* * Socket-based io_ops @@ -1126,7 +1160,32 @@ static int vfio_user_io_get_info(VFIODevice *vbasedev, return 0; } +static int vfio_user_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info, + int *fd) +{ + int ret; + VFIOUserFDs fds = { 0, 1, fd}; + + ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); + if (ret) { + return ret; + } + + if (info->index > vbasedev->num_regions) { + return -EINVAL; + } + /* cap_offset in valid area */ + if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && + (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) { + return -EINVAL; + } + + return 0; +} + VFIODevIO vfio_dev_io_sock = { .get_info = vfio_user_io_get_info, + .get_region_info = vfio_user_io_get_region_info, }; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index fb7d865..3406e6a 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -56,6 +56,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + int fd; /* fd to mmap() region */ } VFIORegion; typedef struct VFIOMigration { @@ -150,6 +151,7 @@ typedef struct VFIODevice { OnOffAuto pre_copy_dirty_page_tracking; VFIOProxy *proxy; struct vfio_region_info **regions; + int *regfds; } VFIODevice; struct VFIODeviceOps { @@ -172,7 +174,7 @@ struct VFIODeviceOps { struct VFIODevIO { int (*get_info)(VFIODevice *vdev, struct vfio_device_info *info); int (*get_region_info)(VFIODevice *vdev, - struct vfio_region_info *info); + struct vfio_region_info *info, int *fd); int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, @@ -183,8 +185,8 @@ struct VFIODevIO { #define VDEV_GET_INFO(vdev, info) \ ((vdev)->io_ops->get_info((vdev), (info))) -#define VDEV_GET_REGION_INFO(vdev, info) \ - ((vdev)->io_ops->get_region_info((vdev), (info))) +#define VDEV_GET_REGION_INFO(vdev, info, fd) \ + ((vdev)->io_ops->get_region_info((vdev), (info), (fd))) #define VDEV_GET_IRQ_INFO(vdev, irq) \ ((vdev)->io_ops->get_irq_info((vdev), (irq))) #define VDEV_SET_IRQS(vdev, irqs) \