Message ID | 32606ff56a2dd3f0d9ceaa91feb6a3c3fa6b98d5.1636057885.git.john.g.johnson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user client | expand |
On Mon, 8 Nov 2021 16:46:39 -0800 John Johnson <john.g.johnson@oracle.com> wrote: > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > hw/vfio/user-protocol.h | 14 ++++++++++++ > include/hw/vfio/vfio-common.h | 4 +++- > hw/vfio/common.c | 30 +++++++++++++++++++++++++- > hw/vfio/user.c | 50 +++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 96 insertions(+), 2 deletions(-) > > diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > index 13e44eb..104bf4f 100644 > --- a/hw/vfio/user-protocol.h > +++ b/hw/vfio/user-protocol.h > @@ -95,4 +95,18 @@ typedef struct { > uint32_t cap_offset; > } VFIOUserDeviceInfo; > > +/* > + * VFIO_USER_DEVICE_GET_REGION_INFO > + * imported from struct_vfio_region_info > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint32_t argsz; > + uint32_t flags; > + uint32_t index; > + uint32_t cap_offset; > + uint64_t size; > + uint64_t offset; > +} VFIOUserRegionInfo; > + > #endif /* VFIO_USER_PROTOCOL_H */ > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 224dbf8..e2d7ee1 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -56,6 +56,7 @@ typedef struct VFIORegion { > uint32_t nr_mmaps; > VFIOMmap *mmaps; > uint8_t nr; /* cache the region number for debug */ > + int remfd; /* fd if exported from remote process */ > } VFIORegion; > > typedef struct VFIOMigration { > @@ -150,8 +151,9 @@ typedef struct VFIODevice { > VFIOMigration *migration; > Error *migration_blocker; > OnOffAuto pre_copy_dirty_page_tracking; > - struct vfio_region_info **regions; > VFIOProxy *proxy; > + struct vfio_region_info **regions; > + int *regfds; > } VFIODevice; > > struct VFIODeviceOps { > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 41fdd78..47ec28f 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -40,6 +40,7 @@ > #include "trace.h" > #include "qapi/error.h" > #include "migration/migration.h" > +#include "hw/vfio/user.h" > > VFIOGroupList vfio_group_list = > QLIST_HEAD_INITIALIZER(vfio_group_list); > @@ -1491,6 +1492,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, > return true; > } > > +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) > +{ > + struct vfio_region_info *info; > + > + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { > + vfio_get_region_info(vbasedev, index, &info); > + } > + return vbasedev->regfds != NULL ? vbasedev->regfds[index] : -1; > +} This patch is really obscure due to the region info fd being added many patches ago and only now being used. Do we really want a parallel array to regions for storing these fds? Why do we call an array of these fds "regfds" but a single one "remfd"? Ugh, why do we have both the regfds array and a remfd per VFIORegion? TBH, I'm still not sure why we're caching region infos at all, this seems to be gratuitously bloated. > + > static int vfio_setup_region_sparse_mmaps(VFIORegion *region, > struct vfio_region_info *info) > { > @@ -1544,6 +1555,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, > region->size = info->size; > region->fd_offset = info->offset; > region->nr = index; > + region->remfd = vfio_get_region_info_remfd(vbasedev, index); Why didn't we just get an fd back from vfio_get_region_info() that we could use here? > > if (region->size) { > region->mem = g_new0(MemoryRegion, 1); > @@ -1587,6 +1599,7 @@ int vfio_region_mmap(VFIORegion *region) > { > int i, prot = 0; > char *name; > + int fd; > > if (!region->mem) { > return 0; > @@ -1595,9 +1608,11 @@ int vfio_region_mmap(VFIORegion *region) > prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; > prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; > > + fd = region->remfd != -1 ? region->remfd : region->vbasedev->fd; Gack, why can't VFIORegion.fd be either the remote process fd or the vbasedevfd to avoid all these switches? Thanks, Alex > + > for (i = 0; i < region->nr_mmaps; i++) { > region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, > - MAP_SHARED, region->vbasedev->fd, > + MAP_SHARED, fd, > region->fd_offset + > region->mmaps[i].offset); > if (region->mmaps[i].mmap == MAP_FAILED) { > @@ -2379,10 +2394,17 @@ void vfio_put_base_device(VFIODevice *vbasedev) > int i; > > for (i = 0; i < vbasedev->num_regions; i++) { > + if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) { > + close(vbasedev->regfds[i]); > + } > g_free(vbasedev->regions[i]); > } > g_free(vbasedev->regions); > vbasedev->regions = NULL; > + if (vbasedev->regfds != NULL) { > + g_free(vbasedev->regfds); > + vbasedev->regfds = NULL; > + } > } > > if (!vbasedev->group) { > @@ -2405,6 +2427,9 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, > if (vbasedev->regions == NULL) { > vbasedev->regions = g_new0(struct vfio_region_info *, > vbasedev->num_regions); > + if (vbasedev->proxy != NULL) { > + vbasedev->regfds = g_new0(int, vbasedev->num_regions); > + } > } > /* check cache */ > if (vbasedev->regions[index] != NULL) { > @@ -2441,6 +2466,9 @@ retry: > /* fill cache */ > vbasedev->regions[index] = g_malloc0(argsz); > memcpy(vbasedev->regions[index], *info, argsz); > + if (vbasedev->regfds != NULL) { > + vbasedev->regfds[index] = fd; > + } > > return 0; > } > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index ed2a4d7..b40c4ed 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -923,6 +923,40 @@ static int vfio_user_get_info(VFIOProxy *proxy, struct vfio_device_info *info) > return 0; > } > > +static int vfio_user_get_region_info(VFIOProxy *proxy, > + struct vfio_region_info *info, > + VFIOUserFDs *fds) > +{ > + g_autofree VFIOUserRegionInfo *msgp = NULL; > + uint32_t size; > + > + /* data returned can be larger than vfio_region_info */ > + if (info->argsz < sizeof(*info)) { > + error_printf("vfio_user_get_region_info argsz too small\n"); > + return -EINVAL; > + } > + if (fds != NULL && fds->send_fds != 0) { > + error_printf("vfio_user_get_region_info can't send FDs\n"); > + return -EINVAL; > + } > + > + size = info->argsz + sizeof(VFIOUserHdr); > + msgp = g_malloc0(size); > + > + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, > + sizeof(*msgp), 0); > + msgp->argsz = info->argsz; > + msgp->index = info->index; > + > + vfio_user_send_wait(proxy, &msgp->hdr, fds, size, false); > + if (msgp->hdr.flags & VFIO_USER_ERROR) { > + return -msgp->hdr.error_reply; > + } > + > + memcpy(info, &msgp->argsz, info->argsz); > + return 0; > +} > + > > /* > * Socket-based io_ops > @@ -941,7 +975,23 @@ static int vfio_user_io_get_info(VFIODevice *vbasedev, > return VDEV_VALID_INFO(vbasedev, info); > } > > +static int vfio_user_io_get_region_info(VFIODevice *vbasedev, > + struct vfio_region_info *info, > + int *fd) > +{ > + int ret; > + VFIOUserFDs fds = { 0, 1, fd}; > + > + ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); > + if (ret) { > + return ret; > + } > + > + return VDEV_VALID_REGION_INFO(vbasedev, info, fd); > +} > + > VFIODevIO vfio_dev_io_sock = { > .get_info = vfio_user_io_get_info, > + .get_region_info = vfio_user_io_get_region_info, > }; >
> On Nov 19, 2021, at 2:42 PM, Alex Williamson <alex.williamson@redhat.com> wrote: > > On Mon, 8 Nov 2021 16:46:39 -0800 > John Johnson <john.g.johnson@oracle.com> wrote: > >> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> >> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> >> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> >> --- >> hw/vfio/user-protocol.h | 14 ++++++++++++ >> include/hw/vfio/vfio-common.h | 4 +++- >> hw/vfio/common.c | 30 +++++++++++++++++++++++++- >> hw/vfio/user.c | 50 +++++++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 96 insertions(+), 2 deletions(-) >> >> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h >> index 13e44eb..104bf4f 100644 >> --- a/hw/vfio/user-protocol.h >> +++ b/hw/vfio/user-protocol.h >> @@ -95,4 +95,18 @@ typedef struct { >> uint32_t cap_offset; >> } VFIOUserDeviceInfo; >> >> +/* >> + * VFIO_USER_DEVICE_GET_REGION_INFO >> + * imported from struct_vfio_region_info >> + */ >> +typedef struct { >> + VFIOUserHdr hdr; >> + uint32_t argsz; >> + uint32_t flags; >> + uint32_t index; >> + uint32_t cap_offset; >> + uint64_t size; >> + uint64_t offset; >> +} VFIOUserRegionInfo; >> + >> #endif /* VFIO_USER_PROTOCOL_H */ >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 224dbf8..e2d7ee1 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -56,6 +56,7 @@ typedef struct VFIORegion { >> uint32_t nr_mmaps; >> VFIOMmap *mmaps; >> uint8_t nr; /* cache the region number for debug */ >> + int remfd; /* fd if exported from remote process */ >> } VFIORegion; >> >> typedef struct VFIOMigration { >> @@ -150,8 +151,9 @@ typedef struct VFIODevice { >> VFIOMigration *migration; >> Error *migration_blocker; >> OnOffAuto pre_copy_dirty_page_tracking; >> - struct vfio_region_info **regions; >> VFIOProxy *proxy; >> + struct vfio_region_info **regions; >> + int *regfds; >> } VFIODevice; >> >> struct VFIODeviceOps { >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index 41fdd78..47ec28f 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -40,6 +40,7 @@ >> #include "trace.h" >> #include "qapi/error.h" >> #include "migration/migration.h" >> +#include "hw/vfio/user.h" >> >> VFIOGroupList vfio_group_list = >> QLIST_HEAD_INITIALIZER(vfio_group_list); >> @@ -1491,6 +1492,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, >> return true; >> } >> >> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) >> +{ >> + struct vfio_region_info *info; >> + >> + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { >> + vfio_get_region_info(vbasedev, index, &info); >> + } >> + return vbasedev->regfds != NULL ? vbasedev->regfds[index] : -1; >> +} > > This patch is really obscure due to the region info fd being added many > patches ago and only now being used. > > Do we really want a parallel array to regions for storing these fds? > > Why do we call an array of these fds "regfds" but a single one "remfd"? > > Ugh, why do we have both the regfds array and a remfd per VFIORegion? > > TBH, I'm still not sure why we're caching region infos at all, this > seems to be gratuitously bloated. > >> + >> static int vfio_setup_region_sparse_mmaps(VFIORegion *region, >> struct vfio_region_info *info) >> { >> @@ -1544,6 +1555,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, >> region->size = info->size; >> region->fd_offset = info->offset; >> region->nr = index; >> + region->remfd = vfio_get_region_info_remfd(vbasedev, index); > > Why didn't we just get an fd back from vfio_get_region_info() that we > could use here? > As I said in the patch with the cache changes, the purpose of the cache was to fill it once, and have all the other callers who want region info be satisfied from the cache. This includes the FD used to map device memory exported by the server. >> >> if (region->size) { >> region->mem = g_new0(MemoryRegion, 1); >> @@ -1587,6 +1599,7 @@ int vfio_region_mmap(VFIORegion *region) >> { >> int i, prot = 0; >> char *name; >> + int fd; >> >> if (!region->mem) { >> return 0; >> @@ -1595,9 +1608,11 @@ int vfio_region_mmap(VFIORegion *region) >> prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; >> prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; >> >> + fd = region->remfd != -1 ? region->remfd : region->vbasedev->fd; > > Gack, why can't VFIORegion.fd be either the remote process fd or the > vbasedevfd to avoid all these switches? Thanks, > That’s a good idea. The container FD can be the vbasedev FD is the kernel case, and the exported FD in vfio-user. JJ
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 13e44eb..104bf4f 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -95,4 +95,18 @@ typedef struct { uint32_t cap_offset; } VFIOUserDeviceInfo; +/* + * VFIO_USER_DEVICE_GET_REGION_INFO + * imported from struct_vfio_region_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t cap_offset; + uint64_t size; + uint64_t offset; +} VFIOUserRegionInfo; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 224dbf8..e2d7ee1 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -56,6 +56,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + int remfd; /* fd if exported from remote process */ } VFIORegion; typedef struct VFIOMigration { @@ -150,8 +151,9 @@ typedef struct VFIODevice { VFIOMigration *migration; Error *migration_blocker; OnOffAuto pre_copy_dirty_page_tracking; - struct vfio_region_info **regions; VFIOProxy *proxy; + struct vfio_region_info **regions; + int *regfds; } VFIODevice; struct VFIODeviceOps { diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 41fdd78..47ec28f 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -40,6 +40,7 @@ #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" +#include "hw/vfio/user.h" VFIOGroupList vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); @@ -1491,6 +1492,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, return true; } +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) +{ + struct vfio_region_info *info; + + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { + vfio_get_region_info(vbasedev, index, &info); + } + return vbasedev->regfds != NULL ? vbasedev->regfds[index] : -1; +} + static int vfio_setup_region_sparse_mmaps(VFIORegion *region, struct vfio_region_info *info) { @@ -1544,6 +1555,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + region->remfd = vfio_get_region_info_remfd(vbasedev, index); if (region->size) { region->mem = g_new0(MemoryRegion, 1); @@ -1587,6 +1599,7 @@ int vfio_region_mmap(VFIORegion *region) { int i, prot = 0; char *name; + int fd; if (!region->mem) { return 0; @@ -1595,9 +1608,11 @@ int vfio_region_mmap(VFIORegion *region) prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; + fd = region->remfd != -1 ? region->remfd : region->vbasedev->fd; + for (i = 0; i < region->nr_mmaps; i++) { region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, - MAP_SHARED, region->vbasedev->fd, + MAP_SHARED, fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) { @@ -2379,10 +2394,17 @@ void vfio_put_base_device(VFIODevice *vbasedev) int i; for (i = 0; i < vbasedev->num_regions; i++) { + if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) { + close(vbasedev->regfds[i]); + } g_free(vbasedev->regions[i]); } g_free(vbasedev->regions); vbasedev->regions = NULL; + if (vbasedev->regfds != NULL) { + g_free(vbasedev->regfds); + vbasedev->regfds = NULL; + } } if (!vbasedev->group) { @@ -2405,6 +2427,9 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, if (vbasedev->regions == NULL) { vbasedev->regions = g_new0(struct vfio_region_info *, vbasedev->num_regions); + if (vbasedev->proxy != NULL) { + vbasedev->regfds = g_new0(int, vbasedev->num_regions); + } } /* check cache */ if (vbasedev->regions[index] != NULL) { @@ -2441,6 +2466,9 @@ retry: /* fill cache */ vbasedev->regions[index] = g_malloc0(argsz); memcpy(vbasedev->regions[index], *info, argsz); + if (vbasedev->regfds != NULL) { + vbasedev->regfds[index] = fd; + } return 0; } diff --git a/hw/vfio/user.c b/hw/vfio/user.c index ed2a4d7..b40c4ed 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -923,6 +923,40 @@ static int vfio_user_get_info(VFIOProxy *proxy, struct vfio_device_info *info) return 0; } +static int vfio_user_get_region_info(VFIOProxy *proxy, + struct vfio_region_info *info, + VFIOUserFDs *fds) +{ + g_autofree VFIOUserRegionInfo *msgp = NULL; + uint32_t size; + + /* data returned can be larger than vfio_region_info */ + if (info->argsz < sizeof(*info)) { + error_printf("vfio_user_get_region_info argsz too small\n"); + return -EINVAL; + } + if (fds != NULL && fds->send_fds != 0) { + error_printf("vfio_user_get_region_info can't send FDs\n"); + return -EINVAL; + } + + size = info->argsz + sizeof(VFIOUserHdr); + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, + sizeof(*msgp), 0); + msgp->argsz = info->argsz; + msgp->index = info->index; + + vfio_user_send_wait(proxy, &msgp->hdr, fds, size, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + + memcpy(info, &msgp->argsz, info->argsz); + return 0; +} + /* * Socket-based io_ops @@ -941,7 +975,23 @@ static int vfio_user_io_get_info(VFIODevice *vbasedev, return VDEV_VALID_INFO(vbasedev, info); } +static int vfio_user_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info, + int *fd) +{ + int ret; + VFIOUserFDs fds = { 0, 1, fd}; + + ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); + if (ret) { + return ret; + } + + return VDEV_VALID_REGION_INFO(vbasedev, info, fd); +} + VFIODevIO vfio_dev_io_sock = { .get_info = vfio_user_io_get_info, + .get_region_info = vfio_user_io_get_region_info, };