Message ID | d442105953151559982c8b1a753d847fb89da3ba.1629131628.git.elena.ufimtseva@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user implementation | expand |
On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote: > @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, > return true; > } > > +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) > +{ > + struct vfio_region_info *info; > + > + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { > + vfio_get_region_info(vbasedev, index, &info); > + } Maybe this will be called from other places in the future, but the vfio_region_setup() caller below already invoked vfio_get_region_info() so I'm not sure it's necessary to do this again? Perhaps add an int *remfd argument to vfio_get_region_info(). That way vfio_get_region_info_remfd() isn't necessary. > @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, > struct vfio_region_info **info) > { > size_t argsz = sizeof(struct vfio_region_info); > + int fd = -1; > + int ret; > + > + /* create region cache */ > + if (vbasedev->regions == NULL) { > + vbasedev->regions = g_new0(struct vfio_region_info *, > + vbasedev->num_regions); > + if (vbasedev->proxy != NULL) { > + vbasedev->regfds = g_new0(int, vbasedev->num_regions); > + } > + } > + /* check cache */ > + if (vbasedev->regions[index] != NULL) { > + *info = g_malloc0(vbasedev->regions[index]->argsz); > + memcpy(*info, vbasedev->regions[index], > + vbasedev->regions[index]->argsz); > + return 0; > + } Why is it necessary to introduce a cache? Is it to avoid passing the same fd multiple times? > > *info = g_malloc0(argsz); > > @@ -2417,7 +2467,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, > retry: > (*info)->argsz = argsz; > > - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { > + if (vbasedev->proxy != NULL) { > + VFIOUserFDs fds = { 0, 1, &fd}; > + > + ret = vfio_user_get_region_info(vbasedev, index, *info, &fds); > + } else { > + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info); > + if (ret < 0) { > + ret = -errno; > + } > + } > + if (ret != 0) { > g_free(*info); > *info = NULL; > return -errno; > @@ -2426,10 +2486,22 @@ retry: > if ((*info)->argsz > argsz) { > argsz = (*info)->argsz; > *info = g_realloc(*info, argsz); > + if (fd != -1) { > + close(fd); > + fd = -1; > + } > > goto retry; > } > > + /* fill cache */ > + vbasedev->regions[index] = g_malloc0(argsz); > + memcpy(vbasedev->regions[index], *info, argsz); > + *vbasedev->regions[index] = **info; The previous line already copied the contents of *info. What is the purpose of this assignment? > + if (vbasedev->regfds != NULL) { > + vbasedev->regfds[index] = fd; > + } > + > return 0; > } > > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index b584b8e0f2..91b51f37df 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -734,3 +734,36 @@ int vfio_user_get_info(VFIODevice *vbasedev) > vbasedev->reset_works = !!(msg.flags & VFIO_DEVICE_FLAGS_RESET); > return 0; > } > + > +int vfio_user_get_region_info(VFIODevice *vbasedev, int index, > + struct vfio_region_info *info, VFIOUserFDs *fds) > +{ > + g_autofree VFIOUserRegionInfo *msgp = NULL; > + int size; Please use uint32_t size instead of int size to prevent possible signedness issues: - VFIOUserRegionInfo->argsz is uint32_t. - sizeof(VFIOUserHdr) is size_t. - The vfio_user_request_msg() size argument is uint32_t.
> On Sep 7, 2021, at 7:31 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote: >> @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, >> return true; >> } >> >> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) >> +{ >> + struct vfio_region_info *info; >> + >> + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { >> + vfio_get_region_info(vbasedev, index, &info); >> + } > > Maybe this will be called from other places in the future, but the > vfio_region_setup() caller below already invoked vfio_get_region_info() > so I'm not sure it's necessary to do this again? > > Perhaps add an int *remfd argument to vfio_get_region_info(). That way > vfio_get_region_info_remfd() isn't necessary. > I think they could be combined, but the region capabilities are retrieved with separate calls to vfio_get_region_info_cap(), so I followed that precedent. >> @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, >> struct vfio_region_info **info) >> { >> size_t argsz = sizeof(struct vfio_region_info); >> + int fd = -1; >> + int ret; >> + >> + /* create region cache */ >> + if (vbasedev->regions == NULL) { >> + vbasedev->regions = g_new0(struct vfio_region_info *, >> + vbasedev->num_regions); >> + if (vbasedev->proxy != NULL) { >> + vbasedev->regfds = g_new0(int, vbasedev->num_regions); >> + } >> + } >> + /* check cache */ >> + if (vbasedev->regions[index] != NULL) { >> + *info = g_malloc0(vbasedev->regions[index]->argsz); >> + memcpy(*info, vbasedev->regions[index], >> + vbasedev->regions[index]->argsz); >> + return 0; >> + } > > Why is it necessary to introduce a cache? Is it to avoid passing the > same fd multiple times? > Yes. For polled regions, the server returns an FD with each message, so there would be an FD leak if I didn’t check that the region hadn’t already returned one. Since I had to cache the FD anyway, I cached the whole struct. >> >> *info = g_malloc0(argsz); >> >> @@ -2417,7 +2467,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, >> retry: >> (*info)->argsz = argsz; >> >> - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { >> + if (vbasedev->proxy != NULL) { >> + VFIOUserFDs fds = { 0, 1, &fd}; >> + >> + ret = vfio_user_get_region_info(vbasedev, index, *info, &fds); >> + } else { >> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info); >> + if (ret < 0) { >> + ret = -errno; >> + } >> + } >> + if (ret != 0) { >> g_free(*info); >> *info = NULL; >> return -errno; >> @@ -2426,10 +2486,22 @@ retry: >> if ((*info)->argsz > argsz) { >> argsz = (*info)->argsz; >> *info = g_realloc(*info, argsz); >> + if (fd != -1) { >> + close(fd); >> + fd = -1; >> + } >> >> goto retry; >> } >> >> + /* fill cache */ >> + vbasedev->regions[index] = g_malloc0(argsz); >> + memcpy(vbasedev->regions[index], *info, argsz); >> + *vbasedev->regions[index] = **info; > > The previous line already copied the contents of *info. What is the > purpose of this assignment? > That might be a mis-merge. The struct assignment was a bug fixed several revs ago with the memcpy() call. >> + if (vbasedev->regfds != NULL) { >> + vbasedev->regfds[index] = fd; >> + } >> + >> return 0; >> } >> >> diff --git a/hw/vfio/user.c b/hw/vfio/user.c >> index b584b8e0f2..91b51f37df 100644 >> --- a/hw/vfio/user.c >> +++ b/hw/vfio/user.c >> @@ -734,3 +734,36 @@ int vfio_user_get_info(VFIODevice *vbasedev) >> vbasedev->reset_works = !!(msg.flags & VFIO_DEVICE_FLAGS_RESET); >> return 0; >> } >> + >> +int vfio_user_get_region_info(VFIODevice *vbasedev, int index, >> + struct vfio_region_info *info, VFIOUserFDs *fds) >> +{ >> + g_autofree VFIOUserRegionInfo *msgp = NULL; >> + int size; > > Please use uint32_t size instead of int size to prevent possible > signedness issues: > - VFIOUserRegionInfo->argsz is uint32_t. > - sizeof(VFIOUserHdr) is size_t. > - The vfio_user_request_msg() size argument is uint32_t. OK JJ
On Thu, Sep 09, 2021 at 05:35:40AM +0000, John Johnson wrote: > > > > On Sep 7, 2021, at 7:31 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > > On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote: > >> @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, > >> return true; > >> } > >> > >> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) > >> +{ > >> + struct vfio_region_info *info; > >> + > >> + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { > >> + vfio_get_region_info(vbasedev, index, &info); > >> + } > > > > Maybe this will be called from other places in the future, but the > > vfio_region_setup() caller below already invoked vfio_get_region_info() > > so I'm not sure it's necessary to do this again? > > > > Perhaps add an int *remfd argument to vfio_get_region_info(). That way > > vfio_get_region_info_remfd() isn't necessary. > > > > I think they could be combined, but the region capabilities are > retrieved with separate calls to vfio_get_region_info_cap(), so I followed > that precedent. > > > >> @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, > >> struct vfio_region_info **info) > >> { > >> size_t argsz = sizeof(struct vfio_region_info); > >> + int fd = -1; > >> + int ret; > >> + > >> + /* create region cache */ > >> + if (vbasedev->regions == NULL) { > >> + vbasedev->regions = g_new0(struct vfio_region_info *, > >> + vbasedev->num_regions); > >> + if (vbasedev->proxy != NULL) { > >> + vbasedev->regfds = g_new0(int, vbasedev->num_regions); > >> + } > >> + } > >> + /* check cache */ > >> + if (vbasedev->regions[index] != NULL) { > >> + *info = g_malloc0(vbasedev->regions[index]->argsz); > >> + memcpy(*info, vbasedev->regions[index], > >> + vbasedev->regions[index]->argsz); > >> + return 0; > >> + } > > > > Why is it necessary to introduce a cache? Is it to avoid passing the > > same fd multiple times? > > > > Yes. For polled regions, the server returns an FD with each > message, so there would be an FD leak if I didn’t check that the region > hadn’t already returned one. Since I had to cache the FD anyway, I > cached the whole struct. If vfio_get_region_info() takes an int *fd argument then fd ownership becomes explicit and the need for the cache falls away. Maybe Alex has a preference for how to structure the code to track per-region fds. Stefan
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 13e44ebf1c..104bf4ff31 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -95,4 +95,18 @@ typedef struct { uint32_t cap_offset; } VFIOUserDeviceInfo; +/* + * VFIO_USER_DEVICE_GET_REGION_INFO + * imported from struct_vfio_region_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t cap_offset; + uint64_t size; + uint64_t offset; +} VFIOUserRegionInfo; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 82044e7e78..f0122539ba 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -72,5 +72,7 @@ void vfio_user_set_reqhandler(VFIODevice *vbasdev, void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret); int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp); int vfio_user_get_info(VFIODevice *vbasedev); +int vfio_user_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info *info, VFIOUserFDs *fds); #endif /* VFIO_USER_H */ diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index f43dc6e5d0..bdd25a546c 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -56,6 +56,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + int remfd; /* fd if exported from remote process */ } VFIORegion; typedef struct VFIOMigration { @@ -145,6 +146,8 @@ typedef struct VFIODevice { Error *migration_blocker; OnOffAuto pre_copy_dirty_page_tracking; VFIOProxy *proxy; + struct vfio_region_info **regions; + int *regfds; } VFIODevice; struct VFIODeviceOps { diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 8728d4d5c2..7d667b0533 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -40,6 +40,7 @@ #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" +#include "hw/vfio/user.h" VFIOGroupList vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, return true; } +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) +{ + struct vfio_region_info *info; + + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { + vfio_get_region_info(vbasedev, index, &info); + } + return vbasedev->regfds != NULL ? vbasedev->regfds[index] : -1; +} + static int vfio_setup_region_sparse_mmaps(VFIORegion *region, struct vfio_region_info *info) { @@ -1567,6 +1578,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + region->remfd = vfio_get_region_info_remfd(vbasedev, index); if (region->size) { region->mem = g_new0(MemoryRegion, 1); @@ -1610,6 +1622,7 @@ int vfio_region_mmap(VFIORegion *region) { int i, prot = 0; char *name; + int fd; if (!region->mem) { return 0; @@ -1618,9 +1631,11 @@ int vfio_region_mmap(VFIORegion *region) prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; + fd = region->remfd != -1 ? region->remfd : region->vbasedev->fd; + for (i = 0; i < region->nr_mmaps; i++) { region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, - MAP_SHARED, region->vbasedev->fd, + MAP_SHARED, fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) { @@ -2397,6 +2412,23 @@ int vfio_get_device(VFIOGroup *group, const char *name, void vfio_put_base_device(VFIODevice *vbasedev) { + if (vbasedev->regions != NULL) { + int i; + + for (i = 0; i < vbasedev->num_regions; i++) { + if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) { + close(vbasedev->regfds[i]); + } + g_free(vbasedev->regions[i]); + } + g_free(vbasedev->regions); + vbasedev->regions = NULL; + if (vbasedev->regfds != NULL) { + g_free(vbasedev->regfds); + vbasedev->regfds = NULL; + } + } + if (!vbasedev->group) { return; } @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int fd = -1; + int ret; + + /* create region cache */ + if (vbasedev->regions == NULL) { + vbasedev->regions = g_new0(struct vfio_region_info *, + vbasedev->num_regions); + if (vbasedev->proxy != NULL) { + vbasedev->regfds = g_new0(int, vbasedev->num_regions); + } + } + /* check cache */ + if (vbasedev->regions[index] != NULL) { + *info = g_malloc0(vbasedev->regions[index]->argsz); + memcpy(*info, vbasedev->regions[index], + vbasedev->regions[index]->argsz); + return 0; + } *info = g_malloc0(argsz); @@ -2417,7 +2467,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + if (vbasedev->proxy != NULL) { + VFIOUserFDs fds = { 0, 1, &fd}; + + ret = vfio_user_get_region_info(vbasedev, index, *info, &fds); + } else { + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info); + if (ret < 0) { + ret = -errno; + } + } + if (ret != 0) { g_free(*info); *info = NULL; return -errno; @@ -2426,10 +2486,22 @@ retry: if ((*info)->argsz > argsz) { argsz = (*info)->argsz; *info = g_realloc(*info, argsz); + if (fd != -1) { + close(fd); + fd = -1; + } goto retry; } + /* fill cache */ + vbasedev->regions[index] = g_malloc0(argsz); + memcpy(vbasedev->regions[index], *info, argsz); + *vbasedev->regions[index] = **info; + if (vbasedev->regfds != NULL) { + vbasedev->regfds[index] = fd; + } + return 0; } diff --git a/hw/vfio/user.c b/hw/vfio/user.c index b584b8e0f2..91b51f37df 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -734,3 +734,36 @@ int vfio_user_get_info(VFIODevice *vbasedev) vbasedev->reset_works = !!(msg.flags & VFIO_DEVICE_FLAGS_RESET); return 0; } + +int vfio_user_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info *info, VFIOUserFDs *fds) +{ + g_autofree VFIOUserRegionInfo *msgp = NULL; + int size; + + /* data returned can be larger than vfio_region_info */ + if (info->argsz < sizeof(*info)) { + error_printf("vfio_user_get_region_info argsz too small\n"); + return -EINVAL; + } + if (fds != NULL && fds->send_fds != 0) { + error_printf("vfio_user_get_region_info can't send FDs\n"); + return -EINVAL; + } + + size = info->argsz + sizeof(VFIOUserHdr); + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, + sizeof(*msgp), 0); + msgp->argsz = info->argsz; + msgp->index = info->index; + + vfio_user_send_recv(vbasedev->proxy, &msgp->hdr, fds, size, 0); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + + memcpy(info, &msgp->argsz, info->argsz); + return 0; +}