Message ID | d357c8c243ef839cc8e41fc1ae5c8db2d98fc2cc.1667542066.git.john.g.johnson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user client | expand |
On Tue, Nov 08, 2022 at 03:13:25PM -0800, John Johnson wrote: > Used for communication with VFIO driver > (prep work for vfio-user, which will communicate over a socket) > index e573f5a..6fd40f1 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > + > +extern VFIOContIO vfio_cont_io_ioctl; Nit, there's no need for this to be non-static, it's only used in hw/vfio/common.c regards john
Hello John, On 11/9/22 00:13, John Johnson wrote: > Used for communication with VFIO driver > (prep work for vfio-user, which will communicate over a socket) > > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > --- > hw/vfio/common.c | 126 ++++++++++++++++++++++++++++-------------- > include/hw/vfio/vfio-common.h | 33 +++++++++++ > 2 files changed, 117 insertions(+), 42 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index ace9562..83d69b9 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, > goto unmap_exit; > } > > - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); > + ret = CONT_DMA_UNMAP(container, unmap, bitmap); I am not sure these macros are very useful, compared to : container->ops->dma_unmap(container, unmap, bitmap); > if (!ret) { > cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, > iotlb->translated_addr, pages); > } else { > - error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); > + error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %s", strerror(-ret)); > } > > g_free(bitmap->data); > @@ -465,30 +465,7 @@ static int vfio_dma_unmap(VFIOContainer *container, > return vfio_dma_unmap_bitmap(container, iova, size, iotlb); > } > > - while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { > - /* > - * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c > - * v4.15) where an overflow in its wrap-around check prevents us from > - * unmapping the last page of the address space. Test for the error > - * condition and re-try the unmap excluding the last page. The > - * expectation is that we've never mapped the last page anyway and this > - * unmap request comes via vIOMMU support which also makes it unlikely > - * that this page is used. This bug was introduced well after type1 v2 > - * support was introduced, so we shouldn't need to test for v1. A fix > - * is queued for kernel v5.0 so this workaround can be removed once > - * affected kernels are sufficiently deprecated. > - */ > - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && > - container->iommu_type == VFIO_TYPE1v2_IOMMU) { > - trace_vfio_dma_unmap_overflow_workaround(); > - unmap.size -= 1ULL << ctz64(container->pgsizes); > - continue; > - } > - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); > - return -errno; > - } > - > - return 0; > + return CONT_DMA_UNMAP(container, &unmap, NULL); > } > > static int vfio_dma_map(VFIOContainer *container, hwaddr iova, > @@ -501,24 +478,18 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, > .iova = iova, > .size = size, > }; > + int ret; > > if (!readonly) { > map.flags |= VFIO_DMA_MAP_FLAG_WRITE; > } > > - /* > - * Try the mapping, if it fails with EBUSY, unmap the region and try > - * again. This shouldn't be necessary, but we sometimes see it in > - * the VGA ROM space. > - */ > - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || > - (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && > - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { > - return 0; > - } > + ret = CONT_DMA_MAP(container, &map); > > - error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); > - return -errno; > + if (ret < 0) { > + error_report("VFIO_MAP_DMA failed: %s", strerror(-ret)); > + } > + return ret; > } > > static void vfio_host_win_add(VFIOContainer *container, > @@ -1263,10 +1234,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) > dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; > } > > - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); > + ret = CONT_DIRTY_BITMAP(container, &dirty, NULL); > if (ret) { > error_report("Failed to set dirty tracking flag 0x%x errno: %d", > - dirty.flags, errno); > + dirty.flags, -ret); > } > } > > @@ -1316,11 +1287,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, > goto err_out; > } > > - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); > + ret = CONT_DIRTY_BITMAP(container, dbitmap, range); > if (ret) { > error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 > " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, > - (uint64_t)range->size, errno); > + (uint64_t)range->size, -ret); > goto err_out; > } > > @@ -2090,6 +2061,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > container->error = NULL; > container->dirty_pages_supported = false; > container->dma_max_mappings = 0; > + container->io_ops = &vfio_cont_io_ioctl; > QLIST_INIT(&container->giommu_list); > QLIST_INIT(&container->hostwin_list); > QLIST_INIT(&container->vrdl_list); > @@ -2626,3 +2598,73 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op) > } > return vfio_eeh_container_op(container, op); > } > + > +/* > + * Traditional ioctl() based io_ops > + */ > + > +static int vfio_io_dma_map(VFIOContainer *container, > + struct vfio_iommu_type1_dma_map *map) > +{ > + > + /* > + * Try the mapping, if it fails with EBUSY, unmap the region and try > + * again. This shouldn't be necessary, but we sometimes see it in > + * the VGA ROM space. > + */ > + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0 || > + (errno == EBUSY && > + vfio_dma_unmap(container, map->iova, map->size, NULL) == 0 && > + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0)) { > + return 0; > + } > + return -errno; > +} > + > +static int vfio_io_dma_unmap(VFIOContainer *container, > + struct vfio_iommu_type1_dma_unmap *unmap, > + struct vfio_bitmap *bitmap) > +{ > + > + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap)) { > + /* > + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c > + * v4.15) where an overflow in its wrap-around check prevents us from > + * unmapping the last page of the address space. Test for the error > + * condition and re-try the unmap excluding the last page. The > + * expectation is that we've never mapped the last page anyway and this > + * unmap request comes via vIOMMU support which also makes it unlikely > + * that this page is used. This bug was introduced well after type1 v2 > + * support was introduced, so we shouldn't need to test for v1. A fix > + * is queued for kernel v5.0 so this workaround can be removed once > + * affected kernels are sufficiently deprecated. > + */ > + if (errno == EINVAL && unmap->size && !(unmap->iova + unmap->size) && > + container->iommu_type == VFIO_TYPE1v2_IOMMU) { > + trace_vfio_dma_unmap_overflow_workaround(); > + unmap->size -= 1ULL << ctz64(container->pgsizes); > + continue; > + } > + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); > + return -errno; > + } > + > + return 0; > +} > + > +static int vfio_io_dirty_bitmap(VFIOContainer *container, > + struct vfio_iommu_type1_dirty_bitmap *bitmap, > + struct vfio_iommu_type1_dirty_bitmap_get *range) > +{ > + int ret; > + > + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, bitmap); > + > + return ret < 0 ? -errno : ret; > +} > + > +VFIOContIO vfio_cont_io_ioctl = { > + .dma_map = vfio_io_dma_map, > + .dma_unmap = vfio_io_dma_unmap, > + .dirty_bitmap = vfio_io_dirty_bitmap, > +}; > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index e573f5a..6fd40f1 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace { > } VFIOAddressSpace; > > struct VFIOGroup; > +typedef struct VFIOContIO VFIOContIO; > > typedef struct VFIOContainer { > VFIOAddressSpace *space; > @@ -83,6 +84,7 @@ typedef struct VFIOContainer { > MemoryListener prereg_listener; > unsigned iommu_type; > Error *error; > + VFIOContIO *io_ops; ops should be enough. > bool initialized; > bool dirty_pages_supported; > uint64_t dirty_pgsizes; > @@ -154,6 +156,37 @@ struct VFIODeviceOps { > int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); > }; > > +#ifdef CONFIG_LINUX > + > +/* > + * The next 2 ops vectors are how Devices and Containers > + * communicate with the server. The default option is > + * through ioctl() to the kernel VFIO driver, but vfio-user > + * can use a socket to a remote process. > + */ > + > +struct VFIOContIO { VFIOContainerOps seems more adequate with the current VFIO terminology in QEMU. Thanks, C. > + int (*dma_map)(VFIOContainer *container, > + struct vfio_iommu_type1_dma_map *map); > + int (*dma_unmap)(VFIOContainer *container, > + struct vfio_iommu_type1_dma_unmap *unmap, > + struct vfio_bitmap *bitmap); > + int (*dirty_bitmap)(VFIOContainer *container, > + struct vfio_iommu_type1_dirty_bitmap *bitmap, > + struct vfio_iommu_type1_dirty_bitmap_get *range); > +}; > + > +#define CONT_DMA_MAP(cont, map) \ > + ((cont)->io_ops->dma_map((cont), (map))) > +#define CONT_DMA_UNMAP(cont, unmap, bitmap) \ > + ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap))) > +#define CONT_DIRTY_BITMAP(cont, bitmap, range) \ > + ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range))) > + > +extern VFIOContIO vfio_cont_io_ioctl; > + > +#endif /* CONFIG_LINUX */ > + > typedef struct VFIOGroup { > int fd; > int groupid;
On 9/12/22 17:10, Cédric Le Goater wrote: > Hello John, > > On 11/9/22 00:13, John Johnson wrote: >> Used for communication with VFIO driver >> (prep work for vfio-user, which will communicate over a socket) >> >> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> >> --- >> hw/vfio/common.c | 126 >> ++++++++++++++++++++++++++++-------------- >> include/hw/vfio/vfio-common.h | 33 +++++++++++ >> 2 files changed, 117 insertions(+), 42 deletions(-) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index ace9562..83d69b9 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer >> *container, >> goto unmap_exit; >> } >> - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); >> + ret = CONT_DMA_UNMAP(container, unmap, bitmap); > > I am not sure these macros are very useful, compared to : > > container->ops->dma_unmap(container, unmap, bitmap); I was going to report the same. >> +/* >> + * The next 2 ops vectors are how Devices and Containers >> + * communicate with the server. The default option is >> + * through ioctl() to the kernel VFIO driver, but vfio-user >> + * can use a socket to a remote process. >> + */ >> + >> +struct VFIOContIO { > > VFIOContainerOps seems more adequate with the current VFIO terminology > in QEMU. Yes please, abbreviated "Cont" is not helpful.
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index ace9562..83d69b9 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, goto unmap_exit; } - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); + ret = CONT_DMA_UNMAP(container, unmap, bitmap); if (!ret) { cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, iotlb->translated_addr, pages); } else { - error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); + error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %s", strerror(-ret)); } g_free(bitmap->data); @@ -465,30 +465,7 @@ static int vfio_dma_unmap(VFIOContainer *container, return vfio_dma_unmap_bitmap(container, iova, size, iotlb); } - while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - /* - * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c - * v4.15) where an overflow in its wrap-around check prevents us from - * unmapping the last page of the address space. Test for the error - * condition and re-try the unmap excluding the last page. The - * expectation is that we've never mapped the last page anyway and this - * unmap request comes via vIOMMU support which also makes it unlikely - * that this page is used. This bug was introduced well after type1 v2 - * support was introduced, so we shouldn't need to test for v1. A fix - * is queued for kernel v5.0 so this workaround can be removed once - * affected kernels are sufficiently deprecated. - */ - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { - trace_vfio_dma_unmap_overflow_workaround(); - unmap.size -= 1ULL << ctz64(container->pgsizes); - continue; - } - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); - return -errno; - } - - return 0; + return CONT_DMA_UNMAP(container, &unmap, NULL); } static int vfio_dma_map(VFIOContainer *container, hwaddr iova, @@ -501,24 +478,18 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, .iova = iova, .size = size, }; + int ret; if (!readonly) { map.flags |= VFIO_DMA_MAP_FLAG_WRITE; } - /* - * Try the mapping, if it fails with EBUSY, unmap the region and try - * again. This shouldn't be necessary, but we sometimes see it in - * the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || - (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } + ret = CONT_DMA_MAP(container, &map); - error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); - return -errno; + if (ret < 0) { + error_report("VFIO_MAP_DMA failed: %s", strerror(-ret)); + } + return ret; } static void vfio_host_win_add(VFIOContainer *container, @@ -1263,10 +1234,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); + ret = CONT_DIRTY_BITMAP(container, &dirty, NULL); if (ret) { error_report("Failed to set dirty tracking flag 0x%x errno: %d", - dirty.flags, errno); + dirty.flags, -ret); } } @@ -1316,11 +1287,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, goto err_out; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + ret = CONT_DIRTY_BITMAP(container, dbitmap, range); if (ret) { error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, - (uint64_t)range->size, errno); + (uint64_t)range->size, -ret); goto err_out; } @@ -2090,6 +2061,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->error = NULL; container->dirty_pages_supported = false; container->dma_max_mappings = 0; + container->io_ops = &vfio_cont_io_ioctl; QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); @@ -2626,3 +2598,73 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op) } return vfio_eeh_container_op(container, op); } + +/* + * Traditional ioctl() based io_ops + */ + +static int vfio_io_dma_map(VFIOContainer *container, + struct vfio_iommu_type1_dma_map *map) +{ + + /* + * Try the mapping, if it fails with EBUSY, unmap the region and try + * again. This shouldn't be necessary, but we sometimes see it in + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0 || + (errno == EBUSY && + vfio_dma_unmap(container, map->iova, map->size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0)) { + return 0; + } + return -errno; +} + +static int vfio_io_dma_unmap(VFIOContainer *container, + struct vfio_iommu_type1_dma_unmap *unmap, + struct vfio_bitmap *bitmap) +{ + + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c + * v4.15) where an overflow in its wrap-around check prevents us from + * unmapping the last page of the address space. Test for the error + * condition and re-try the unmap excluding the last page. The + * expectation is that we've never mapped the last page anyway and this + * unmap request comes via vIOMMU support which also makes it unlikely + * that this page is used. This bug was introduced well after type1 v2 + * support was introduced, so we shouldn't need to test for v1. A fix + * is queued for kernel v5.0 so this workaround can be removed once + * affected kernels are sufficiently deprecated. + */ + if (errno == EINVAL && unmap->size && !(unmap->iova + unmap->size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { + trace_vfio_dma_unmap_overflow_workaround(); + unmap->size -= 1ULL << ctz64(container->pgsizes); + continue; + } + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); + return -errno; + } + + return 0; +} + +static int vfio_io_dirty_bitmap(VFIOContainer *container, + struct vfio_iommu_type1_dirty_bitmap *bitmap, + struct vfio_iommu_type1_dirty_bitmap_get *range) +{ + int ret; + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, bitmap); + + return ret < 0 ? -errno : ret; +} + +VFIOContIO vfio_cont_io_ioctl = { + .dma_map = vfio_io_dma_map, + .dma_unmap = vfio_io_dma_unmap, + .dirty_bitmap = vfio_io_dirty_bitmap, +}; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index e573f5a..6fd40f1 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace { } VFIOAddressSpace; struct VFIOGroup; +typedef struct VFIOContIO VFIOContIO; typedef struct VFIOContainer { VFIOAddressSpace *space; @@ -83,6 +84,7 @@ typedef struct VFIOContainer { MemoryListener prereg_listener; unsigned iommu_type; Error *error; + VFIOContIO *io_ops; bool initialized; bool dirty_pages_supported; uint64_t dirty_pgsizes; @@ -154,6 +156,37 @@ struct VFIODeviceOps { int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); }; +#ifdef CONFIG_LINUX + +/* + * The next 2 ops vectors are how Devices and Containers + * communicate with the server. The default option is + * through ioctl() to the kernel VFIO driver, but vfio-user + * can use a socket to a remote process. + */ + +struct VFIOContIO { + int (*dma_map)(VFIOContainer *container, + struct vfio_iommu_type1_dma_map *map); + int (*dma_unmap)(VFIOContainer *container, + struct vfio_iommu_type1_dma_unmap *unmap, + struct vfio_bitmap *bitmap); + int (*dirty_bitmap)(VFIOContainer *container, + struct vfio_iommu_type1_dirty_bitmap *bitmap, + struct vfio_iommu_type1_dirty_bitmap_get *range); +}; + +#define CONT_DMA_MAP(cont, map) \ + ((cont)->io_ops->dma_map((cont), (map))) +#define CONT_DMA_UNMAP(cont, unmap, bitmap) \ + ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap))) +#define CONT_DIRTY_BITMAP(cont, bitmap, range) \ + ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range))) + +extern VFIOContIO vfio_cont_io_ioctl; + +#endif /* CONFIG_LINUX */ + typedef struct VFIOGroup { int fd; int groupid;
Used for communication with VFIO driver (prep work for vfio-user, which will communicate over a socket) Signed-off-by: John G Johnson <john.g.johnson@oracle.com> --- hw/vfio/common.c | 126 ++++++++++++++++++++++++++++-------------- include/hw/vfio/vfio-common.h | 33 +++++++++++ 2 files changed, 117 insertions(+), 42 deletions(-)