Message ID | 7f6595dcd88bfc0b459d3befd615a569635bae7d.1629131628.git.elena.ufimtseva@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user implementation | expand |
On Mon, Aug 16, 2021 at 09:42:45AM -0700, Elena Ufimtseva wrote: > From: John Johnson <john.g.johnson@oracle.com> > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > include/hw/vfio/vfio-common.h | 3 ++ > hw/vfio/common.c | 84 +++++++++++++++++++++++++++++++++++ > hw/vfio/pci.c | 22 +++++++++ > 3 files changed, 109 insertions(+) Alex: I'm not familiar enough with hw/vfio/ to review this in depth. You might have suggestions on how to unify the vfio-user and vfio kernel concepts of groups and containers. > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index bdd25a546c..688660c28d 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -91,6 +91,7 @@ typedef struct VFIOContainer { > uint64_t max_dirty_bitmap_size; > unsigned long pgsizes; > unsigned int dma_max_mappings; > + VFIOProxy *proxy; > QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; > QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; > QLIST_HEAD(, VFIOGroup) group_list; > @@ -217,6 +218,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); > void vfio_put_group(VFIOGroup *group); > int vfio_get_device(VFIOGroup *group, const char *name, > VFIODevice *vbasedev, Error **errp); > +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as); > +void vfio_disconnect_proxy(VFIOGroup *group); > > extern const MemoryRegionOps vfio_region_ops; > typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 9fe3e05dc6..57b9e111e6 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -2249,6 +2249,55 @@ put_space_exit: > return ret; > } > > +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as) > +{ > + VFIOAddressSpace *space; > + VFIOContainer *container; > + > + if (QLIST_EMPTY(&vfio_group_list)) { > + qemu_register_reset(vfio_reset_handler, NULL); > + } > + > + QLIST_INSERT_HEAD(&vfio_group_list, group, next); > + > + /* > + * try to mirror vfio_connect_container() > + * as much as possible > + */ > + > + space = vfio_get_address_space(as); > + > + container = g_malloc0(sizeof(*container)); > + container->space = space; > + container->fd = -1; > + QLIST_INIT(&container->giommu_list); > + QLIST_INIT(&container->hostwin_list); > + container->proxy = proxy; > + > + /* > + * The proxy uses a SW IOMMU in lieu of the HW one > + * used in the ioctl() version. Use TYPE1 with the > + * target's page size for maximum capatibility > + */ > + container->iommu_type = VFIO_TYPE1_IOMMU; > + vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE); > + container->pgsizes = TARGET_PAGE_SIZE; > + > + container->dirty_pages_supported = true; > + container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER; > + container->dirty_pgsizes = TARGET_PAGE_SIZE; > + > + QLIST_INIT(&container->group_list); > + QLIST_INSERT_HEAD(&space->containers, container, next); > + > + group->container = container; > + QLIST_INSERT_HEAD(&container->group_list, group, container_next); > + > + container->listener = vfio_memory_listener; > + memory_listener_register(&container->listener, container->space->as); > + container->initialized = true; > +} > + > static void vfio_disconnect_container(VFIOGroup *group) > { > VFIOContainer *container = group->container; > @@ -2291,6 +2340,41 @@ static void vfio_disconnect_container(VFIOGroup *group) > } > } > > +void vfio_disconnect_proxy(VFIOGroup *group) > +{ > + VFIOContainer *container = group->container; > + VFIOAddressSpace *space = container->space; > + VFIOGuestIOMMU *giommu, *tmp; > + > + /* > + * try to mirror vfio_disconnect_container() > + * as much as possible, knowing each device > + * is in one group and one container > + */ > + > + QLIST_REMOVE(group, container_next); > + group->container = NULL; > + > + /* > + * Explicitly release the listener first before unset container, > + * since unset may destroy the backend container if it's the last > + * group. > + */ > + memory_listener_unregister(&container->listener); > + > + QLIST_REMOVE(container, next); > + > + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { > + memory_region_unregister_iommu_notifier( > + MEMORY_REGION(giommu->iommu), &giommu->n); > + QLIST_REMOVE(giommu, giommu_next); > + g_free(giommu); > + } > + > + g_free(container); > + vfio_put_address_space(space); > +} > + > VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) > { > VFIOGroup *group; > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 282de6a30b..2c9fcb2fa9 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3442,6 +3442,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > VFIODevice *vbasedev = &vdev->vbasedev; > SocketAddress addr; > VFIOProxy *proxy; > + VFIOGroup *group = NULL; > int ret; > Error *err = NULL; > > @@ -3484,6 +3485,19 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > vbasedev->no_mmap = false; > vbasedev->ops = &vfio_user_pci_ops; > > + /* > + * each device gets its own group and container > + * make them unrelated to any host IOMMU groupings > + */ > + group = g_malloc0(sizeof(*group)); > + group->fd = -1; > + group->groupid = -1; > + QLIST_INIT(&group->device_list); > + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); > + vbasedev->group = group; > + > + vfio_connect_proxy(proxy, group, pci_device_iommu_address_space(pdev)); > + > ret = vfio_user_get_info(&vdev->vbasedev); > if (ret) { > error_setg_errno(errp, -ret, "get info failure"); > @@ -3587,6 +3601,9 @@ out_teardown: > vfio_teardown_msi(vdev); > vfio_bars_exit(vdev); > error: > + if (group != NULL) { > + vfio_disconnect_proxy(group); > + } > vfio_user_disconnect(proxy); > error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); > } > @@ -3595,6 +3612,11 @@ static void vfio_user_instance_finalize(Object *obj) > { > VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); > VFIODevice *vbasedev = &vdev->vbasedev; > + VFIOGroup *group = vbasedev->group; > + > + vfio_disconnect_proxy(group); > + g_free(group); > + vbasedev->group = NULL; Can vfio_put_group() be used instead? I'm worried that the cleanup code will be duplicated or become inconsistent if it's not shared. Also, vfio_instance_finalize() calls vfio_put_group() after vfio_put_device(). Does this code intentionally take advantage of the if (!vbasedev->group) early return in vfio_put_base_device()? This is non-obvious. I recommend unifying the device and group cleanup instead of special-casing it here (this is fragile!). > > vfio_put_device(vdev); > > -- > 2.25.1 >
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index bdd25a546c..688660c28d 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -91,6 +91,7 @@ typedef struct VFIOContainer { uint64_t max_dirty_bitmap_size; unsigned long pgsizes; unsigned int dma_max_mappings; + VFIOProxy *proxy; QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; QLIST_HEAD(, VFIOGroup) group_list; @@ -217,6 +218,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); void vfio_put_group(VFIOGroup *group); int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vbasedev, Error **errp); +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as); +void vfio_disconnect_proxy(VFIOGroup *group); extern const MemoryRegionOps vfio_region_ops; typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 9fe3e05dc6..57b9e111e6 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -2249,6 +2249,55 @@ put_space_exit: return ret; } +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as) +{ + VFIOAddressSpace *space; + VFIOContainer *container; + + if (QLIST_EMPTY(&vfio_group_list)) { + qemu_register_reset(vfio_reset_handler, NULL); + } + + QLIST_INSERT_HEAD(&vfio_group_list, group, next); + + /* + * try to mirror vfio_connect_container() + * as much as possible + */ + + space = vfio_get_address_space(as); + + container = g_malloc0(sizeof(*container)); + container->space = space; + container->fd = -1; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->hostwin_list); + container->proxy = proxy; + + /* + * The proxy uses a SW IOMMU in lieu of the HW one + * used in the ioctl() version. Use TYPE1 with the + * target's page size for maximum capatibility + */ + container->iommu_type = VFIO_TYPE1_IOMMU; + vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE); + container->pgsizes = TARGET_PAGE_SIZE; + + container->dirty_pages_supported = true; + container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER; + container->dirty_pgsizes = TARGET_PAGE_SIZE; + + QLIST_INIT(&container->group_list); + QLIST_INSERT_HEAD(&space->containers, container, next); + + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + + container->listener = vfio_memory_listener; + memory_listener_register(&container->listener, container->space->as); + container->initialized = true; +} + static void vfio_disconnect_container(VFIOGroup *group) { VFIOContainer *container = group->container; @@ -2291,6 +2340,41 @@ static void vfio_disconnect_container(VFIOGroup *group) } } +void vfio_disconnect_proxy(VFIOGroup *group) +{ + VFIOContainer *container = group->container; + VFIOAddressSpace *space = container->space; + VFIOGuestIOMMU *giommu, *tmp; + + /* + * try to mirror vfio_disconnect_container() + * as much as possible, knowing each device + * is in one group and one container + */ + + QLIST_REMOVE(group, container_next); + group->container = NULL; + + /* + * Explicitly release the listener first before unset container, + * since unset may destroy the backend container if it's the last + * group. + */ + memory_listener_unregister(&container->listener); + + QLIST_REMOVE(container, next); + + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier( + MEMORY_REGION(giommu->iommu), &giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } + + g_free(container); + vfio_put_address_space(space); +} + VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) { VFIOGroup *group; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 282de6a30b..2c9fcb2fa9 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3442,6 +3442,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) VFIODevice *vbasedev = &vdev->vbasedev; SocketAddress addr; VFIOProxy *proxy; + VFIOGroup *group = NULL; int ret; Error *err = NULL; @@ -3484,6 +3485,19 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) vbasedev->no_mmap = false; vbasedev->ops = &vfio_user_pci_ops; + /* + * each device gets its own group and container + * make them unrelated to any host IOMMU groupings + */ + group = g_malloc0(sizeof(*group)); + group->fd = -1; + group->groupid = -1; + QLIST_INIT(&group->device_list); + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + vbasedev->group = group; + + vfio_connect_proxy(proxy, group, pci_device_iommu_address_space(pdev)); + ret = vfio_user_get_info(&vdev->vbasedev); if (ret) { error_setg_errno(errp, -ret, "get info failure"); @@ -3587,6 +3601,9 @@ out_teardown: vfio_teardown_msi(vdev); vfio_bars_exit(vdev); error: + if (group != NULL) { + vfio_disconnect_proxy(group); + } vfio_user_disconnect(proxy); error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); } @@ -3595,6 +3612,11 @@ static void vfio_user_instance_finalize(Object *obj) { VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); VFIODevice *vbasedev = &vdev->vbasedev; + VFIOGroup *group = vbasedev->group; + + vfio_disconnect_proxy(group); + g_free(group); + vbasedev->group = NULL; vfio_put_device(vdev);