diff mbox series

[RFC,v2,12/16] vfio-user: proxy container connect/disconnect

Message ID 7f6595dcd88bfc0b459d3befd615a569635bae7d.1629131628.git.elena.ufimtseva@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user implementation | expand

Commit Message

Elena Ufimtseva Aug. 16, 2021, 4:42 p.m. UTC
From: John Johnson <john.g.johnson@oracle.com>

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 include/hw/vfio/vfio-common.h |  3 ++
 hw/vfio/common.c              | 84 +++++++++++++++++++++++++++++++++++
 hw/vfio/pci.c                 | 22 +++++++++
 3 files changed, 109 insertions(+)

Comments

Stefan Hajnoczi Sept. 8, 2021, 8:30 a.m. UTC | #1
On Mon, Aug 16, 2021 at 09:42:45AM -0700, Elena Ufimtseva wrote:
> From: John Johnson <john.g.johnson@oracle.com>
> 
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
>  include/hw/vfio/vfio-common.h |  3 ++
>  hw/vfio/common.c              | 84 +++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.c                 | 22 +++++++++
>  3 files changed, 109 insertions(+)

Alex: I'm not familiar enough with hw/vfio/ to review this in depth. You
might have suggestions on how to unify the vfio-user and vfio kernel
concepts of groups and containers.

> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index bdd25a546c..688660c28d 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -91,6 +91,7 @@ typedef struct VFIOContainer {
>      uint64_t max_dirty_bitmap_size;
>      unsigned long pgsizes;
>      unsigned int dma_max_mappings;
> +    VFIOProxy *proxy;
>      QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>      QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>      QLIST_HEAD(, VFIOGroup) group_list;
> @@ -217,6 +218,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
>  void vfio_put_group(VFIOGroup *group);
>  int vfio_get_device(VFIOGroup *group, const char *name,
>                      VFIODevice *vbasedev, Error **errp);
> +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as);
> +void vfio_disconnect_proxy(VFIOGroup *group);
>  
>  extern const MemoryRegionOps vfio_region_ops;
>  typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9fe3e05dc6..57b9e111e6 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -2249,6 +2249,55 @@ put_space_exit:
>      return ret;
>  }
>  
> +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as)
> +{
> +    VFIOAddressSpace *space;
> +    VFIOContainer *container;
> +
> +    if (QLIST_EMPTY(&vfio_group_list)) {
> +        qemu_register_reset(vfio_reset_handler, NULL);
> +    }
> +
> +    QLIST_INSERT_HEAD(&vfio_group_list, group, next);
> +
> +    /*
> +     * try to mirror vfio_connect_container()
> +     * as much as possible
> +     */
> +
> +    space = vfio_get_address_space(as);
> +
> +    container = g_malloc0(sizeof(*container));
> +    container->space = space;
> +    container->fd = -1;
> +    QLIST_INIT(&container->giommu_list);
> +    QLIST_INIT(&container->hostwin_list);
> +    container->proxy = proxy;
> +
> +    /*
> +     * The proxy uses a SW IOMMU in lieu of the HW one
> +     * used in the ioctl() version.  Use TYPE1 with the
> +     * target's page size for maximum capatibility
> +     */
> +    container->iommu_type = VFIO_TYPE1_IOMMU;
> +    vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE);
> +    container->pgsizes = TARGET_PAGE_SIZE;
> +
> +    container->dirty_pages_supported = true;
> +    container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER;
> +    container->dirty_pgsizes = TARGET_PAGE_SIZE;
> +
> +    QLIST_INIT(&container->group_list);
> +    QLIST_INSERT_HEAD(&space->containers, container, next);
> +
> +    group->container = container;
> +    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
> +
> +    container->listener = vfio_memory_listener;
> +    memory_listener_register(&container->listener, container->space->as);
> +    container->initialized = true;
> +}
> +
>  static void vfio_disconnect_container(VFIOGroup *group)
>  {
>      VFIOContainer *container = group->container;
> @@ -2291,6 +2340,41 @@ static void vfio_disconnect_container(VFIOGroup *group)
>      }
>  }
>  
> +void vfio_disconnect_proxy(VFIOGroup *group)
> +{
> +    VFIOContainer *container = group->container;
> +    VFIOAddressSpace *space = container->space;
> +    VFIOGuestIOMMU *giommu, *tmp;
> +
> +    /*
> +     * try to mirror vfio_disconnect_container()
> +     * as much as possible, knowing each device
> +     * is in one group and one container
> +     */
> +
> +    QLIST_REMOVE(group, container_next);
> +    group->container = NULL;
> +
> +    /*
> +     * Explicitly release the listener first before unset container,
> +     * since unset may destroy the backend container if it's the last
> +     * group.
> +     */
> +    memory_listener_unregister(&container->listener);
> +
> +    QLIST_REMOVE(container, next);
> +
> +    QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
> +        memory_region_unregister_iommu_notifier(
> +            MEMORY_REGION(giommu->iommu), &giommu->n);
> +        QLIST_REMOVE(giommu, giommu_next);
> +        g_free(giommu);
> +    }
> +
> +    g_free(container);
> +    vfio_put_address_space(space);
> +}
> +
>  VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
>  {
>      VFIOGroup *group;
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 282de6a30b..2c9fcb2fa9 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3442,6 +3442,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
>      VFIODevice *vbasedev = &vdev->vbasedev;
>      SocketAddress addr;
>      VFIOProxy *proxy;
> +    VFIOGroup *group = NULL;
>      int ret;
>      Error *err = NULL;
>  
> @@ -3484,6 +3485,19 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
>      vbasedev->no_mmap = false;
>      vbasedev->ops = &vfio_user_pci_ops;
>  
> +    /*
> +     * each device gets its own group and container
> +     * make them unrelated to any host IOMMU groupings
> +     */
> +    group = g_malloc0(sizeof(*group));
> +    group->fd = -1;
> +    group->groupid = -1;
> +    QLIST_INIT(&group->device_list);
> +    QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
> +    vbasedev->group = group;
> +
> +    vfio_connect_proxy(proxy, group, pci_device_iommu_address_space(pdev));
> +
>      ret = vfio_user_get_info(&vdev->vbasedev);
>      if (ret) {
>          error_setg_errno(errp, -ret, "get info failure");
> @@ -3587,6 +3601,9 @@ out_teardown:
>      vfio_teardown_msi(vdev);
>      vfio_bars_exit(vdev);
>  error:
> +    if (group != NULL) {
> +        vfio_disconnect_proxy(group);
> +    }
>      vfio_user_disconnect(proxy);
>      error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
>  }
> @@ -3595,6 +3612,11 @@ static void vfio_user_instance_finalize(Object *obj)
>  {
>      VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
>      VFIODevice *vbasedev = &vdev->vbasedev;
> +    VFIOGroup *group = vbasedev->group;
> +
> +    vfio_disconnect_proxy(group);
> +    g_free(group);
> +    vbasedev->group = NULL;

Can vfio_put_group() be used instead? I'm worried that the cleanup code
will be duplicated or become inconsistent if it's not shared.

Also, vfio_instance_finalize() calls vfio_put_group() after
vfio_put_device(). Does this code intentionally take advantage of the if
(!vbasedev->group) early return in vfio_put_base_device()? This is
non-obvious. I recommend unifying the device and group cleanup instead
of special-casing it here (this is fragile!).

>  
>      vfio_put_device(vdev);
>  
> -- 
> 2.25.1
>
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index bdd25a546c..688660c28d 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -91,6 +91,7 @@  typedef struct VFIOContainer {
     uint64_t max_dirty_bitmap_size;
     unsigned long pgsizes;
     unsigned int dma_max_mappings;
+    VFIOProxy *proxy;
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
     QLIST_HEAD(, VFIOGroup) group_list;
@@ -217,6 +218,8 @@  VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
 void vfio_put_group(VFIOGroup *group);
 int vfio_get_device(VFIOGroup *group, const char *name,
                     VFIODevice *vbasedev, Error **errp);
+void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as);
+void vfio_disconnect_proxy(VFIOGroup *group);
 
 extern const MemoryRegionOps vfio_region_ops;
 typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9fe3e05dc6..57b9e111e6 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -2249,6 +2249,55 @@  put_space_exit:
     return ret;
 }
 
+void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as)
+{
+    VFIOAddressSpace *space;
+    VFIOContainer *container;
+
+    if (QLIST_EMPTY(&vfio_group_list)) {
+        qemu_register_reset(vfio_reset_handler, NULL);
+    }
+
+    QLIST_INSERT_HEAD(&vfio_group_list, group, next);
+
+    /*
+     * try to mirror vfio_connect_container()
+     * as much as possible
+     */
+
+    space = vfio_get_address_space(as);
+
+    container = g_malloc0(sizeof(*container));
+    container->space = space;
+    container->fd = -1;
+    QLIST_INIT(&container->giommu_list);
+    QLIST_INIT(&container->hostwin_list);
+    container->proxy = proxy;
+
+    /*
+     * The proxy uses a SW IOMMU in lieu of the HW one
+     * used in the ioctl() version.  Use TYPE1 with the
+     * target's page size for maximum capatibility
+     */
+    container->iommu_type = VFIO_TYPE1_IOMMU;
+    vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE);
+    container->pgsizes = TARGET_PAGE_SIZE;
+
+    container->dirty_pages_supported = true;
+    container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER;
+    container->dirty_pgsizes = TARGET_PAGE_SIZE;
+
+    QLIST_INIT(&container->group_list);
+    QLIST_INSERT_HEAD(&space->containers, container, next);
+
+    group->container = container;
+    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
+    container->listener = vfio_memory_listener;
+    memory_listener_register(&container->listener, container->space->as);
+    container->initialized = true;
+}
+
 static void vfio_disconnect_container(VFIOGroup *group)
 {
     VFIOContainer *container = group->container;
@@ -2291,6 +2340,41 @@  static void vfio_disconnect_container(VFIOGroup *group)
     }
 }
 
+void vfio_disconnect_proxy(VFIOGroup *group)
+{
+    VFIOContainer *container = group->container;
+    VFIOAddressSpace *space = container->space;
+    VFIOGuestIOMMU *giommu, *tmp;
+
+    /*
+     * try to mirror vfio_disconnect_container()
+     * as much as possible, knowing each device
+     * is in one group and one container
+     */
+
+    QLIST_REMOVE(group, container_next);
+    group->container = NULL;
+
+    /*
+     * Explicitly release the listener first before unset container,
+     * since unset may destroy the backend container if it's the last
+     * group.
+     */
+    memory_listener_unregister(&container->listener);
+
+    QLIST_REMOVE(container, next);
+
+    QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
+        memory_region_unregister_iommu_notifier(
+            MEMORY_REGION(giommu->iommu), &giommu->n);
+        QLIST_REMOVE(giommu, giommu_next);
+        g_free(giommu);
+    }
+
+    g_free(container);
+    vfio_put_address_space(space);
+}
+
 VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
 {
     VFIOGroup *group;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 282de6a30b..2c9fcb2fa9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3442,6 +3442,7 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
     VFIODevice *vbasedev = &vdev->vbasedev;
     SocketAddress addr;
     VFIOProxy *proxy;
+    VFIOGroup *group = NULL;
     int ret;
     Error *err = NULL;
 
@@ -3484,6 +3485,19 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
     vbasedev->no_mmap = false;
     vbasedev->ops = &vfio_user_pci_ops;
 
+    /*
+     * each device gets its own group and container
+     * make them unrelated to any host IOMMU groupings
+     */
+    group = g_malloc0(sizeof(*group));
+    group->fd = -1;
+    group->groupid = -1;
+    QLIST_INIT(&group->device_list);
+    QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
+    vbasedev->group = group;
+
+    vfio_connect_proxy(proxy, group, pci_device_iommu_address_space(pdev));
+
     ret = vfio_user_get_info(&vdev->vbasedev);
     if (ret) {
         error_setg_errno(errp, -ret, "get info failure");
@@ -3587,6 +3601,9 @@  out_teardown:
     vfio_teardown_msi(vdev);
     vfio_bars_exit(vdev);
 error:
+    if (group != NULL) {
+        vfio_disconnect_proxy(group);
+    }
     vfio_user_disconnect(proxy);
     error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
 }
@@ -3595,6 +3612,11 @@  static void vfio_user_instance_finalize(Object *obj)
 {
     VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
     VFIODevice *vbasedev = &vdev->vbasedev;
+    VFIOGroup *group = vbasedev->group;
+
+    vfio_disconnect_proxy(group);
+    g_free(group);
+    vbasedev->group = NULL;
 
     vfio_put_device(vdev);