diff mbox series

[RFC,v2,1/4] vfio: support creating VFIOContainer directly

Message ID 20180919062834.30103-2-tiwei.bie@intel.com (mailing list archive)
State New, archived
Headers show
Series Supporting programming IOMMU in QEMU (vDPA/vhost-user) | expand

Commit Message

Tiwei Bie Sept. 19, 2018, 6:28 a.m. UTC
This patch introduces several APIs to support creating
VFIOContainer from VFIO container fd and AddressSpace
directly. These containers will be marked as external,
and won't be used by the VFIO passthru code. This is
useful when the container fd is opened and shared by
another process and that process wants to do the IOMMU
programming based on a QMEU device's DMA address space.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 hw/vfio/common.c              | 164 +++++++++++++++++++++++++++++++++-
 hw/vfio/trace-events          |   2 +
 include/hw/vfio/vfio-common.h |   9 ++
 3 files changed, 174 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 7c185e5a2e..899d1c8f46 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -41,6 +41,7 @@  struct vfio_group_head vfio_group_list =
     QLIST_HEAD_INITIALIZER(vfio_group_list);
 struct vfio_as_head vfio_address_spaces =
     QLIST_HEAD_INITIALIZER(vfio_address_spaces);
+QemuMutex vfio_address_spaces_lock;
 
 #ifdef CONFIG_KVM
 /*
@@ -1043,6 +1044,8 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     int ret, fd;
     VFIOAddressSpace *space;
 
+    qemu_mutex_lock(&vfio_address_spaces_lock);
+
     space = vfio_get_address_space(as);
 
     /*
@@ -1073,10 +1076,14 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     qemu_balloon_inhibit(true);
 
     QLIST_FOREACH(container, &space->containers, next) {
+        if (container->external) {
+            continue;
+        }
         if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
             group->container = container;
             QLIST_INSERT_HEAD(&container->group_list, group, container_next);
             vfio_kvm_device_add_group(group);
+            qemu_mutex_unlock(&vfio_address_spaces_lock);
             return 0;
         }
     }
@@ -1249,6 +1256,7 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
 
     container->initialized = true;
 
+    qemu_mutex_unlock(&vfio_address_spaces_lock);
     return 0;
 listener_release_exit:
     QLIST_REMOVE(group, container_next);
@@ -1265,6 +1273,7 @@  close_fd_exit:
 put_space_exit:
     qemu_balloon_inhibit(false);
     vfio_put_address_space(space);
+    qemu_mutex_unlock(&vfio_address_spaces_lock);
 
     return ret;
 }
@@ -1273,6 +1282,8 @@  static void vfio_disconnect_container(VFIOGroup *group)
 {
     VFIOContainer *container = group->container;
 
+    qemu_mutex_lock(&vfio_address_spaces_lock);
+
     QLIST_REMOVE(group, container_next);
     group->container = NULL;
 
@@ -1309,6 +1320,147 @@  static void vfio_disconnect_container(VFIOGroup *group)
 
         vfio_put_address_space(space);
     }
+
+    qemu_mutex_unlock(&vfio_address_spaces_lock);
+}
+
+/*
+ * Currently, only TYPE1 IOMMU is supported.
+ */
+VFIOContainer *vfio_new_container(int container_fd, AddressSpace *as,
+                                  Error **errp)
+{
+    VFIOContainer *container;
+    int ret, fd;
+    VFIOAddressSpace *space;
+    struct vfio_iommu_type1_info info;
+    hwaddr pgmask;
+    bool v2;
+
+    trace_vfio_new_container(container_fd);
+
+    qemu_mutex_lock(&vfio_address_spaces_lock);
+
+    space = vfio_get_address_space(as);
+
+    qemu_balloon_inhibit(true);
+
+    fd = container_fd;
+    if (fd < 0) {
+        error_setg(errp, "invalid container fd %d", fd);
+        goto put_space_exit;
+    }
+
+    ret = ioctl(fd, VFIO_GET_API_VERSION);
+    if (ret != VFIO_API_VERSION) {
+        error_setg(errp, "supported vfio version: %d, "
+                   "reported version: %d", VFIO_API_VERSION, ret);
+        goto put_space_exit;
+    }
+
+    container = g_malloc0(sizeof(*container));
+    container->space = space;
+    container->fd = fd;
+    QLIST_INIT(&container->giommu_list);
+    QLIST_INIT(&container->hostwin_list);
+
+    if (!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) &&
+        !ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) {
+        error_setg(errp, "No available IOMMU models");
+        goto free_container_exit;
+    }
+    v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU);
+    container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU;
+
+    /*
+     * FIXME: This assumes that a Type1 IOMMU can map any 64-bit
+     * IOVA whatsoever.  That's not actually true, but the current
+     * kernel interface doesn't tell us what it can map, and the
+     * existing Type1 IOMMUs generally support any IOVA we're
+     * going to actually try in practice.
+     */
+    info.argsz = sizeof(info);
+    ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
+    /* Ignore errors */
+    if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
+        /* Assume 4k IOVA page size */
+        info.iova_pgsizes = 4096;
+    }
+    vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
+    container->pgsizes = info.iova_pgsizes;
+
+    pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
+    vfio_dma_unmap(container, 0, (ram_addr_t)-1 & ~pgmask);
+
+    container->external = true;
+
+    QLIST_INIT(&container->group_list);
+    QLIST_INSERT_HEAD(&space->containers, container, next);
+
+    container->listener = vfio_memory_listener;
+
+    memory_listener_register(&container->listener, container->space->as);
+
+    if (container->error) {
+        error_setg_errno(errp, -container->error,
+                         "memory listener initialization failed for container");
+        goto listener_release_exit;
+    }
+
+    container->initialized = true;
+
+    qemu_mutex_unlock(&vfio_address_spaces_lock);
+    return container;
+
+listener_release_exit:
+    QLIST_REMOVE(container, next);
+    vfio_listener_release(container);
+
+free_container_exit:
+    g_free(container);
+
+put_space_exit:
+    qemu_balloon_inhibit(false);
+    vfio_put_address_space(space);
+    qemu_mutex_unlock(&vfio_address_spaces_lock);
+
+    return NULL;
+}
+
+void vfio_free_container(VFIOContainer *container)
+{
+    VFIOAddressSpace *space = container->space;
+    VFIOGuestIOMMU *giommu, *tmp;
+    hwaddr pgmask;
+
+    if (!container->external) {
+        return;
+    }
+
+    trace_vfio_free_container(container->fd);
+
+    qemu_mutex_lock(&vfio_address_spaces_lock);
+
+    vfio_listener_release(container);
+
+    pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
+    vfio_dma_unmap(container, 0, (ram_addr_t)-1 & ~pgmask);
+
+    QLIST_REMOVE(container, next);
+    QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
+        memory_region_unregister_iommu_notifier(
+                MEMORY_REGION(giommu->iommu), &giommu->n);
+        QLIST_REMOVE(giommu, giommu_next);
+        g_free(giommu);
+    }
+
+    close(container->fd);
+    g_free(container);
+
+    qemu_balloon_inhibit(false);
+    vfio_put_address_space(space);
+
+    qemu_mutex_unlock(&vfio_address_spaces_lock);
 }
 
 VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
@@ -1601,9 +1753,13 @@  static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
 
 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
 {
-    VFIOAddressSpace *space = vfio_get_address_space(as);
+    VFIOAddressSpace *space;
     VFIOContainer *container = NULL;
 
+    qemu_mutex_lock(&vfio_address_spaces_lock);
+
+    space = vfio_get_address_space(as);
+
     if (QLIST_EMPTY(&space->containers)) {
         /* No containers to act on */
         goto out;
@@ -1620,6 +1776,7 @@  static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
 
 out:
     vfio_put_address_space(space);
+    qemu_mutex_unlock(&vfio_address_spaces_lock);
     return container;
 }
 
@@ -1639,3 +1796,8 @@  int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
     }
     return vfio_eeh_container_op(container, op);
 }
+
+static void __attribute__((__constructor__)) vfio_common_init(void)
+{
+    qemu_mutex_init(&vfio_address_spaces_lock);
+}
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index a85e8662ea..4e123d6cd9 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -98,6 +98,8 @@  vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t si
 vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
 vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
 vfio_disconnect_container(int fd) "close container->fd=%d"
+vfio_new_container(int fd) "new container->fd=%d"
+vfio_free_container(int fd) "free container->fd=%d"
 vfio_put_group(int fd) "close group->fd=%d"
 vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
 vfio_put_base_device(int fd) "close vdev->fd=%d"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 821def0565..be87a6125a 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -73,6 +73,7 @@  typedef struct VFIOContainer {
     unsigned iommu_type;
     int error;
     bool initialized;
+    bool external; /* Used outside the hw/vfio */
     unsigned long pgsizes;
     /*
      * This assumes the host IOMMU can support only a single
@@ -180,6 +181,7 @@  int vfio_get_device(VFIOGroup *group, const char *name,
 extern const MemoryRegionOps vfio_region_ops;
 extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;
 extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces;
+extern QemuMutex vfio_address_spaces_lock;
 
 #ifdef CONFIG_LINUX
 int vfio_get_region_info(VFIODevice *vbasedev, int index,
@@ -196,4 +198,11 @@  int vfio_spapr_create_window(VFIOContainer *container,
 int vfio_spapr_remove_window(VFIOContainer *container,
                              hwaddr offset_within_address_space);
 
+/*
+ * APIs used by modules outside hw/vfio.
+ */
+VFIOContainer *vfio_new_container(int container_fd, AddressSpace *as,
+                                  Error **errp);
+void vfio_free_container(VFIOContainer *container);
+
 #endif /* HW_VFIO_VFIO_COMMON_H */