@@ -1623,41 +1623,11 @@ static int vfio_host_iommu_ctx_unbind_stage1_pgtbl(HostIOMMUContext *iommu_ctx,
return ret;
}
-static int vfio_init_container(VFIOContainer *container, int group_fd,
- bool want_nested, Error **errp)
-{
- int iommu_type, ret;
-
- iommu_type = vfio_get_iommu_type(container, want_nested, errp);
- if (iommu_type < 0) {
- return iommu_type;
- }
-
- ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
- if (ret) {
- error_setg_errno(errp, errno, "Failed to set group container");
- return -errno;
- }
-
- while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
- if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
- /*
- * On sPAPR, despite the IOMMU subdriver always advertises v1 and
- * v2, the running platform may not support v2 and there is no
- * way to guess it until an IOMMU group gets added to the container.
- * So in case it fails with v2, try v1 as a fallback.
- */
- iommu_type = VFIO_SPAPR_TCE_IOMMU;
- continue;
- }
- error_setg_errno(errp, errno, "Failed to set iommu for container");
- return -errno;
- }
-
- container->iommu_type = iommu_type;
- return 0;
-}
-
+/**
+ * Get iommu info from host. Caller of this funcion should free
+ * the memory pointed by the returned pointer stored in @info
+ * after a successful calling when finished its usage.
+ */
static int vfio_get_iommu_info(VFIOContainer *container,
struct vfio_iommu_type1_info **info)
{
@@ -1702,6 +1672,101 @@ vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
return NULL;
}
+static int vfio_get_nesting_iommu_cap(VFIOContainer *container,
+ struct vfio_iommu_type1_info_cap_nesting **cap_nesting)
+{
+ struct vfio_iommu_type1_info *info;
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_nesting *cap;
+ struct iommu_nesting_info *nest_info;
+ int ret;
+ uint32_t minsz, cap_size;
+
+ ret = vfio_get_iommu_info(container, &info);
+ if (ret) {
+ return ret;
+ }
+
+ hdr = vfio_get_iommu_info_cap(info,
+ VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
+ if (!hdr) {
+ g_free(info);
+ return -EINVAL;
+ }
+
+ cap = container_of(hdr,
+ struct vfio_iommu_type1_info_cap_nesting, header);
+
+ nest_info = &cap->info;
+ minsz = offsetof(struct iommu_nesting_info, vendor);
+ if (nest_info->argsz < minsz) {
+ g_free(info);
+ return -EINVAL;
+ }
+
+ cap_size = offsetof(struct vfio_iommu_type1_info_cap_nesting, info) +
+ nest_info->argsz;
+ *cap_nesting = g_malloc0(cap_size);
+ memcpy(*cap_nesting, cap, cap_size);
+
+ g_free(info);
+ return 0;
+}
+
+static int vfio_init_container(VFIOContainer *container, int group_fd,
+ bool want_nested, Error **errp)
+{
+ int iommu_type, ret;
+
+ iommu_type = vfio_get_iommu_type(container, want_nested, errp);
+ if (iommu_type < 0) {
+ return iommu_type;
+ }
+
+ ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
+ if (ret) {
+ error_setg_errno(errp, errno, "Failed to set group container");
+ return -errno;
+ }
+
+ while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
+ if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+ /*
+ * On sPAPR, despite the IOMMU subdriver always advertises v1 and
+ * v2, the running platform may not support v2 and there is no
+ * way to guess it until an IOMMU group gets added to the container.
+ * So in case it fails with v2, try v1 as a fallback.
+ */
+ iommu_type = VFIO_SPAPR_TCE_IOMMU;
+ continue;
+ }
+ error_setg_errno(errp, errno, "Failed to set iommu for container");
+ return -errno;
+ }
+
+ if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+ struct vfio_iommu_type1_info_cap_nesting *nesting = NULL;
+ struct iommu_nesting_info *nest_info;
+
+ ret = vfio_get_nesting_iommu_cap(container, &nesting);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "Failed to get nesting iommu cap");
+ return ret;
+ }
+
+ nest_info = (struct iommu_nesting_info *) &nesting->info;
+ host_iommu_ctx_init(&container->iommu_ctx,
+ sizeof(container->iommu_ctx),
+ TYPE_VFIO_HOST_IOMMU_CONTEXT,
+ nest_info);
+ g_free(nesting);
+ }
+
+ container->iommu_type = iommu_type;
+ return 0;
+}
+
static void vfio_get_iommu_info_migration(VFIOContainer *container,
struct vfio_iommu_type1_info *info)
{
@@ -2764,6 +2764,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
+ VFIOContainer *container;
char *tmp, *subsys, group_path[PATH_MAX], *group_name;
Error *err = NULL;
ssize_t len;
@@ -2829,6 +2830,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
+ container = group->container;
+ if (container->iommu_ctx.initialized &&
+ pci_device_set_iommu_context(pdev, &container->iommu_ctx)) {
+ error_setg(errp, "device attachment is denied by vIOMMU, "
+ "please check host IOMMU nesting capability");
+ vfio_put_group(group);
+ goto error;
+ }
+
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
error_setg(errp, "device is already attached");
@@ -3112,9 +3122,16 @@ static void vfio_instance_finalize(Object *obj)
static void vfio_exitfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+ VFIOContainer *container;
vfio_unregister_req_notifier(vdev);
vfio_unregister_err_notifier(vdev);
+
+ container = vdev->vbasedev.group->container;
+ if (container->iommu_ctx.initialized) {
+ pci_device_unset_iommu_context(pdev);
+ }
+
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
if (vdev->irqchip_change_notifier.notify) {
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
@@ -85,6 +85,7 @@ typedef struct VFIOContainer {
MemoryListener listener;
MemoryListener prereg_listener;
unsigned iommu_type;
+ HostIOMMUContext iommu_ctx;
Error *error;
bool initialized;
bool dirty_pages_supported;
In this patch, QEMU firstly gets iommu info from kernel to check the supported capabilities by a VFIO_IOMMU_TYPE1_NESTING iommu. And inits HostIOMMUContet instance. For vfio-pci devices, it could use pci_device_set/unset_iommu() to expose host iommu context to vIOMMU emulators. vIOMMU emulators could make use of the methods provided by host iommu context. e.g. propagate requests to host iommu. Cc: Kevin Tian <kevin.tian@intel.com> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com> Cc: Peter Xu <peterx@redhat.com> Cc: Eric Auger <eric.auger@redhat.com> Cc: Yi Sun <yi.y.sun@linux.intel.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Liu Yi L <yi.l.liu@intel.com> --- hw/vfio/common.c | 135 +++++++++++++++++++++++++--------- hw/vfio/pci.c | 17 +++++ include/hw/vfio/vfio-common.h | 1 + 3 files changed, 118 insertions(+), 35 deletions(-)