diff mbox series

[v3,06/11] vfio-iommufd: Allow iommufd to be used in place of a container fd

Message ID 6-v3-50561e12d92b+313-vfio_iommufd_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Connect VFIO to IOMMUFD | expand

Commit Message

Jason Gunthorpe Nov. 16, 2022, 9:05 p.m. UTC
This makes VFIO_GROUP_SET_CONTAINER accept both a vfio container FD and an
iommufd.

In iommufd mode an IOAS will exist after the SET_CONTAINER, but it will
not be attached to any groups.

For VFIO this means that the VFIO_GROUP_GET_STATUS and
VFIO_GROUP_FLAGS_VIABLE works subtly differently. With the container FD
the iommu_group_claim_dma_owner() is done during SET_CONTAINER but for
IOMMUFD this is done during VFIO_GROUP_GET_DEVICE_FD. Meaning that
VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due to
viability.

As GET_DEVICE_FD can fail for many reasons already this is not expected to
be a meaningful difference.

Reorganize the tests for if the group has an assigned container or iommu
into a vfio_group_has_iommu() function and consolidate all the duplicated
WARN_ON's etc related to this.

Call container functions only if a container is actually present on the
group.

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/Kconfig     |  1 +
 drivers/vfio/container.c |  7 +++-
 drivers/vfio/vfio.h      |  2 +
 drivers/vfio/vfio_main.c | 86 +++++++++++++++++++++++++++++++++-------
 4 files changed, 80 insertions(+), 16 deletions(-)

Comments

Alex Williamson Nov. 16, 2022, 11:31 p.m. UTC | #1
On Wed, 16 Nov 2022 17:05:31 -0400
Jason Gunthorpe <jgg@nvidia.com> wrote:

> This makes VFIO_GROUP_SET_CONTAINER accept both a vfio container FD and an
> iommufd.
> 
> In iommufd mode an IOAS will exist after the SET_CONTAINER, but it will
> not be attached to any groups.
> 
> For VFIO this means that the VFIO_GROUP_GET_STATUS and
> VFIO_GROUP_FLAGS_VIABLE works subtly differently. With the container FD
> the iommu_group_claim_dma_owner() is done during SET_CONTAINER but for
> IOMMUFD this is done during VFIO_GROUP_GET_DEVICE_FD. Meaning that
> VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due to
> viability.
> 
> As GET_DEVICE_FD can fail for many reasons already this is not expected to
> be a meaningful difference.
> 
> Reorganize the tests for if the group has an assigned container or iommu
> into a vfio_group_has_iommu() function and consolidate all the duplicated
> WARN_ON's etc related to this.
> 
> Call container functions only if a container is actually present on the
> group.
> 
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Tested-by: Yi Liu <yi.l.liu@intel.com>
> Tested-by: Lixiao Yang <lixiao.yang@intel.com>
> Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
> Tested-by: Yu He <yu.he@intel.com>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/Kconfig     |  1 +
>  drivers/vfio/container.c |  7 +++-
>  drivers/vfio/vfio.h      |  2 +
>  drivers/vfio/vfio_main.c | 86 +++++++++++++++++++++++++++++++++-------
>  4 files changed, 80 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
> index 86c381ceb9a1e9..1118d322eec97d 100644
> --- a/drivers/vfio/Kconfig
> +++ b/drivers/vfio/Kconfig
> @@ -2,6 +2,7 @@
>  menuconfig VFIO
>  	tristate "VFIO Non-Privileged userspace driver framework"
>  	select IOMMU_API
> +	depends on IOMMUFD || !IOMMUFD
>  	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
>  	select INTERVAL_TREE
>  	help
> diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
> index d97747dfb05d02..8772dad6808539 100644
> --- a/drivers/vfio/container.c
> +++ b/drivers/vfio/container.c
> @@ -516,8 +516,11 @@ int vfio_group_use_container(struct vfio_group *group)
>  {
>  	lockdep_assert_held(&group->group_lock);
>  
> -	if (!group->container || !group->container->iommu_driver ||
> -	    WARN_ON(!group->container_users))
> +	/*
> +	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
> +	 * VFIO_SET_IOMMU hasn't been done yet.
> +	 */
> +	if (!group->container->iommu_driver)
>  		return -EINVAL;
>  
>  	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
> index 247590334e14b0..985e13d52989ca 100644
> --- a/drivers/vfio/vfio.h
> +++ b/drivers/vfio/vfio.h
> @@ -10,6 +10,7 @@
>  #include <linux/cdev.h>
>  #include <linux/module.h>
>  
> +struct iommufd_ctx;
>  struct iommu_group;
>  struct vfio_device;
>  struct vfio_container;
> @@ -60,6 +61,7 @@ struct vfio_group {
>  	struct kvm			*kvm;
>  	struct file			*opened_file;
>  	struct blocking_notifier_head	notifier;
> +	struct iommufd_ctx		*iommufd;
>  };
>  
>  /* events for the backend driver notify callback */
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 5c0e810f8b4d08..8c124290ce9f0d 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -35,6 +35,7 @@
>  #include <linux/pm_runtime.h>
>  #include <linux/interval_tree.h>
>  #include <linux/iova_bitmap.h>
> +#include <linux/iommufd.h>
>  #include "vfio.h"
>  
>  #define DRIVER_VERSION	"0.3"
> @@ -665,6 +666,16 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
>  /*
>   * VFIO Group fd, /dev/vfio/$GROUP
>   */
> +static bool vfio_group_has_iommu(struct vfio_group *group)
> +{
> +	lockdep_assert_held(&group->group_lock);
> +	if (!group->container)
> +		WARN_ON(group->container_users);
> +	else
> +		WARN_ON(!group->container_users);

I think this is just carrying forward the WARN_ON that gets replaced in
set_container, but I don't really see how this bit of paranoia is ever
a possibility.  If it is, a comment would be good, and perhaps simplify
to:

	WARN_ON(group->container ^ group->container_users);


> +	return group->container || group->iommufd;
> +}
> +
>  /*
>   * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
>   * if there was no container to unset.  Since the ioctl is called on
[snip]
> @@ -900,7 +945,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group,
>  		return -ENODEV;
>  	}
>  
> -	if (group->container)
> +	/*
> +	 * With the container FD the iommu_group_claim_dma_owner() is done
> +	 * during SET_CONTAINER but for IOMMFD this is done during
> +	 * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
> +	 * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
> +	 * to viability.
> +	 */
> +	if (group->container || group->iommufd)

Why didn't this use the vfio_group_has_iommu() helper?  This is only
skipping the paranoia checks, which aren't currently obvious to me
anyway.  Thanks,

Alex

>  		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
>  				VFIO_GROUP_FLAGS_VIABLE;
>  	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
Jason Gunthorpe Nov. 17, 2022, 12:20 a.m. UTC | #2
On Wed, Nov 16, 2022 at 04:31:33PM -0700, Alex Williamson wrote:
> > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> > index 5c0e810f8b4d08..8c124290ce9f0d 100644
> > --- a/drivers/vfio/vfio_main.c
> > +++ b/drivers/vfio/vfio_main.c
> > @@ -35,6 +35,7 @@
> >  #include <linux/pm_runtime.h>
> >  #include <linux/interval_tree.h>
> >  #include <linux/iova_bitmap.h>
> > +#include <linux/iommufd.h>
> >  #include "vfio.h"
> >  
> >  #define DRIVER_VERSION	"0.3"
> > @@ -665,6 +666,16 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
> >  /*
> >   * VFIO Group fd, /dev/vfio/$GROUP
> >   */
> > +static bool vfio_group_has_iommu(struct vfio_group *group)
> > +{
> > +	lockdep_assert_held(&group->group_lock);
> > +	if (!group->container)
> > +		WARN_ON(group->container_users);
> > +	else
> > +		WARN_ON(!group->container_users);
> 
> I think this is just carrying forward the WARN_ON that gets replaced in
> set_container,

Yes, I've carried this invariant forward through a few series now

> but I don't really see how this bit of paranoia is ever a
> possibility.  

Right, it is an invariant assertion, it should never trigger and we've
never seen it trigger. I think at one point it was harder to see that
this is impossible so an assertion must have been added

> 	WARN_ON(group->container ^ group->container_users);

Ah, this needs a "logical xor" which is a bit obscure. In C I guess
this is the common way to do it:

	/*
	 * There can only be users if there is a container, and if there is a
	 * container there must be users.
	 */
	WARN_ON(!group->container != !group->container_users);

I'm also happy to delete it, not sure it is a valuable invariant.

> > @@ -900,7 +945,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group,
> >  		return -ENODEV;
> >  	}
> >  
> > -	if (group->container)
> > +	/*
> > +	 * With the container FD the iommu_group_claim_dma_owner() is done
> > +	 * during SET_CONTAINER but for IOMMFD this is done during
> > +	 * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
> > +	 * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
> > +	 * to viability.
> > +	 */
> > +	if (group->container || group->iommufd)
> 
> Why didn't this use the vfio_group_has_iommu() helper?  This is only
> skipping the paranoia checks, which aren't currently obvious to me
> anyway.

Yes, it was missed, I fixed it

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 86c381ceb9a1e9..1118d322eec97d 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -2,6 +2,7 @@ 
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	select IOMMU_API
+	depends on IOMMUFD || !IOMMUFD
 	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
 	select INTERVAL_TREE
 	help
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d97747dfb05d02..8772dad6808539 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -516,8 +516,11 @@  int vfio_group_use_container(struct vfio_group *group)
 {
 	lockdep_assert_held(&group->group_lock);
 
-	if (!group->container || !group->container->iommu_driver ||
-	    WARN_ON(!group->container_users))
+	/*
+	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
+	 * VFIO_SET_IOMMU hasn't been done yet.
+	 */
+	if (!group->container->iommu_driver)
 		return -EINVAL;
 
 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 247590334e14b0..985e13d52989ca 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -10,6 +10,7 @@ 
 #include <linux/cdev.h>
 #include <linux/module.h>
 
+struct iommufd_ctx;
 struct iommu_group;
 struct vfio_device;
 struct vfio_container;
@@ -60,6 +61,7 @@  struct vfio_group {
 	struct kvm			*kvm;
 	struct file			*opened_file;
 	struct blocking_notifier_head	notifier;
+	struct iommufd_ctx		*iommufd;
 };
 
 /* events for the backend driver notify callback */
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 5c0e810f8b4d08..8c124290ce9f0d 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -35,6 +35,7 @@ 
 #include <linux/pm_runtime.h>
 #include <linux/interval_tree.h>
 #include <linux/iova_bitmap.h>
+#include <linux/iommufd.h>
 #include "vfio.h"
 
 #define DRIVER_VERSION	"0.3"
@@ -665,6 +666,16 @@  EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
 /*
  * VFIO Group fd, /dev/vfio/$GROUP
  */
+static bool vfio_group_has_iommu(struct vfio_group *group)
+{
+	lockdep_assert_held(&group->group_lock);
+	if (!group->container)
+		WARN_ON(group->container_users);
+	else
+		WARN_ON(!group->container_users);
+	return group->container || group->iommufd;
+}
+
 /*
  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
  * if there was no container to unset.  Since the ioctl is called on
@@ -676,15 +687,21 @@  static int vfio_group_ioctl_unset_container(struct vfio_group *group)
 	int ret = 0;
 
 	mutex_lock(&group->group_lock);
-	if (!group->container) {
+	if (!vfio_group_has_iommu(group)) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
-	if (group->container_users != 1) {
-		ret = -EBUSY;
-		goto out_unlock;
+	if (group->container) {
+		if (group->container_users != 1) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+		vfio_group_detach_container(group);
+	}
+	if (group->iommufd) {
+		iommufd_ctx_put(group->iommufd);
+		group->iommufd = NULL;
 	}
-	vfio_group_detach_container(group);
 
 out_unlock:
 	mutex_unlock(&group->group_lock);
@@ -695,6 +712,7 @@  static int vfio_group_ioctl_set_container(struct vfio_group *group,
 					  int __user *arg)
 {
 	struct vfio_container *container;
+	struct iommufd_ctx *iommufd;
 	struct fd f;
 	int ret;
 	int fd;
@@ -707,7 +725,7 @@  static int vfio_group_ioctl_set_container(struct vfio_group *group,
 		return -EBADF;
 
 	mutex_lock(&group->group_lock);
-	if (group->container || WARN_ON(group->container_users)) {
+	if (vfio_group_has_iommu(group)) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
@@ -717,12 +735,28 @@  static int vfio_group_ioctl_set_container(struct vfio_group *group,
 	}
 
 	container = vfio_container_from_file(f.file);
-	ret = -EINVAL;
 	if (container) {
 		ret = vfio_container_attach_group(container, group);
 		goto out_unlock;
 	}
 
+	iommufd = iommufd_ctx_from_file(f.file);
+	if (!IS_ERR(iommufd)) {
+		u32 ioas_id;
+
+		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
+		if (ret) {
+			iommufd_ctx_put(group->iommufd);
+			goto out_unlock;
+		}
+
+		group->iommufd = iommufd;
+		goto out_unlock;
+	}
+
+	/* The FD passed is not recognized. */
+	ret = -EBADFD;
+
 out_unlock:
 	mutex_unlock(&group->group_lock);
 	fdput(f);
@@ -752,9 +786,16 @@  static int vfio_device_first_open(struct vfio_device *device)
 	 * during close_device.
 	 */
 	mutex_lock(&device->group->group_lock);
-	ret = vfio_group_use_container(device->group);
-	if (ret)
+	if (!vfio_group_has_iommu(device->group)) {
+		ret = -EINVAL;
 		goto err_module_put;
+	}
+
+	if (device->group->container) {
+		ret = vfio_group_use_container(device->group);
+		if (ret)
+			goto err_module_put;
+	}
 
 	device->kvm = device->group->kvm;
 	if (device->ops->open_device) {
@@ -762,13 +803,15 @@  static int vfio_device_first_open(struct vfio_device *device)
 		if (ret)
 			goto err_container;
 	}
-	vfio_device_container_register(device);
+	if (device->group->container)
+		vfio_device_container_register(device);
 	mutex_unlock(&device->group->group_lock);
 	return 0;
 
 err_container:
 	device->kvm = NULL;
-	vfio_group_unuse_container(device->group);
+	if (device->group->container)
+		vfio_group_unuse_container(device->group);
 err_module_put:
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
@@ -780,11 +823,13 @@  static void vfio_device_last_close(struct vfio_device *device)
 	lockdep_assert_held(&device->dev_set->lock);
 
 	mutex_lock(&device->group->group_lock);
-	vfio_device_container_unregister(device);
+	if (device->group->container)
+		vfio_device_container_unregister(device);
 	if (device->ops->close_device)
 		device->ops->close_device(device);
 	device->kvm = NULL;
-	vfio_group_unuse_container(device->group);
+	if (device->group->container)
+		vfio_group_unuse_container(device->group);
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 }
@@ -900,7 +945,14 @@  static int vfio_group_ioctl_get_status(struct vfio_group *group,
 		return -ENODEV;
 	}
 
-	if (group->container)
+	/*
+	 * With the container FD the iommu_group_claim_dma_owner() is done
+	 * during SET_CONTAINER but for IOMMFD this is done during
+	 * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
+	 * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
+	 * to viability.
+	 */
+	if (group->container || group->iommufd)
 		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
 				VFIO_GROUP_FLAGS_VIABLE;
 	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
@@ -983,6 +1035,10 @@  static int vfio_group_fops_release(struct inode *inode, struct file *filep)
 	WARN_ON(group->notifier.head);
 	if (group->container)
 		vfio_group_detach_container(group);
+	if (group->iommufd) {
+		iommufd_ctx_put(group->iommufd);
+		group->iommufd = NULL;
+	}
 	group->opened_file = NULL;
 	mutex_unlock(&group->group_lock);
 	return 0;
@@ -1881,6 +1937,8 @@  static void __exit vfio_cleanup(void)
 module_init(vfio_init);
 module_exit(vfio_cleanup);
 
+MODULE_IMPORT_NS(IOMMUFD);
+MODULE_IMPORT_NS(IOMMUFD_VFIO);
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(DRIVER_AUTHOR);