Message ID | 6-v3-50561e12d92b+313-vfio_iommufd_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Connect VFIO to IOMMUFD | expand |
On Wed, 16 Nov 2022 17:05:31 -0400 Jason Gunthorpe <jgg@nvidia.com> wrote: > This makes VFIO_GROUP_SET_CONTAINER accept both a vfio container FD and an > iommufd. > > In iommufd mode an IOAS will exist after the SET_CONTAINER, but it will > not be attached to any groups. > > For VFIO this means that the VFIO_GROUP_GET_STATUS and > VFIO_GROUP_FLAGS_VIABLE works subtly differently. With the container FD > the iommu_group_claim_dma_owner() is done during SET_CONTAINER but for > IOMMUFD this is done during VFIO_GROUP_GET_DEVICE_FD. Meaning that > VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due to > viability. > > As GET_DEVICE_FD can fail for many reasons already this is not expected to > be a meaningful difference. > > Reorganize the tests for if the group has an assigned container or iommu > into a vfio_group_has_iommu() function and consolidate all the duplicated > WARN_ON's etc related to this. > > Call container functions only if a container is actually present on the > group. > > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Tested-by: Yi Liu <yi.l.liu@intel.com> > Tested-by: Lixiao Yang <lixiao.yang@intel.com> > Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> > Tested-by: Yu He <yu.he@intel.com> > Reviewed-by: Kevin Tian <kevin.tian@intel.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/vfio/Kconfig | 1 + > drivers/vfio/container.c | 7 +++- > drivers/vfio/vfio.h | 2 + > drivers/vfio/vfio_main.c | 86 +++++++++++++++++++++++++++++++++------- > 4 files changed, 80 insertions(+), 16 deletions(-) > > diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig > index 86c381ceb9a1e9..1118d322eec97d 100644 > --- a/drivers/vfio/Kconfig > +++ b/drivers/vfio/Kconfig > @@ -2,6 +2,7 @@ > menuconfig VFIO > tristate "VFIO Non-Privileged userspace driver framework" > select IOMMU_API > + depends on IOMMUFD || !IOMMUFD > select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) > select INTERVAL_TREE > help > diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c > index d97747dfb05d02..8772dad6808539 100644 > --- a/drivers/vfio/container.c > +++ b/drivers/vfio/container.c > @@ -516,8 +516,11 @@ int vfio_group_use_container(struct vfio_group *group) > { > lockdep_assert_held(&group->group_lock); > > - if (!group->container || !group->container->iommu_driver || > - WARN_ON(!group->container_users)) > + /* > + * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but > + * VFIO_SET_IOMMU hasn't been done yet. > + */ > + if (!group->container->iommu_driver) > return -EINVAL; > > if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) > diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h > index 247590334e14b0..985e13d52989ca 100644 > --- a/drivers/vfio/vfio.h > +++ b/drivers/vfio/vfio.h > @@ -10,6 +10,7 @@ > #include <linux/cdev.h> > #include <linux/module.h> > > +struct iommufd_ctx; > struct iommu_group; > struct vfio_device; > struct vfio_container; > @@ -60,6 +61,7 @@ struct vfio_group { > struct kvm *kvm; > struct file *opened_file; > struct blocking_notifier_head notifier; > + struct iommufd_ctx *iommufd; > }; > > /* events for the backend driver notify callback */ > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > index 5c0e810f8b4d08..8c124290ce9f0d 100644 > --- a/drivers/vfio/vfio_main.c > +++ b/drivers/vfio/vfio_main.c > @@ -35,6 +35,7 @@ > #include <linux/pm_runtime.h> > #include <linux/interval_tree.h> > #include <linux/iova_bitmap.h> > +#include <linux/iommufd.h> > #include "vfio.h" > > #define DRIVER_VERSION "0.3" > @@ -665,6 +666,16 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); > /* > * VFIO Group fd, /dev/vfio/$GROUP > */ > +static bool vfio_group_has_iommu(struct vfio_group *group) > +{ > + lockdep_assert_held(&group->group_lock); > + if (!group->container) > + WARN_ON(group->container_users); > + else > + WARN_ON(!group->container_users); I think this is just carrying forward the WARN_ON that gets replaced in set_container, but I don't really see how this bit of paranoia is ever a possibility. If it is, a comment would be good, and perhaps simplify to: WARN_ON(group->container ^ group->container_users); > + return group->container || group->iommufd; > +} > + > /* > * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or > * if there was no container to unset. Since the ioctl is called on [snip] > @@ -900,7 +945,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group, > return -ENODEV; > } > > - if (group->container) > + /* > + * With the container FD the iommu_group_claim_dma_owner() is done > + * during SET_CONTAINER but for IOMMFD this is done during > + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd > + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due > + * to viability. > + */ > + if (group->container || group->iommufd) Why didn't this use the vfio_group_has_iommu() helper? This is only skipping the paranoia checks, which aren't currently obvious to me anyway. Thanks, Alex > status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | > VFIO_GROUP_FLAGS_VIABLE; > else if (!iommu_group_dma_owner_claimed(group->iommu_group))
On Wed, Nov 16, 2022 at 04:31:33PM -0700, Alex Williamson wrote: > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > > index 5c0e810f8b4d08..8c124290ce9f0d 100644 > > --- a/drivers/vfio/vfio_main.c > > +++ b/drivers/vfio/vfio_main.c > > @@ -35,6 +35,7 @@ > > #include <linux/pm_runtime.h> > > #include <linux/interval_tree.h> > > #include <linux/iova_bitmap.h> > > +#include <linux/iommufd.h> > > #include "vfio.h" > > > > #define DRIVER_VERSION "0.3" > > @@ -665,6 +666,16 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); > > /* > > * VFIO Group fd, /dev/vfio/$GROUP > > */ > > +static bool vfio_group_has_iommu(struct vfio_group *group) > > +{ > > + lockdep_assert_held(&group->group_lock); > > + if (!group->container) > > + WARN_ON(group->container_users); > > + else > > + WARN_ON(!group->container_users); > > I think this is just carrying forward the WARN_ON that gets replaced in > set_container, Yes, I've carried this invariant forward through a few series now > but I don't really see how this bit of paranoia is ever a > possibility. Right, it is an invariant assertion, it should never trigger and we've never seen it trigger. I think at one point it was harder to see that this is impossible so an assertion must have been added > WARN_ON(group->container ^ group->container_users); Ah, this needs a "logical xor" which is a bit obscure. In C I guess this is the common way to do it: /* * There can only be users if there is a container, and if there is a * container there must be users. */ WARN_ON(!group->container != !group->container_users); I'm also happy to delete it, not sure it is a valuable invariant. > > @@ -900,7 +945,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group, > > return -ENODEV; > > } > > > > - if (group->container) > > + /* > > + * With the container FD the iommu_group_claim_dma_owner() is done > > + * during SET_CONTAINER but for IOMMFD this is done during > > + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd > > + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due > > + * to viability. > > + */ > > + if (group->container || group->iommufd) > > Why didn't this use the vfio_group_has_iommu() helper? This is only > skipping the paranoia checks, which aren't currently obvious to me > anyway. Yes, it was missed, I fixed it Thanks, Jason
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 86c381ceb9a1e9..1118d322eec97d 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -2,6 +2,7 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API + depends on IOMMUFD || !IOMMUFD select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) select INTERVAL_TREE help diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d97747dfb05d02..8772dad6808539 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -516,8 +516,11 @@ int vfio_group_use_container(struct vfio_group *group) { lockdep_assert_held(&group->group_lock); - if (!group->container || !group->container->iommu_driver || - WARN_ON(!group->container_users)) + /* + * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but + * VFIO_SET_IOMMU hasn't been done yet. + */ + if (!group->container->iommu_driver) return -EINVAL; if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 247590334e14b0..985e13d52989ca 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -10,6 +10,7 @@ #include <linux/cdev.h> #include <linux/module.h> +struct iommufd_ctx; struct iommu_group; struct vfio_device; struct vfio_container; @@ -60,6 +61,7 @@ struct vfio_group { struct kvm *kvm; struct file *opened_file; struct blocking_notifier_head notifier; + struct iommufd_ctx *iommufd; }; /* events for the backend driver notify callback */ diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 5c0e810f8b4d08..8c124290ce9f0d 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <linux/interval_tree.h> #include <linux/iova_bitmap.h> +#include <linux/iommufd.h> #include "vfio.h" #define DRIVER_VERSION "0.3" @@ -665,6 +666,16 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); /* * VFIO Group fd, /dev/vfio/$GROUP */ +static bool vfio_group_has_iommu(struct vfio_group *group) +{ + lockdep_assert_held(&group->group_lock); + if (!group->container) + WARN_ON(group->container_users); + else + WARN_ON(!group->container_users); + return group->container || group->iommufd; +} + /* * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or * if there was no container to unset. Since the ioctl is called on @@ -676,15 +687,21 @@ static int vfio_group_ioctl_unset_container(struct vfio_group *group) int ret = 0; mutex_lock(&group->group_lock); - if (!group->container) { + if (!vfio_group_has_iommu(group)) { ret = -EINVAL; goto out_unlock; } - if (group->container_users != 1) { - ret = -EBUSY; - goto out_unlock; + if (group->container) { + if (group->container_users != 1) { + ret = -EBUSY; + goto out_unlock; + } + vfio_group_detach_container(group); + } + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; } - vfio_group_detach_container(group); out_unlock: mutex_unlock(&group->group_lock); @@ -695,6 +712,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, int __user *arg) { struct vfio_container *container; + struct iommufd_ctx *iommufd; struct fd f; int ret; int fd; @@ -707,7 +725,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, return -EBADF; mutex_lock(&group->group_lock); - if (group->container || WARN_ON(group->container_users)) { + if (vfio_group_has_iommu(group)) { ret = -EINVAL; goto out_unlock; } @@ -717,12 +735,28 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, } container = vfio_container_from_file(f.file); - ret = -EINVAL; if (container) { ret = vfio_container_attach_group(container, group); goto out_unlock; } + iommufd = iommufd_ctx_from_file(f.file); + if (!IS_ERR(iommufd)) { + u32 ioas_id; + + ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); + if (ret) { + iommufd_ctx_put(group->iommufd); + goto out_unlock; + } + + group->iommufd = iommufd; + goto out_unlock; + } + + /* The FD passed is not recognized. */ + ret = -EBADFD; + out_unlock: mutex_unlock(&group->group_lock); fdput(f); @@ -752,9 +786,16 @@ static int vfio_device_first_open(struct vfio_device *device) * during close_device. */ mutex_lock(&device->group->group_lock); - ret = vfio_group_use_container(device->group); - if (ret) + if (!vfio_group_has_iommu(device->group)) { + ret = -EINVAL; goto err_module_put; + } + + if (device->group->container) { + ret = vfio_group_use_container(device->group); + if (ret) + goto err_module_put; + } device->kvm = device->group->kvm; if (device->ops->open_device) { @@ -762,13 +803,15 @@ static int vfio_device_first_open(struct vfio_device *device) if (ret) goto err_container; } - vfio_device_container_register(device); + if (device->group->container) + vfio_device_container_register(device); mutex_unlock(&device->group->group_lock); return 0; err_container: device->kvm = NULL; - vfio_group_unuse_container(device->group); + if (device->group->container) + vfio_group_unuse_container(device->group); err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); @@ -780,11 +823,13 @@ static void vfio_device_last_close(struct vfio_device *device) lockdep_assert_held(&device->dev_set->lock); mutex_lock(&device->group->group_lock); - vfio_device_container_unregister(device); + if (device->group->container) + vfio_device_container_unregister(device); if (device->ops->close_device) device->ops->close_device(device); device->kvm = NULL; - vfio_group_unuse_container(device->group); + if (device->group->container) + vfio_group_unuse_container(device->group); mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); } @@ -900,7 +945,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group, return -ENODEV; } - if (group->container) + /* + * With the container FD the iommu_group_claim_dma_owner() is done + * during SET_CONTAINER but for IOMMFD this is done during + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due + * to viability. + */ + if (group->container || group->iommufd) status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | VFIO_GROUP_FLAGS_VIABLE; else if (!iommu_group_dma_owner_claimed(group->iommu_group)) @@ -983,6 +1035,10 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep) WARN_ON(group->notifier.head); if (group->container) vfio_group_detach_container(group); + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; + } group->opened_file = NULL; mutex_unlock(&group->group_lock); return 0; @@ -1881,6 +1937,8 @@ static void __exit vfio_cleanup(void) module_init(vfio_init); module_exit(vfio_cleanup); +MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS(IOMMUFD_VFIO); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR);