Message ID | 0-v1-5cde901db21d+661fe-iommufd_noiommu_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio: Support VFIO_NOIOMMU with iommufd | expand |
On Mon, 9 Jan 2023 10:22:59 -0400 Jason Gunthorpe <jgg@nvidia.com> wrote: > Add a small amount of emulation to vfio_compat to accept the SET_IOMMU > to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working > on a no-iommu enabled device. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/iommu/iommufd/Kconfig | 2 +- > drivers/iommu/iommufd/vfio_compat.c | 46 ++++++++++++++++++++++++----- > drivers/vfio/group.c | 13 ++++---- > drivers/vfio/iommufd.c | 21 ++++++++++++- > include/linux/iommufd.h | 6 ++-- > 5 files changed, 70 insertions(+), 18 deletions(-) > > This needs a testing confirmation with dpdk to go forward, thanks How do we create a noiommu group w/o the vfio_noiommu flag that's provided by container.c? Even without dpdk, you should be able to turn off the system IOMMU and get something bound to vfio-pci that still taints the kernel and provides a noiommu-%d group under /dev/vfio/. There's a rudimentary unit test for noiommu here[1]. Thanks, Alex [1]https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c > diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig > index 8306616b6d8198..ada693ea51a78e 100644 > --- a/drivers/iommu/iommufd/Kconfig > +++ b/drivers/iommu/iommufd/Kconfig > @@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER > removed. > > IOMMUFD VFIO container emulation is known to lack certain features > - of the native VFIO container, such as no-IOMMU support, peer-to-peer > + of the native VFIO container, such as peer-to-peer > DMA mapping, PPC IOMMU support, as well as other potentially > undiscovered gaps. This option is currently intended for the > purpose of testing IOMMUFD with unmodified userspace supporting VFIO > diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c > index 3ceca0e8311c39..6c8e5dc1c221f4 100644 > --- a/drivers/iommu/iommufd/vfio_compat.c > +++ b/drivers/iommu/iommufd/vfio_compat.c > @@ -26,16 +26,35 @@ static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx) > } > > /** > - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use > + * iommufd_vfio_compat_ioas_get_id - Ensure a comat IOAS exists > + * @ictx: Context to operate on > + * > + * Return the ID of the current compatibility ioas. The ID can be passed into > + * other functions that take an ioas_id. > + */ > +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) > +{ > + struct iommufd_ioas *ioas; > + > + ioas = get_compat_ioas(ictx); > + if (IS_ERR(ioas)) > + return PTR_ERR(ioas); > + *out_ioas_id = ioas->obj.id; > + iommufd_put_object(&ioas->obj); > + return 0; > +} > +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); > + > +/** > + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio should use > * @ictx: Context to operate on > - * @out_ioas_id: The ioas_id the caller should use > * > * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate > * on since they do not have an IOAS ID input in their ABI. Only attaching a > - * group should cause a default creation of the internal ioas, this returns the > - * existing ioas if it has already been assigned somehow. > + * group should cause a default creation of the internal ioas, this does nothing > + * if an existing ioas has already been assigned somehow. > */ > -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) > +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx) > { > struct iommufd_ioas *ioas = NULL; > struct iommufd_ioas *out_ioas; > @@ -53,7 +72,6 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) > } > xa_unlock(&ictx->objects); > > - *out_ioas_id = out_ioas->obj.id; > if (out_ioas != ioas) { > iommufd_put_object(&out_ioas->obj); > iommufd_object_abort(ictx, &ioas->obj); > @@ -68,7 +86,7 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) > iommufd_object_finalize(ictx, &ioas->obj); > return 0; > } > -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); > +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create_id, IOMMUFD_VFIO); > > int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) > { > @@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, > case VFIO_UNMAP_ALL: > return 1; > > + case VFIO_NOIOMMU_IOMMU: > + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); > + > case VFIO_DMA_CC_IOMMU: > return iommufd_vfio_cc_iommu(ictx); > > @@ -264,6 +285,17 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) > struct iommufd_ioas *ioas = NULL; > int rc = 0; > > + /* > + * Emulation for NOIMMU is imperfect in that VFIO blocks almost all > + * other ioctls. We let them keep working but they mostly fail since no > + * IOAS should exist. > + */ > + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU) { > + if (!capable(CAP_SYS_RAWIO)) > + return -EPERM; > + return 0; > + } > + > if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) > return -EINVAL; > > diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c > index bb24b2f0271e03..0f2a483a1f3bdb 100644 > --- a/drivers/vfio/group.c > +++ b/drivers/vfio/group.c > @@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, > > iommufd = iommufd_ctx_from_file(f.file); > if (!IS_ERR(iommufd)) { > - u32 ioas_id; > - > - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); > - if (ret) { > - iommufd_ctx_put(group->iommufd); > - goto out_unlock; > + if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) || > + group->type != VFIO_NO_IOMMU) { > + ret = iommufd_vfio_compat_ioas_create_id(iommufd); > + if (ret) { > + iommufd_ctx_put(group->iommufd); > + goto out_unlock; > + } > } > > group->iommufd = iommufd; > diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c > index 4f82a6fa7c6c7f..da50feb24b6e1d 100644 > --- a/drivers/vfio/iommufd.c > +++ b/drivers/vfio/iommufd.c > @@ -18,6 +18,21 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > > lockdep_assert_held(&vdev->dev_set->lock); > > + if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) && > + vdev->group->type == VFIO_NO_IOMMU) { > + if (!capable(CAP_SYS_RAWIO)) > + return -EPERM; > + > + /* > + * Require no compat ioas to be assigned to proceed. The basic > + * statement is that the user cannot have done something that > + * implies they expected translation to exist > + */ > + if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) > + return -EPERM; > + return 0; > + } > + > /* > * If the driver doesn't provide this op then it means the device does > * not do DMA at all. So nothing to do. > @@ -29,7 +44,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > if (ret) > return ret; > > - ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); > + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > if (ret) > goto err_unbind; > ret = vdev->ops->attach_ioas(vdev, &ioas_id); > @@ -52,6 +67,10 @@ void vfio_iommufd_unbind(struct vfio_device *vdev) > { > lockdep_assert_held(&vdev->dev_set->lock); > > + if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) && > + vdev->group->type == VFIO_NO_IOMMU) > + return; > + > if (vdev->ops->unbind_iommufd) > vdev->ops->unbind_iommufd(vdev); > } > diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h > index 650d45629647a7..9d1afd417215d0 100644 > --- a/include/linux/iommufd.h > +++ b/include/linux/iommufd.h > @@ -57,7 +57,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, > unsigned long iova, unsigned long length); > int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, > void *data, size_t len, unsigned int flags); > -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); > +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); > +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx); > #else /* !CONFIG_IOMMUFD */ > static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) > { > @@ -89,8 +90,7 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long > return -EOPNOTSUPP; > } > > -static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, > - u32 *out_ioas_id) > +static inline int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx) > { > return -EOPNOTSUPP; > } > > base-commit: 88603b6dc419445847923fcb7fe5080067a30f98
On Mon, Jan 09, 2023 at 04:34:34PM -0700, Alex Williamson wrote: > On Mon, 9 Jan 2023 10:22:59 -0400 > Jason Gunthorpe <jgg@nvidia.com> wrote: > > > Add a small amount of emulation to vfio_compat to accept the SET_IOMMU > > to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working > > on a no-iommu enabled device. > > > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > > --- > > drivers/iommu/iommufd/Kconfig | 2 +- > > drivers/iommu/iommufd/vfio_compat.c | 46 ++++++++++++++++++++++++----- > > drivers/vfio/group.c | 13 ++++---- > > drivers/vfio/iommufd.c | 21 ++++++++++++- > > include/linux/iommufd.h | 6 ++-- > > 5 files changed, 70 insertions(+), 18 deletions(-) > > > > This needs a testing confirmation with dpdk to go forward, thanks > > How do we create a noiommu group w/o the vfio_noiommu flag that's > provided by container.c? Ah, the module option is now in the wrong place, I'll move it to vfio_main.c > Even without dpdk, you should be able to turn off the system IOMMU > and get something bound to vfio-pci that still taints the kernel and > provides a noiommu-%d group under /dev/vfio/. There's a rudimentary > unit test for noiommu here[1]. Thanks, Thanks, I'll check it Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Monday, January 9, 2023 10:23 PM > > /** > - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use > + * iommufd_vfio_compat_ioas_get_id - Ensure a comat IOAS exists s/comat/compat/ > +/** > + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio 'ID' is not returned in this case. and it's slightly clearer to remove the trailing '_id' in the function name. > @@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct > iommufd_ctx *ictx, > case VFIO_UNMAP_ALL: > return 1; > > + case VFIO_NOIOMMU_IOMMU: > + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); > + also check vfio_noiommu? > @@ -264,6 +285,17 @@ static int iommufd_vfio_set_iommu(struct > iommufd_ctx *ictx, unsigned long type) > struct iommufd_ioas *ioas = NULL; > int rc = 0; > > > + /* > + * Emulation for NOIMMU is imperfect in that VFIO blocks almost all s/NOIMMU/NOIOMMU/ > + * other ioctls. We let them keep working but they mostly fail since no > + * IOAS should exist. > + */ > + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == > VFIO_NOIOMMU_IOMMU) { > + if (!capable(CAP_SYS_RAWIO)) > + return -EPERM; > + return 0; > + } > + Another subtle difference. The container path has below check which applies to noiommu: /* * The container is designed to be an unprivileged interface while * the group can be assigned to specific users. Therefore, only by * adding a group to a container does the user get the privilege of * enabling the iommu, which may allocate finite resources. There * is no unset_iommu, but by removing all the groups from a container, * the container is deprivileged and returns to an unset state. */ if (list_empty(&container->group_list) || container->iommu_driver) { up_write(&container->group_lock); return -EINVAL; } > @@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct > vfio_group *group, > > iommufd = iommufd_ctx_from_file(f.file); > if (!IS_ERR(iommufd)) { > - u32 ioas_id; > - > - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); > - if (ret) { > - iommufd_ctx_put(group->iommufd); > - goto out_unlock; > + if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) || > + group->type != VFIO_NO_IOMMU) { > + ret = > iommufd_vfio_compat_ioas_create_id(iommufd); > + if (ret) { > + iommufd_ctx_put(group->iommufd); > + goto out_unlock; > + } This doesn't prevent userspace from mixing noiommu and the check in vfio_iommufd_bind() is partial which only ensures no compat ioas when binding a noiommu device. It's still possible to bind a VFIO_IOMMU type device and create the compat ioas afterwards. somehow we need a sticky bit similar to container->noiommu.
On Tue, Jan 10, 2023 at 06:20:33AM +0000, Tian, Kevin wrote: > > +/** > > + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio > > 'ID' is not returned in this case. > > and it's slightly clearer to remove the trailing '_id' in the function name. > > > @@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct > > iommufd_ctx *ictx, > > case VFIO_UNMAP_ALL: > > return 1; > > > > + case VFIO_NOIOMMU_IOMMU: > > + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); > > + > > also check vfio_noiommu? Can't easily, that value is in another module > Another subtle difference. The container path has below check which applies > to noiommu: > > /* > * The container is designed to be an unprivileged interface while > * the group can be assigned to specific users. Therefore, only by > * adding a group to a container does the user get the privilege of > * enabling the iommu, which may allocate finite resources. There > * is no unset_iommu, but by removing all the groups from a container, > * the container is deprivileged and returns to an unset state. > */ > if (list_empty(&container->group_list) || container->iommu_driver) { > up_write(&container->group_lock); > return -EINVAL; > } We don't follow that model in iommufd, once you have an iommufd you can immediately start allocating resources, and it is not considered "unprivileged". Instead all resource usage is constrained by the cgroup I suppose it is something of a difference in IOMMUFD_VFIO_CONTAINER that it behaves like this, but fixing it is not related to noiommu > > @@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct > > vfio_group *group, > > > > iommufd = iommufd_ctx_from_file(f.file); > > if (!IS_ERR(iommufd)) { > > - u32 ioas_id; > > - > > - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); > > - if (ret) { > > - iommufd_ctx_put(group->iommufd); > > - goto out_unlock; > > + if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) || > > + group->type != VFIO_NO_IOMMU) { > > + ret = > > iommufd_vfio_compat_ioas_create_id(iommufd); > > + if (ret) { > > + iommufd_ctx_put(group->iommufd); > > + goto out_unlock; > > + } > > This doesn't prevent userspace from mixing noiommu and the check > in vfio_iommufd_bind() is partial which only ensures no compat ioas > when binding a noiommu device. It's still possible to bind a VFIO_IOMMU > type device and create the compat ioas afterwards. There are many ways to have an IOAS but also have an iommufd "bound" to a noiommu device - userspace could just use the native iommufd interface, or mess with the IOMMUFD_CMD_VFIO_IOAS, for instance. I don't really want to get to the same kind of protection as vfio typically did because it would be very invasive for little gain. So long as a well behaved userspace does not become confused that it thinks there is translation but none exists I think this is good enough. The way it is set here is if userspace somehow had a compat IOAS assigned then things will fail, and if it attempts to do a map after binding then it will also fail. This is enough to protect well behaved userspace. This will become more explicit in the cdev stuff where userspace must explicitly specify -1 as the iommufd to run in noiommu and userspace cannot become confused. Jason
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 8306616b6d8198..ada693ea51a78e 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER removed. IOMMUFD VFIO container emulation is known to lack certain features - of the native VFIO container, such as no-IOMMU support, peer-to-peer + of the native VFIO container, such as peer-to-peer DMA mapping, PPC IOMMU support, as well as other potentially undiscovered gaps. This option is currently intended for the purpose of testing IOMMUFD with unmodified userspace supporting VFIO diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 3ceca0e8311c39..6c8e5dc1c221f4 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -26,16 +26,35 @@ static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx) } /** - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use + * iommufd_vfio_compat_ioas_get_id - Ensure a comat IOAS exists + * @ictx: Context to operate on + * + * Return the ID of the current compatibility ioas. The ID can be passed into + * other functions that take an ioas_id. + */ +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +{ + struct iommufd_ioas *ioas; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + *out_ioas_id = ioas->obj.id; + iommufd_put_object(&ioas->obj); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); + +/** + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio should use * @ictx: Context to operate on - * @out_ioas_id: The ioas_id the caller should use * * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate * on since they do not have an IOAS ID input in their ABI. Only attaching a - * group should cause a default creation of the internal ioas, this returns the - * existing ioas if it has already been assigned somehow. + * group should cause a default creation of the internal ioas, this does nothing + * if an existing ioas has already been assigned somehow. */ -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx) { struct iommufd_ioas *ioas = NULL; struct iommufd_ioas *out_ioas; @@ -53,7 +72,6 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) } xa_unlock(&ictx->objects); - *out_ioas_id = out_ioas->obj.id; if (out_ioas != ioas) { iommufd_put_object(&out_ioas->obj); iommufd_object_abort(ictx, &ioas->obj); @@ -68,7 +86,7 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) iommufd_object_finalize(ictx, &ioas->obj); return 0; } -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create_id, IOMMUFD_VFIO); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) { @@ -235,6 +253,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, case VFIO_UNMAP_ALL: return 1; + case VFIO_NOIOMMU_IOMMU: + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); + case VFIO_DMA_CC_IOMMU: return iommufd_vfio_cc_iommu(ictx); @@ -264,6 +285,17 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) struct iommufd_ioas *ioas = NULL; int rc = 0; + /* + * Emulation for NOIMMU is imperfect in that VFIO blocks almost all + * other ioctls. We let them keep working but they mostly fail since no + * IOAS should exist. + */ + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU) { + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + return 0; + } + if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) return -EINVAL; diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index bb24b2f0271e03..0f2a483a1f3bdb 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -133,12 +133,13 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, iommufd = iommufd_ctx_from_file(f.file); if (!IS_ERR(iommufd)) { - u32 ioas_id; - - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); - if (ret) { - iommufd_ctx_put(group->iommufd); - goto out_unlock; + if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) || + group->type != VFIO_NO_IOMMU) { + ret = iommufd_vfio_compat_ioas_create_id(iommufd); + if (ret) { + iommufd_ctx_put(group->iommufd); + goto out_unlock; + } } group->iommufd = iommufd; diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 4f82a6fa7c6c7f..da50feb24b6e1d 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -18,6 +18,21 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) lockdep_assert_held(&vdev->dev_set->lock); + if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) && + vdev->group->type == VFIO_NO_IOMMU) { + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + /* + * Require no compat ioas to be assigned to proceed. The basic + * statement is that the user cannot have done something that + * implies they expected translation to exist + */ + if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) + return -EPERM; + return 0; + } + /* * If the driver doesn't provide this op then it means the device does * not do DMA at all. So nothing to do. @@ -29,7 +44,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) if (ret) return ret; - ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); if (ret) goto err_unbind; ret = vdev->ops->attach_ioas(vdev, &ioas_id); @@ -52,6 +67,10 @@ void vfio_iommufd_unbind(struct vfio_device *vdev) { lockdep_assert_held(&vdev->dev_set->lock); + if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) && + vdev->group->type == VFIO_NO_IOMMU) + return; + if (vdev->ops->unbind_iommufd) vdev->ops->unbind_iommufd(vdev); } diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 650d45629647a7..9d1afd417215d0 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -57,7 +57,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t len, unsigned int flags); -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -89,8 +90,7 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long return -EOPNOTSUPP; } -static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, - u32 *out_ioas_id) +static inline int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx) { return -EOPNOTSUPP; }
Add a small amount of emulation to vfio_compat to accept the SET_IOMMU to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working on a no-iommu enabled device. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/iommufd/Kconfig | 2 +- drivers/iommu/iommufd/vfio_compat.c | 46 ++++++++++++++++++++++++----- drivers/vfio/group.c | 13 ++++---- drivers/vfio/iommufd.c | 21 ++++++++++++- include/linux/iommufd.h | 6 ++-- 5 files changed, 70 insertions(+), 18 deletions(-) This needs a testing confirmation with dpdk to go forward, thanks base-commit: 88603b6dc419445847923fcb7fe5080067a30f98