Message ID | 20230308132903.465159-14-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | cover-letter: Add vfio_device cdev for iommufd support | expand |
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Wednesday, March 8, 2023 9:29 PM > > @@ -177,7 +177,7 @@ static int vfio_device_group_open(struct > vfio_device_file *df) > mutex_lock(&device->group->group_lock); > if (!vfio_group_has_iommu(device->group)) { > ret = -EINVAL; > - goto out_unlock; > + goto err_unlock; > } My impression - out_xxx means go to do xxx while err_xxx means go to do something for error xxx, though in many places the two are mixed to both meaning 'do xxx'. either way I don't see a need of changing it. > -int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > +static int vfio_iommufd_device_probe_comapt_noiommu(struct vfio_device > *vdev, > + struct iommufd_ctx *ictx) s/comapt/compat/ btw it's clearer to move this check into vfio_device_group_open(). if noiommu then pass NULL to vfio_device_open(), same as the cdev path. > + > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > +{ > u32 device_id; > int ret; > > lockdep_assert_held(&vdev->dev_set->lock); > > if (vfio_device_is_noiommu(vdev)) { > - if (!capable(CAP_SYS_RAWIO)) > - return -EPERM; > - > - /* > - * Require no compat ioas to be assigned to proceed. The > basic > - * statement is that the user cannot have done something > that > - * implies they expected translation to exist > - */ > - if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) > - return -EPERM; > - return 0; > + ret = vfio_iommufd_device_probe_comapt_noiommu(vdev, > ictx); > + if (ret) > + return ret; > } > > if (WARN_ON(!vdev->ops->bind_iommufd)) > return -ENODEV; > > - ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > - if (ret) > - return ret; > + /* The legacy path has no way to return the device id */ > + return vdev->ops->bind_iommufd(vdev, ictx, &device_id); > +} > > - ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > - if (ret) > - goto err_unbind; > - ret = vdev->ops->attach_ioas(vdev, &ioas_id); > - if (ret) > - goto err_unbind; after noiommu check and attach_ioas are moved out then this entire function can be removed now. Just call the ops in vfio_device_first_open(). > +int vfio_iommufd_attach_compat_ioas(struct vfio_device *vdev, > + struct iommufd_ctx *ictx) > +{ > + u32 ioas_id; > + int ret; > + > + lockdep_assert_held(&vdev->dev_set->lock); > > /* > - * The legacy path has no way to return the device id or the selected > - * pt_id > + * If the driver doesn't provide this op then it means the device does > + * not do DMA at all. So nothing to do. > */ > - return 0; > + if (WARN_ON(!vdev->ops->bind_iommufd)) > + return -ENODEV; > > -err_unbind: > - if (vdev->ops->unbind_iommufd) > - vdev->ops->unbind_iommufd(vdev); > - return ret; > + if (vfio_device_is_noiommu(vdev)) { > + if > (WARN_ON(vfio_iommufd_device_probe_comapt_noiommu(vdev, ictx))) > + return -EINVAL; > + return 0; > + } no need. let's directly call following from vfio_device_group_open(). In that case no need to do noiommu check twice in one function. > + > + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > + if (ret) > + return ret; > + > + /* The legacy path has no way to return the selected pt_id */ > + return vdev->ops->attach_ioas(vdev, &ioas_id); > } >
> From: Tian, Kevin <kevin.tian@intel.com> > Sent: Friday, March 10, 2023 4:08 PM > > > From: Liu, Yi L <yi.l.liu@intel.com> > > Sent: Wednesday, March 8, 2023 9:29 PM > > > > @@ -177,7 +177,7 @@ static int vfio_device_group_open(struct > > vfio_device_file *df) > > mutex_lock(&device->group->group_lock); > > if (!vfio_group_has_iommu(device->group)) { > > ret = -EINVAL; > > - goto out_unlock; > > + goto err_unlock; > > } > > My impression - out_xxx means go to do xxx while err_xxx means > go to do something for error xxx, though in many places the two > are mixed to both meaning 'do xxx'. > > either way I don't see a need of changing it. Ok. I'm fine with either way. > > -int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx > *ictx) > > +static int vfio_iommufd_device_probe_comapt_noiommu(struct > vfio_device > > *vdev, > > + struct iommufd_ctx *ictx) > > s/comapt/compat/ > > btw it's clearer to move this check into vfio_device_group_open(). > > if noiommu then pass NULL to vfio_device_open(), same as the cdev path. Right. > > + > > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx > *ictx) > > +{ > > u32 device_id; > > int ret; > > > > lockdep_assert_held(&vdev->dev_set->lock); > > > > if (vfio_device_is_noiommu(vdev)) { > > - if (!capable(CAP_SYS_RAWIO)) > > - return -EPERM; > > - > > - /* > > - * Require no compat ioas to be assigned to proceed. The > > basic > > - * statement is that the user cannot have done something > > that > > - * implies they expected translation to exist > > - */ > > - if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) > > - return -EPERM; > > - return 0; > > + ret = vfio_iommufd_device_probe_comapt_noiommu(vdev, > > ictx); > > + if (ret) > > + return ret; > > } > > > > if (WARN_ON(!vdev->ops->bind_iommufd)) > > return -ENODEV; > > > > - ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > - if (ret) > > - return ret; > > + /* The legacy path has no way to return the device id */ > > + return vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > +} > > > > - ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > > - if (ret) > > - goto err_unbind; > > - ret = vdev->ops->attach_ioas(vdev, &ioas_id); > > - if (ret) > > - goto err_unbind; > > after noiommu check and attach_ioas are moved out then this > entire function can be removed now. Just call the ops in > vfio_device_first_open(). Yes. and also no vfio_iommufd_unbind(). > > > +int vfio_iommufd_attach_compat_ioas(struct vfio_device *vdev, > > + struct iommufd_ctx *ictx) > > +{ > > + u32 ioas_id; > > + int ret; > > + > > + lockdep_assert_held(&vdev->dev_set->lock); > > > > /* > > - * The legacy path has no way to return the device id or the selected > > - * pt_id > > + * If the driver doesn't provide this op then it means the device does > > + * not do DMA at all. So nothing to do. > > */ > > - return 0; > > + if (WARN_ON(!vdev->ops->bind_iommufd)) > > + return -ENODEV; > > > > -err_unbind: > > - if (vdev->ops->unbind_iommufd) > > - vdev->ops->unbind_iommufd(vdev); > > - return ret; > > + if (vfio_device_is_noiommu(vdev)) { > > + if > > (WARN_ON(vfio_iommufd_device_probe_comapt_noiommu(vdev, ictx))) > > + return -EINVAL; > > + return 0; > > + } > > no need. let's directly call following from vfio_device_group_open(). > In that case no need to do noiommu check twice in one function. Ok. maybe still have vfio_iommufd_attach_compat_ioas() but only call it if it's not noiommu mode. vfio_device_group_open() can call probe_noiommu() first and has a bool to mark noiommu. Jason had a remark that it's better to keep the iommufd_vfio_compat_ioas_get_id() in iommufd.c > > > + > > + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > > + if (ret) > > + return ret; > > + > > + /* The legacy path has no way to return the selected pt_id */ > > + return vdev->ops->attach_ioas(vdev, &ioas_id); > > } > >
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Friday, March 10, 2023 4:22 PM > > > > > > +int vfio_iommufd_attach_compat_ioas(struct vfio_device *vdev, > > > + struct iommufd_ctx *ictx) > > > +{ > > > + u32 ioas_id; > > > + int ret; > > > + > > > + lockdep_assert_held(&vdev->dev_set->lock); > > > > > > /* > > > - * The legacy path has no way to return the device id or the selected > > > - * pt_id > > > + * If the driver doesn't provide this op then it means the device does > > > + * not do DMA at all. So nothing to do. > > > */ > > > - return 0; > > > + if (WARN_ON(!vdev->ops->bind_iommufd)) > > > + return -ENODEV; > > > > > > -err_unbind: > > > - if (vdev->ops->unbind_iommufd) > > > - vdev->ops->unbind_iommufd(vdev); > > > - return ret; > > > + if (vfio_device_is_noiommu(vdev)) { > > > + if > > > (WARN_ON(vfio_iommufd_device_probe_comapt_noiommu(vdev, ictx))) > > > + return -EINVAL; > > > + return 0; > > > + } > > > > no need. let's directly call following from vfio_device_group_open(). > > In that case no need to do noiommu check twice in one function. > > Ok. maybe still have vfio_iommufd_attach_compat_ioas() but > only call it if it's not noiommu mode. vfio_device_group_open() > can call probe_noiommu() first and has a bool to mark noiommu. > Jason had a remark that it's better to keep the > iommufd_vfio_compat_ioas_get_id() in iommufd.c > Probably that remark doesn't hold now if we agree to remove vfio_iommufd_bind() and let vfio_device_group_open() directly call .bind_iommufd(). also group.c already calls other compat API: if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && group->type == VFIO_NO_IOMMU) ret = iommufd_vfio_compat_set_no_iommu(iommufd); else ret = iommufd_vfio_compat_ioas_create(iommufd);
Hi Keivn, > From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Friday, March 10, 2023 4:22 PM > > > From: Tian, Kevin <kevin.tian@intel.com> > > Sent: Friday, March 10, 2023 4:08 PM > > > > > From: Liu, Yi L <yi.l.liu@intel.com> > > > Sent: Wednesday, March 8, 2023 9:29 PM > > > > > > @@ -177,7 +177,7 @@ static int vfio_device_group_open(struct > > > vfio_device_file *df) > > > mutex_lock(&device->group->group_lock); > > > if (!vfio_group_has_iommu(device->group)) { > > > ret = -EINVAL; > > > - goto out_unlock; > > > + goto err_unlock; > > > } > > > > My impression - out_xxx means go to do xxx while err_xxx means > > go to do something for error xxx, though in many places the two > > are mixed to both meaning 'do xxx'. > > > > either way I don't see a need of changing it. > > Ok. I'm fine with either way. > > > > -int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx > > *ictx) > > > +static int vfio_iommufd_device_probe_comapt_noiommu(struct > > vfio_device > > > *vdev, > > > + struct iommufd_ctx *ictx) > > > > s/comapt/compat/ > > > > btw it's clearer to move this check into vfio_device_group_open(). > > > > if noiommu then pass NULL to vfio_device_open(), same as the cdev path. > > Right. > > > > + > > > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx > > *ictx) > > > +{ > > > u32 device_id; > > > int ret; > > > > > > lockdep_assert_held(&vdev->dev_set->lock); > > > > > > if (vfio_device_is_noiommu(vdev)) { > > > - if (!capable(CAP_SYS_RAWIO)) > > > - return -EPERM; > > > - > > > - /* > > > - * Require no compat ioas to be assigned to proceed. The > > > basic > > > - * statement is that the user cannot have done something > > > that > > > - * implies they expected translation to exist > > > - */ > > > - if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) > > > - return -EPERM; > > > - return 0; > > > + ret = vfio_iommufd_device_probe_comapt_noiommu(vdev, > > > ictx); > > > + if (ret) > > > + return ret; > > > } > > > > > > if (WARN_ON(!vdev->ops->bind_iommufd)) > > > return -ENODEV; > > > > > > - ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > > - if (ret) > > > - return ret; > > > + /* The legacy path has no way to return the device id */ > > > + return vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > > +} > > > > > > - ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > > > - if (ret) > > > - goto err_unbind; > > > - ret = vdev->ops->attach_ioas(vdev, &ioas_id); > > > - if (ret) > > > - goto err_unbind; > > > > after noiommu check and attach_ioas are moved out then this > > entire function can be removed now. Just call the ops in > > vfio_device_first_open(). > > Yes. and also no vfio_iommufd_unbind(). Seems still necessary to have this wrapper. .bind_iommufd callback would be NULL if CONFIG_IOMMUFD==n. If we call ops->bind_iommufd directly in vfio_device_first_open() of vfio_main.c, it may trigger kernel panic for NULL pointer dereference if there is wrong code that passes valid iommufd pointer.. Ideally, if CONFIG_IOMMUFD==n, vfio_device_first_open should not receive valid iommufd pointer hence won't call ops->bind_iommufd at all. So it deserves a panic. However, if we have a wrapper for it, such code may just fail with -EOPNOTSUPPT. > > > > > +int vfio_iommufd_attach_compat_ioas(struct vfio_device *vdev, > > > + struct iommufd_ctx *ictx) > > > +{ > > > + u32 ioas_id; > > > + int ret; > > > + > > > + lockdep_assert_held(&vdev->dev_set->lock); > > > > > > /* > > > - * The legacy path has no way to return the device id or the selected > > > - * pt_id > > > + * If the driver doesn't provide this op then it means the device does > > > + * not do DMA at all. So nothing to do. > > > */ > > > - return 0; > > > + if (WARN_ON(!vdev->ops->bind_iommufd)) > > > + return -ENODEV; > > > > > > -err_unbind: > > > - if (vdev->ops->unbind_iommufd) > > > - vdev->ops->unbind_iommufd(vdev); > > > - return ret; > > > + if (vfio_device_is_noiommu(vdev)) { > > > + if > > > (WARN_ON(vfio_iommufd_device_probe_comapt_noiommu(vdev, > ictx))) > > > + return -EINVAL; > > > + return 0; > > > + } > > > > no need. let's directly call following from vfio_device_group_open(). > > In that case no need to do noiommu check twice in one function. > > Ok. maybe still have vfio_iommufd_attach_compat_ioas() but > only call it if it's not noiommu mode. vfio_device_group_open() > can call probe_noiommu() first and has a bool to mark noiommu. > Jason had a remark that it's better to keep the > iommufd_vfio_compat_ioas_get_id() in iommufd.c Same with .bind_iommufd(). If we move the compat ioas attach code to group.c, it may encounter kernel panic if there is wrong code that passes valid iommufd pointer. > > > > > + > > > + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > > > + if (ret) > > > + return ret; > > > + > > > + /* The legacy path has no way to return the selected pt_id */ > > > + return vdev->ops->attach_ioas(vdev, &ioas_id); > > > } > > > Regards, Yi Liu
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Saturday, March 11, 2023 6:24 PM > > > > > > > > - ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > > > - if (ret) > > > > - return ret; > > > > + /* The legacy path has no way to return the device id */ > > > > + return vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > > > +} > > > > > > > > - ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); > > > > - if (ret) > > > > - goto err_unbind; > > > > - ret = vdev->ops->attach_ioas(vdev, &ioas_id); > > > > - if (ret) > > > > - goto err_unbind; > > > > > > after noiommu check and attach_ioas are moved out then this > > > entire function can be removed now. Just call the ops in > > > vfio_device_first_open(). > > > > Yes. and also no vfio_iommufd_unbind(). > > Seems still necessary to have this wrapper. .bind_iommufd callback would > be NULL if CONFIG_IOMMUFD==n. If we call ops->bind_iommufd directly > in vfio_device_first_open() of vfio_main.c, it may trigger kernel panic > for NULL pointer dereference if there is wrong code that passes valid > iommufd pointer.. Ideally, if CONFIG_IOMMUFD==n, vfio_device_first_open > should not receive valid iommufd pointer hence won't call ops- > >bind_iommufd > at all. So it deserves a panic. However, if we have a wrapper for it, such code > may just fail with -EOPNOTSUPPT. > ok, let's keep this wrapper then. I didn't realize it's NULL if CONFIG_IOMMUFD==n.
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 6280368eb0bd..555d68aefa71 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -177,7 +177,7 @@ static int vfio_device_group_open(struct vfio_device_file *df) mutex_lock(&device->group->group_lock); if (!vfio_group_has_iommu(device->group)) { ret = -EINVAL; - goto out_unlock; + goto err_unlock; } mutex_lock(&device->dev_set->lock); @@ -194,9 +194,14 @@ static int vfio_device_group_open(struct vfio_device_file *df) df->iommufd = device->group->iommufd; ret = vfio_device_open(df); - if (ret) { - df->iommufd = NULL; - goto out_put_kvm; + if (ret) + goto err_put_kvm; + + if (device->group->iommufd) { + ret = vfio_iommufd_attach_compat_ioas(device, + device->group->iommufd); + if (ret) + goto err_close_device; } /* @@ -205,13 +210,18 @@ static int vfio_device_group_open(struct vfio_device_file *df) */ smp_store_release(&df->access_granted, true); -out_put_kvm: + mutex_unlock(&device->dev_set->lock); + mutex_unlock(&device->group->group_lock); + return 0; + +err_close_device: + vfio_device_close(df); +err_put_kvm: + df->iommufd = NULL; if (device->open_count == 0) vfio_device_put_kvm(device); - mutex_unlock(&device->dev_set->lock); - -out_unlock: +err_unlock: mutex_unlock(&device->group->group_lock); return ret; } diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 30c0da2e11f9..8c518f8bd39a 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -10,52 +10,71 @@ MODULE_IMPORT_NS(IOMMUFD); MODULE_IMPORT_NS(IOMMUFD_VFIO); -int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) +static int vfio_iommufd_device_probe_comapt_noiommu(struct vfio_device *vdev, + struct iommufd_ctx *ictx) { u32 ioas_id; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + /* + * Require no compat ioas to be assigned to proceed. The basic + * statement is that the user cannot have done something that + * implies they expected translation to exist + */ + if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) + return -EPERM; + return 0; +} + +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ u32 device_id; int ret; lockdep_assert_held(&vdev->dev_set->lock); if (vfio_device_is_noiommu(vdev)) { - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - /* - * Require no compat ioas to be assigned to proceed. The basic - * statement is that the user cannot have done something that - * implies they expected translation to exist - */ - if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) - return -EPERM; - return 0; + ret = vfio_iommufd_device_probe_comapt_noiommu(vdev, ictx); + if (ret) + return ret; } if (WARN_ON(!vdev->ops->bind_iommufd)) return -ENODEV; - ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); - if (ret) - return ret; + /* The legacy path has no way to return the device id */ + return vdev->ops->bind_iommufd(vdev, ictx, &device_id); +} - ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); - if (ret) - goto err_unbind; - ret = vdev->ops->attach_ioas(vdev, &ioas_id); - if (ret) - goto err_unbind; +int vfio_iommufd_attach_compat_ioas(struct vfio_device *vdev, + struct iommufd_ctx *ictx) +{ + u32 ioas_id; + int ret; + + lockdep_assert_held(&vdev->dev_set->lock); /* - * The legacy path has no way to return the device id or the selected - * pt_id + * If the driver doesn't provide this op then it means the device does + * not do DMA at all. So nothing to do. */ - return 0; + if (WARN_ON(!vdev->ops->bind_iommufd)) + return -ENODEV; -err_unbind: - if (vdev->ops->unbind_iommufd) - vdev->ops->unbind_iommufd(vdev); - return ret; + if (vfio_device_is_noiommu(vdev)) { + if (WARN_ON(vfio_iommufd_device_probe_comapt_noiommu(vdev, ictx))) + return -EINVAL; + return 0; + } + + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); + if (ret) + return ret; + + /* The legacy path has no way to return the selected pt_id */ + return vdev->ops->attach_ioas(vdev, &ioas_id); } void vfio_iommufd_unbind(struct vfio_device *vdev) diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 464263288d16..3356321805e9 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -232,6 +232,8 @@ static inline void vfio_container_cleanup(void) #if IS_ENABLED(CONFIG_IOMMUFD) int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx); void vfio_iommufd_unbind(struct vfio_device *device); +int vfio_iommufd_attach_compat_ioas(struct vfio_device *vdev, + struct iommufd_ctx *ictx); #else static inline int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx) @@ -242,6 +244,12 @@ static inline int vfio_iommufd_bind(struct vfio_device *device, static inline void vfio_iommufd_unbind(struct vfio_device *device) { } + +static inline int vfio_iommufd_attach_compat_ioas(struct vfio_device *vdev, + struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} #endif #if IS_ENABLED(CONFIG_VFIO_VIRQFD)
This makes the group code call .bind_iommufd and .attach_ioas in two steps instead of in a single step. This prepares the bind_iommufd and attach_ioas support in the coming cdev path. Signed-off-by: Yi Liu <yi.l.liu@intel.com> --- drivers/vfio/group.c | 26 ++++++++++----- drivers/vfio/iommufd.c | 75 ++++++++++++++++++++++++++---------------- drivers/vfio/vfio.h | 8 +++++ 3 files changed, 73 insertions(+), 36 deletions(-)