Message ID | 20230227111135.61728-18-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add vfio_device cdev for iommufd support | expand |
On Mon, Feb 27, 2023 at 03:11:33AM -0800, Yi Liu wrote: > This adds ioctl for userspace to attach device cdev fd to and detach > from IOAS/hw_pagetable managed by iommufd. > > VFIO_DEVICE_ATTACH_IOMMUFD_PT: attach vfio device to IOAS, hw_pagetable > managed by iommufd. Attach can be > undo by VFIO_DEVICE_DETACH_IOMMUFD_PT > or device fd close. > VFIO_DEVICE_DETACH_IOMMUFD_PT: detach vfio device from the current attached > IOAS or hw_pagetable managed by iommufd. > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Reviewed-by: Kevin Tian <kevin.tian@intel.com> > --- > drivers/vfio/device_cdev.c | 76 ++++++++++++++++++++++++++++++++++++++ > drivers/vfio/vfio.h | 16 ++++++++ > drivers/vfio/vfio_main.c | 8 ++++ > include/uapi/linux/vfio.h | 52 ++++++++++++++++++++++++++ > 4 files changed, 152 insertions(+) > > diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c > index 37f80e368551..5b5a249a6612 100644 > --- a/drivers/vfio/device_cdev.c > +++ b/drivers/vfio/device_cdev.c > @@ -191,6 +191,82 @@ long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df, > return ret; > } > > +int vfio_ioctl_device_attach(struct vfio_device_file *df, > + void __user *arg) This should be struct vfio_device_attach_iommufd_pt __user *arg > +{ > + struct vfio_device *device = df->device; > + struct vfio_device_attach_iommufd_pt attach; > + unsigned long minsz; > + int ret; > + > + minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id); > + > + if (copy_from_user(&attach, (void __user *)arg, minsz)) No cast > + return -EFAULT; > + > + if (attach.argsz < minsz || attach.flags || > + attach.pt_id == IOMMUFD_INVALID_ID) > + return -EINVAL; > + > + if (!device->ops->bind_iommufd) > + return -ENODEV; > + > + mutex_lock(&device->dev_set->lock); > + if (df->noiommu) { > + ret = -EINVAL; > + goto out_unlock; > + } > + > + ret = device->ops->attach_ioas(device, &attach.pt_id); > + if (ret) > + goto out_unlock; > + > + ret = copy_to_user((void __user *)arg + > + offsetofend(struct vfio_device_attach_iommufd_pt, flags), This should just be &arg->flags > + &attach.pt_id, > + sizeof(attach.pt_id)) ? -EFAULT : 0; Also: static_assert(__same_type(arg->flags), attach.pt_id); > + if (ret) > + goto out_detach; > + mutex_unlock(&device->dev_set->lock); > + > + return 0; > + > +out_detach: > + device->ops->detach_ioas(device); > +out_unlock: > + mutex_unlock(&device->dev_set->lock); > + return ret; > +} > + > +int vfio_ioctl_device_detach(struct vfio_device_file *df, > + void __user *arg) > +{ > + struct vfio_device *device = df->device; > + struct vfio_device_detach_iommufd_pt detach; > + unsigned long minsz; > + > + minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags); > + > + if (copy_from_user(&detach, (void __user *)arg, minsz)) > + return -EFAULT; Same comments here > + > + if (detach.argsz < minsz || detach.flags) > + return -EINVAL; > + > + if (!device->ops->bind_iommufd) > + return -ENODEV; > + > + mutex_lock(&device->dev_set->lock); > + if (df->noiommu) { > + mutex_unlock(&device->dev_set->lock); > + return -EINVAL; > + } This seems strange. no iommu mode should have a NULL dev->iommufctx. Why do we have a df->noiommu at all? Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, February 28, 2023 2:39 AM > > On Mon, Feb 27, 2023 at 03:11:33AM -0800, Yi Liu wrote: > > This adds ioctl for userspace to attach device cdev fd to and detach > > from IOAS/hw_pagetable managed by iommufd. > > > > VFIO_DEVICE_ATTACH_IOMMUFD_PT: attach vfio device to IOAS, > hw_pagetable > > managed by iommufd. Attach can be > > undo by > VFIO_DEVICE_DETACH_IOMMUFD_PT > > or device fd close. > > VFIO_DEVICE_DETACH_IOMMUFD_PT: detach vfio device from the > current attached > > IOAS or hw_pagetable managed by > iommufd. > > > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > > Reviewed-by: Kevin Tian <kevin.tian@intel.com> > > --- > > drivers/vfio/device_cdev.c | 76 > ++++++++++++++++++++++++++++++++++++++ > > drivers/vfio/vfio.h | 16 ++++++++ > > drivers/vfio/vfio_main.c | 8 ++++ > > include/uapi/linux/vfio.h | 52 ++++++++++++++++++++++++++ > > 4 files changed, 152 insertions(+) > > > > diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c > > index 37f80e368551..5b5a249a6612 100644 > > --- a/drivers/vfio/device_cdev.c > > +++ b/drivers/vfio/device_cdev.c > > @@ -191,6 +191,82 @@ long vfio_device_ioctl_bind_iommufd(struct > vfio_device_file *df, > > return ret; > > } > > > > +int vfio_ioctl_device_attach(struct vfio_device_file *df, > > + void __user *arg) > > This should be > > struct vfio_device_attach_iommufd_pt __user *arg Got it. > > +{ > > + struct vfio_device *device = df->device; > > + struct vfio_device_attach_iommufd_pt attach; > > + unsigned long minsz; > > + int ret; > > + > > + minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id); > > + > > + if (copy_from_user(&attach, (void __user *)arg, minsz)) > > No cast Yes. > > + return -EFAULT; > > + > > + if (attach.argsz < minsz || attach.flags || > > + attach.pt_id == IOMMUFD_INVALID_ID) > > + return -EINVAL; > > + > > + if (!device->ops->bind_iommufd) > > + return -ENODEV; > > + > > + mutex_lock(&device->dev_set->lock); > > + if (df->noiommu) { > > + ret = -EINVAL; > > + goto out_unlock; > > + } > > + > > + ret = device->ops->attach_ioas(device, &attach.pt_id); > > + if (ret) > > + goto out_unlock; > > + > > + ret = copy_to_user((void __user *)arg + > > + offsetofend(struct > vfio_device_attach_iommufd_pt, flags), > > This should just be &arg->flags Yes, can use arg->xxx here. I guess you mean &arg->pt_id. > > > + &attach.pt_id, > > + sizeof(attach.pt_id)) ? -EFAULT : 0; > > Also: > > static_assert(__same_type(arg->flags), attach.pt_id); Got it. but s/arg->flags/arg->pt_id/ > > + if (ret) > > + goto out_detach; > > + mutex_unlock(&device->dev_set->lock); > > + > > + return 0; > > + > > +out_detach: > > + device->ops->detach_ioas(device); > > > > +out_unlock: > > + mutex_unlock(&device->dev_set->lock); > > + return ret; > > +} > > + > > +int vfio_ioctl_device_detach(struct vfio_device_file *df, > > + void __user *arg) > > +{ > > + struct vfio_device *device = df->device; > > + struct vfio_device_detach_iommufd_pt detach; > > + unsigned long minsz; > > + > > + minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags); > > + > > + if (copy_from_user(&detach, (void __user *)arg, minsz)) > > + return -EFAULT; > > Same comments here Sure. > > + > > + if (detach.argsz < minsz || detach.flags) > > + return -EINVAL; > > + > > + if (!device->ops->bind_iommufd) > > + return -ENODEV; > > + > > + mutex_lock(&device->dev_set->lock); > > + if (df->noiommu) { > > + mutex_unlock(&device->dev_set->lock); > > + return -EINVAL; > > + } > > This seems strange. no iommu mode should have a NULL dev->iommufctx. > Why do we have a df->noiommu at all? This is due to the vfio_device_first_open(). Detail as below comment (part of patch 0016). + /* + * For group/container path, iommufd pointer is NULL when comes + * into this helper. Its noiommu support is handled by + * vfio_device_group_use_iommu() + * + * For iommufd compat mode, iommufd pointer here is a valid value. + * Its noiommu support is in vfio_iommufd_bind(). + * + * For device cdev path, iommufd pointer here is a valid value for + * normal cases, but it is NULL if it's noiommu. Check df->noiommu + * to differentiate cdev noiommu from the group/container path which + * also passes NULL iommufd pointer in. If set then do nothing. + */ if (iommufd) ret = vfio_iommufd_bind(device, iommufd, dev_id, pt_id); - else + else if (!df->noiommu) ret = vfio_device_group_use_iommu(device); if (ret) goto err_module_put; Regards, Yi Liu
On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote: > > This seems strange. no iommu mode should have a NULL dev->iommufctx. > > Why do we have a df->noiommu at all? > > This is due to the vfio_device_first_open(). Detail as below comment (part of > patch 0016). > > + /* > + * For group/container path, iommufd pointer is NULL when comes > + * into this helper. Its noiommu support is handled by > + * vfio_device_group_use_iommu() > + * > + * For iommufd compat mode, iommufd pointer here is a valid value. > + * Its noiommu support is in vfio_iommufd_bind(). > + * > + * For device cdev path, iommufd pointer here is a valid value for > + * normal cases, but it is NULL if it's noiommu. Check df->noiommu > + * to differentiate cdev noiommu from the group/container path which > + * also passes NULL iommufd pointer in. If set then do nothing. > + */ If the group is in iommufd mode then it should set this pointer too. Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, February 28, 2023 8:32 PM > > On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote: > > > This seems strange. no iommu mode should have a NULL dev- > >iommufctx. > > > Why do we have a df->noiommu at all? > > > > This is due to the vfio_device_first_open(). Detail as below comment (part > of > > patch 0016). > > > > + /* > > + * For group/container path, iommufd pointer is NULL when comes > > + * into this helper. Its noiommu support is handled by > > + * vfio_device_group_use_iommu() > > + * > > + * For iommufd compat mode, iommufd pointer here is a valid value. > > + * Its noiommu support is in vfio_iommufd_bind(). > > + * > > + * For device cdev path, iommufd pointer here is a valid value for > > + * normal cases, but it is NULL if it's noiommu. Check df->noiommu > > + * to differentiate cdev noiommu from the group/container path > which > > + * also passes NULL iommufd pointer in. If set then do nothing. > > + */ > > If the group is in iommufd mode then it should set this pointer too. Yes, but the key point is that both the group in legacy mode and the cdev path sets iommufd==NULL. And the handling for the two should be different. So needs this extra info to differentiate them in vfio_device_first_open(). Regards, Yi Liu
On Tue, Feb 28, 2023 at 12:42:31PM +0000, Liu, Yi L wrote: > > From: Jason Gunthorpe <jgg@nvidia.com> > > Sent: Tuesday, February 28, 2023 8:32 PM > > > > On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote: > > > > This seems strange. no iommu mode should have a NULL dev- > > >iommufctx. > > > > Why do we have a df->noiommu at all? > > > > > > This is due to the vfio_device_first_open(). Detail as below comment (part > > of > > > patch 0016). > > > > > > + /* > > > + * For group/container path, iommufd pointer is NULL when comes > > > + * into this helper. Its noiommu support is handled by > > > + * vfio_device_group_use_iommu() > > > + * > > > + * For iommufd compat mode, iommufd pointer here is a valid value. > > > + * Its noiommu support is in vfio_iommufd_bind(). > > > + * > > > + * For device cdev path, iommufd pointer here is a valid value for > > > + * normal cases, but it is NULL if it's noiommu. Check df->noiommu > > > + * to differentiate cdev noiommu from the group/container path > > which > > > + * also passes NULL iommufd pointer in. If set then do nothing. > > > + */ > > > > If the group is in iommufd mode then it should set this pointer too. > > Yes, but the key point is that both the group in legacy mode and the > cdev path sets iommufd==NULL. And the handling for the two should > be different. So needs this extra info to differentiate them in > vfio_device_first_open(). Don't encode that in the iommufd pointer, it is confusing. A null iommufd pointer and a bound df flag is sufficient to see that it is compat mode. Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, February 28, 2023 8:54 PM > > On Tue, Feb 28, 2023 at 12:42:31PM +0000, Liu, Yi L wrote: > > > From: Jason Gunthorpe <jgg@nvidia.com> > > > Sent: Tuesday, February 28, 2023 8:32 PM > > > > > > On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote: > > > > > This seems strange. no iommu mode should have a NULL dev- > > > >iommufctx. > > > > > Why do we have a df->noiommu at all? > > > > > > > > This is due to the vfio_device_first_open(). Detail as below comment > (part > > > of > > > > patch 0016). > > > > > > > > + /* > > > > + * For group/container path, iommufd pointer is NULL when comes > > > > + * into this helper. Its noiommu support is handled by > > > > + * vfio_device_group_use_iommu() > > > > + * > > > > + * For iommufd compat mode, iommufd pointer here is a valid value. > > > > + * Its noiommu support is in vfio_iommufd_bind(). > > > > + * > > > > + * For device cdev path, iommufd pointer here is a valid value for > > > > + * normal cases, but it is NULL if it's noiommu. Check df->noiommu > > > > + * to differentiate cdev noiommu from the group/container path > > > which > > > > + * also passes NULL iommufd pointer in. If set then do nothing. > > > > + */ > > > > > > If the group is in iommufd mode then it should set this pointer too. > > > > Yes, but the key point is that both the group in legacy mode and the > > cdev path sets iommufd==NULL. And the handling for the two should > > be different. So needs this extra info to differentiate them in > > vfio_device_first_open(). > > Don't encode that in the iommufd pointer, it is confusing. Maybe I failed to make it clear. As the below code, When iommufd==!NULL, no need to differentiate whether it is the group compat mode or the cdev path. But if iommufd==NULL, it may be the legacy group code or the cdev noiommu mode. So df->noiommu is added. But I agree this noiommu flag is confusing. May use the df->is_cdev_device flag as the purpose here is to differentiate cdev path and group path. if (iommufd) ret = vfio_iommufd_bind(device, iommufd, dev_id); else if (!df->noiommu) ret = vfio_device_group_use_iommu(device); if (ret) goto err_module_put; > A null iommufd pointer and a bound df flag is sufficient to see that > it is compat mode. Hope df->is_cdev_device suits your expectation.:-) The code will look like below: if (iommufd) { ret = vfio_iommufd_bind(device, iommufd, dev_id); if (!ret && !df->is_cdev_device) { ret = vfio_iommufd_attach_compat(device); // new helper as in patch 10 discussed if (ret) vfio_iommufd_unbind(device); } } else if (!df->is_cdev_device) { ret = vfio_device_group_use_iommu(device); } if (ret) goto err_module_put; Regards, Yi Liu
On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote: > > A null iommufd pointer and a bound df flag is sufficient to see that > > it is compat mode. > > Hope df->is_cdev_device suits your expectation.:-) The code will look > like below: Yes, this is better.. However I'd suggest 'uses_container' as it is clearer what the special case is Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, February 28, 2023 9:26 PM > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote: > > > > A null iommufd pointer and a bound df flag is sufficient to see that > > > it is compat mode. > > > > Hope df->is_cdev_device suits your expectation.:-) The code will look > > like below: > > Yes, this is better.. However I'd suggest 'uses_container' as it is > clearer what the special case is Surely doable. Need to add a helper like below: bool vfio_device_group_uses_container() { lockdep_assert_held(&device->group->group_lock); return device->group->container; } But I'm poor at naming it. If it is true, the code would call vfio_device_group_use_iommu(). If doing it in this way, I think it's better to rename vfio_device_group_use_iommu() as well. Regards, Yi Liu
On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote: > > From: Jason Gunthorpe <jgg@nvidia.com> > > Sent: Tuesday, February 28, 2023 9:26 PM > > > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote: > > > > > > A null iommufd pointer and a bound df flag is sufficient to see that > > > > it is compat mode. > > > > > > Hope df->is_cdev_device suits your expectation.:-) The code will look > > > like below: > > > > Yes, this is better.. However I'd suggest 'uses_container' as it is > > clearer what the special case is > > Surely doable. Need to add a helper like below: > > bool vfio_device_group_uses_container() > { > lockdep_assert_held(&device->group->group_lock); > return device->group->container; > } It should come from the df. If you have a df then by definition: smp_load_acquire(..) == false - Not bound df->device->iommufd_ctx != NULL - Using iommufd df->group->containter != NULL - Using legacy container all other cases - NO_IOMMU No locking required since all these cases after the smp_load_acquire must be fixed for the lifetime of the df. Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, February 28, 2023 9:44 PM > > On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote: > > > From: Jason Gunthorpe <jgg@nvidia.com> > > > Sent: Tuesday, February 28, 2023 9:26 PM > > > > > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote: > > > > > > > > A null iommufd pointer and a bound df flag is sufficient to see that > > > > > it is compat mode. > > > > > > > > Hope df->is_cdev_device suits your expectation.:-) The code will look > > > > like below: > > > > > > Yes, this is better.. However I'd suggest 'uses_container' as it is > > > clearer what the special case is > > > > Surely doable. Need to add a helper like below: > > > > bool vfio_device_group_uses_container() > > { > > lockdep_assert_held(&device->group->group_lock); > > return device->group->container; > > } > > It should come from the df. > > If you have a df then by definition: > smp_load_acquire(..) == false - Not bound > df->device->iommufd_ctx != NULL - Using iommufd > df->group->containter != NULL - Using legacy container > all other cases - NO_IOMMU > > No locking required since all these cases after the smp_load_acquire > must be fixed for the lifetime of the df. Do you mean the df->access_granted (introduced in patch 07) or a new flag? Following your suggestion, it seems a mandatory requirement to do the smp_load_acquire(..) == false check first, and then call into the vfio_device_open() which further calls vfio_device_first_open() to check the iommufd/ legacy container/noiommu stuffs. Is it? df->group->containter this may need a helper to avoid decoding group field. May be just store container in df? Regards, Yi Liu
On Tue, Feb 28, 2023 at 02:01:36PM +0000, Liu, Yi L wrote: > > From: Jason Gunthorpe <jgg@nvidia.com> > > Sent: Tuesday, February 28, 2023 9:44 PM > > > > On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote: > > > > From: Jason Gunthorpe <jgg@nvidia.com> > > > > Sent: Tuesday, February 28, 2023 9:26 PM > > > > > > > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote: > > > > > > > > > > A null iommufd pointer and a bound df flag is sufficient to see that > > > > > > it is compat mode. > > > > > > > > > > Hope df->is_cdev_device suits your expectation.:-) The code will look > > > > > like below: > > > > > > > > Yes, this is better.. However I'd suggest 'uses_container' as it is > > > > clearer what the special case is > > > > > > Surely doable. Need to add a helper like below: > > > > > > bool vfio_device_group_uses_container() > > > { > > > lockdep_assert_held(&device->group->group_lock); > > > return device->group->container; > > > } > > > > It should come from the df. > > > > If you have a df then by definition: > > smp_load_acquire(..) == false - Not bound > > df->device->iommufd_ctx != NULL - Using iommufd > > df->group->containter != NULL - Using legacy container > > all other cases - NO_IOMMU > > > > No locking required since all these cases after the smp_load_acquire > > must be fixed for the lifetime of the df. > > Do you mean the df->access_granted (introduced in patch 07) or a new > flag? yes > Following your suggestion, it seems a mandatory requirement to do the > smp_load_acquire(..) == false check first, and then call into the vfio_device_open() > which further calls vfio_device_first_open() to check the iommufd/ > legacy container/noiommu stuffs. Is it? Figuring out if an open should happen or not is a different operation, you already build exclusion between cdev/group so we don't need to care about the open path. > df->group->containter this may need a helper to avoid decoding group > field. May be just store container in df? At worst a flag, but a helper seems like a good idea anyhow, then it can be compiled out Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, February 28, 2023 10:38 PM > > On Tue, Feb 28, 2023 at 02:01:36PM +0000, Liu, Yi L wrote: > > > From: Jason Gunthorpe <jgg@nvidia.com> > > > Sent: Tuesday, February 28, 2023 9:44 PM > > > > > > On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote: > > > > > From: Jason Gunthorpe <jgg@nvidia.com> > > > > > Sent: Tuesday, February 28, 2023 9:26 PM > > > > > > > > > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote: > > > > > > > > > > > > A null iommufd pointer and a bound df flag is sufficient to see > that > > > > > > > it is compat mode. > > > > > > > > > > > > Hope df->is_cdev_device suits your expectation.:-) The code will > look > > > > > > like below: > > > > > > > > > > Yes, this is better.. However I'd suggest 'uses_container' as it is > > > > > clearer what the special case is > > > > > > > > Surely doable. Need to add a helper like below: > > > > > > > > bool vfio_device_group_uses_container() > > > > { > > > > lockdep_assert_held(&device->group->group_lock); > > > > return device->group->container; > > > > } > > > > > > It should come from the df. > > > > > > If you have a df then by definition: > > > smp_load_acquire(..) == false - Not bound > > > df->device->iommufd_ctx != NULL - Using iommufd > > > df->group->containter != NULL - Using legacy container > > > all other cases - NO_IOMMU > > > > > > No locking required since all these cases after the smp_load_acquire > > > must be fixed for the lifetime of the df. > > > > Do you mean the df->access_granted (introduced in patch 07) or a new > > flag? > > yes > > > Following your suggestion, it seems a mandatory requirement to do the > > smp_load_acquire(..) == false check first, and then call into the > vfio_device_open() > > which further calls vfio_device_first_open() to check the iommufd/ > > legacy container/noiommu stuffs. Is it? > > Figuring out if an open should happen or not is a different operation, > you already build exclusion between cdev/group so we don't need to > care about the open path. Ok. > > df->group->containter this may need a helper to avoid decoding group > > field. May be just store container in df? > > At worst a flag, but a helper seems like a good idea anyhow, then it > can be compiled out I add a separate commit as below. vfio_device_group_uses_container() is added. From 0ce86e6b71d1884e9f5de30ba23e3aa93cc84db9 Mon Sep 17 00:00:00 2001 From: Yi Liu <yi.l.liu@intel.com> Date: Wed, 1 Mar 2023 02:24:43 -0800 Subject: [PATCH 15/22] vfio: Make vfio_device_first_open() to cover the noiommu mode in cdev path vfio_device_first_open() now covers the below two cases: 1) user uses iommufd (e.g. the group path in iommufd compat mode); 2) user uses container (e.g. the group path in legacy mode); The above two paths have their own noiommu mode support accordingly. The cdev path also uses iommufd, so for the case user provides a valid iommufd, this helper is able to support it. But for noiommu mode, the cdev path just provides a NULL iommufd. So this needs to be able to cover it. As there is no special things to do for the cdev path in noiommu mode, it can be covered by simply differentiate it from the container case. If user is not using iommufd nor container, it is the noiommu mode. Signed-off-by: Yi Liu <yi.l.liu@intel.com> --- drivers/vfio/group.c | 5 +++++ drivers/vfio/vfio.h | 1 + drivers/vfio/vfio_main.c | 19 ++++++++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 2a13442add43..ed3ffe7ceb3f 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -777,6 +777,11 @@ void vfio_device_group_unregister(struct vfio_device *device) mutex_unlock(&device->group->device_lock); } +bool vfio_device_group_uses_container(struct vfio_device *device) +{ + return READ_ONCE(device->group->container); +} + int vfio_device_group_use_iommu(struct vfio_device *device) { struct vfio_group *group = device->group; diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 68d35e1d7b87..e1f5a0310551 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -95,6 +95,7 @@ int vfio_device_set_group(struct vfio_device *device, void vfio_device_remove_group(struct vfio_device *device); void vfio_device_group_register(struct vfio_device *device); void vfio_device_group_unregister(struct vfio_device *device); +bool vfio_device_group_uses_container(struct vfio_device *device); int vfio_device_group_use_iommu(struct vfio_device *device); void vfio_device_group_unuse_iommu(struct vfio_device *device); void vfio_device_group_close(struct vfio_device_file *df); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 121a75fadceb..4b5b17e8aaa1 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -422,9 +422,22 @@ static int vfio_device_first_open(struct vfio_device_file *df) if (!try_module_get(device->dev->driver->owner)) return -ENODEV; + /* + * The handling here depends on what the user is using. + * + * If user uses iommufd in the group compat mode or the + * cdev path, call vfio_iommufd_bind(). + * + * If user uses container in the group legacy mode, call + * vfio_device_group_use_iommu(). + * + * If user doesn't use iommufd nor container, this is + * the noiommufd mode in the cdev path, nothing needs + * to be done here just go ahead to open device. + */ if (iommufd) ret = vfio_iommufd_bind(device, iommufd); - else + else if (vfio_device_group_uses_container(device)) ret = vfio_device_group_use_iommu(device); if (ret) goto err_module_put; @@ -439,7 +452,7 @@ static int vfio_device_first_open(struct vfio_device_file *df) err_unuse_iommu: if (iommufd) vfio_iommufd_unbind(device); - else + else if (vfio_device_group_uses_container(device)) vfio_device_group_unuse_iommu(device); err_module_put: module_put(device->dev->driver->owner); @@ -457,7 +470,7 @@ static void vfio_device_last_close(struct vfio_device_file *df) device->ops->close_device(device); if (iommufd) vfio_iommufd_unbind(device); - else + else if (vfio_device_group_uses_container(device)) vfio_device_group_unuse_iommu(device); module_put(device->dev->driver->owner); }
On Wed, Mar 01, 2023 at 02:04:00PM +0000, Liu, Yi L wrote: > diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c > index 2a13442add43..ed3ffe7ceb3f 100644 > --- a/drivers/vfio/group.c > +++ b/drivers/vfio/group.c > @@ -777,6 +777,11 @@ void vfio_device_group_unregister(struct vfio_device *device) > mutex_unlock(&device->group->device_lock); > } > > +bool vfio_device_group_uses_container(struct vfio_device *device) > +{ > + return READ_ONCE(device->group->container); > +} As I said this should take in the vfio_device_file because as long as a vfio_device_file exists then group->contianer is required to be stable. > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > index 121a75fadceb..4b5b17e8aaa1 100644 > --- a/drivers/vfio/vfio_main.c > +++ b/drivers/vfio/vfio_main.c > @@ -422,9 +422,22 @@ static int vfio_device_first_open(struct vfio_device_file *df) > if (!try_module_get(device->dev->driver->owner)) > return -ENODEV; > > + /* > + * The handling here depends on what the user is using. > + * > + * If user uses iommufd in the group compat mode or the > + * cdev path, call vfio_iommufd_bind(). > + * > + * If user uses container in the group legacy mode, call > + * vfio_device_group_use_iommu(). > + * > + * If user doesn't use iommufd nor container, this is > + * the noiommufd mode in the cdev path, nothing needs > + * to be done here just go ahead to open device. > + */ > if (iommufd) > ret = vfio_iommufd_bind(device, iommufd); > - else > + else if (vfio_device_group_uses_container(device)) > ret = vfio_device_group_use_iommu(device); But yes, this makes alot more sense.. Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Thursday, March 2, 2023 1:49 AM > > On Wed, Mar 01, 2023 at 02:04:00PM +0000, Liu, Yi L wrote: > > diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c > > index 2a13442add43..ed3ffe7ceb3f 100644 > > --- a/drivers/vfio/group.c > > +++ b/drivers/vfio/group.c > > @@ -777,6 +777,11 @@ void vfio_device_group_unregister(struct > vfio_device *device) > > mutex_unlock(&device->group->device_lock); > > } > > > > +bool vfio_device_group_uses_container(struct vfio_device *device) > > +{ > > + return READ_ONCE(device->group->container); > > +} > > As I said this should take in the vfio_device_file because as long as > a vfio_device_file exists then group->contianer is required to be stable. Ok, let me store vfio_group in vfio_devcie_file instead of reach it by df->device->group. btw. With vfio_group stored in vfio_device_file, it looks like the is_cdev_device flag (introduced in patch 14) is not necessary now, we can always define the group pointer in vfio_device_file even group code is compiled out, then we can use this group pointer to check if the vfio_device_file is used in the group path or the cdev path. Is it? > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > > index 121a75fadceb..4b5b17e8aaa1 100644 > > --- a/drivers/vfio/vfio_main.c > > +++ b/drivers/vfio/vfio_main.c > > @@ -422,9 +422,22 @@ static int vfio_device_first_open(struct > vfio_device_file *df) > > if (!try_module_get(device->dev->driver->owner)) > > return -ENODEV; > > > > + /* > > + * The handling here depends on what the user is using. > > + * > > + * If user uses iommufd in the group compat mode or the > > + * cdev path, call vfio_iommufd_bind(). > > + * > > + * If user uses container in the group legacy mode, call > > + * vfio_device_group_use_iommu(). > > + * > > + * If user doesn't use iommufd nor container, this is > > + * the noiommufd mode in the cdev path, nothing needs > > + * to be done here just go ahead to open device. > > + */ > > if (iommufd) > > ret = vfio_iommufd_bind(device, iommufd); > > - else > > + else if (vfio_device_group_uses_container(device)) > > ret = vfio_device_group_use_iommu(device); > > But yes, this makes alot more sense.. > > Jason Regards, Yi Liu
diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index 37f80e368551..5b5a249a6612 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -191,6 +191,82 @@ long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df, return ret; } +int vfio_ioctl_device_attach(struct vfio_device_file *df, + void __user *arg) +{ + struct vfio_device *device = df->device; + struct vfio_device_attach_iommufd_pt attach; + unsigned long minsz; + int ret; + + minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id); + + if (copy_from_user(&attach, (void __user *)arg, minsz)) + return -EFAULT; + + if (attach.argsz < minsz || attach.flags || + attach.pt_id == IOMMUFD_INVALID_ID) + return -EINVAL; + + if (!device->ops->bind_iommufd) + return -ENODEV; + + mutex_lock(&device->dev_set->lock); + if (df->noiommu) { + ret = -EINVAL; + goto out_unlock; + } + + ret = device->ops->attach_ioas(device, &attach.pt_id); + if (ret) + goto out_unlock; + + ret = copy_to_user((void __user *)arg + + offsetofend(struct vfio_device_attach_iommufd_pt, flags), + &attach.pt_id, + sizeof(attach.pt_id)) ? -EFAULT : 0; + if (ret) + goto out_detach; + mutex_unlock(&device->dev_set->lock); + + return 0; + +out_detach: + device->ops->detach_ioas(device); +out_unlock: + mutex_unlock(&device->dev_set->lock); + return ret; +} + +int vfio_ioctl_device_detach(struct vfio_device_file *df, + void __user *arg) +{ + struct vfio_device *device = df->device; + struct vfio_device_detach_iommufd_pt detach; + unsigned long minsz; + + minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags); + + if (copy_from_user(&detach, (void __user *)arg, minsz)) + return -EFAULT; + + if (detach.argsz < minsz || detach.flags) + return -EINVAL; + + if (!device->ops->bind_iommufd) + return -ENODEV; + + mutex_lock(&device->dev_set->lock); + if (df->noiommu) { + mutex_unlock(&device->dev_set->lock); + return -EINVAL; + } + device->ops->detach_ioas(device); + mutex_unlock(&device->dev_set->lock); + + return 0; +} + static char *vfio_device_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev)); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 4716a904e63b..5a1ceb014779 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -274,6 +274,10 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep); void vfio_device_cdev_close(struct vfio_device_file *df); long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df, unsigned long arg); +int vfio_ioctl_device_attach(struct vfio_device_file *df, + void __user *arg); +int vfio_ioctl_device_detach(struct vfio_device_file *df, + void __user *arg); int vfio_cdev_init(struct class *device_class); void vfio_cdev_cleanup(void); #else @@ -307,6 +311,18 @@ static inline long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df, return -EOPNOTSUPP; } +static inline int vfio_ioctl_device_attach(struct vfio_device_file *df, + void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int vfio_ioctl_device_detach(struct vfio_device_file *df, + void __user *arg) +{ + return -EOPNOTSUPP; +} + static inline int vfio_cdev_init(struct class *device_class) { return 0; diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 69d0add930bb..f68550fe206f 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1161,6 +1161,14 @@ static long vfio_device_fops_unl_ioctl(struct file *filep, ret = vfio_ioctl_device_feature(device, (void __user *)arg); break; + case VFIO_DEVICE_ATTACH_IOMMUFD_PT: + ret = vfio_ioctl_device_attach(df, (void __user *)arg); + break; + + case VFIO_DEVICE_DETACH_IOMMUFD_PT: + ret = vfio_ioctl_device_detach(df, (void __user *)arg); + break; + default: if (unlikely(!device->ops->ioctl)) ret = -EINVAL; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 92aa8dbc970a..ff8753d0abb0 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -228,6 +228,58 @@ struct vfio_device_bind_iommufd { #define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 19) +/* + * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, + * struct vfio_device_attach_iommufd_pt) + * + * Attach a vfio device to an iommufd address space specified by IOAS + * id or hw_pagetable (hwpt) id. + * + * Available only after a device has been bound to iommufd via + * VFIO_DEVICE_BIND_IOMMUFD + * + * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. + * + * @argsz: user filled size of this data. + * @flags: must be 0. + * @pt_id: Input the target id which can represent an ioas or a hwpt + * allocated via iommufd subsystem. + * Output the attached hwpt id which could be the specified + * hwpt itself or a hwpt automatically created for the + * specified ioas by kernel during the attachment. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_attach_iommufd_pt { + __u32 argsz; + __u32 flags; + __u32 pt_id; +}; + +#define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) + +/* + * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 21, + * struct vfio_device_detach_iommufd_pt) + * + * Detach a vfio device from the iommufd address space it has been + * attached to. After it, device should be in a blocking DMA state. + * + * Available only after a device has been bound to iommufd via + * VFIO_DEVICE_BIND_IOMMUFD + * + * @argsz: user filled size of this data. + * @flags: must be 0. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_detach_iommufd_pt { + __u32 argsz; + __u32 flags; +}; + +#define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 21) + /** * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7, * struct vfio_device_info)