diff mbox series

[v5,17/19] vfio: Add VFIO_DEVICE_AT[DE]TACH_IOMMUFD_PT

Message ID 20230227111135.61728-18-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series Add vfio_device cdev for iommufd support | expand

Commit Message

Yi Liu Feb. 27, 2023, 11:11 a.m. UTC
This adds ioctl for userspace to attach device cdev fd to and detach
from IOAS/hw_pagetable managed by iommufd.

    VFIO_DEVICE_ATTACH_IOMMUFD_PT: attach vfio device to IOAS, hw_pagetable
				   managed by iommufd. Attach can be
				   undo by VFIO_DEVICE_DETACH_IOMMUFD_PT
				   or device fd close.
    VFIO_DEVICE_DETACH_IOMMUFD_PT: detach vfio device from the current attached
				   IOAS or hw_pagetable managed by iommufd.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
---
 drivers/vfio/device_cdev.c | 76 ++++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio.h        | 16 ++++++++
 drivers/vfio/vfio_main.c   |  8 ++++
 include/uapi/linux/vfio.h  | 52 ++++++++++++++++++++++++++
 4 files changed, 152 insertions(+)

Comments

Jason Gunthorpe Feb. 27, 2023, 6:39 p.m. UTC | #1
On Mon, Feb 27, 2023 at 03:11:33AM -0800, Yi Liu wrote:
> This adds ioctl for userspace to attach device cdev fd to and detach
> from IOAS/hw_pagetable managed by iommufd.
> 
>     VFIO_DEVICE_ATTACH_IOMMUFD_PT: attach vfio device to IOAS, hw_pagetable
> 				   managed by iommufd. Attach can be
> 				   undo by VFIO_DEVICE_DETACH_IOMMUFD_PT
> 				   or device fd close.
>     VFIO_DEVICE_DETACH_IOMMUFD_PT: detach vfio device from the current attached
> 				   IOAS or hw_pagetable managed by iommufd.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> ---
>  drivers/vfio/device_cdev.c | 76 ++++++++++++++++++++++++++++++++++++++
>  drivers/vfio/vfio.h        | 16 ++++++++
>  drivers/vfio/vfio_main.c   |  8 ++++
>  include/uapi/linux/vfio.h  | 52 ++++++++++++++++++++++++++
>  4 files changed, 152 insertions(+)
> 
> diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
> index 37f80e368551..5b5a249a6612 100644
> --- a/drivers/vfio/device_cdev.c
> +++ b/drivers/vfio/device_cdev.c
> @@ -191,6 +191,82 @@ long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df,
>  	return ret;
>  }
>  
> +int vfio_ioctl_device_attach(struct vfio_device_file *df,
> +			     void __user *arg)

This should be

struct vfio_device_attach_iommufd_pt __user *arg

> +{
> +	struct vfio_device *device = df->device;
> +	struct vfio_device_attach_iommufd_pt attach;
> +	unsigned long minsz;
> +	int ret;
> +
> +	minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id);
> +
> +	if (copy_from_user(&attach, (void __user *)arg, minsz))

No cast

> +		return -EFAULT;
> +
> +	if (attach.argsz < minsz || attach.flags ||
> +	    attach.pt_id == IOMMUFD_INVALID_ID)
> +		return -EINVAL;
> +
> +	if (!device->ops->bind_iommufd)
> +		return -ENODEV;
> +
> +	mutex_lock(&device->dev_set->lock);
> +	if (df->noiommu) {
> +		ret = -EINVAL;
> +		goto out_unlock;
> +	}
> +
> +	ret = device->ops->attach_ioas(device, &attach.pt_id);
> +	if (ret)
> +		goto out_unlock;
> +
> +	ret = copy_to_user((void __user *)arg +
> +			   offsetofend(struct vfio_device_attach_iommufd_pt, flags),

This should just be &arg->flags

> +			   &attach.pt_id,
> +			   sizeof(attach.pt_id)) ? -EFAULT : 0;

Also:

static_assert(__same_type(arg->flags), attach.pt_id);

> +	if (ret)
> +		goto out_detach;
> +	mutex_unlock(&device->dev_set->lock);
> +
> +	return 0;
> +
> +out_detach:
> +	device->ops->detach_ioas(device);


> +out_unlock:
> +	mutex_unlock(&device->dev_set->lock);
> +	return ret;
> +}
> +
> +int vfio_ioctl_device_detach(struct vfio_device_file *df,
> +			     void __user *arg)
> +{
> +	struct vfio_device *device = df->device;
> +	struct vfio_device_detach_iommufd_pt detach;
> +	unsigned long minsz;
> +
> +	minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags);
> +
> +	if (copy_from_user(&detach, (void __user *)arg, minsz))
> +		return -EFAULT;

Same comments here

> +
> +	if (detach.argsz < minsz || detach.flags)
> +		return -EINVAL;
> +
> +	if (!device->ops->bind_iommufd)
> +		return -ENODEV;
> +
> +	mutex_lock(&device->dev_set->lock);
> +	if (df->noiommu) {
> +		mutex_unlock(&device->dev_set->lock);
> +		return -EINVAL;
> +	}

This seems strange. no iommu mode should have a NULL dev->iommufctx.
Why do we have a df->noiommu at all?

Jason
Yi Liu Feb. 28, 2023, 2:51 a.m. UTC | #2
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, February 28, 2023 2:39 AM
> 
> On Mon, Feb 27, 2023 at 03:11:33AM -0800, Yi Liu wrote:
> > This adds ioctl for userspace to attach device cdev fd to and detach
> > from IOAS/hw_pagetable managed by iommufd.
> >
> >     VFIO_DEVICE_ATTACH_IOMMUFD_PT: attach vfio device to IOAS,
> hw_pagetable
> > 				   managed by iommufd. Attach can be
> > 				   undo by
> VFIO_DEVICE_DETACH_IOMMUFD_PT
> > 				   or device fd close.
> >     VFIO_DEVICE_DETACH_IOMMUFD_PT: detach vfio device from the
> current attached
> > 				   IOAS or hw_pagetable managed by
> iommufd.
> >
> > Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> > Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> > ---
> >  drivers/vfio/device_cdev.c | 76
> ++++++++++++++++++++++++++++++++++++++
> >  drivers/vfio/vfio.h        | 16 ++++++++
> >  drivers/vfio/vfio_main.c   |  8 ++++
> >  include/uapi/linux/vfio.h  | 52 ++++++++++++++++++++++++++
> >  4 files changed, 152 insertions(+)
> >
> > diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
> > index 37f80e368551..5b5a249a6612 100644
> > --- a/drivers/vfio/device_cdev.c
> > +++ b/drivers/vfio/device_cdev.c
> > @@ -191,6 +191,82 @@ long vfio_device_ioctl_bind_iommufd(struct
> vfio_device_file *df,
> >  	return ret;
> >  }
> >
> > +int vfio_ioctl_device_attach(struct vfio_device_file *df,
> > +			     void __user *arg)
> 
> This should be
> 
> struct vfio_device_attach_iommufd_pt __user *arg

Got it.

> > +{
> > +	struct vfio_device *device = df->device;
> > +	struct vfio_device_attach_iommufd_pt attach;
> > +	unsigned long minsz;
> > +	int ret;
> > +
> > +	minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id);
> > +
> > +	if (copy_from_user(&attach, (void __user *)arg, minsz))
> 
> No cast

Yes.

> > +		return -EFAULT;
> > +
> > +	if (attach.argsz < minsz || attach.flags ||
> > +	    attach.pt_id == IOMMUFD_INVALID_ID)
> > +		return -EINVAL;
> > +
> > +	if (!device->ops->bind_iommufd)
> > +		return -ENODEV;
> > +
> > +	mutex_lock(&device->dev_set->lock);
> > +	if (df->noiommu) {
> > +		ret = -EINVAL;
> > +		goto out_unlock;
> > +	}
> > +
> > +	ret = device->ops->attach_ioas(device, &attach.pt_id);
> > +	if (ret)
> > +		goto out_unlock;
> > +
> > +	ret = copy_to_user((void __user *)arg +
> > +			   offsetofend(struct
> vfio_device_attach_iommufd_pt, flags),
> 
> This should just be &arg->flags

Yes, can use arg->xxx here. I guess you mean &arg->pt_id.

> 
> > +			   &attach.pt_id,
> > +			   sizeof(attach.pt_id)) ? -EFAULT : 0;
> 
> Also:
> 
> static_assert(__same_type(arg->flags), attach.pt_id);

Got it. but s/arg->flags/arg->pt_id/

> > +	if (ret)
> > +		goto out_detach;
> > +	mutex_unlock(&device->dev_set->lock);
> > +
> > +	return 0;
> > +
> > +out_detach:
> > +	device->ops->detach_ioas(device);
> 
> 
> > +out_unlock:
> > +	mutex_unlock(&device->dev_set->lock);
> > +	return ret;
> > +}
> > +
> > +int vfio_ioctl_device_detach(struct vfio_device_file *df,
> > +			     void __user *arg)
> > +{
> > +	struct vfio_device *device = df->device;
> > +	struct vfio_device_detach_iommufd_pt detach;
> > +	unsigned long minsz;
> > +
> > +	minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags);
> > +
> > +	if (copy_from_user(&detach, (void __user *)arg, minsz))
> > +		return -EFAULT;
> 
> Same comments here

Sure.
 
> > +
> > +	if (detach.argsz < minsz || detach.flags)
> > +		return -EINVAL;
> > +
> > +	if (!device->ops->bind_iommufd)
> > +		return -ENODEV;
> > +
> > +	mutex_lock(&device->dev_set->lock);
> > +	if (df->noiommu) {
> > +		mutex_unlock(&device->dev_set->lock);
> > +		return -EINVAL;
> > +	}
> 
> This seems strange. no iommu mode should have a NULL dev->iommufctx.
> Why do we have a df->noiommu at all?

This is due to the vfio_device_first_open(). Detail as below comment (part of
patch 0016).

+	/*
+	 * For group/container path, iommufd pointer is NULL when comes
+	 * into this helper. Its noiommu support is handled by
+	 * vfio_device_group_use_iommu()
+	 *
+	 * For iommufd compat mode, iommufd pointer here is a valid value.
+	 * Its noiommu support is in vfio_iommufd_bind().
+	 *
+	 * For device cdev path, iommufd pointer here is a valid value for
+	 * normal cases, but it is NULL if it's noiommu. Check df->noiommu
+	 * to differentiate cdev noiommu from the group/container path which
+	 * also passes NULL iommufd pointer in. If set then do nothing.
+	 */
 	if (iommufd)
 		ret = vfio_iommufd_bind(device, iommufd, dev_id, pt_id);
-	else
+	else if (!df->noiommu)
 		ret = vfio_device_group_use_iommu(device);
 	if (ret)
 		goto err_module_put;

Regards,
Yi Liu
Jason Gunthorpe Feb. 28, 2023, 12:32 p.m. UTC | #3
On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote:
> > This seems strange. no iommu mode should have a NULL dev->iommufctx.
> > Why do we have a df->noiommu at all?
> 
> This is due to the vfio_device_first_open(). Detail as below comment (part of
> patch 0016).
> 
> +	/*
> +	 * For group/container path, iommufd pointer is NULL when comes
> +	 * into this helper. Its noiommu support is handled by
> +	 * vfio_device_group_use_iommu()
> +	 *
> +	 * For iommufd compat mode, iommufd pointer here is a valid value.
> +	 * Its noiommu support is in vfio_iommufd_bind().
> +	 *
> +	 * For device cdev path, iommufd pointer here is a valid value for
> +	 * normal cases, but it is NULL if it's noiommu. Check df->noiommu
> +	 * to differentiate cdev noiommu from the group/container path which
> +	 * also passes NULL iommufd pointer in. If set then do nothing.
> +	 */

If the group is in iommufd mode then it should set this pointer too.

Jason
Yi Liu Feb. 28, 2023, 12:42 p.m. UTC | #4
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, February 28, 2023 8:32 PM
> 
> On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote:
> > > This seems strange. no iommu mode should have a NULL dev-
> >iommufctx.
> > > Why do we have a df->noiommu at all?
> >
> > This is due to the vfio_device_first_open(). Detail as below comment (part
> of
> > patch 0016).
> >
> > +	/*
> > +	 * For group/container path, iommufd pointer is NULL when comes
> > +	 * into this helper. Its noiommu support is handled by
> > +	 * vfio_device_group_use_iommu()
> > +	 *
> > +	 * For iommufd compat mode, iommufd pointer here is a valid value.
> > +	 * Its noiommu support is in vfio_iommufd_bind().
> > +	 *
> > +	 * For device cdev path, iommufd pointer here is a valid value for
> > +	 * normal cases, but it is NULL if it's noiommu. Check df->noiommu
> > +	 * to differentiate cdev noiommu from the group/container path
> which
> > +	 * also passes NULL iommufd pointer in. If set then do nothing.
> > +	 */
> 
> If the group is in iommufd mode then it should set this pointer too.

Yes, but the key point is that both the group in legacy mode and the
cdev path sets iommufd==NULL. And the handling for the two should
be different. So needs this extra info to differentiate them in
vfio_device_first_open().

Regards,
Yi Liu
Jason Gunthorpe Feb. 28, 2023, 12:53 p.m. UTC | #5
On Tue, Feb 28, 2023 at 12:42:31PM +0000, Liu, Yi L wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Tuesday, February 28, 2023 8:32 PM
> > 
> > On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote:
> > > > This seems strange. no iommu mode should have a NULL dev-
> > >iommufctx.
> > > > Why do we have a df->noiommu at all?
> > >
> > > This is due to the vfio_device_first_open(). Detail as below comment (part
> > of
> > > patch 0016).
> > >
> > > +	/*
> > > +	 * For group/container path, iommufd pointer is NULL when comes
> > > +	 * into this helper. Its noiommu support is handled by
> > > +	 * vfio_device_group_use_iommu()
> > > +	 *
> > > +	 * For iommufd compat mode, iommufd pointer here is a valid value.
> > > +	 * Its noiommu support is in vfio_iommufd_bind().
> > > +	 *
> > > +	 * For device cdev path, iommufd pointer here is a valid value for
> > > +	 * normal cases, but it is NULL if it's noiommu. Check df->noiommu
> > > +	 * to differentiate cdev noiommu from the group/container path
> > which
> > > +	 * also passes NULL iommufd pointer in. If set then do nothing.
> > > +	 */
> > 
> > If the group is in iommufd mode then it should set this pointer too.
> 
> Yes, but the key point is that both the group in legacy mode and the
> cdev path sets iommufd==NULL. And the handling for the two should
> be different. So needs this extra info to differentiate them in
> vfio_device_first_open().

Don't encode that in the iommufd pointer, it is confusing.

A null iommufd pointer and a bound df flag is sufficient to see that
it is compat mode.

Jason
Yi Liu Feb. 28, 2023, 1:22 p.m. UTC | #6
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, February 28, 2023 8:54 PM
> 
> On Tue, Feb 28, 2023 at 12:42:31PM +0000, Liu, Yi L wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Tuesday, February 28, 2023 8:32 PM
> > >
> > > On Tue, Feb 28, 2023 at 02:51:28AM +0000, Liu, Yi L wrote:
> > > > > This seems strange. no iommu mode should have a NULL dev-
> > > >iommufctx.
> > > > > Why do we have a df->noiommu at all?
> > > >
> > > > This is due to the vfio_device_first_open(). Detail as below comment
> (part
> > > of
> > > > patch 0016).
> > > >
> > > > +	/*
> > > > +	 * For group/container path, iommufd pointer is NULL when comes
> > > > +	 * into this helper. Its noiommu support is handled by
> > > > +	 * vfio_device_group_use_iommu()
> > > > +	 *
> > > > +	 * For iommufd compat mode, iommufd pointer here is a valid value.
> > > > +	 * Its noiommu support is in vfio_iommufd_bind().
> > > > +	 *
> > > > +	 * For device cdev path, iommufd pointer here is a valid value for
> > > > +	 * normal cases, but it is NULL if it's noiommu. Check df->noiommu
> > > > +	 * to differentiate cdev noiommu from the group/container path
> > > which
> > > > +	 * also passes NULL iommufd pointer in. If set then do nothing.
> > > > +	 */
> > >
> > > If the group is in iommufd mode then it should set this pointer too.
> >
> > Yes, but the key point is that both the group in legacy mode and the
> > cdev path sets iommufd==NULL. And the handling for the two should
> > be different. So needs this extra info to differentiate them in
> > vfio_device_first_open().
> 
> Don't encode that in the iommufd pointer, it is confusing.

Maybe I failed to make it clear. As the below code, When
iommufd==!NULL, no need to differentiate whether it is
the group compat mode or the cdev path. But if iommufd==NULL,
it may be the legacy group code or the cdev noiommu mode. So
df->noiommu is added. But I agree this noiommu flag is confusing.
May use the df->is_cdev_device flag as the purpose here is to
differentiate cdev path and group path.

	if (iommufd)
		ret = vfio_iommufd_bind(device, iommufd, dev_id);
	else if (!df->noiommu)
		ret = vfio_device_group_use_iommu(device);
	if (ret)
		goto err_module_put;


> A null iommufd pointer and a bound df flag is sufficient to see that
> it is compat mode.

Hope df->is_cdev_device suits your expectation.:-) The code will look
like below:

	if (iommufd) {
		ret = vfio_iommufd_bind(device, iommufd, dev_id);

		if (!ret && !df->is_cdev_device) {
			ret = vfio_iommufd_attach_compat(device); // new helper as in patch 10 discussed
			if (ret)
				vfio_iommufd_unbind(device);
		}
	} else if (!df->is_cdev_device) {
		ret = vfio_device_group_use_iommu(device);
	}
	if (ret)
		goto err_module_put;

Regards,
Yi Liu
Jason Gunthorpe Feb. 28, 2023, 1:25 p.m. UTC | #7
On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote:

> > A null iommufd pointer and a bound df flag is sufficient to see that
> > it is compat mode.
> 
> Hope df->is_cdev_device suits your expectation.:-) The code will look
> like below:

Yes, this is better.. However I'd suggest 'uses_container' as it is
clearer what the special case is

Jason
Yi Liu Feb. 28, 2023, 1:36 p.m. UTC | #8
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, February 28, 2023 9:26 PM
> 
> On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote:
> 
> > > A null iommufd pointer and a bound df flag is sufficient to see that
> > > it is compat mode.
> >
> > Hope df->is_cdev_device suits your expectation.:-) The code will look
> > like below:
> 
> Yes, this is better.. However I'd suggest 'uses_container' as it is
> clearer what the special case is

Surely doable. Need to add a helper like below:

bool vfio_device_group_uses_container()
{
	lockdep_assert_held(&device->group->group_lock);
	return device->group->container;
}

But I'm poor at naming it. If it is true, the code would call
vfio_device_group_use_iommu(). If doing it in this way,
I think it's better to rename vfio_device_group_use_iommu()
as well.

Regards,
Yi Liu
Jason Gunthorpe Feb. 28, 2023, 1:43 p.m. UTC | #9
On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Tuesday, February 28, 2023 9:26 PM
> > 
> > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote:
> > 
> > > > A null iommufd pointer and a bound df flag is sufficient to see that
> > > > it is compat mode.
> > >
> > > Hope df->is_cdev_device suits your expectation.:-) The code will look
> > > like below:
> > 
> > Yes, this is better.. However I'd suggest 'uses_container' as it is
> > clearer what the special case is
> 
> Surely doable. Need to add a helper like below:
> 
> bool vfio_device_group_uses_container()
> {
> 	lockdep_assert_held(&device->group->group_lock);
> 	return device->group->container;
> }

It should come from the df.

If you have a df then by definition:
  smp_load_acquire(..) == false     - Not bound
  df->device->iommufd_ctx != NULL   - Using iommufd
  df->group->containter != NULL     - Using legacy container
  all other cases                   - NO_IOMMU                

No locking required since all these cases after the smp_load_acquire
must be fixed for the lifetime of the df.

Jason
Yi Liu Feb. 28, 2023, 2:01 p.m. UTC | #10
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, February 28, 2023 9:44 PM
> 
> On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Tuesday, February 28, 2023 9:26 PM
> > >
> > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote:
> > >
> > > > > A null iommufd pointer and a bound df flag is sufficient to see that
> > > > > it is compat mode.
> > > >
> > > > Hope df->is_cdev_device suits your expectation.:-) The code will look
> > > > like below:
> > >
> > > Yes, this is better.. However I'd suggest 'uses_container' as it is
> > > clearer what the special case is
> >
> > Surely doable. Need to add a helper like below:
> >
> > bool vfio_device_group_uses_container()
> > {
> > 	lockdep_assert_held(&device->group->group_lock);
> > 	return device->group->container;
> > }
> 
> It should come from the df.
> 
> If you have a df then by definition:
>   smp_load_acquire(..) == false     - Not bound
>   df->device->iommufd_ctx != NULL   - Using iommufd
>   df->group->containter != NULL     - Using legacy container
>   all other cases                   - NO_IOMMU
> 
> No locking required since all these cases after the smp_load_acquire
> must be fixed for the lifetime of the df.

Do you mean the df->access_granted (introduced in patch 07) or a new flag?
Following your suggestion, it seems a mandatory requirement to do the
smp_load_acquire(..) == false check first, and then call into the vfio_device_open()
which further calls vfio_device_first_open() to check the iommufd/
legacy container/noiommu stuffs. Is it?

df->group->containter this may need a helper to avoid decoding group
field. May be just store container in df?

Regards,
Yi Liu
Jason Gunthorpe Feb. 28, 2023, 2:38 p.m. UTC | #11
On Tue, Feb 28, 2023 at 02:01:36PM +0000, Liu, Yi L wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Tuesday, February 28, 2023 9:44 PM
> > 
> > On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote:
> > > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > > Sent: Tuesday, February 28, 2023 9:26 PM
> > > >
> > > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote:
> > > >
> > > > > > A null iommufd pointer and a bound df flag is sufficient to see that
> > > > > > it is compat mode.
> > > > >
> > > > > Hope df->is_cdev_device suits your expectation.:-) The code will look
> > > > > like below:
> > > >
> > > > Yes, this is better.. However I'd suggest 'uses_container' as it is
> > > > clearer what the special case is
> > >
> > > Surely doable. Need to add a helper like below:
> > >
> > > bool vfio_device_group_uses_container()
> > > {
> > > 	lockdep_assert_held(&device->group->group_lock);
> > > 	return device->group->container;
> > > }
> > 
> > It should come from the df.
> > 
> > If you have a df then by definition:
> >   smp_load_acquire(..) == false     - Not bound
> >   df->device->iommufd_ctx != NULL   - Using iommufd
> >   df->group->containter != NULL     - Using legacy container
> >   all other cases                   - NO_IOMMU
> > 
> > No locking required since all these cases after the smp_load_acquire
> > must be fixed for the lifetime of the df.
> 
> Do you mean the df->access_granted (introduced in patch 07) or a new
> flag?

yes

> Following your suggestion, it seems a mandatory requirement to do the
> smp_load_acquire(..) == false check first, and then call into the vfio_device_open()
> which further calls vfio_device_first_open() to check the iommufd/
> legacy container/noiommu stuffs. Is it?

Figuring out if an open should happen or not is a different operation,
you already build exclusion between cdev/group so we don't need to
care about the open path. 

> df->group->containter this may need a helper to avoid decoding group
> field. May be just store container in df?

At worst a flag, but a helper seems like a good idea anyhow, then it
can be compiled out

Jason
Yi Liu March 1, 2023, 2:04 p.m. UTC | #12
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, February 28, 2023 10:38 PM
> 
> On Tue, Feb 28, 2023 at 02:01:36PM +0000, Liu, Yi L wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Tuesday, February 28, 2023 9:44 PM
> > >
> > > On Tue, Feb 28, 2023 at 01:36:24PM +0000, Liu, Yi L wrote:
> > > > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > > > Sent: Tuesday, February 28, 2023 9:26 PM
> > > > >
> > > > > On Tue, Feb 28, 2023 at 01:22:50PM +0000, Liu, Yi L wrote:
> > > > >
> > > > > > > A null iommufd pointer and a bound df flag is sufficient to see
> that
> > > > > > > it is compat mode.
> > > > > >
> > > > > > Hope df->is_cdev_device suits your expectation.:-) The code will
> look
> > > > > > like below:
> > > > >
> > > > > Yes, this is better.. However I'd suggest 'uses_container' as it is
> > > > > clearer what the special case is
> > > >
> > > > Surely doable. Need to add a helper like below:
> > > >
> > > > bool vfio_device_group_uses_container()
> > > > {
> > > > 	lockdep_assert_held(&device->group->group_lock);
> > > > 	return device->group->container;
> > > > }
> > >
> > > It should come from the df.
> > >
> > > If you have a df then by definition:
> > >   smp_load_acquire(..) == false     - Not bound
> > >   df->device->iommufd_ctx != NULL   - Using iommufd
> > >   df->group->containter != NULL     - Using legacy container
> > >   all other cases                   - NO_IOMMU
> > >
> > > No locking required since all these cases after the smp_load_acquire
> > > must be fixed for the lifetime of the df.
> >
> > Do you mean the df->access_granted (introduced in patch 07) or a new
> > flag?
> 
> yes
> 
> > Following your suggestion, it seems a mandatory requirement to do the
> > smp_load_acquire(..) == false check first, and then call into the
> vfio_device_open()
> > which further calls vfio_device_first_open() to check the iommufd/
> > legacy container/noiommu stuffs. Is it?
> 
> Figuring out if an open should happen or not is a different operation,
> you already build exclusion between cdev/group so we don't need to
> care about the open path.

Ok.
 
> > df->group->containter this may need a helper to avoid decoding group
> > field. May be just store container in df?
> 
> At worst a flag, but a helper seems like a good idea anyhow, then it
> can be compiled out

I add a separate commit as below. vfio_device_group_uses_container() is
added.

From 0ce86e6b71d1884e9f5de30ba23e3aa93cc84db9 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Wed, 1 Mar 2023 02:24:43 -0800
Subject: [PATCH 15/22] vfio: Make vfio_device_first_open() to cover the
 noiommu mode in cdev path

vfio_device_first_open() now covers the below two cases:

1) user uses iommufd (e.g. the group path in iommufd compat mode);
2) user uses container (e.g. the group path in legacy mode);

The above two paths have their own noiommu mode support accordingly.

The cdev path also uses iommufd, so for the case user provides a valid
iommufd, this helper is able to support it. But for noiommu mode, the
cdev path just provides a NULL iommufd. So this needs to be able to cover
it. As there is no special things to do for the cdev path in noiommu
mode, it can be covered by simply differentiate it from the container
case. If user is not using iommufd nor container, it is the noiommu
mode.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/group.c     |  5 +++++
 drivers/vfio/vfio.h      |  1 +
 drivers/vfio/vfio_main.c | 19 ++++++++++++++++---
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index 2a13442add43..ed3ffe7ceb3f 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -777,6 +777,11 @@ void vfio_device_group_unregister(struct vfio_device *device)
 	mutex_unlock(&device->group->device_lock);
 }
 
+bool vfio_device_group_uses_container(struct vfio_device *device)
+{
+	return READ_ONCE(device->group->container);
+}
+
 int vfio_device_group_use_iommu(struct vfio_device *device)
 {
 	struct vfio_group *group = device->group;
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 68d35e1d7b87..e1f5a0310551 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -95,6 +95,7 @@ int vfio_device_set_group(struct vfio_device *device,
 void vfio_device_remove_group(struct vfio_device *device);
 void vfio_device_group_register(struct vfio_device *device);
 void vfio_device_group_unregister(struct vfio_device *device);
+bool vfio_device_group_uses_container(struct vfio_device *device);
 int vfio_device_group_use_iommu(struct vfio_device *device);
 void vfio_device_group_unuse_iommu(struct vfio_device *device);
 void vfio_device_group_close(struct vfio_device_file *df);
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 121a75fadceb..4b5b17e8aaa1 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -422,9 +422,22 @@ static int vfio_device_first_open(struct vfio_device_file *df)
 	if (!try_module_get(device->dev->driver->owner))
 		return -ENODEV;
 
+	/*
+	 * The handling here depends on what the user is using.
+	 *
+	 * If user uses iommufd in the group compat mode or the
+	 * cdev path, call vfio_iommufd_bind().
+	 *
+	 * If user uses container in the group legacy mode, call
+	 * vfio_device_group_use_iommu().
+	 *
+	 * If user doesn't use iommufd nor container, this is
+	 * the noiommufd mode in the cdev path, nothing needs
+	 * to be done here just go ahead to open device.
+	 */
 	if (iommufd)
 		ret = vfio_iommufd_bind(device, iommufd);
-	else
+	else if (vfio_device_group_uses_container(device))
 		ret = vfio_device_group_use_iommu(device);
 	if (ret)
 		goto err_module_put;
@@ -439,7 +452,7 @@ static int vfio_device_first_open(struct vfio_device_file *df)
 err_unuse_iommu:
 	if (iommufd)
 		vfio_iommufd_unbind(device);
-	else
+	else if (vfio_device_group_uses_container(device))
 		vfio_device_group_unuse_iommu(device);
 err_module_put:
 	module_put(device->dev->driver->owner);
@@ -457,7 +470,7 @@ static void vfio_device_last_close(struct vfio_device_file *df)
 		device->ops->close_device(device);
 	if (iommufd)
 		vfio_iommufd_unbind(device);
-	else
+	else if (vfio_device_group_uses_container(device))
 		vfio_device_group_unuse_iommu(device);
 	module_put(device->dev->driver->owner);
 }
Jason Gunthorpe March 1, 2023, 5:49 p.m. UTC | #13
On Wed, Mar 01, 2023 at 02:04:00PM +0000, Liu, Yi L wrote:
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index 2a13442add43..ed3ffe7ceb3f 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -777,6 +777,11 @@ void vfio_device_group_unregister(struct vfio_device *device)
>  	mutex_unlock(&device->group->device_lock);
>  }
>  
> +bool vfio_device_group_uses_container(struct vfio_device *device)
> +{
> +	return READ_ONCE(device->group->container);
> +}

As I said this should take in the vfio_device_file because as long as
a vfio_device_file exists then group->contianer is required to be stable.

> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 121a75fadceb..4b5b17e8aaa1 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -422,9 +422,22 @@ static int vfio_device_first_open(struct vfio_device_file *df)
>  	if (!try_module_get(device->dev->driver->owner))
>  		return -ENODEV;
>  
> +	/*
> +	 * The handling here depends on what the user is using.
> +	 *
> +	 * If user uses iommufd in the group compat mode or the
> +	 * cdev path, call vfio_iommufd_bind().
> +	 *
> +	 * If user uses container in the group legacy mode, call
> +	 * vfio_device_group_use_iommu().
> +	 *
> +	 * If user doesn't use iommufd nor container, this is
> +	 * the noiommufd mode in the cdev path, nothing needs
> +	 * to be done here just go ahead to open device.
> +	 */
>  	if (iommufd)
>  		ret = vfio_iommufd_bind(device, iommufd);
> -	else
> +	else if (vfio_device_group_uses_container(device))
>  		ret = vfio_device_group_use_iommu(device);

But yes, this makes alot more sense..

Jason
Yi Liu March 2, 2023, 3:24 a.m. UTC | #14
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Thursday, March 2, 2023 1:49 AM
> 
> On Wed, Mar 01, 2023 at 02:04:00PM +0000, Liu, Yi L wrote:
> > diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> > index 2a13442add43..ed3ffe7ceb3f 100644
> > --- a/drivers/vfio/group.c
> > +++ b/drivers/vfio/group.c
> > @@ -777,6 +777,11 @@ void vfio_device_group_unregister(struct
> vfio_device *device)
> >  	mutex_unlock(&device->group->device_lock);
> >  }
> >
> > +bool vfio_device_group_uses_container(struct vfio_device *device)
> > +{
> > +	return READ_ONCE(device->group->container);
> > +}
> 
> As I said this should take in the vfio_device_file because as long as
> a vfio_device_file exists then group->contianer is required to be stable.

Ok, let me store vfio_group in vfio_devcie_file instead of reach
it by df->device->group.

btw. With vfio_group stored in vfio_device_file, it looks like
the is_cdev_device flag (introduced in patch 14) is not necessary
now, we can always define the group pointer in vfio_device_file
even group code is compiled out, then we can use this group
pointer to check if the vfio_device_file is used in the group path
or the cdev path. Is it?

> > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> > index 121a75fadceb..4b5b17e8aaa1 100644
> > --- a/drivers/vfio/vfio_main.c
> > +++ b/drivers/vfio/vfio_main.c
> > @@ -422,9 +422,22 @@ static int vfio_device_first_open(struct
> vfio_device_file *df)
> >  	if (!try_module_get(device->dev->driver->owner))
> >  		return -ENODEV;
> >
> > +	/*
> > +	 * The handling here depends on what the user is using.
> > +	 *
> > +	 * If user uses iommufd in the group compat mode or the
> > +	 * cdev path, call vfio_iommufd_bind().
> > +	 *
> > +	 * If user uses container in the group legacy mode, call
> > +	 * vfio_device_group_use_iommu().
> > +	 *
> > +	 * If user doesn't use iommufd nor container, this is
> > +	 * the noiommufd mode in the cdev path, nothing needs
> > +	 * to be done here just go ahead to open device.
> > +	 */
> >  	if (iommufd)
> >  		ret = vfio_iommufd_bind(device, iommufd);
> > -	else
> > +	else if (vfio_device_group_uses_container(device))
> >  		ret = vfio_device_group_use_iommu(device);
> 
> But yes, this makes alot more sense..
> 
> Jason

Regards,
Yi Liu
diff mbox series

Patch

diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
index 37f80e368551..5b5a249a6612 100644
--- a/drivers/vfio/device_cdev.c
+++ b/drivers/vfio/device_cdev.c
@@ -191,6 +191,82 @@  long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df,
 	return ret;
 }
 
+int vfio_ioctl_device_attach(struct vfio_device_file *df,
+			     void __user *arg)
+{
+	struct vfio_device *device = df->device;
+	struct vfio_device_attach_iommufd_pt attach;
+	unsigned long minsz;
+	int ret;
+
+	minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id);
+
+	if (copy_from_user(&attach, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	if (attach.argsz < minsz || attach.flags ||
+	    attach.pt_id == IOMMUFD_INVALID_ID)
+		return -EINVAL;
+
+	if (!device->ops->bind_iommufd)
+		return -ENODEV;
+
+	mutex_lock(&device->dev_set->lock);
+	if (df->noiommu) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	ret = device->ops->attach_ioas(device, &attach.pt_id);
+	if (ret)
+		goto out_unlock;
+
+	ret = copy_to_user((void __user *)arg +
+			   offsetofend(struct vfio_device_attach_iommufd_pt, flags),
+			   &attach.pt_id,
+			   sizeof(attach.pt_id)) ? -EFAULT : 0;
+	if (ret)
+		goto out_detach;
+	mutex_unlock(&device->dev_set->lock);
+
+	return 0;
+
+out_detach:
+	device->ops->detach_ioas(device);
+out_unlock:
+	mutex_unlock(&device->dev_set->lock);
+	return ret;
+}
+
+int vfio_ioctl_device_detach(struct vfio_device_file *df,
+			     void __user *arg)
+{
+	struct vfio_device *device = df->device;
+	struct vfio_device_detach_iommufd_pt detach;
+	unsigned long minsz;
+
+	minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags);
+
+	if (copy_from_user(&detach, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	if (detach.argsz < minsz || detach.flags)
+		return -EINVAL;
+
+	if (!device->ops->bind_iommufd)
+		return -ENODEV;
+
+	mutex_lock(&device->dev_set->lock);
+	if (df->noiommu) {
+		mutex_unlock(&device->dev_set->lock);
+		return -EINVAL;
+	}
+	device->ops->detach_ioas(device);
+	mutex_unlock(&device->dev_set->lock);
+
+	return 0;
+}
+
 static char *vfio_device_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev));
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 4716a904e63b..5a1ceb014779 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -274,6 +274,10 @@  int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep);
 void vfio_device_cdev_close(struct vfio_device_file *df);
 long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df,
 				    unsigned long arg);
+int vfio_ioctl_device_attach(struct vfio_device_file *df,
+			     void __user *arg);
+int vfio_ioctl_device_detach(struct vfio_device_file *df,
+			     void __user *arg);
 int vfio_cdev_init(struct class *device_class);
 void vfio_cdev_cleanup(void);
 #else
@@ -307,6 +311,18 @@  static inline long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df,
 	return -EOPNOTSUPP;
 }
 
+static inline int vfio_ioctl_device_attach(struct vfio_device_file *df,
+					   void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int vfio_ioctl_device_detach(struct vfio_device_file *df,
+					   void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int vfio_cdev_init(struct class *device_class)
 {
 	return 0;
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 69d0add930bb..f68550fe206f 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1161,6 +1161,14 @@  static long vfio_device_fops_unl_ioctl(struct file *filep,
 		ret = vfio_ioctl_device_feature(device, (void __user *)arg);
 		break;
 
+	case VFIO_DEVICE_ATTACH_IOMMUFD_PT:
+		ret = vfio_ioctl_device_attach(df, (void __user *)arg);
+		break;
+
+	case VFIO_DEVICE_DETACH_IOMMUFD_PT:
+		ret = vfio_ioctl_device_detach(df, (void __user *)arg);
+		break;
+
 	default:
 		if (unlikely(!device->ops->ioctl))
 			ret = -EINVAL;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 92aa8dbc970a..ff8753d0abb0 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -228,6 +228,58 @@  struct vfio_device_bind_iommufd {
 
 #define VFIO_DEVICE_BIND_IOMMUFD	_IO(VFIO_TYPE, VFIO_BASE + 19)
 
+/*
+ * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20,
+ *					struct vfio_device_attach_iommufd_pt)
+ *
+ * Attach a vfio device to an iommufd address space specified by IOAS
+ * id or hw_pagetable (hwpt) id.
+ *
+ * Available only after a device has been bound to iommufd via
+ * VFIO_DEVICE_BIND_IOMMUFD
+ *
+ * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close.
+ *
+ * @argsz:	user filled size of this data.
+ * @flags:	must be 0.
+ * @pt_id:	Input the target id which can represent an ioas or a hwpt
+ *		allocated via iommufd subsystem.
+ *		Output the attached hwpt id which could be the specified
+ *		hwpt itself or a hwpt automatically created for the
+ *		specified ioas by kernel during the attachment.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_attach_iommufd_pt {
+	__u32	argsz;
+	__u32	flags;
+	__u32	pt_id;
+};
+
+#define VFIO_DEVICE_ATTACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 20)
+
+/*
+ * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 21,
+ *					struct vfio_device_detach_iommufd_pt)
+ *
+ * Detach a vfio device from the iommufd address space it has been
+ * attached to. After it, device should be in a blocking DMA state.
+ *
+ * Available only after a device has been bound to iommufd via
+ * VFIO_DEVICE_BIND_IOMMUFD
+ *
+ * @argsz:	user filled size of this data.
+ * @flags:	must be 0.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_detach_iommufd_pt {
+	__u32	argsz;
+	__u32	flags;
+};
+
+#define VFIO_DEVICE_DETACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /**
  * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
  *						struct vfio_device_info)