diff mbox series

[v4,3/4] vfio: VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT support pasid

Message ID 20241104132732.16759-4-yi.l.liu@intel.com (mailing list archive)
State New
Headers show
Series vfio-pci support pasid attach/detach | expand

Commit Message

Yi Liu Nov. 4, 2024, 1:27 p.m. UTC
This extends the VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT ioctls to attach/detach
a given pasid of a vfio device to/from an IOAS/HWPT.

vfio_copy_from_user() is added to copy the user data for the case in which
the existing user struct has introduced new fields. The rule is not breaking
the existing usersapce. The kernel only copies the new fields when the
corresponding flag is set by the userspace. For the case that has multiple
new fields marked by different flags, kernel checks the flags one by one to
get the correct size to copy besides the minsz. Such logics can be shared by
the other uapi extensions, hence add a helper for it.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/device_cdev.c | 62 +++++++++++++++++++++++++++-----------
 drivers/vfio/vfio.h        | 18 +++++++++++
 drivers/vfio/vfio_main.c   | 55 +++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h  | 29 ++++++++++++------
 4 files changed, 136 insertions(+), 28 deletions(-)

Comments

Alex Williamson Nov. 4, 2024, 9 p.m. UTC | #1
On Mon,  4 Nov 2024 05:27:31 -0800
Yi Liu <yi.l.liu@intel.com> wrote:

> This extends the VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT ioctls to attach/detach
> a given pasid of a vfio device to/from an IOAS/HWPT.
> 
> vfio_copy_from_user() is added to copy the user data for the case in which
> the existing user struct has introduced new fields. The rule is not breaking
> the existing usersapce. The kernel only copies the new fields when the
> corresponding flag is set by the userspace. For the case that has multiple
> new fields marked by different flags, kernel checks the flags one by one to
> get the correct size to copy besides the minsz. Such logics can be shared by
> the other uapi extensions, hence add a helper for it.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  drivers/vfio/device_cdev.c | 62 +++++++++++++++++++++++++++-----------
>  drivers/vfio/vfio.h        | 18 +++++++++++
>  drivers/vfio/vfio_main.c   | 55 +++++++++++++++++++++++++++++++++
>  include/uapi/linux/vfio.h  | 29 ++++++++++++------
>  4 files changed, 136 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
> index bb1817bd4ff3..bd13ddbfb9e3 100644
> --- a/drivers/vfio/device_cdev.c
> +++ b/drivers/vfio/device_cdev.c
> @@ -159,24 +159,44 @@ void vfio_df_unbind_iommufd(struct vfio_device_file *df)
>  	vfio_device_unblock_group(device);
>  }
>  
> +#define VFIO_ATTACH_FLAGS_MASK VFIO_DEVICE_ATTACH_PASID
> +static unsigned long
> +vfio_attach_xends[ilog2(VFIO_ATTACH_FLAGS_MASK) + 1] = {
> +	XEND_SIZE(VFIO_DEVICE_ATTACH_PASID,
> +		  struct vfio_device_attach_iommufd_pt, pasid),
> +};
> +
> +#define VFIO_DETACH_FLAGS_MASK VFIO_DEVICE_DETACH_PASID
> +static unsigned long
> +vfio_detach_xends[ilog2(VFIO_DETACH_FLAGS_MASK) + 1] = {
> +	XEND_SIZE(VFIO_DEVICE_DETACH_PASID,
> +		  struct vfio_device_detach_iommufd_pt, pasid),
> +};

Doesn't this rather imply that every valid flag bit indicates some new
structure field?

For example, we start out with:

struct vfio_device_attach_iommufd_pt {
        __u32   argsz;
        __u32   flags;
        __u32   pt_id;
};

And then here it becomes:

struct vfio_device_attach_iommufd_pt {
	__u32	argsz;
	__u32	flags;
#define VFIO_DEVICE_ATTACH_PASID	(1 << 0)
	__u32	pt_id;
	__u32	pasid;
};

What if the next flag is simply related to the processing of @pt_id and
doesn't require @pasid?

The xend array necessarily expands, but what's the value?  Logically it
would be offsetofend(, pt_id), so the array becomes { 16, 12 }.

Similarly, rather than pasid we might have reused a previously
reserved field, for instance what if we already expanded the structure
as:

struct vfio_device_attach_iommufd_pt {
	__u32	argsz;
	__u32	flags;
#define VFIO_DEVICE_ATTACH_FOO		(1 << 0)
	__u32	pt_id;
	__u32	reserved;
	__u64	foo;
};

If we then want to add @pasid, we might really prefer to take advantage
of that reserved field and the array becomes { 24, 16 }.

I think these can work (see below), but this seems like a pretty
complicated generalization.  It might make sense to initially open code
the handling for @pasid with a follow-on patch with this sort of
generalization so we can evaluate them separately.

BTW, don't feel obligated to use "xend" based on my email sample code.

> +
>  int vfio_df_ioctl_attach_pt(struct vfio_device_file *df,
>  			    struct vfio_device_attach_iommufd_pt __user *arg)
>  {
> -	struct vfio_device *device = df->device;
>  	struct vfio_device_attach_iommufd_pt attach;
> -	unsigned long minsz;
> +	struct vfio_device *device = df->device;
>  	int ret;
>  
> -	minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id);
> -
> -	if (copy_from_user(&attach, arg, minsz))
> -		return -EFAULT;
> +	ret = VFIO_COPY_USER_DATA((void __user *)arg, &attach,
> +				  struct vfio_device_attach_iommufd_pt,
> +				  pt_id, VFIO_ATTACH_FLAGS_MASK,
> +				  vfio_attach_xends);
> +	if (ret)
> +		return ret;
>  
> -	if (attach.argsz < minsz || attach.flags)
> -		return -EINVAL;
> +	if ((attach.flags & VFIO_DEVICE_ATTACH_PASID) &&
> +	    !device->ops->pasid_attach_ioas)
> +		return -EOPNOTSUPP;
>  
>  	mutex_lock(&device->dev_set->lock);
> -	ret = device->ops->attach_ioas(device, &attach.pt_id);
> +	if (attach.flags & VFIO_DEVICE_ATTACH_PASID)
> +		ret = device->ops->pasid_attach_ioas(device, attach.pasid,
> +						     &attach.pt_id);
> +	else
> +		ret = device->ops->attach_ioas(device, &attach.pt_id);
>  	if (ret)
>  		goto out_unlock;
>  
> @@ -198,20 +218,26 @@ int vfio_df_ioctl_attach_pt(struct vfio_device_file *df,
>  int vfio_df_ioctl_detach_pt(struct vfio_device_file *df,
>  			    struct vfio_device_detach_iommufd_pt __user *arg)
>  {
> -	struct vfio_device *device = df->device;
>  	struct vfio_device_detach_iommufd_pt detach;
> -	unsigned long minsz;
> -
> -	minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags);
> +	struct vfio_device *device = df->device;
> +	int ret;
>  
> -	if (copy_from_user(&detach, arg, minsz))
> -		return -EFAULT;
> +	ret = VFIO_COPY_USER_DATA((void __user *)arg, &detach,
> +				  struct vfio_device_detach_iommufd_pt,
> +				  flags, VFIO_DETACH_FLAGS_MASK,
> +				  vfio_detach_xends);
> +	if (ret)
> +		return ret;
>  
> -	if (detach.argsz < minsz || detach.flags)
> -		return -EINVAL;
> +	if ((detach.flags & VFIO_DEVICE_DETACH_PASID) &&
> +	    !device->ops->pasid_detach_ioas)
> +		return -EOPNOTSUPP;
>  
>  	mutex_lock(&device->dev_set->lock);
> -	device->ops->detach_ioas(device);
> +	if (detach.flags & VFIO_DEVICE_DETACH_PASID)
> +		device->ops->pasid_detach_ioas(device, detach.pasid);
> +	else
> +		device->ops->detach_ioas(device);
>  	mutex_unlock(&device->dev_set->lock);
>  
>  	return 0;
> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
> index 50128da18bca..9f081cf01c5a 100644
> --- a/drivers/vfio/vfio.h
> +++ b/drivers/vfio/vfio.h
> @@ -34,6 +34,24 @@ void vfio_df_close(struct vfio_device_file *df);
>  struct vfio_device_file *
>  vfio_allocate_device_file(struct vfio_device *device);
>  
> +int vfio_copy_from_user(void *buffer, void __user *arg,
> +			unsigned long minsz, u32 flags_mask,
> +			unsigned long *xend_array);
> +
> +#define VFIO_COPY_USER_DATA(_arg, _local_buffer, _struct, _min_last,          \
> +			    _flags_mask, _xend_array)                         \
> +	vfio_copy_from_user(_local_buffer, _arg,                              \
> +			    offsetofend(_struct, _min_last) +                \
> +			    BUILD_BUG_ON_ZERO(offsetof(_struct, argsz) !=     \
> +					      0) +                            \
> +			    BUILD_BUG_ON_ZERO(offsetof(_struct, flags) !=     \
> +					      sizeof(u32)),                   \
> +			    _flags_mask, _xend_array)

We have a precedence in vfio_alloc_device() that macros wrapping
functions don't need to be all caps.

> +
> +#define XEND_SIZE(_flag, _struct, _xlast)                                    \
> +	[ilog2(_flag)] = offsetofend(_struct, _xlast) +                      \
> +			 BUILD_BUG_ON_ZERO(_flag == 0)                       \
> +
>  extern const struct file_operations vfio_device_fops;
>  
>  #ifdef CONFIG_VFIO_NOIOMMU
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index a5a62d9d963f..7df94bf121fd 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -1694,6 +1694,61 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
>  }
>  EXPORT_SYMBOL(vfio_dma_rw);
>  
> +/**
> + * vfio_copy_from_user - Copy the user struct that may have extended fields
> + *
> + * @buffer: The local buffer to store the data copied from user
> + * @arg: The user buffer pointer
> + * @minsz: The minimum size of the user struct, it should never bump up.
> + * @flags_mask: The combination of all the falgs defined
> + * @xend_array: The array that stores the xend size for set flags.
> + *
> + * This helper requires the user struct put the argsz and flags fields in
> + * the first 8 bytes.
> + *
> + * Return 0 for success, otherwise -errno
> + */
> +int vfio_copy_from_user(void *buffer, void __user *arg,
> +			unsigned long minsz, u32 flags_mask,
> +			unsigned long *xend_array)
> +{
> +	unsigned long xend = 0;
> +	struct user_header {
> +		u32 argsz;
> +		u32 flags;
> +	} *header;
> +	unsigned long flags;
> +	u32 flag;
> +
> +	if (copy_from_user(buffer, arg, minsz))
> +		return -EFAULT;
> +
> +	header = (struct user_header *)buffer;
> +	if (header->argsz < minsz)
> +		return -EINVAL;
> +
> +	if (header->flags & ~flags_mask)
> +		return -EINVAL;
> +
> +	/* Loop each set flag to decide the xend */
> +	flags = header->flags;
> +	for_each_set_bit(flag, &flags, BITS_PER_LONG) {

I suppose it doesn't matter, but there's a logical inconsistency
searching BITS_PER_LONG on a buffer initialized by a u32.

> +		if (xend_array[flag])

Given the earlier concern, this should be:

		if (xend_array[flags] > xend)

Thanks,
Alex

> +			xend = xend_array[flag];
> +	}
> +
> +	if (xend) {
> +		if (header->argsz < xend)
> +			return -EINVAL;
> +
> +		if (copy_from_user(buffer + minsz,
> +				   arg + minsz, xend - minsz))
> +			return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Module/class support
>   */
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 2b68e6cdf190..40b414e642f5 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -931,29 +931,34 @@ struct vfio_device_bind_iommufd {
>   * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19,
>   *					struct vfio_device_attach_iommufd_pt)
>   * @argsz:	User filled size of this data.
> - * @flags:	Must be 0.
> + * @flags:	Flags for attach.
>   * @pt_id:	Input the target id which can represent an ioas or a hwpt
>   *		allocated via iommufd subsystem.
>   *		Output the input ioas id or the attached hwpt id which could
>   *		be the specified hwpt itself or a hwpt automatically created
>   *		for the specified ioas by kernel during the attachment.
> + * @pasid:	The pasid to be attached, only meaningful when
> + *		VFIO_DEVICE_ATTACH_PASID is set in @flags
>   *
>   * Associate the device with an address space within the bound iommufd.
>   * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close.  This is only
>   * allowed on cdev fds.
>   *
> - * If a vfio device is currently attached to a valid hw_pagetable, without doing
> - * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl
> - * passing in another hw_pagetable (hwpt) id is allowed. This action, also known
> - * as a hw_pagetable replacement, will replace the device's currently attached
> - * hw_pagetable with a new hw_pagetable corresponding to the given pt_id.
> + * If a vfio device or a pasid of this device is currently attached to a valid
> + * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second
> + * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed.
> + * This action, also known as a hw_pagetable replacement, will replace the
> + * currently attached hwpt of the device or the pasid of this device with a new
> + * hwpt corresponding to the given pt_id.
>   *
>   * Return: 0 on success, -errno on failure.
>   */
>  struct vfio_device_attach_iommufd_pt {
>  	__u32	argsz;
>  	__u32	flags;
> +#define VFIO_DEVICE_ATTACH_PASID	(1 << 0)
>  	__u32	pt_id;
> +	__u32	pasid;
>  };
>  
>  #define VFIO_DEVICE_ATTACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 19)
> @@ -962,17 +967,21 @@ struct vfio_device_attach_iommufd_pt {
>   * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20,
>   *					struct vfio_device_detach_iommufd_pt)
>   * @argsz:	User filled size of this data.
> - * @flags:	Must be 0.
> + * @flags:	Flags for detach.
> + * @pasid:	The pasid to be detached, only meaningful when
> + *		VFIO_DEVICE_DETACH_PASID is set in @flags
>   *
> - * Remove the association of the device and its current associated address
> - * space.  After it, the device should be in a blocking DMA state.  This is only
> - * allowed on cdev fds.
> + * Remove the association of the device or a pasid of the device and its current
> + * associated address space.  After it, the device or the pasid should be in a
> + * blocking DMA state.  This is only allowed on cdev fds.
>   *
>   * Return: 0 on success, -errno on failure.
>   */
>  struct vfio_device_detach_iommufd_pt {
>  	__u32	argsz;
>  	__u32	flags;
> +#define VFIO_DEVICE_DETACH_PASID	(1 << 0)
> +	__u32	pasid;
>  };
>  
>  #define VFIO_DEVICE_DETACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 20)
Yi Liu Nov. 5, 2024, 7:44 a.m. UTC | #2
On 2024/11/5 05:00, Alex Williamson wrote:
> On Mon,  4 Nov 2024 05:27:31 -0800
> Yi Liu <yi.l.liu@intel.com> wrote:
> 
>> This extends the VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT ioctls to attach/detach
>> a given pasid of a vfio device to/from an IOAS/HWPT.
>>
>> vfio_copy_from_user() is added to copy the user data for the case in which
>> the existing user struct has introduced new fields. The rule is not breaking
>> the existing usersapce. The kernel only copies the new fields when the
>> corresponding flag is set by the userspace. For the case that has multiple
>> new fields marked by different flags, kernel checks the flags one by one to
>> get the correct size to copy besides the minsz. Such logics can be shared by
>> the other uapi extensions, hence add a helper for it.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> ---
>>   drivers/vfio/device_cdev.c | 62 +++++++++++++++++++++++++++-----------
>>   drivers/vfio/vfio.h        | 18 +++++++++++
>>   drivers/vfio/vfio_main.c   | 55 +++++++++++++++++++++++++++++++++
>>   include/uapi/linux/vfio.h  | 29 ++++++++++++------
>>   4 files changed, 136 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
>> index bb1817bd4ff3..bd13ddbfb9e3 100644
>> --- a/drivers/vfio/device_cdev.c
>> +++ b/drivers/vfio/device_cdev.c
>> @@ -159,24 +159,44 @@ void vfio_df_unbind_iommufd(struct vfio_device_file *df)
>>   	vfio_device_unblock_group(device);
>>   }
>>   
>> +#define VFIO_ATTACH_FLAGS_MASK VFIO_DEVICE_ATTACH_PASID
>> +static unsigned long
>> +vfio_attach_xends[ilog2(VFIO_ATTACH_FLAGS_MASK) + 1] = {
>> +	XEND_SIZE(VFIO_DEVICE_ATTACH_PASID,
>> +		  struct vfio_device_attach_iommufd_pt, pasid),
>> +};
>> +
>> +#define VFIO_DETACH_FLAGS_MASK VFIO_DEVICE_DETACH_PASID
>> +static unsigned long
>> +vfio_detach_xends[ilog2(VFIO_DETACH_FLAGS_MASK) + 1] = {
>> +	XEND_SIZE(VFIO_DEVICE_DETACH_PASID,
>> +		  struct vfio_device_detach_iommufd_pt, pasid),
>> +};
> 
> Doesn't this rather imply that every valid flag bit indicates some new
> structure field?
> 
> For example, we start out with:
> 
> struct vfio_device_attach_iommufd_pt {
>          __u32   argsz;
>          __u32   flags;
>          __u32   pt_id;
> };
> 
> And then here it becomes:
> 
> struct vfio_device_attach_iommufd_pt {
> 	__u32	argsz;
> 	__u32	flags;
> #define VFIO_DEVICE_ATTACH_PASID	(1 << 0)
> 	__u32	pt_id;
> 	__u32	pasid;
> };
> 
> What if the next flag is simply related to the processing of @pt_id and
> doesn't require @pasid?
> 
> The xend array necessarily expands, but what's the value?  Logically it
> would be offsetofend(, pt_id), so the array becomes { 16, 12 }.

You are right.

> 
> Similarly, rather than pasid we might have reused a previously
> reserved field, for instance what if we already expanded the structure
> as:
> 
> struct vfio_device_attach_iommufd_pt {
> 	__u32	argsz;
> 	__u32	flags;
> #define VFIO_DEVICE_ATTACH_FOO		(1 << 0)
> 	__u32	pt_id;
> 	__u32	reserved;
> 	__u64	foo;
> };
> 
> If we then want to add @pasid, we might really prefer to take advantage
> of that reserved field and the array becomes { 24, 16 }.

yes.

> I think these can work (see below), but this seems like a pretty
> complicated generalization.  It might make sense to initially open code
> the handling for @pasid with a follow-on patch with this sort of
> generalization so we can evaluate them separately.

sure. If don't mind, I'd like to mention the two examples in the commit
message when adding the generalization. To let future people understand
how should the array be programmed.

> 
> BTW, don't feel obligated to use "xend" based on my email sample code.

TBH. I don't see a better name besides it yet. :) If you don't mind, I
may keep using it in later versions.

> 
>> +
>>   int vfio_df_ioctl_attach_pt(struct vfio_device_file *df,
>>   			    struct vfio_device_attach_iommufd_pt __user *arg)
>>   {
>> -	struct vfio_device *device = df->device;
>>   	struct vfio_device_attach_iommufd_pt attach;
>> -	unsigned long minsz;
>> +	struct vfio_device *device = df->device;
>>   	int ret;
>>   
>> -	minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id);
>> -
>> -	if (copy_from_user(&attach, arg, minsz))
>> -		return -EFAULT;
>> +	ret = VFIO_COPY_USER_DATA((void __user *)arg, &attach,
>> +				  struct vfio_device_attach_iommufd_pt,
>> +				  pt_id, VFIO_ATTACH_FLAGS_MASK,
>> +				  vfio_attach_xends);
>> +	if (ret)
>> +		return ret;
>>   
>> -	if (attach.argsz < minsz || attach.flags)
>> -		return -EINVAL;
>> +	if ((attach.flags & VFIO_DEVICE_ATTACH_PASID) &&
>> +	    !device->ops->pasid_attach_ioas)
>> +		return -EOPNOTSUPP;
>>   
>>   	mutex_lock(&device->dev_set->lock);
>> -	ret = device->ops->attach_ioas(device, &attach.pt_id);
>> +	if (attach.flags & VFIO_DEVICE_ATTACH_PASID)
>> +		ret = device->ops->pasid_attach_ioas(device, attach.pasid,
>> +						     &attach.pt_id);
>> +	else
>> +		ret = device->ops->attach_ioas(device, &attach.pt_id);
>>   	if (ret)
>>   		goto out_unlock;
>>   
>> @@ -198,20 +218,26 @@ int vfio_df_ioctl_attach_pt(struct vfio_device_file *df,
>>   int vfio_df_ioctl_detach_pt(struct vfio_device_file *df,
>>   			    struct vfio_device_detach_iommufd_pt __user *arg)
>>   {
>> -	struct vfio_device *device = df->device;
>>   	struct vfio_device_detach_iommufd_pt detach;
>> -	unsigned long minsz;
>> -
>> -	minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags);
>> +	struct vfio_device *device = df->device;
>> +	int ret;
>>   
>> -	if (copy_from_user(&detach, arg, minsz))
>> -		return -EFAULT;
>> +	ret = VFIO_COPY_USER_DATA((void __user *)arg, &detach,
>> +				  struct vfio_device_detach_iommufd_pt,
>> +				  flags, VFIO_DETACH_FLAGS_MASK,
>> +				  vfio_detach_xends);
>> +	if (ret)
>> +		return ret;
>>   
>> -	if (detach.argsz < minsz || detach.flags)
>> -		return -EINVAL;
>> +	if ((detach.flags & VFIO_DEVICE_DETACH_PASID) &&
>> +	    !device->ops->pasid_detach_ioas)
>> +		return -EOPNOTSUPP;
>>   
>>   	mutex_lock(&device->dev_set->lock);
>> -	device->ops->detach_ioas(device);
>> +	if (detach.flags & VFIO_DEVICE_DETACH_PASID)
>> +		device->ops->pasid_detach_ioas(device, detach.pasid);
>> +	else
>> +		device->ops->detach_ioas(device);
>>   	mutex_unlock(&device->dev_set->lock);
>>   
>>   	return 0;
>> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
>> index 50128da18bca..9f081cf01c5a 100644
>> --- a/drivers/vfio/vfio.h
>> +++ b/drivers/vfio/vfio.h
>> @@ -34,6 +34,24 @@ void vfio_df_close(struct vfio_device_file *df);
>>   struct vfio_device_file *
>>   vfio_allocate_device_file(struct vfio_device *device);
>>   
>> +int vfio_copy_from_user(void *buffer, void __user *arg,
>> +			unsigned long minsz, u32 flags_mask,
>> +			unsigned long *xend_array);
>> +
>> +#define VFIO_COPY_USER_DATA(_arg, _local_buffer, _struct, _min_last,          \
>> +			    _flags_mask, _xend_array)                         \
>> +	vfio_copy_from_user(_local_buffer, _arg,                              \
>> +			    offsetofend(_struct, _min_last) +                \
>> +			    BUILD_BUG_ON_ZERO(offsetof(_struct, argsz) !=     \
>> +					      0) +                            \
>> +			    BUILD_BUG_ON_ZERO(offsetof(_struct, flags) !=     \
>> +					      sizeof(u32)),                   \
>> +			    _flags_mask, _xend_array)
> 
> We have a precedence in vfio_alloc_device() that macros wrapping
> functions don't need to be all caps.

got it. :)

> 
>> +
>> +#define XEND_SIZE(_flag, _struct, _xlast)                                    \
>> +	[ilog2(_flag)] = offsetofend(_struct, _xlast) +                      \
>> +			 BUILD_BUG_ON_ZERO(_flag == 0)                       \
>> +
>>   extern const struct file_operations vfio_device_fops;
>>   
>>   #ifdef CONFIG_VFIO_NOIOMMU
>> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
>> index a5a62d9d963f..7df94bf121fd 100644
>> --- a/drivers/vfio/vfio_main.c
>> +++ b/drivers/vfio/vfio_main.c
>> @@ -1694,6 +1694,61 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
>>   }
>>   EXPORT_SYMBOL(vfio_dma_rw);
>>   
>> +/**
>> + * vfio_copy_from_user - Copy the user struct that may have extended fields
>> + *
>> + * @buffer: The local buffer to store the data copied from user
>> + * @arg: The user buffer pointer
>> + * @minsz: The minimum size of the user struct, it should never bump up.
>> + * @flags_mask: The combination of all the falgs defined
>> + * @xend_array: The array that stores the xend size for set flags.
>> + *
>> + * This helper requires the user struct put the argsz and flags fields in
>> + * the first 8 bytes.
>> + *
>> + * Return 0 for success, otherwise -errno
>> + */
>> +int vfio_copy_from_user(void *buffer, void __user *arg,
>> +			unsigned long minsz, u32 flags_mask,
>> +			unsigned long *xend_array)
>> +{
>> +	unsigned long xend = 0;
>> +	struct user_header {
>> +		u32 argsz;
>> +		u32 flags;
>> +	} *header;
>> +	unsigned long flags;
>> +	u32 flag;
>> +
>> +	if (copy_from_user(buffer, arg, minsz))
>> +		return -EFAULT;
>> +
>> +	header = (struct user_header *)buffer;
>> +	if (header->argsz < minsz)
>> +		return -EINVAL;
>> +
>> +	if (header->flags & ~flags_mask)
>> +		return -EINVAL;
>> +
>> +	/* Loop each set flag to decide the xend */
>> +	flags = header->flags;
>> +	for_each_set_bit(flag, &flags, BITS_PER_LONG) {
> 
> I suppose it doesn't matter, but there's a logical inconsistency
> searching BITS_PER_LONG on a buffer initialized by a u32.

how about using BITS_PER_TYPE(u32) or hard-code 32 here?

> 
>> +		if (xend_array[flag])
> 
> Given the earlier concern, this should be:
> 
> 		if (xend_array[flags] > xend)

yes.

> 
> Thanks,
> Alex
> 
>> +			xend = xend_array[flag];
>> +	}
>> +
>> +	if (xend) {

I may change this check as the below. Hence we don't bother to
do the check and funtion call at all.

	if (xend != minsz) {

>> +		if (header->argsz < xend)
>> +			return -EINVAL;
>> +
>> +		if (copy_from_user(buffer + minsz,
>> +				   arg + minsz, xend - minsz))
>> +			return -EFAULT;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   /*
>>    * Module/class support
>>    */
diff mbox series

Patch

diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
index bb1817bd4ff3..bd13ddbfb9e3 100644
--- a/drivers/vfio/device_cdev.c
+++ b/drivers/vfio/device_cdev.c
@@ -159,24 +159,44 @@  void vfio_df_unbind_iommufd(struct vfio_device_file *df)
 	vfio_device_unblock_group(device);
 }
 
+#define VFIO_ATTACH_FLAGS_MASK VFIO_DEVICE_ATTACH_PASID
+static unsigned long
+vfio_attach_xends[ilog2(VFIO_ATTACH_FLAGS_MASK) + 1] = {
+	XEND_SIZE(VFIO_DEVICE_ATTACH_PASID,
+		  struct vfio_device_attach_iommufd_pt, pasid),
+};
+
+#define VFIO_DETACH_FLAGS_MASK VFIO_DEVICE_DETACH_PASID
+static unsigned long
+vfio_detach_xends[ilog2(VFIO_DETACH_FLAGS_MASK) + 1] = {
+	XEND_SIZE(VFIO_DEVICE_DETACH_PASID,
+		  struct vfio_device_detach_iommufd_pt, pasid),
+};
+
 int vfio_df_ioctl_attach_pt(struct vfio_device_file *df,
 			    struct vfio_device_attach_iommufd_pt __user *arg)
 {
-	struct vfio_device *device = df->device;
 	struct vfio_device_attach_iommufd_pt attach;
-	unsigned long minsz;
+	struct vfio_device *device = df->device;
 	int ret;
 
-	minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id);
-
-	if (copy_from_user(&attach, arg, minsz))
-		return -EFAULT;
+	ret = VFIO_COPY_USER_DATA((void __user *)arg, &attach,
+				  struct vfio_device_attach_iommufd_pt,
+				  pt_id, VFIO_ATTACH_FLAGS_MASK,
+				  vfio_attach_xends);
+	if (ret)
+		return ret;
 
-	if (attach.argsz < minsz || attach.flags)
-		return -EINVAL;
+	if ((attach.flags & VFIO_DEVICE_ATTACH_PASID) &&
+	    !device->ops->pasid_attach_ioas)
+		return -EOPNOTSUPP;
 
 	mutex_lock(&device->dev_set->lock);
-	ret = device->ops->attach_ioas(device, &attach.pt_id);
+	if (attach.flags & VFIO_DEVICE_ATTACH_PASID)
+		ret = device->ops->pasid_attach_ioas(device, attach.pasid,
+						     &attach.pt_id);
+	else
+		ret = device->ops->attach_ioas(device, &attach.pt_id);
 	if (ret)
 		goto out_unlock;
 
@@ -198,20 +218,26 @@  int vfio_df_ioctl_attach_pt(struct vfio_device_file *df,
 int vfio_df_ioctl_detach_pt(struct vfio_device_file *df,
 			    struct vfio_device_detach_iommufd_pt __user *arg)
 {
-	struct vfio_device *device = df->device;
 	struct vfio_device_detach_iommufd_pt detach;
-	unsigned long minsz;
-
-	minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags);
+	struct vfio_device *device = df->device;
+	int ret;
 
-	if (copy_from_user(&detach, arg, minsz))
-		return -EFAULT;
+	ret = VFIO_COPY_USER_DATA((void __user *)arg, &detach,
+				  struct vfio_device_detach_iommufd_pt,
+				  flags, VFIO_DETACH_FLAGS_MASK,
+				  vfio_detach_xends);
+	if (ret)
+		return ret;
 
-	if (detach.argsz < minsz || detach.flags)
-		return -EINVAL;
+	if ((detach.flags & VFIO_DEVICE_DETACH_PASID) &&
+	    !device->ops->pasid_detach_ioas)
+		return -EOPNOTSUPP;
 
 	mutex_lock(&device->dev_set->lock);
-	device->ops->detach_ioas(device);
+	if (detach.flags & VFIO_DEVICE_DETACH_PASID)
+		device->ops->pasid_detach_ioas(device, detach.pasid);
+	else
+		device->ops->detach_ioas(device);
 	mutex_unlock(&device->dev_set->lock);
 
 	return 0;
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 50128da18bca..9f081cf01c5a 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -34,6 +34,24 @@  void vfio_df_close(struct vfio_device_file *df);
 struct vfio_device_file *
 vfio_allocate_device_file(struct vfio_device *device);
 
+int vfio_copy_from_user(void *buffer, void __user *arg,
+			unsigned long minsz, u32 flags_mask,
+			unsigned long *xend_array);
+
+#define VFIO_COPY_USER_DATA(_arg, _local_buffer, _struct, _min_last,          \
+			    _flags_mask, _xend_array)                         \
+	vfio_copy_from_user(_local_buffer, _arg,                              \
+			    offsetofend(_struct, _min_last) +                \
+			    BUILD_BUG_ON_ZERO(offsetof(_struct, argsz) !=     \
+					      0) +                            \
+			    BUILD_BUG_ON_ZERO(offsetof(_struct, flags) !=     \
+					      sizeof(u32)),                   \
+			    _flags_mask, _xend_array)
+
+#define XEND_SIZE(_flag, _struct, _xlast)                                    \
+	[ilog2(_flag)] = offsetofend(_struct, _xlast) +                      \
+			 BUILD_BUG_ON_ZERO(_flag == 0)                       \
+
 extern const struct file_operations vfio_device_fops;
 
 #ifdef CONFIG_VFIO_NOIOMMU
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index a5a62d9d963f..7df94bf121fd 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1694,6 +1694,61 @@  int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
 }
 EXPORT_SYMBOL(vfio_dma_rw);
 
+/**
+ * vfio_copy_from_user - Copy the user struct that may have extended fields
+ *
+ * @buffer: The local buffer to store the data copied from user
+ * @arg: The user buffer pointer
+ * @minsz: The minimum size of the user struct, it should never bump up.
+ * @flags_mask: The combination of all the falgs defined
+ * @xend_array: The array that stores the xend size for set flags.
+ *
+ * This helper requires the user struct put the argsz and flags fields in
+ * the first 8 bytes.
+ *
+ * Return 0 for success, otherwise -errno
+ */
+int vfio_copy_from_user(void *buffer, void __user *arg,
+			unsigned long minsz, u32 flags_mask,
+			unsigned long *xend_array)
+{
+	unsigned long xend = 0;
+	struct user_header {
+		u32 argsz;
+		u32 flags;
+	} *header;
+	unsigned long flags;
+	u32 flag;
+
+	if (copy_from_user(buffer, arg, minsz))
+		return -EFAULT;
+
+	header = (struct user_header *)buffer;
+	if (header->argsz < minsz)
+		return -EINVAL;
+
+	if (header->flags & ~flags_mask)
+		return -EINVAL;
+
+	/* Loop each set flag to decide the xend */
+	flags = header->flags;
+	for_each_set_bit(flag, &flags, BITS_PER_LONG) {
+		if (xend_array[flag])
+			xend = xend_array[flag];
+	}
+
+	if (xend) {
+		if (header->argsz < xend)
+			return -EINVAL;
+
+		if (copy_from_user(buffer + minsz,
+				   arg + minsz, xend - minsz))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
 /*
  * Module/class support
  */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..40b414e642f5 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -931,29 +931,34 @@  struct vfio_device_bind_iommufd {
  * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19,
  *					struct vfio_device_attach_iommufd_pt)
  * @argsz:	User filled size of this data.
- * @flags:	Must be 0.
+ * @flags:	Flags for attach.
  * @pt_id:	Input the target id which can represent an ioas or a hwpt
  *		allocated via iommufd subsystem.
  *		Output the input ioas id or the attached hwpt id which could
  *		be the specified hwpt itself or a hwpt automatically created
  *		for the specified ioas by kernel during the attachment.
+ * @pasid:	The pasid to be attached, only meaningful when
+ *		VFIO_DEVICE_ATTACH_PASID is set in @flags
  *
  * Associate the device with an address space within the bound iommufd.
  * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close.  This is only
  * allowed on cdev fds.
  *
- * If a vfio device is currently attached to a valid hw_pagetable, without doing
- * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl
- * passing in another hw_pagetable (hwpt) id is allowed. This action, also known
- * as a hw_pagetable replacement, will replace the device's currently attached
- * hw_pagetable with a new hw_pagetable corresponding to the given pt_id.
+ * If a vfio device or a pasid of this device is currently attached to a valid
+ * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second
+ * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed.
+ * This action, also known as a hw_pagetable replacement, will replace the
+ * currently attached hwpt of the device or the pasid of this device with a new
+ * hwpt corresponding to the given pt_id.
  *
  * Return: 0 on success, -errno on failure.
  */
 struct vfio_device_attach_iommufd_pt {
 	__u32	argsz;
 	__u32	flags;
+#define VFIO_DEVICE_ATTACH_PASID	(1 << 0)
 	__u32	pt_id;
+	__u32	pasid;
 };
 
 #define VFIO_DEVICE_ATTACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 19)
@@ -962,17 +967,21 @@  struct vfio_device_attach_iommufd_pt {
  * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20,
  *					struct vfio_device_detach_iommufd_pt)
  * @argsz:	User filled size of this data.
- * @flags:	Must be 0.
+ * @flags:	Flags for detach.
+ * @pasid:	The pasid to be detached, only meaningful when
+ *		VFIO_DEVICE_DETACH_PASID is set in @flags
  *
- * Remove the association of the device and its current associated address
- * space.  After it, the device should be in a blocking DMA state.  This is only
- * allowed on cdev fds.
+ * Remove the association of the device or a pasid of the device and its current
+ * associated address space.  After it, the device or the pasid should be in a
+ * blocking DMA state.  This is only allowed on cdev fds.
  *
  * Return: 0 on success, -errno on failure.
  */
 struct vfio_device_detach_iommufd_pt {
 	__u32	argsz;
 	__u32	flags;
+#define VFIO_DEVICE_DETACH_PASID	(1 << 0)
+	__u32	pasid;
 };
 
 #define VFIO_DEVICE_DETACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 20)