diff mbox series

[v15,Kernel,3/7] vfio iommu: Add ioctl definition for dirty pages tracking.

Message ID 1584649004-8285-4-git-send-email-kwankhede@nvidia.com (mailing list archive)
State New, archived
Headers show
Series KABIs to support migration for VFIO devices | expand

Commit Message

Kirti Wankhede March 19, 2020, 8:16 p.m. UTC
IOMMU container maintains a list of all pages pinned by vfio_pin_pages API.
All pages pinned by vendor driver through this API should be considered as
dirty during migration. When container consists of IOMMU capable device and
all pages are pinned and mapped, then all pages are marked dirty.
Added support to start/stop dirtied pages tracking and to get bitmap of all
dirtied pages for requested IO virtual address range.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 include/uapi/linux/vfio.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

Comments

Eric Auger March 23, 2020, 9:11 p.m. UTC | #1
Hi Kirti,

On 3/19/20 9:16 PM, Kirti Wankhede wrote:
> IOMMU container maintains a list of all pages pinned by vfio_pin_pages API.
> All pages pinned by vendor driver through this API should be considered as
> dirty during migration. When container consists of IOMMU capable device and
> all pages are pinned and mapped, then all pages are marked dirty.
> Added support to start/stop dirtied pages tracking and to get bitmap of all
> dirtied pages for requested IO virtual address range.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  include/uapi/linux/vfio.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 55 insertions(+)
> 
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index d0021467af53..8138f94cac15 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -995,6 +995,12 @@ struct vfio_iommu_type1_dma_map {
>  
>  #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
>  
> +struct vfio_bitmap {
> +	__u64        pgsize;	/* page size for bitmap */
in bytes as well
> +	__u64        size;	/* in bytes */
> +	__u64 __user *data;	/* one bit per page */
> +};
> +
>  /**
>   * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
>   *							struct vfio_dma_unmap)
> @@ -1021,6 +1027,55 @@ struct vfio_iommu_type1_dma_unmap {
>  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
>  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
>  
> +/**
> + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
> + *                                     struct vfio_iommu_type1_dirty_bitmap)
> + * IOCTL is used for dirty pages tracking. Caller sets argsz, which is size of> + * struct vfio_iommu_type1_dirty_bitmap.
nit: This may become outdated when adding new fields. argz use mode is
documented at the beginning of the file.

 Caller set flag depend on which
> + * operation to perform, details as below:
> + *
> + * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_START set, indicates
> + * migration is active and IOMMU module should track pages which are dirtied or
> + * potentially dirtied by device.
> + * Dirty pages are tracked until tracking is stopped by user application by
> + * setting VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
> + *
> + * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP set, indicates
> + * IOMMU should stop tracking dirtied pages.
> + *
> + * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set,
> + * IOCTL returns dirty pages bitmap for IOMMU container during migration for
> + * given IOVA range. User must provide data[] as the structure
> + * vfio_iommu_type1_dirty_bitmap_get through which user provides IOVA range
I think the fact the IOVA range must match the vfio dma_size must be
documented.
 and
> + * pgsize. This interface supports to get bitmap of smallest supported pgsize
> + * only and can be modified in future to get bitmap of specified pgsize.
> + * User must allocate memory for bitmap, zero the bitmap memory and set size
> + * of allocated memory in bitmap_size field. One bit is used to represent one
> + * page consecutively starting from iova offset. User should provide page size
> + * in 'pgsize'. Bit set in bitmap indicates page at that offset from iova is
> + * dirty. Caller must set argsz including size of structure
> + * vfio_iommu_type1_dirty_bitmap_get.
nit: ditto
> + *
> + * Only one of the flags _START, STOP and _GET may be specified at a time.
> + *
> + */
> +struct vfio_iommu_type1_dirty_bitmap {
> +	__u32        argsz;
> +	__u32        flags;
> +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START	(1 << 0)
> +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP	(1 << 1)
> +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP	(1 << 2)
> +	__u8         data[];
> +};
> +
> +struct vfio_iommu_type1_dirty_bitmap_get {
> +	__u64              iova;	/* IO virtual address */
> +	__u64              size;	/* Size of iova range */
> +	struct vfio_bitmap bitmap;
> +};
> +
> +#define VFIO_IOMMU_DIRTY_PAGES             _IO(VFIO_TYPE, VFIO_BASE + 17)
> +
>  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>  
>  /*
> 
Thanks

Eric
Kirti Wankhede March 24, 2020, 7:49 p.m. UTC | #2
On 3/24/2020 2:41 AM, Auger Eric wrote:
> Hi Kirti,
> 
> On 3/19/20 9:16 PM, Kirti Wankhede wrote:
>> IOMMU container maintains a list of all pages pinned by vfio_pin_pages API.
>> All pages pinned by vendor driver through this API should be considered as
>> dirty during migration. When container consists of IOMMU capable device and
>> all pages are pinned and mapped, then all pages are marked dirty.
>> Added support to start/stop dirtied pages tracking and to get bitmap of all
>> dirtied pages for requested IO virtual address range.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> ---
>>   include/uapi/linux/vfio.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 55 insertions(+)
>>
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index d0021467af53..8138f94cac15 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -995,6 +995,12 @@ struct vfio_iommu_type1_dma_map {
>>   
>>   #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
>>   
>> +struct vfio_bitmap {
>> +	__u64        pgsize;	/* page size for bitmap */
> in bytes as well

Added.

>> +	__u64        size;	/* in bytes */
>> +	__u64 __user *data;	/* one bit per page */
>> +};
>> +
>>   /**
>>    * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
>>    *							struct vfio_dma_unmap)
>> @@ -1021,6 +1027,55 @@ struct vfio_iommu_type1_dma_unmap {
>>   #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
>>   #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
>>   
>> +/**
>> + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
>> + *                                     struct vfio_iommu_type1_dirty_bitmap)
>> + * IOCTL is used for dirty pages tracking. Caller sets argsz, which is size of> + * struct vfio_iommu_type1_dirty_bitmap.
> nit: This may become outdated when adding new fields. argz use mode is
> documented at the beginning of the file.
>

Ok.

>   Caller set flag depend on which
>> + * operation to perform, details as below:
>> + *
>> + * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_START set, indicates
>> + * migration is active and IOMMU module should track pages which are dirtied or
>> + * potentially dirtied by device.
>> + * Dirty pages are tracked until tracking is stopped by user application by
>> + * setting VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
>> + *
>> + * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP set, indicates
>> + * IOMMU should stop tracking dirtied pages.
>> + *
>> + * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set,
>> + * IOCTL returns dirty pages bitmap for IOMMU container during migration for
>> + * given IOVA range. User must provide data[] as the structure
>> + * vfio_iommu_type1_dirty_bitmap_get through which user provides IOVA range
> I think the fact the IOVA range must match the vfio dma_size must be
> documented.

Added.

>   and
>> + * pgsize. This interface supports to get bitmap of smallest supported pgsize
>> + * only and can be modified in future to get bitmap of specified pgsize.
>> + * User must allocate memory for bitmap, zero the bitmap memory and set size
>> + * of allocated memory in bitmap_size field. One bit is used to represent one
>> + * page consecutively starting from iova offset. User should provide page size
>> + * in 'pgsize'. Bit set in bitmap indicates page at that offset from iova is
>> + * dirty. Caller must set argsz including size of structure
>> + * vfio_iommu_type1_dirty_bitmap_get.
> nit: ditto

I think this is still needed here because vfio_bitmap is only used in 
case of this particular flag.

Thanks,
Kirti

>> + *
>> + * Only one of the flags _START, STOP and _GET may be specified at a time.
>> + *
>> + */
>> +struct vfio_iommu_type1_dirty_bitmap {
>> +	__u32        argsz;
>> +	__u32        flags;
>> +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START	(1 << 0)
>> +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP	(1 << 1)
>> +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP	(1 << 2)
>> +	__u8         data[];
>> +};
>> +
>> +struct vfio_iommu_type1_dirty_bitmap_get {
>> +	__u64              iova;	/* IO virtual address */
>> +	__u64              size;	/* Size of iova range */
>> +	struct vfio_bitmap bitmap;
>> +};
>> +
>> +#define VFIO_IOMMU_DIRTY_PAGES             _IO(VFIO_TYPE, VFIO_BASE + 17)
>> +
>>   /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>>   
>>   /*
>>
> Thanks
> 
> Eric
>
diff mbox series

Patch

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index d0021467af53..8138f94cac15 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -995,6 +995,12 @@  struct vfio_iommu_type1_dma_map {
 
 #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
 
+struct vfio_bitmap {
+	__u64        pgsize;	/* page size for bitmap */
+	__u64        size;	/* in bytes */
+	__u64 __user *data;	/* one bit per page */
+};
+
 /**
  * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
  *							struct vfio_dma_unmap)
@@ -1021,6 +1027,55 @@  struct vfio_iommu_type1_dma_unmap {
 #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
 #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
 
+/**
+ * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
+ *                                     struct vfio_iommu_type1_dirty_bitmap)
+ * IOCTL is used for dirty pages tracking. Caller sets argsz, which is size of
+ * struct vfio_iommu_type1_dirty_bitmap. Caller set flag depend on which
+ * operation to perform, details as below:
+ *
+ * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_START set, indicates
+ * migration is active and IOMMU module should track pages which are dirtied or
+ * potentially dirtied by device.
+ * Dirty pages are tracked until tracking is stopped by user application by
+ * setting VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
+ *
+ * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP set, indicates
+ * IOMMU should stop tracking dirtied pages.
+ *
+ * When IOCTL is called with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set,
+ * IOCTL returns dirty pages bitmap for IOMMU container during migration for
+ * given IOVA range. User must provide data[] as the structure
+ * vfio_iommu_type1_dirty_bitmap_get through which user provides IOVA range and
+ * pgsize. This interface supports to get bitmap of smallest supported pgsize
+ * only and can be modified in future to get bitmap of specified pgsize.
+ * User must allocate memory for bitmap, zero the bitmap memory and set size
+ * of allocated memory in bitmap_size field. One bit is used to represent one
+ * page consecutively starting from iova offset. User should provide page size
+ * in 'pgsize'. Bit set in bitmap indicates page at that offset from iova is
+ * dirty. Caller must set argsz including size of structure
+ * vfio_iommu_type1_dirty_bitmap_get.
+ *
+ * Only one of the flags _START, STOP and _GET may be specified at a time.
+ *
+ */
+struct vfio_iommu_type1_dirty_bitmap {
+	__u32        argsz;
+	__u32        flags;
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START	(1 << 0)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP	(1 << 1)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP	(1 << 2)
+	__u8         data[];
+};
+
+struct vfio_iommu_type1_dirty_bitmap_get {
+	__u64              iova;	/* IO virtual address */
+	__u64              size;	/* Size of iova range */
+	struct vfio_bitmap bitmap;
+};
+
+#define VFIO_IOMMU_DIRTY_PAGES             _IO(VFIO_TYPE, VFIO_BASE + 17)
+
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
 /*