diff mbox series

[vfio,10/13] vfio/mlx5: Introduce SW headers for migration states

Message ID 20221106174630.25909-11-yishaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add migration PRE_COPY support for mlx5 driver | expand

Commit Message

Yishai Hadas Nov. 6, 2022, 5:46 p.m. UTC
From: Shay Drory <shayd@nvidia.com>

As mentioned in the previous patches, mlx5 is transferring multiple
states when the PRE_COPY protocol is used. This states mechanism
requires the target VM to know the states' size in order to execute
multiple loads.
Therefore, add SW header, with the needed information, for each saved
state the source VM is transferring to the target VM.

This patch implements the source VM handling of the headers, following
patch will implement the target VM handling of the headers.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/vfio/pci/mlx5/cmd.h  |  7 +++++
 drivers/vfio/pci/mlx5/main.c | 50 +++++++++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 3 deletions(-)

Comments

Jason Gunthorpe Nov. 9, 2022, 6:38 p.m. UTC | #1
On Sun, Nov 06, 2022 at 07:46:27PM +0200, Yishai Hadas wrote:
> From: Shay Drory <shayd@nvidia.com>
> 
> As mentioned in the previous patches, mlx5 is transferring multiple
> states when the PRE_COPY protocol is used. This states mechanism
> requires the target VM to know the states' size in order to execute
> multiple loads.
> Therefore, add SW header, with the needed information, for each saved
> state the source VM is transferring to the target VM.
> 
> This patch implements the source VM handling of the headers, following
> patch will implement the target VM handling of the headers.
> 
> Signed-off-by: Shay Drory <shayd@nvidia.com>
> Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> ---
>  drivers/vfio/pci/mlx5/cmd.h  |  7 +++++
>  drivers/vfio/pci/mlx5/main.c | 50 +++++++++++++++++++++++++++++++++---
>  2 files changed, 54 insertions(+), 3 deletions(-)

This seems very awkwardly done. If you are going to string together
sg_tables, then just do that consistently. Put the header into its own
small sg_table as well and have a simple queue of sg_table/starting
offset/length chunks to let read figure out on its own. When you start
new stuff you stick it to the back of the queue. eg when you get the
incremental stick on two sg_tables to the queue. when you get to the
final stick on the final sg_table to the back of the queue. Whatever
has or hasn't happened just works out naturally.

There are too many special cases trying to figure out which chunk is
the right chunk.

>  #define MIGF_TOTAL_DATA(migf) \
> -	(migf->table_start_pos + migf->image_length + migf->final_length)
> +	(migf->table_start_pos + migf->image_length + migf->final_length + \
> +	 migf->sw_headers_bytes_sent)

And make all these macros static inline functions in all the patches -
they don't even have proper argument bracketing..

> +static void mlx5vf_send_sw_header(struct mlx5_vf_migration_file *migf,
> +				  loff_t *pos, char __user **buf, size_t *len,
> +				  ssize_t *done)
> +{
> +	struct mlx5_vf_migration_header header = {};
> +	size_t header_size = sizeof(header);
> +	void *header_buf = &header;
> +	size_t size_to_transfer;
> +
> +	if (*pos >= mlx5vf_final_table_start_pos(migf))
> +		header.image_size = migf->final_length;
> +	else
> +		header.image_size = migf->image_length;
> +
> +	size_to_transfer = header_size -
> +			   (migf->sw_headers_bytes_sent % header_size);
> +	size_to_transfer = min_t(size_t, size_to_transfer, *len);
> +	header_buf += header_size - size_to_transfer;
> +	if (copy_to_user(*buf, header_buf, size_to_transfer)) {
> +		*done = -EFAULT;

A function that has errors should return a value, not something like
this..

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 07a2fc54c9d8..3b0411e4a74e 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -22,17 +22,24 @@  struct mlx5vf_async_data {
 	void *out;
 };
 
+struct mlx5_vf_migration_header {
+	u32 image_size;
+	u32 reserved;
+};
+
 struct mlx5_vf_migration_file {
 	struct file *filp;
 	struct mutex lock;
 	u8 disabled:1;
 	u8 is_err:1;
 	u8 save_cb_active:1;
+	u8 header_read:1;
 
 	struct sg_append_table table;
 	size_t table_start_pos;
 	size_t image_length;
 	size_t allocated_length;
+	size_t sw_headers_bytes_sent;
 	/*
 	 * The device can be moved to stop_copy before the previous state was
 	 * fully read. Another set of variables is needed to maintain it.
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 8a5714158e43..c0ee121bd5ea 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -121,6 +121,7 @@  static void mlx5vf_prep_next_table(struct mlx5_vf_migration_file *migf)
 	migf->image_length = 0;
 	migf->allocated_length = 0;
 	migf->last_offset_sg = NULL;
+	migf->header_read = false;
 }
 
 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
@@ -155,7 +156,8 @@  static int mlx5vf_release_file(struct inode *inode, struct file *filp)
 }
 
 #define MIGF_TOTAL_DATA(migf) \
-	(migf->table_start_pos + migf->image_length + migf->final_length)
+	(migf->table_start_pos + migf->image_length + migf->final_length + \
+	 migf->sw_headers_bytes_sent)
 
 #define VFIO_MIG_STATE_PRE_COPY(mvdev) \
 	(mvdev->mig_state == VFIO_DEVICE_STATE_PRE_COPY || \
@@ -175,7 +177,7 @@  mlx5vf_final_table_start_pos(struct mlx5_vf_migration_file *migf)
 
 static size_t mlx5vf_get_table_start_pos(struct mlx5_vf_migration_file *migf)
 {
-	return migf->table_start_pos;
+	return migf->table_start_pos + migf->sw_headers_bytes_sent;
 }
 
 static size_t mlx5vf_get_table_end_pos(struct mlx5_vf_migration_file *migf,
@@ -183,7 +185,40 @@  static size_t mlx5vf_get_table_end_pos(struct mlx5_vf_migration_file *migf,
 {
 	if (table == &migf->final_table)
 		return MIGF_TOTAL_DATA(migf);
-	return migf->table_start_pos + migf->image_length;
+	return migf->table_start_pos + migf->image_length +
+		migf->sw_headers_bytes_sent;
+}
+
+static void mlx5vf_send_sw_header(struct mlx5_vf_migration_file *migf,
+				  loff_t *pos, char __user **buf, size_t *len,
+				  ssize_t *done)
+{
+	struct mlx5_vf_migration_header header = {};
+	size_t header_size = sizeof(header);
+	void *header_buf = &header;
+	size_t size_to_transfer;
+
+	if (*pos >= mlx5vf_final_table_start_pos(migf))
+		header.image_size = migf->final_length;
+	else
+		header.image_size = migf->image_length;
+
+	size_to_transfer = header_size -
+			   (migf->sw_headers_bytes_sent % header_size);
+	size_to_transfer = min_t(size_t, size_to_transfer, *len);
+	header_buf += header_size - size_to_transfer;
+	if (copy_to_user(*buf, header_buf, size_to_transfer)) {
+		*done = -EFAULT;
+		return;
+	}
+
+	migf->sw_headers_bytes_sent += size_to_transfer;
+	migf->header_read = !(migf->sw_headers_bytes_sent % header_size);
+
+	*pos += size_to_transfer;
+	*len -= size_to_transfer;
+	*done += size_to_transfer;
+	*buf += size_to_transfer;
 }
 
 static struct sg_append_table *
@@ -233,6 +268,12 @@  static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 		goto out_unlock;
 	}
 
+	if (VFIO_PRE_COPY_SUPP(migf->mvdev) && !migf->header_read) {
+		mlx5vf_send_sw_header(migf, pos, &buf, &len, &done);
+		if (done < 0)
+			goto out_unlock;
+	}
+
 	len = min_t(size_t, MIGF_TOTAL_DATA(migf) - *pos, len);
 	table = mlx5vf_get_table(migf, pos);
 	while (len) {
@@ -288,6 +329,9 @@  static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 			 */
 			if (tmp == table)
 				break;
+			mlx5vf_send_sw_header(migf, pos, &buf, &len, &done);
+			if (done < 0)
+				goto out_unlock;
 		}
 	}