diff mbox series

[vfio,11/13] vfio/mlx5: Introduce multiple loads

Message ID 20221106174630.25909-12-yishaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add migration PRE_COPY support for mlx5 driver | expand

Commit Message

Yishai Hadas Nov. 6, 2022, 5:46 p.m. UTC
From: Shay Drory <shayd@nvidia.com>

In order to support PRE_COPY, mlx5 driver transfers multiple states
(images) of the device. e.g.: the source VF can save and transfer
multiple states, and the target VF will load them by that order.

This patch implements the changes for the target VF to decompose the
header for each state and to write and load multiple states.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 12 ++---
 drivers/vfio/pci/mlx5/cmd.h  |  2 +
 drivers/vfio/pci/mlx5/main.c | 98 ++++++++++++++++++++++++++++++------
 3 files changed, 89 insertions(+), 23 deletions(-)

Comments

Jason Gunthorpe Nov. 9, 2022, 6:45 p.m. UTC | #1
On Sun, Nov 06, 2022 at 07:46:28PM +0200, Yishai Hadas wrote:

> +start_over:
>  	if (migf->allocated_length < requested_length) {
> -		done = mlx5vf_add_migration_pages(
> +		ret = mlx5vf_add_migration_pages(
>  			migf,
>  			DIV_ROUND_UP(requested_length - migf->allocated_length,
>  				     PAGE_SIZE), &migf->table);
> -		if (done)
> +		if (ret)
> +			goto out_unlock;
> +	}

This really wants to be coded as a state machine, not a tangle of
gotos

> +
> +	if (VFIO_PRE_COPY_SUPP(migf->mvdev)) {
> +		if (!migf->header_read)
> +			mlx5vf_recv_sw_header(migf, pos, &buf, &len, &done);
> +		if (done < 0)
>  			goto out_unlock;

And when you make it into a FSM then we can pre-allocate the required
sg_table space based on the header instead of having to stream

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 2d2171191218..a1b17cd688b9 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -420,16 +420,14 @@  int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	if (mvdev->mdev_detach)
 		return -ENOTCONN;
 
-	mutex_lock(&migf->lock);
-	if (!migf->image_length) {
-		err = -EINVAL;
-		goto end;
-	}
+	lockdep_assert_held(&migf->lock);
+	if (!migf->image_length)
+		return -EINVAL;
 
 	mdev = mvdev->mdev;
 	err = mlx5_core_alloc_pd(mdev, &pdn);
 	if (err)
-		goto end;
+		return err;
 
 	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
 	if (err)
@@ -454,8 +452,6 @@  int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
 err_reg:
 	mlx5_core_dealloc_pd(mdev, pdn);
-end:
-	mutex_unlock(&migf->lock);
 	return err;
 }
 
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 3b0411e4a74e..03f3b5e99879 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -39,6 +39,8 @@  struct mlx5_vf_migration_file {
 	size_t table_start_pos;
 	size_t image_length;
 	size_t allocated_length;
+	size_t expected_length;
+	struct mlx5_vf_migration_header header;
 	size_t sw_headers_bytes_sent;
 	/*
 	 * The device can be moved to stop_copy before the previous state was
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index c0ee121bd5ea..6cdd4fc93818 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -569,12 +569,45 @@  mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
 	return ERR_PTR(ret);
 }
 
+static void mlx5vf_recv_sw_header(struct mlx5_vf_migration_file *migf,
+				  loff_t *pos, const char __user **buf,
+				  size_t *len, ssize_t *done)
+{
+	ssize_t header_size = sizeof(migf->header);
+	void *header_buf = &migf->header;
+	size_t size_to_recv;
+
+	size_to_recv = header_size - (migf->sw_headers_bytes_sent % header_size);
+	size_to_recv = min_t(size_t, size_to_recv, *len);
+	header_buf += header_size - size_to_recv;
+	if (copy_from_user(header_buf, *buf, size_to_recv)) {
+		*done = -EFAULT;
+		return;
+	}
+
+	*pos += size_to_recv;
+	*len -= size_to_recv;
+	*done += size_to_recv;
+	*buf += size_to_recv;
+	migf->sw_headers_bytes_sent += size_to_recv;
+	migf->header_read = !(migf->sw_headers_bytes_sent % header_size);
+
+	if (migf->sw_headers_bytes_sent % header_size)
+		return;
+	migf->expected_length = migf->header.image_size;
+}
+
+#define EXPECTED_TABLE_END_POSITION(migf) \
+	(migf->table_start_pos + migf->expected_length + \
+	 migf->sw_headers_bytes_sent)
+
 static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 				   size_t len, loff_t *pos)
 {
 	struct mlx5_vf_migration_file *migf = filp->private_data;
 	loff_t requested_length;
 	ssize_t done = 0;
+	int ret = 0;
 
 	if (pos)
 		return -ESPIPE;
@@ -584,33 +617,47 @@  static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 	    check_add_overflow((loff_t)len, *pos, &requested_length))
 		return -EINVAL;
 
-	if (requested_length > MAX_MIGRATION_SIZE)
-		return -ENOMEM;
-
+	mutex_lock(&migf->mvdev->state_mutex);
 	mutex_lock(&migf->lock);
+	requested_length -= migf->table_start_pos;
+	if (requested_length > MAX_MIGRATION_SIZE) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
 	if (migf->disabled) {
-		done = -ENODEV;
+		ret = -ENODEV;
 		goto out_unlock;
 	}
 
+start_over:
 	if (migf->allocated_length < requested_length) {
-		done = mlx5vf_add_migration_pages(
+		ret = mlx5vf_add_migration_pages(
 			migf,
 			DIV_ROUND_UP(requested_length - migf->allocated_length,
 				     PAGE_SIZE), &migf->table);
-		if (done)
+		if (ret)
+			goto out_unlock;
+	}
+
+	if (VFIO_PRE_COPY_SUPP(migf->mvdev)) {
+		if (!migf->header_read)
+			mlx5vf_recv_sw_header(migf, pos, &buf, &len, &done);
+		if (done < 0)
 			goto out_unlock;
 	}
 
 	while (len) {
+		unsigned long offset;
 		size_t page_offset;
 		struct page *page;
 		size_t page_len;
 		u8 *to_buff;
-		int ret;
 
-		page_offset = (*pos) % PAGE_SIZE;
-		page = mlx5vf_get_migration_page(migf, *pos - page_offset,
+		offset = *pos - mlx5vf_get_table_start_pos(migf);
+		page_offset = offset % PAGE_SIZE;
+		offset -= page_offset;
+		page = mlx5vf_get_migration_page(migf, offset,
 						 &migf->table);
 		if (!page) {
 			if (done == 0)
@@ -619,11 +666,15 @@  static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 		}
 
 		page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
+		if (VFIO_PRE_COPY_SUPP(migf->mvdev))
+			page_len = min_t(size_t, page_len,
+				 EXPECTED_TABLE_END_POSITION(migf) - *pos);
+
 		to_buff = kmap_local_page(page);
 		ret = copy_from_user(to_buff + page_offset, buf, page_len);
 		kunmap_local(to_buff);
 		if (ret) {
-			done = -EFAULT;
+			ret = -EFAULT;
 			goto out_unlock;
 		}
 		*pos += page_len;
@@ -631,10 +682,22 @@  static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 		done += page_len;
 		buf += page_len;
 		migf->image_length += page_len;
+
+		if (*pos == EXPECTED_TABLE_END_POSITION(migf)) {
+			ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf);
+			if (ret)
+				goto out_unlock;
+			mlx5vf_prep_next_table(migf);
+			if (len) {
+				requested_length -= migf->expected_length;
+				goto start_over;
+			}
+		}
 	}
 out_unlock:
 	mutex_unlock(&migf->lock);
-	return done;
+	mlx5vf_state_mutex_unlock(migf->mvdev);
+	return ret ? ret : done;
 }
 
 static const struct file_operations mlx5vf_resume_fops = {
@@ -663,6 +726,7 @@  mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	}
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
+	migf->mvdev = mvdev;
 	return migf;
 }
 
@@ -754,10 +818,14 @@  mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 	}
 
 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
-		ret = mlx5vf_cmd_load_vhca_state(mvdev,
-						 mvdev->resuming_migf);
-		if (ret)
-			return ERR_PTR(ret);
+		if (!VFIO_PRE_COPY_SUPP(mvdev)) {
+			mutex_lock(&mvdev->resuming_migf->lock);
+			ret = mlx5vf_cmd_load_vhca_state(mvdev,
+							 mvdev->resuming_migf);
+			mutex_unlock(&mvdev->resuming_migf->lock);
+			if (ret)
+				return ERR_PTR(ret);
+		}
 		mlx5vf_disable_fds(mvdev);
 		return NULL;
 	}