@@ -84,6 +84,18 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
if (ret)
return ret;
+ if (mvdev->saving_migf->state ==
+ MLX5_MIGF_STATE_PRE_COPY_ERROR) {
+ /*
+ * In case we had a PRE_COPY error, only query full
+ * image for final image
+ */
+ if (!(query_flags & MLX5VF_QUERY_FINAL)) {
+ *state_size = 0;
+ return 0;
+ }
+ query_flags &= ~MLX5VF_QUERY_INC;
+ }
}
MLX5_SET(query_vhca_migration_state_in, in, opcode,
@@ -442,7 +454,10 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
mlx5vf_put_data_buffer(async_data->buf);
if (async_data->header_buf)
mlx5vf_put_data_buffer(async_data->header_buf);
- migf->state = MLX5_MIGF_STATE_ERROR;
+ if (async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR)
+ migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR;
+ else
+ migf->state = MLX5_MIGF_STATE_ERROR;
wake_up_interruptible(&migf->poll_wait);
}
mutex_unlock(&migf->lock);
@@ -511,6 +526,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
* The error and the cleanup flows can't run from an
* interrupt context
*/
+ if (status == -EREMOTEIO)
+ status = MLX5_GET(save_vhca_state_out, async_data->out, status);
async_data->status = status;
queue_work(migf->mvdev->cb_wq, &async_data->work);
}
@@ -534,6 +551,13 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (err)
return err;
+ if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)
+ /*
+ * In case we had a PRE_COPY error, SAVE is triggered only for
+ * the final image, read device full image.
+ */
+ inc = false;
+
MLX5_SET(save_vhca_state_in, in, opcode,
MLX5_CMD_OP_SAVE_VHCA_STATE);
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
@@ -14,6 +14,7 @@
enum mlx5_vf_migf_state {
MLX5_MIGF_STATE_ERROR = 1,
+ MLX5_MIGF_STATE_PRE_COPY_ERROR,
MLX5_MIGF_STATE_PRE_COPY,
MLX5_MIGF_STATE_SAVE_LAST,
MLX5_MIGF_STATE_COMPLETE,
@@ -154,6 +155,7 @@ struct mlx5vf_pci_core_device {
enum {
MLX5VF_QUERY_INC = (1UL << 0),
+ MLX5VF_QUERY_FINAL = (1UL << 1),
};
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
@@ -222,6 +222,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
if (wait_event_interruptible(migf->poll_wait,
!list_empty(&migf->buf_list) ||
migf->state == MLX5_MIGF_STATE_ERROR ||
+ migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR ||
migf->state == MLX5_MIGF_STATE_PRE_COPY ||
migf->state == MLX5_MIGF_STATE_COMPLETE))
return -ERESTARTSYS;
@@ -241,7 +242,8 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
if (first_loop_call) {
first_loop_call = false;
/* Temporary end of file as part of PRE_COPY */
- if (end_of_data && migf->state == MLX5_MIGF_STATE_PRE_COPY) {
+ if (end_of_data && (migf->state == MLX5_MIGF_STATE_PRE_COPY ||
+ migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)) {
done = -ENOMSG;
goto out_unlock;
}
@@ -434,7 +436,7 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
return -ENODEV;
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length,
- MLX5VF_QUERY_INC);
+ MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL);
if (ret)
goto err;