Message ID | 20240305103037.61144-1-yishaih@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [vfio] vfio/mlx5: Enforce PRE_COPY support | expand |
On Tue, 5 Mar 2024 12:30:37 +0200 Yishai Hadas <yishaih@nvidia.com> wrote: > Enable live migration only once the firmware supports PRE_COPY. > > PRE_COPY has been supported by the firmware for a long time already and > is required to achieve a low downtime upon live migration. > > This lets us clean up some old code that is not applicable those days > while PRE_COPY is fully supported by the firmware. Was firmware without PRE_COPY support ever available to users? AIUI this would disable migration support on devices with older firmware, so if that firmware exists in the wild this should go through a deprecation process. We should also likely note a minimum firmware revision and time frame when PRE_COPY support was added in firmware. Thanks, Alex > > Signed-off-by: Yishai Hadas <yishaih@nvidia.com> > --- > drivers/vfio/pci/mlx5/cmd.c | 83 +++++++++++++++++++------- > drivers/vfio/pci/mlx5/cmd.h | 6 -- > drivers/vfio/pci/mlx5/main.c | 109 +++-------------------------------- > 3 files changed, 71 insertions(+), 127 deletions(-) > > diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c > index c54bcd5d0917..41a4b0cf4297 100644 > --- a/drivers/vfio/pci/mlx5/cmd.c > +++ b/drivers/vfio/pci/mlx5/cmd.c > @@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, > if (!MLX5_CAP_GEN(mvdev->mdev, migration)) > goto end; > > + if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && > + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) > + goto end; > + > mvdev->vf_id = pci_iov_vf_id(pdev); > if (mvdev->vf_id < 0) > goto end; > @@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, > mvdev->migrate_cap = 1; > mvdev->core_device.vdev.migration_flags = > VFIO_MIGRATION_STOP_COPY | > - VFIO_MIGRATION_P2P; > + VFIO_MIGRATION_P2P | > + VFIO_MIGRATION_PRE_COPY; > + > mvdev->core_device.vdev.mig_ops = mig_ops; > init_completion(&mvdev->tracker_comp); > if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) > mvdev->core_device.vdev.log_ops = log_ops; > > - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && > - MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) > - mvdev->core_device.vdev.migration_flags |= > - VFIO_MIGRATION_PRE_COPY; > - > if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) > mvdev->chunk_mode = 1; > > @@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) > kfree(buf); > } > > +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, > + unsigned int npages) > +{ > + unsigned int to_alloc = npages; > + struct page **page_list; > + unsigned long filled; > + unsigned int to_fill; > + int ret; > + > + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); > + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); > + if (!page_list) > + return -ENOMEM; > + > + do { > + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, > + page_list); > + if (!filled) { > + ret = -ENOMEM; > + goto err; > + } > + to_alloc -= filled; > + ret = sg_alloc_append_table_from_pages( > + &buf->table, page_list, filled, 0, > + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, > + GFP_KERNEL_ACCOUNT); > + > + if (ret) > + goto err; > + buf->allocated_length += filled * PAGE_SIZE; > + /* clean input for another bulk allocation */ > + memset(page_list, 0, filled * sizeof(*page_list)); > + to_fill = min_t(unsigned int, to_alloc, > + PAGE_SIZE / sizeof(*page_list)); > + } while (to_alloc > 0); > + > + kvfree(page_list); > + return 0; > + > +err: > + kvfree(page_list); > + return ret; > +} > + > struct mlx5_vhca_data_buffer * > mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, > size_t length, > @@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, > goto err_out; > } > > - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { > - if (async_data->stop_copy_chunk) { > - u8 header_idx = buf->stop_copy_chunk_num ? > - buf->stop_copy_chunk_num - 1 : 0; > + if (async_data->stop_copy_chunk) { > + u8 header_idx = buf->stop_copy_chunk_num ? > + buf->stop_copy_chunk_num - 1 : 0; > > - header_buf = migf->buf_header[header_idx]; > - migf->buf_header[header_idx] = NULL; > - } > + header_buf = migf->buf_header[header_idx]; > + migf->buf_header[header_idx] = NULL; > + } > > - if (!header_buf) { > - header_buf = mlx5vf_get_data_buffer(migf, > - sizeof(struct mlx5_vf_migration_header), DMA_NONE); > - if (IS_ERR(header_buf)) { > - err = PTR_ERR(header_buf); > - goto err_free; > - } > + if (!header_buf) { > + header_buf = mlx5vf_get_data_buffer(migf, > + sizeof(struct mlx5_vf_migration_header), DMA_NONE); > + if (IS_ERR(header_buf)) { > + err = PTR_ERR(header_buf); > + goto err_free; > } > } > > diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h > index 707393df36c4..df421dc6de04 100644 > --- a/drivers/vfio/pci/mlx5/cmd.h > +++ b/drivers/vfio/pci/mlx5/cmd.h > @@ -13,9 +13,6 @@ > #include <linux/mlx5/cq.h> > #include <linux/mlx5/qp.h> > > -#define MLX5VF_PRE_COPY_SUPP(mvdev) \ > - ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) > - > enum mlx5_vf_migf_state { > MLX5_MIGF_STATE_ERROR = 1, > MLX5_MIGF_STATE_PRE_COPY_ERROR, > @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state { > }; > > enum mlx5_vf_load_state { > - MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, > MLX5_VF_LOAD_STATE_READ_HEADER, > MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, > MLX5_VF_LOAD_STATE_READ_HEADER_DATA, > @@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer * > mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, > size_t length, enum dma_data_direction dma_dir); > void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); > -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, > - unsigned int npages); > struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, > unsigned long offset); > void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); > diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c > index 3982fcf60cf2..61d9b0f9146d 100644 > --- a/drivers/vfio/pci/mlx5/main.c > +++ b/drivers/vfio/pci/mlx5/main.c > @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, > return NULL; > } > > -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, > - unsigned int npages) > -{ > - unsigned int to_alloc = npages; > - struct page **page_list; > - unsigned long filled; > - unsigned int to_fill; > - int ret; > - > - to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); > - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); > - if (!page_list) > - return -ENOMEM; > - > - do { > - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, > - page_list); > - if (!filled) { > - ret = -ENOMEM; > - goto err; > - } > - to_alloc -= filled; > - ret = sg_alloc_append_table_from_pages( > - &buf->table, page_list, filled, 0, > - filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, > - GFP_KERNEL_ACCOUNT); > - > - if (ret) > - goto err; > - buf->allocated_length += filled * PAGE_SIZE; > - /* clean input for another bulk allocation */ > - memset(page_list, 0, filled * sizeof(*page_list)); > - to_fill = min_t(unsigned int, to_alloc, > - PAGE_SIZE / sizeof(*page_list)); > - } while (to_alloc > 0); > - > - kvfree(page_list); > - return 0; > - > -err: > - kvfree(page_list); > - return ret; > -} > - > static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) > { > mutex_lock(&migf->lock); > @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, > return 0; > } > > -static int > -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, > - loff_t requested_length, > - const char __user **buf, size_t *len, > - loff_t *pos, ssize_t *done) > -{ > - int ret; > - > - if (requested_length > MAX_LOAD_SIZE) > - return -ENOMEM; > - > - if (vhca_buf->allocated_length < requested_length) { > - ret = mlx5vf_add_migration_pages( > - vhca_buf, > - DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, > - PAGE_SIZE)); > - if (ret) > - return ret; > - } > - > - while (*len) { > - ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, > - done); > - if (ret) > - return ret; > - } > - > - return 0; > -} > - > static ssize_t > mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, > struct mlx5_vhca_data_buffer *vhca_buf, > @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, > migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; > break; > } > - case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: > - ret = mlx5vf_resume_read_image_no_header(vhca_buf, > - requested_length, > - &buf, &len, pos, &done); > - if (ret) > - goto out_unlock; > - break; > case MLX5_VF_LOAD_STATE_READ_IMAGE: > ret = mlx5vf_resume_read_image(migf, vhca_buf, > migf->record_size, > @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) > } > > migf->buf[0] = buf; > - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { > - buf = mlx5vf_alloc_data_buffer(migf, > - sizeof(struct mlx5_vf_migration_header), DMA_NONE); > - if (IS_ERR(buf)) { > - ret = PTR_ERR(buf); > - goto out_buf; > - } > - > - migf->buf_header[0] = buf; > - migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; > - } else { > - /* Initial state will be to read the image */ > - migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; > + buf = mlx5vf_alloc_data_buffer(migf, > + sizeof(struct mlx5_vf_migration_header), DMA_NONE); > + if (IS_ERR(buf)) { > + ret = PTR_ERR(buf); > + goto out_buf; > } > > + migf->buf_header[0] = buf; > + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; > + > stream_open(migf->filp->f_inode, migf->filp); > mutex_init(&migf->lock); > INIT_LIST_HEAD(&migf->buf_list); > @@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, > } > > if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { > - if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { > - ret = mlx5vf_cmd_load_vhca_state(mvdev, > - mvdev->resuming_migf, > - mvdev->resuming_migf->buf[0]); > - if (ret) > - return ERR_PTR(ret); > - } > mlx5vf_disable_fds(mvdev, NULL); > return NULL; > }
On 05/03/2024 23:56, Alex Williamson wrote: > On Tue, 5 Mar 2024 12:30:37 +0200 > Yishai Hadas <yishaih@nvidia.com> wrote: > >> Enable live migration only once the firmware supports PRE_COPY. >> >> PRE_COPY has been supported by the firmware for a long time already and >> is required to achieve a low downtime upon live migration. >> >> This lets us clean up some old code that is not applicable those days >> while PRE_COPY is fully supported by the firmware. > > Was firmware without PRE_COPY support ever available to users? AIUI > this would disable migration support on devices with older firmware, so > if that firmware exists in the wild this should go through a > deprecation process. No firmware without PRE_COPY support ever available to users. > > We should also likely note a minimum firmware revision and time frame > when PRE_COPY support was added in firmware. Thanks, > Sure, I just sent V1 having that information. Thanks, Yishai > Alex > > >> >> Signed-off-by: Yishai Hadas <yishaih@nvidia.com> >> --- >> drivers/vfio/pci/mlx5/cmd.c | 83 +++++++++++++++++++------- >> drivers/vfio/pci/mlx5/cmd.h | 6 -- >> drivers/vfio/pci/mlx5/main.c | 109 +++-------------------------------- >> 3 files changed, 71 insertions(+), 127 deletions(-) >> >> diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c >> index c54bcd5d0917..41a4b0cf4297 100644 >> --- a/drivers/vfio/pci/mlx5/cmd.c >> +++ b/drivers/vfio/pci/mlx5/cmd.c >> @@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, >> if (!MLX5_CAP_GEN(mvdev->mdev, migration)) >> goto end; >> >> + if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && >> + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) >> + goto end; >> + >> mvdev->vf_id = pci_iov_vf_id(pdev); >> if (mvdev->vf_id < 0) >> goto end; >> @@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, >> mvdev->migrate_cap = 1; >> mvdev->core_device.vdev.migration_flags = >> VFIO_MIGRATION_STOP_COPY | >> - VFIO_MIGRATION_P2P; >> + VFIO_MIGRATION_P2P | >> + VFIO_MIGRATION_PRE_COPY; >> + >> mvdev->core_device.vdev.mig_ops = mig_ops; >> init_completion(&mvdev->tracker_comp); >> if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) >> mvdev->core_device.vdev.log_ops = log_ops; >> >> - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && >> - MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) >> - mvdev->core_device.vdev.migration_flags |= >> - VFIO_MIGRATION_PRE_COPY; >> - >> if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) >> mvdev->chunk_mode = 1; >> >> @@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) >> kfree(buf); >> } >> >> +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, >> + unsigned int npages) >> +{ >> + unsigned int to_alloc = npages; >> + struct page **page_list; >> + unsigned long filled; >> + unsigned int to_fill; >> + int ret; >> + >> + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); >> + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); >> + if (!page_list) >> + return -ENOMEM; >> + >> + do { >> + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, >> + page_list); >> + if (!filled) { >> + ret = -ENOMEM; >> + goto err; >> + } >> + to_alloc -= filled; >> + ret = sg_alloc_append_table_from_pages( >> + &buf->table, page_list, filled, 0, >> + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, >> + GFP_KERNEL_ACCOUNT); >> + >> + if (ret) >> + goto err; >> + buf->allocated_length += filled * PAGE_SIZE; >> + /* clean input for another bulk allocation */ >> + memset(page_list, 0, filled * sizeof(*page_list)); >> + to_fill = min_t(unsigned int, to_alloc, >> + PAGE_SIZE / sizeof(*page_list)); >> + } while (to_alloc > 0); >> + >> + kvfree(page_list); >> + return 0; >> + >> +err: >> + kvfree(page_list); >> + return ret; >> +} >> + >> struct mlx5_vhca_data_buffer * >> mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, >> size_t length, >> @@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, >> goto err_out; >> } >> >> - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { >> - if (async_data->stop_copy_chunk) { >> - u8 header_idx = buf->stop_copy_chunk_num ? >> - buf->stop_copy_chunk_num - 1 : 0; >> + if (async_data->stop_copy_chunk) { >> + u8 header_idx = buf->stop_copy_chunk_num ? >> + buf->stop_copy_chunk_num - 1 : 0; >> >> - header_buf = migf->buf_header[header_idx]; >> - migf->buf_header[header_idx] = NULL; >> - } >> + header_buf = migf->buf_header[header_idx]; >> + migf->buf_header[header_idx] = NULL; >> + } >> >> - if (!header_buf) { >> - header_buf = mlx5vf_get_data_buffer(migf, >> - sizeof(struct mlx5_vf_migration_header), DMA_NONE); >> - if (IS_ERR(header_buf)) { >> - err = PTR_ERR(header_buf); >> - goto err_free; >> - } >> + if (!header_buf) { >> + header_buf = mlx5vf_get_data_buffer(migf, >> + sizeof(struct mlx5_vf_migration_header), DMA_NONE); >> + if (IS_ERR(header_buf)) { >> + err = PTR_ERR(header_buf); >> + goto err_free; >> } >> } >> >> diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h >> index 707393df36c4..df421dc6de04 100644 >> --- a/drivers/vfio/pci/mlx5/cmd.h >> +++ b/drivers/vfio/pci/mlx5/cmd.h >> @@ -13,9 +13,6 @@ >> #include <linux/mlx5/cq.h> >> #include <linux/mlx5/qp.h> >> >> -#define MLX5VF_PRE_COPY_SUPP(mvdev) \ >> - ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) >> - >> enum mlx5_vf_migf_state { >> MLX5_MIGF_STATE_ERROR = 1, >> MLX5_MIGF_STATE_PRE_COPY_ERROR, >> @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state { >> }; >> >> enum mlx5_vf_load_state { >> - MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, >> MLX5_VF_LOAD_STATE_READ_HEADER, >> MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, >> MLX5_VF_LOAD_STATE_READ_HEADER_DATA, >> @@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer * >> mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, >> size_t length, enum dma_data_direction dma_dir); >> void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); >> -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, >> - unsigned int npages); >> struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, >> unsigned long offset); >> void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); >> diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c >> index 3982fcf60cf2..61d9b0f9146d 100644 >> --- a/drivers/vfio/pci/mlx5/main.c >> +++ b/drivers/vfio/pci/mlx5/main.c >> @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, >> return NULL; >> } >> >> -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, >> - unsigned int npages) >> -{ >> - unsigned int to_alloc = npages; >> - struct page **page_list; >> - unsigned long filled; >> - unsigned int to_fill; >> - int ret; >> - >> - to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); >> - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); >> - if (!page_list) >> - return -ENOMEM; >> - >> - do { >> - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, >> - page_list); >> - if (!filled) { >> - ret = -ENOMEM; >> - goto err; >> - } >> - to_alloc -= filled; >> - ret = sg_alloc_append_table_from_pages( >> - &buf->table, page_list, filled, 0, >> - filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, >> - GFP_KERNEL_ACCOUNT); >> - >> - if (ret) >> - goto err; >> - buf->allocated_length += filled * PAGE_SIZE; >> - /* clean input for another bulk allocation */ >> - memset(page_list, 0, filled * sizeof(*page_list)); >> - to_fill = min_t(unsigned int, to_alloc, >> - PAGE_SIZE / sizeof(*page_list)); >> - } while (to_alloc > 0); >> - >> - kvfree(page_list); >> - return 0; >> - >> -err: >> - kvfree(page_list); >> - return ret; >> -} >> - >> static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) >> { >> mutex_lock(&migf->lock); >> @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, >> return 0; >> } >> >> -static int >> -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, >> - loff_t requested_length, >> - const char __user **buf, size_t *len, >> - loff_t *pos, ssize_t *done) >> -{ >> - int ret; >> - >> - if (requested_length > MAX_LOAD_SIZE) >> - return -ENOMEM; >> - >> - if (vhca_buf->allocated_length < requested_length) { >> - ret = mlx5vf_add_migration_pages( >> - vhca_buf, >> - DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, >> - PAGE_SIZE)); >> - if (ret) >> - return ret; >> - } >> - >> - while (*len) { >> - ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, >> - done); >> - if (ret) >> - return ret; >> - } >> - >> - return 0; >> -} >> - >> static ssize_t >> mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, >> struct mlx5_vhca_data_buffer *vhca_buf, >> @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, >> migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; >> break; >> } >> - case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: >> - ret = mlx5vf_resume_read_image_no_header(vhca_buf, >> - requested_length, >> - &buf, &len, pos, &done); >> - if (ret) >> - goto out_unlock; >> - break; >> case MLX5_VF_LOAD_STATE_READ_IMAGE: >> ret = mlx5vf_resume_read_image(migf, vhca_buf, >> migf->record_size, >> @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) >> } >> >> migf->buf[0] = buf; >> - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { >> - buf = mlx5vf_alloc_data_buffer(migf, >> - sizeof(struct mlx5_vf_migration_header), DMA_NONE); >> - if (IS_ERR(buf)) { >> - ret = PTR_ERR(buf); >> - goto out_buf; >> - } >> - >> - migf->buf_header[0] = buf; >> - migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; >> - } else { >> - /* Initial state will be to read the image */ >> - migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; >> + buf = mlx5vf_alloc_data_buffer(migf, >> + sizeof(struct mlx5_vf_migration_header), DMA_NONE); >> + if (IS_ERR(buf)) { >> + ret = PTR_ERR(buf); >> + goto out_buf; >> } >> >> + migf->buf_header[0] = buf; >> + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; >> + >> stream_open(migf->filp->f_inode, migf->filp); >> mutex_init(&migf->lock); >> INIT_LIST_HEAD(&migf->buf_list); >> @@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, >> } >> >> if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { >> - if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { >> - ret = mlx5vf_cmd_load_vhca_state(mvdev, >> - mvdev->resuming_migf, >> - mvdev->resuming_migf->buf[0]); >> - if (ret) >> - return ERR_PTR(ret); >> - } >> mlx5vf_disable_fds(mvdev, NULL); >> return NULL; >> } >
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index c54bcd5d0917..41a4b0cf4297 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (!MLX5_CAP_GEN(mvdev->mdev, migration)) goto end; + if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) + goto end; + mvdev->vf_id = pci_iov_vf_id(pdev); if (mvdev->vf_id < 0) goto end; @@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, mvdev->migrate_cap = 1; mvdev->core_device.vdev.migration_flags = VFIO_MIGRATION_STOP_COPY | - VFIO_MIGRATION_P2P; + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + mvdev->core_device.vdev.mig_ops = mig_ops; init_completion(&mvdev->tracker_comp); if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) mvdev->core_device.vdev.log_ops = log_ops; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && - MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) - mvdev->core_device.vdev.migration_flags |= - VFIO_MIGRATION_PRE_COPY; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) mvdev->chunk_mode = 1; @@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) kfree(buf); } +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages) +{ + unsigned int to_alloc = npages; + struct page **page_list; + unsigned long filled; + unsigned int to_fill; + int ret; + + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); + if (!page_list) + return -ENOMEM; + + do { + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, + page_list); + if (!filled) { + ret = -ENOMEM; + goto err; + } + to_alloc -= filled; + ret = sg_alloc_append_table_from_pages( + &buf->table, page_list, filled, 0, + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, + GFP_KERNEL_ACCOUNT); + + if (ret) + goto err; + buf->allocated_length += filled * PAGE_SIZE; + /* clean input for another bulk allocation */ + memset(page_list, 0, filled * sizeof(*page_list)); + to_fill = min_t(unsigned int, to_alloc, + PAGE_SIZE / sizeof(*page_list)); + } while (to_alloc > 0); + + kvfree(page_list); + return 0; + +err: + kvfree(page_list); + return ret; +} + struct mlx5_vhca_data_buffer * mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, @@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, goto err_out; } - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - if (async_data->stop_copy_chunk) { - u8 header_idx = buf->stop_copy_chunk_num ? - buf->stop_copy_chunk_num - 1 : 0; + if (async_data->stop_copy_chunk) { + u8 header_idx = buf->stop_copy_chunk_num ? + buf->stop_copy_chunk_num - 1 : 0; - header_buf = migf->buf_header[header_idx]; - migf->buf_header[header_idx] = NULL; - } + header_buf = migf->buf_header[header_idx]; + migf->buf_header[header_idx] = NULL; + } - if (!header_buf) { - header_buf = mlx5vf_get_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(header_buf)) { - err = PTR_ERR(header_buf); - goto err_free; - } + if (!header_buf) { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; } } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 707393df36c4..df421dc6de04 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -13,9 +13,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> -#define MLX5VF_PRE_COPY_SUPP(mvdev) \ - ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) - enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, MLX5_MIGF_STATE_PRE_COPY_ERROR, @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state { }; enum mlx5_vf_load_state { - MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, MLX5_VF_LOAD_STATE_READ_HEADER, MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, MLX5_VF_LOAD_STATE_READ_HEADER_DATA, @@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer * mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, enum dma_data_direction dma_dir); void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, - unsigned int npages); struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, unsigned long offset); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 3982fcf60cf2..61d9b0f9146d 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, return NULL; } -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, - unsigned int npages) -{ - unsigned int to_alloc = npages; - struct page **page_list; - unsigned long filled; - unsigned int to_fill; - int ret; - - to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); - if (!page_list) - return -ENOMEM; - - do { - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, - page_list); - if (!filled) { - ret = -ENOMEM; - goto err; - } - to_alloc -= filled; - ret = sg_alloc_append_table_from_pages( - &buf->table, page_list, filled, 0, - filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, - GFP_KERNEL_ACCOUNT); - - if (ret) - goto err; - buf->allocated_length += filled * PAGE_SIZE; - /* clean input for another bulk allocation */ - memset(page_list, 0, filled * sizeof(*page_list)); - to_fill = min_t(unsigned int, to_alloc, - PAGE_SIZE / sizeof(*page_list)); - } while (to_alloc > 0); - - kvfree(page_list); - return 0; - -err: - kvfree(page_list); - return ret; -} - static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) { mutex_lock(&migf->lock); @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, return 0; } -static int -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, - loff_t requested_length, - const char __user **buf, size_t *len, - loff_t *pos, ssize_t *done) -{ - int ret; - - if (requested_length > MAX_LOAD_SIZE) - return -ENOMEM; - - if (vhca_buf->allocated_length < requested_length) { - ret = mlx5vf_add_migration_pages( - vhca_buf, - DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, - PAGE_SIZE)); - if (ret) - return ret; - } - - while (*len) { - ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, - done); - if (ret) - return ret; - } - - return 0; -} - static ssize_t mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *vhca_buf, @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; break; } - case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: - ret = mlx5vf_resume_read_image_no_header(vhca_buf, - requested_length, - &buf, &len, pos, &done); - if (ret) - goto out_unlock; - break; case MLX5_VF_LOAD_STATE_READ_IMAGE: ret = mlx5vf_resume_read_image(migf, vhca_buf, migf->record_size, @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) } migf->buf[0] = buf; - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - buf = mlx5vf_alloc_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto out_buf; - } - - migf->buf_header[0] = buf; - migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; - } else { - /* Initial state will be to read the image */ - migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; + buf = mlx5vf_alloc_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_buf; } + migf->buf_header[0] = buf; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); INIT_LIST_HEAD(&migf->buf_list); @@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { - if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { - ret = mlx5vf_cmd_load_vhca_state(mvdev, - mvdev->resuming_migf, - mvdev->resuming_migf->buf[0]); - if (ret) - return ERR_PTR(ret); - } mlx5vf_disable_fds(mvdev, NULL); return NULL; }
Enable live migration only once the firmware supports PRE_COPY. PRE_COPY has been supported by the firmware for a long time already and is required to achieve a low downtime upon live migration. This lets us clean up some old code that is not applicable those days while PRE_COPY is fully supported by the firmware. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> --- drivers/vfio/pci/mlx5/cmd.c | 83 +++++++++++++++++++------- drivers/vfio/pci/mlx5/cmd.h | 6 -- drivers/vfio/pci/mlx5/main.c | 109 +++-------------------------------- 3 files changed, 71 insertions(+), 127 deletions(-)