Message ID | 1603449643-12851-10-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO devices | expand |
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com> On Fri, Oct 23, 2020 at 04:10:35PM +0530, Kirti Wankhede wrote: > Sequence during _RESUMING device state: > While data for this device is available, repeat below steps: > a. read data_offset from where user application should write data. > b. write data of data_size to migration region from data_offset. > c. write data_size which indicates vendor driver that data is written in > staging buffer. > > For user, data is opaque. User should write data in the same order as > received. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > hw/vfio/migration.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 4 ++ > 2 files changed, 199 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index be9e4aba541d..240646592b39 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -257,6 +257,77 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) > return ret; > } > > +static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, > + uint64_t data_size) > +{ > + VFIORegion *region = &vbasedev->migration->region; > + uint64_t data_offset = 0, size, report_size; > + int ret; > + > + do { > + ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), > + region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); > + if (ret < 0) { > + return ret; > + } > + > + if (data_offset + data_size > region->size) { > + /* > + * If data_size is greater than the data section of migration region > + * then iterate the write buffer operation. This case can occur if > + * size of migration region at destination is smaller than size of > + * migration region at source. > + */ > + report_size = size = region->size - data_offset; > + data_size -= size; > + } else { > + report_size = size = data_size; > + data_size = 0; > + } > + > + trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); > + > + while (size) { > + void *buf; > + uint64_t sec_size; > + bool buf_alloc = false; > + > + buf = get_data_section_size(region, data_offset, size, &sec_size); > + > + if (!buf) { > + buf = g_try_malloc(sec_size); > + if (!buf) { > + error_report("%s: Error allocating buffer ", __func__); > + return -ENOMEM; > + } > + buf_alloc = true; > + } > + > + qemu_get_buffer(f, buf, sec_size); > + > + if (buf_alloc) { > + ret = vfio_mig_write(vbasedev, buf, sec_size, > + region->fd_offset + data_offset); > + g_free(buf); > + > + if (ret < 0) { > + return ret; > + } > + } > + size -= sec_size; > + data_offset += sec_size; > + } > + > + ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), > + region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); > + if (ret < 0) { > + return ret; > + } > + } while (data_size); > + > + return 0; > +} > + > static int vfio_update_pending(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > @@ -293,6 +364,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque) > return qemu_file_get_error(f); > } > > +static int vfio_load_device_config_state(QEMUFile *f, void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + uint64_t data; > + > + if (vbasedev->ops && vbasedev->ops->vfio_load_config) { > + int ret; > + > + ret = vbasedev->ops->vfio_load_config(vbasedev, f); > + if (ret) { > + error_report("%s: Failed to load device config space", > + vbasedev->name); > + return ret; > + } > + } > + > + data = qemu_get_be64(f); > + if (data != VFIO_MIG_FLAG_END_OF_STATE) { > + error_report("%s: Failed loading device config space, " > + "end flag incorrect 0x%"PRIx64, vbasedev->name, data); > + return -EINVAL; > + } > + > + trace_vfio_load_device_config_state(vbasedev->name); > + return qemu_file_get_error(f); > +} > + > static void vfio_migration_cleanup(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > @@ -483,12 +581,109 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) > return ret; > } > > +static int vfio_load_setup(QEMUFile *f, void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + int ret = 0; > + > + if (migration->region.mmaps) { > + ret = vfio_region_mmap(&migration->region); > + if (ret) { > + error_report("%s: Failed to mmap VFIO migration region %d: %s", > + vbasedev->name, migration->region.nr, > + strerror(-ret)); > + error_report("%s: Falling back to slow path", vbasedev->name); > + } > + } > + > + ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, > + VFIO_DEVICE_STATE_RESUMING); > + if (ret) { > + error_report("%s: Failed to set state RESUMING", vbasedev->name); > + if (migration->region.mmaps) { > + vfio_region_unmap(&migration->region); > + } > + } > + return ret; > +} > + > +static int vfio_load_cleanup(void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + > + vfio_migration_cleanup(vbasedev); > + trace_vfio_load_cleanup(vbasedev->name); > + return 0; > +} > + > +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) > +{ > + VFIODevice *vbasedev = opaque; > + int ret = 0; > + uint64_t data; > + > + data = qemu_get_be64(f); > + while (data != VFIO_MIG_FLAG_END_OF_STATE) { > + > + trace_vfio_load_state(vbasedev->name, data); > + > + switch (data) { > + case VFIO_MIG_FLAG_DEV_CONFIG_STATE: > + { > + ret = vfio_load_device_config_state(f, opaque); > + if (ret) { > + return ret; > + } > + break; > + } > + case VFIO_MIG_FLAG_DEV_SETUP_STATE: > + { > + data = qemu_get_be64(f); > + if (data == VFIO_MIG_FLAG_END_OF_STATE) { > + return ret; > + } else { > + error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, > + vbasedev->name, data); > + return -EINVAL; > + } > + break; > + } > + case VFIO_MIG_FLAG_DEV_DATA_STATE: > + { > + uint64_t data_size = qemu_get_be64(f); > + > + if (data_size) { > + ret = vfio_load_buffer(f, vbasedev, data_size); > + if (ret < 0) { > + return ret; > + } > + } > + break; > + } > + default: > + error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); > + return -EINVAL; > + } > + > + data = qemu_get_be64(f); > + ret = qemu_file_get_error(f); > + if (ret) { > + return ret; > + } > + } > + return ret; > +} > + > static SaveVMHandlers savevm_vfio_handlers = { > .save_setup = vfio_save_setup, > .save_cleanup = vfio_save_cleanup, > .save_live_pending = vfio_save_pending, > .save_live_iterate = vfio_save_iterate, > .save_live_complete_precopy = vfio_save_complete_precopy, > + .load_setup = vfio_load_setup, > + .load_cleanup = vfio_load_cleanup, > + .load_state = vfio_load_state, > }; > > /* ---------------------------------------------------------------------- */ > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 9f5712dab1ea..a75b5208818c 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -159,3 +159,7 @@ vfio_save_device_config_state(const char *name) " (%s)" > vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 > vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" > vfio_save_complete_precopy(const char *name) " (%s)" > +vfio_load_device_config_state(const char *name) " (%s)" > +vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 > +vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 > +vfio_load_cleanup(const char *name) " (%s)" > -- > 2.7.0 >
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index be9e4aba541d..240646592b39 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -257,6 +257,77 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) return ret; } +static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, + uint64_t data_size) +{ + VFIORegion *region = &vbasedev->migration->region; + uint64_t data_offset = 0, size, report_size; + int ret; + + do { + ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), + region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); + if (ret < 0) { + return ret; + } + + if (data_offset + data_size > region->size) { + /* + * If data_size is greater than the data section of migration region + * then iterate the write buffer operation. This case can occur if + * size of migration region at destination is smaller than size of + * migration region at source. + */ + report_size = size = region->size - data_offset; + data_size -= size; + } else { + report_size = size = data_size; + data_size = 0; + } + + trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); + + while (size) { + void *buf; + uint64_t sec_size; + bool buf_alloc = false; + + buf = get_data_section_size(region, data_offset, size, &sec_size); + + if (!buf) { + buf = g_try_malloc(sec_size); + if (!buf) { + error_report("%s: Error allocating buffer ", __func__); + return -ENOMEM; + } + buf_alloc = true; + } + + qemu_get_buffer(f, buf, sec_size); + + if (buf_alloc) { + ret = vfio_mig_write(vbasedev, buf, sec_size, + region->fd_offset + data_offset); + g_free(buf); + + if (ret < 0) { + return ret; + } + } + size -= sec_size; + data_offset += sec_size; + } + + ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), + region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); + if (ret < 0) { + return ret; + } + } while (data_size); + + return 0; +} + static int vfio_update_pending(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; @@ -293,6 +364,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque) return qemu_file_get_error(f); } +static int vfio_load_device_config_state(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + uint64_t data; + + if (vbasedev->ops && vbasedev->ops->vfio_load_config) { + int ret; + + ret = vbasedev->ops->vfio_load_config(vbasedev, f); + if (ret) { + error_report("%s: Failed to load device config space", + vbasedev->name); + return ret; + } + } + + data = qemu_get_be64(f); + if (data != VFIO_MIG_FLAG_END_OF_STATE) { + error_report("%s: Failed loading device config space, " + "end flag incorrect 0x%"PRIx64, vbasedev->name, data); + return -EINVAL; + } + + trace_vfio_load_device_config_state(vbasedev->name); + return qemu_file_get_error(f); +} + static void vfio_migration_cleanup(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; @@ -483,12 +581,109 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) return ret; } +static int vfio_load_setup(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + int ret = 0; + + if (migration->region.mmaps) { + ret = vfio_region_mmap(&migration->region); + if (ret) { + error_report("%s: Failed to mmap VFIO migration region %d: %s", + vbasedev->name, migration->region.nr, + strerror(-ret)); + error_report("%s: Falling back to slow path", vbasedev->name); + } + } + + ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, + VFIO_DEVICE_STATE_RESUMING); + if (ret) { + error_report("%s: Failed to set state RESUMING", vbasedev->name); + if (migration->region.mmaps) { + vfio_region_unmap(&migration->region); + } + } + return ret; +} + +static int vfio_load_cleanup(void *opaque) +{ + VFIODevice *vbasedev = opaque; + + vfio_migration_cleanup(vbasedev); + trace_vfio_load_cleanup(vbasedev->name); + return 0; +} + +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) +{ + VFIODevice *vbasedev = opaque; + int ret = 0; + uint64_t data; + + data = qemu_get_be64(f); + while (data != VFIO_MIG_FLAG_END_OF_STATE) { + + trace_vfio_load_state(vbasedev->name, data); + + switch (data) { + case VFIO_MIG_FLAG_DEV_CONFIG_STATE: + { + ret = vfio_load_device_config_state(f, opaque); + if (ret) { + return ret; + } + break; + } + case VFIO_MIG_FLAG_DEV_SETUP_STATE: + { + data = qemu_get_be64(f); + if (data == VFIO_MIG_FLAG_END_OF_STATE) { + return ret; + } else { + error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, + vbasedev->name, data); + return -EINVAL; + } + break; + } + case VFIO_MIG_FLAG_DEV_DATA_STATE: + { + uint64_t data_size = qemu_get_be64(f); + + if (data_size) { + ret = vfio_load_buffer(f, vbasedev, data_size); + if (ret < 0) { + return ret; + } + } + break; + } + default: + error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); + return -EINVAL; + } + + data = qemu_get_be64(f); + ret = qemu_file_get_error(f); + if (ret) { + return ret; + } + } + return ret; +} + static SaveVMHandlers savevm_vfio_handlers = { .save_setup = vfio_save_setup, .save_cleanup = vfio_save_cleanup, .save_live_pending = vfio_save_pending, .save_live_iterate = vfio_save_iterate, .save_live_complete_precopy = vfio_save_complete_precopy, + .load_setup = vfio_load_setup, + .load_cleanup = vfio_load_cleanup, + .load_state = vfio_load_state, }; /* ---------------------------------------------------------------------- */ diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 9f5712dab1ea..a75b5208818c 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -159,3 +159,7 @@ vfio_save_device_config_state(const char *name) " (%s)" vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" vfio_save_complete_precopy(const char *name) " (%s)" +vfio_load_device_config_state(const char *name) " (%s)" +vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 +vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 +vfio_load_cleanup(const char *name) " (%s)"