Message ID | 1588632293-18932-7-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO devices | expand |
On Tue, 5 May 2020 04:14:41 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > VM state change handler gets called on change in VM's state. This is used to set > VFIO device state to _RUNNING. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 87 +++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 + > include/hw/vfio/vfio-common.h | 4 ++ > 3 files changed, 93 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index bf9384907ec0..e79b34003079 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -10,6 +10,7 @@ > #include "qemu/osdep.h" > #include <linux/vfio.h> > > +#include "sysemu/runstate.h" > #include "hw/vfio/vfio-common.h" > #include "cpu.h" > #include "migration/migration.h" > @@ -74,6 +75,85 @@ err: > return ret; > } > > +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > + uint32_t value) I find 'mask' and 'value' a bit confusing. 'mask' seems to be all the bits you want to keep, and 'value' the bits you want to add? > +{ > + VFIOMigration *migration = vbasedev->migration; > + VFIORegion *region = &migration->region; > + uint32_t device_state; > + int ret; > + > + ret = pread(vbasedev->fd, &device_state, sizeof(device_state), > + region->fd_offset + offsetof(struct vfio_device_migration_info, > + device_state)); > + if (ret < 0) { > + error_report("%s: Failed to read device state %d %s", > + vbasedev->name, ret, strerror(errno)); > + return ret; > + } > + > + device_state = (device_state & mask) | value; > + > + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > + return -EINVAL; > + } > + > + ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state), > + region->fd_offset + offsetof(struct vfio_device_migration_info, > + device_state)); > + if (ret < 0) { > + error_report("%s: Failed to set device state %d %s", > + vbasedev->name, ret, strerror(errno)); > + > + ret = pread(vbasedev->fd, &device_state, sizeof(device_state), > + region->fd_offset + offsetof(struct vfio_device_migration_info, > + device_state)); > + if (ret < 0) { > + error_report("%s: On failure, failed to read device state %d %s", > + vbasedev->name, ret, strerror(errno)); > + return ret; > + } > + > + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { > + error_report("%s: Device is in error state 0x%x", > + vbasedev->name, device_state); > + return -EFAULT; Why -EFAULT? Also, if the device is in an error state, don't you want to propagate that state into the vbasedev as well? It does not look usable in that state, but that information is only available in the migration region. > + } > + } > + > + vbasedev->device_state = device_state; > + trace_vfio_migration_set_state(vbasedev->name, device_state); > + return 0; > +} > + > +static void vfio_vmstate_change(void *opaque, int running, RunState state) > +{ > + VFIODevice *vbasedev = opaque; > + > + if ((vbasedev->vm_running != running)) { > + int ret; > + uint32_t value = 0, mask = 0; > + > + if (running) { > + value = VFIO_DEVICE_STATE_RUNNING; > + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > + mask = ~VFIO_DEVICE_STATE_RESUMING; > + } > + } else { > + mask = ~VFIO_DEVICE_STATE_RUNNING; > + } I think the issue might be that you are starting to fiddle with the target state before you know what the actual device state is (you only know the state in the vbasedev, which might be out of sync.) But you do know what the transition is supposed to look like depending on the vmstate change, so what about the following: - read the state from the region - figure out the transition that is supposed to be happening - write the target state > + > + ret = vfio_migration_set_state(vbasedev, mask, value); > + if (ret) { > + error_report("%s: Failed to set device state 0x%x", > + vbasedev->name, value & mask); If the transition failed, what does that mean? I assume that the device might actually be in an unusable state (like the error state referenced above)? Does it make sense to continue, or should the device rather be flagged broken in some way? > + } > + vbasedev->vm_running = running; > + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > + value & mask); > + } > +}
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index bf9384907ec0..e79b34003079 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include <linux/vfio.h> +#include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" #include "cpu.h" #include "migration/migration.h" @@ -74,6 +75,85 @@ err: return ret; } +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, + uint32_t value) +{ + VFIOMigration *migration = vbasedev->migration; + VFIORegion *region = &migration->region; + uint32_t device_state; + int ret; + + ret = pread(vbasedev->fd, &device_state, sizeof(device_state), + region->fd_offset + offsetof(struct vfio_device_migration_info, + device_state)); + if (ret < 0) { + error_report("%s: Failed to read device state %d %s", + vbasedev->name, ret, strerror(errno)); + return ret; + } + + device_state = (device_state & mask) | value; + + if (!VFIO_DEVICE_STATE_VALID(device_state)) { + return -EINVAL; + } + + ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state), + region->fd_offset + offsetof(struct vfio_device_migration_info, + device_state)); + if (ret < 0) { + error_report("%s: Failed to set device state %d %s", + vbasedev->name, ret, strerror(errno)); + + ret = pread(vbasedev->fd, &device_state, sizeof(device_state), + region->fd_offset + offsetof(struct vfio_device_migration_info, + device_state)); + if (ret < 0) { + error_report("%s: On failure, failed to read device state %d %s", + vbasedev->name, ret, strerror(errno)); + return ret; + } + + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { + error_report("%s: Device is in error state 0x%x", + vbasedev->name, device_state); + return -EFAULT; + } + } + + vbasedev->device_state = device_state; + trace_vfio_migration_set_state(vbasedev->name, device_state); + return 0; +} + +static void vfio_vmstate_change(void *opaque, int running, RunState state) +{ + VFIODevice *vbasedev = opaque; + + if ((vbasedev->vm_running != running)) { + int ret; + uint32_t value = 0, mask = 0; + + if (running) { + value = VFIO_DEVICE_STATE_RUNNING; + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { + mask = ~VFIO_DEVICE_STATE_RESUMING; + } + } else { + mask = ~VFIO_DEVICE_STATE_RUNNING; + } + + ret = vfio_migration_set_state(vbasedev, mask, value); + if (ret) { + error_report("%s: Failed to set device state 0x%x", + vbasedev->name, value & mask); + } + vbasedev->vm_running = running; + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), + value & mask); + } +} + static int vfio_migration_init(VFIODevice *vbasedev, struct vfio_region_info *info) { @@ -90,6 +170,9 @@ static int vfio_migration_init(VFIODevice *vbasedev, return ret; } + vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + vbasedev); + return 0; } @@ -128,6 +211,10 @@ add_blocker: void vfio_migration_finalize(VFIODevice *vbasedev) { + if (vbasedev->vm_state) { + qemu_del_vm_change_state_handler(vbasedev->vm_state); + } + if (vbasedev->migration_blocker) { migrate_del_blocker(vbasedev->migration_blocker); error_free(vbasedev->migration_blocker); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index fd034ac53684..14b0a86c0035 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) "" # migration.c vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index d4b268641173..3d18eb146b33 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -29,6 +29,7 @@ #ifdef CONFIG_LINUX #include <linux/vfio.h> #endif +#include "sysemu/sysemu.h" #define VFIO_MSG_PREFIX "vfio %s: " @@ -119,6 +120,9 @@ typedef struct VFIODevice { unsigned int flags; VFIOMigration *migration; Error *migration_blocker; + VMChangeStateEntry *vm_state; + uint32_t device_state; + int vm_running; } VFIODevice; struct VFIODeviceOps {