diff mbox series

[v9,QEMU,08/15] vfio: Add VM state change handler to know state of VM

Message ID 1573578324-8389-9-git-send-email-kwankhede@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add migration support for VFIO devices | expand

Commit Message

Kirti Wankhede Nov. 12, 2019, 5:05 p.m. UTC
VM state change handler gets called on change in VM's state. This is used to set
VFIO device state to _RUNNING.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 hw/vfio/migration.c           | 69 +++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/trace-events          |  2 ++
 include/hw/vfio/vfio-common.h |  4 +++
 3 files changed, 75 insertions(+)

Comments

Alex Williamson Nov. 14, 2019, 5:02 a.m. UTC | #1
On Tue, 12 Nov 2019 22:35:17 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> VM state change handler gets called on change in VM's state. This is used to set
> VFIO device state to _RUNNING.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  hw/vfio/migration.c           | 69 +++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events          |  2 ++
>  include/hw/vfio/vfio-common.h |  4 +++
>  3 files changed, 75 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index c17bd1b0b934..28981a759e6c 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -10,6 +10,7 @@
>  #include "qemu/osdep.h"
>  #include <linux/vfio.h>
>  
> +#include "sysemu/runstate.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "cpu.h"
>  #include "migration/migration.h"
> @@ -74,6 +75,67 @@ err:
>      return ret;
>  }
>  
> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t set_flags,
> +                                    uint32_t clear_flags)
> +{

Perhaps a mask and value interface like we have elsewhere?

> +    VFIOMigration *migration = vbasedev->migration;
> +    VFIORegion *region = &migration->region;
> +    uint32_t device_state;
> +    int ret = 0;
> +
> +    /* same flags should not be set or clear */
> +    assert(!(set_flags & clear_flags));

mask/value avoids this sort of thing.

> +    device_state = (vbasedev->device_state | set_flags) & ~clear_flags;

Don't we need to re-read device_state from the region?  We can't
predict what those reserved bits will be used for, they could be
volatile.  If we adopt that a reset returns to running, our cached
state may be stale.

> +
> +    switch (device_state & VFIO_DEVICE_STATE_MASK) {
> +    case VFIO_DEVICE_STATE_INVALID_CASE1:
> +    case VFIO_DEVICE_STATE_INVALID_CASE2:
> +        return -EINVAL;
> +    }

I like the VALID macro better.

> +
> +    ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state),
> +                 region->fd_offset + offsetof(struct vfio_device_migration_info,
> +                                              device_state));
> +    if (ret < 0) {
> +        error_report("%s: Failed to set device state %d %s",
> +                     vbasedev->name, ret, strerror(errno));
> +        return ret;
> +    }
> +
> +    vbasedev->device_state = device_state;

Are we opposed to re-reading device_state, here and in the error case
above?

> +    trace_vfio_migration_set_state(vbasedev->name, device_state);
> +    return 0;
> +}
> +
> +static void vfio_vmstate_change(void *opaque, int running, RunState state)
> +{
> +    VFIODevice *vbasedev = opaque;
> +
> +    if ((vbasedev->vm_running != running)) {
> +        int ret;
> +        uint32_t set_flags = 0, clear_flags = 0;
> +
> +        if (running) {
> +            set_flags = VFIO_DEVICE_STATE_RUNNING;
> +            if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
> +                clear_flags = VFIO_DEVICE_STATE_RESUMING;
> +            }
> +        } else {
> +            clear_flags = VFIO_DEVICE_STATE_RUNNING;
> +        }
> +
> +        ret = vfio_migration_set_state(vbasedev, set_flags, clear_flags);
> +        if (ret) {
> +            error_report("%s: Failed to set device state 0x%x",
> +                         vbasedev->name, set_flags & ~clear_flags);
> +        }
> +        vbasedev->vm_running = running;

We're effectively storing running both in vbasedev->device_state and
vbasedev->vm_running, why?

Seems like this could trivially know the initial state of the device is
running.

> +        trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
> +                                  set_flags & ~clear_flags);
> +    }
> +}
> +
>  static int vfio_migration_init(VFIODevice *vbasedev,
>                                 struct vfio_region_info *info)
>  {
> @@ -89,6 +151,9 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>          return ret;
>      }
>  
> +    vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
> +                                                          vbasedev);
> +
>      return 0;
>  }
>  
> @@ -127,6 +192,10 @@ add_blocker:
>  
>  void vfio_migration_finalize(VFIODevice *vbasedev)
>  {
> +    if (vbasedev->vm_state) {
> +        qemu_del_vm_change_state_handler(vbasedev->vm_state);
> +    }
> +
>      if (vbasedev->migration_blocker) {
>          migrate_del_blocker(vbasedev->migration_blocker);
>          error_free(vbasedev->migration_blocker);
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 191a726a1312..3d15bacd031a 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) ""
>  
>  # migration.c
>  vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d"
> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d"
> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 927511897a44..6573acd6738e 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -29,6 +29,7 @@
>  #ifdef CONFIG_LINUX
>  #include <linux/vfio.h>
>  #endif
> +#include "sysemu/sysemu.h"
>  
>  #define VFIO_MSG_PREFIX "vfio %s: "
>  
> @@ -120,6 +121,9 @@ typedef struct VFIODevice {
>      unsigned int flags;
>      VFIOMigration *migration;
>      Error *migration_blocker;
> +    uint32_t device_state;
> +    VMChangeStateEntry *vm_state;
> +    int vm_running;

Isn't this effectively a bool per our usage.

Field ordering is wasteful.

>  } VFIODevice;
>  
>  struct VFIODeviceOps {
diff mbox series

Patch

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c17bd1b0b934..28981a759e6c 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -10,6 +10,7 @@ 
 #include "qemu/osdep.h"
 #include <linux/vfio.h>
 
+#include "sysemu/runstate.h"
 #include "hw/vfio/vfio-common.h"
 #include "cpu.h"
 #include "migration/migration.h"
@@ -74,6 +75,67 @@  err:
     return ret;
 }
 
+static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t set_flags,
+                                    uint32_t clear_flags)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    VFIORegion *region = &migration->region;
+    uint32_t device_state;
+    int ret = 0;
+
+    /* same flags should not be set or clear */
+    assert(!(set_flags & clear_flags));
+
+    device_state = (vbasedev->device_state | set_flags) & ~clear_flags;
+
+    switch (device_state & VFIO_DEVICE_STATE_MASK) {
+    case VFIO_DEVICE_STATE_INVALID_CASE1:
+    case VFIO_DEVICE_STATE_INVALID_CASE2:
+        return -EINVAL;
+    }
+
+    ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state),
+                 region->fd_offset + offsetof(struct vfio_device_migration_info,
+                                              device_state));
+    if (ret < 0) {
+        error_report("%s: Failed to set device state %d %s",
+                     vbasedev->name, ret, strerror(errno));
+        return ret;
+    }
+
+    vbasedev->device_state = device_state;
+    trace_vfio_migration_set_state(vbasedev->name, device_state);
+    return 0;
+}
+
+static void vfio_vmstate_change(void *opaque, int running, RunState state)
+{
+    VFIODevice *vbasedev = opaque;
+
+    if ((vbasedev->vm_running != running)) {
+        int ret;
+        uint32_t set_flags = 0, clear_flags = 0;
+
+        if (running) {
+            set_flags = VFIO_DEVICE_STATE_RUNNING;
+            if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
+                clear_flags = VFIO_DEVICE_STATE_RESUMING;
+            }
+        } else {
+            clear_flags = VFIO_DEVICE_STATE_RUNNING;
+        }
+
+        ret = vfio_migration_set_state(vbasedev, set_flags, clear_flags);
+        if (ret) {
+            error_report("%s: Failed to set device state 0x%x",
+                         vbasedev->name, set_flags & ~clear_flags);
+        }
+        vbasedev->vm_running = running;
+        trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
+                                  set_flags & ~clear_flags);
+    }
+}
+
 static int vfio_migration_init(VFIODevice *vbasedev,
                                struct vfio_region_info *info)
 {
@@ -89,6 +151,9 @@  static int vfio_migration_init(VFIODevice *vbasedev,
         return ret;
     }
 
+    vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
+                                                          vbasedev);
+
     return 0;
 }
 
@@ -127,6 +192,10 @@  add_blocker:
 
 void vfio_migration_finalize(VFIODevice *vbasedev)
 {
+    if (vbasedev->vm_state) {
+        qemu_del_vm_change_state_handler(vbasedev->vm_state);
+    }
+
     if (vbasedev->migration_blocker) {
         migrate_del_blocker(vbasedev->migration_blocker);
         error_free(vbasedev->migration_blocker);
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 191a726a1312..3d15bacd031a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -146,3 +146,5 @@  vfio_display_edid_write_error(void) ""
 
 # migration.c
 vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d"
+vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d"
+vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 927511897a44..6573acd6738e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -29,6 +29,7 @@ 
 #ifdef CONFIG_LINUX
 #include <linux/vfio.h>
 #endif
+#include "sysemu/sysemu.h"
 
 #define VFIO_MSG_PREFIX "vfio %s: "
 
@@ -120,6 +121,9 @@  typedef struct VFIODevice {
     unsigned int flags;
     VFIOMigration *migration;
     Error *migration_blocker;
+    uint32_t device_state;
+    VMChangeStateEntry *vm_state;
+    int vm_running;
 } VFIODevice;
 
 struct VFIODeviceOps {