diff mbox series

[V6,05/14] migration: propagate suspended runstate

Message ID 1701380247-340457-6-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New, archived
Headers show
Series fix migration of suspended runstate | expand

Commit Message

Steven Sistare Nov. 30, 2023, 9:37 p.m. UTC
If the outgoing machine was previously suspended, propagate that to the
incoming side via global_state, so a subsequent vm_start restores the
suspended state.  To maintain backward and forward compatibility, define
the new field in a zero'd hole in the GlobalState struct.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 migration/global_state.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

Comments

Peter Xu Nov. 30, 2023, 11:06 p.m. UTC | #1
On Thu, Nov 30, 2023 at 01:37:18PM -0800, Steve Sistare wrote:
> If the outgoing machine was previously suspended, propagate that to the
> incoming side via global_state, so a subsequent vm_start restores the
> suspended state.  To maintain backward and forward compatibility, define
> the new field in a zero'd hole in the GlobalState struct.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  migration/global_state.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/migration/global_state.c b/migration/global_state.c
> index 4e2a9d8..de2532c 100644
> --- a/migration/global_state.c
> +++ b/migration/global_state.c
> @@ -25,6 +25,7 @@ typedef struct {
>      uint8_t runstate[100];
>      RunState state;
>      bool received;
> +    bool vm_was_suspended;
>  } GlobalState;
>  
>  static GlobalState global_state;
> @@ -35,6 +36,7 @@ static void global_state_do_store(RunState state)
>      assert(strlen(state_str) < sizeof(global_state.runstate));
>      strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate),
>                state_str, '\0');
> +    global_state.vm_was_suspended = vm_get_suspended();
>  }
>  
>  void global_state_store(void)
> @@ -68,6 +70,12 @@ static bool global_state_needed(void *opaque)
>          return true;
>      }
>  
> +    /* If the suspended state must be remembered, it is needed */
> +
> +    if (vm_get_suspended()) {
> +        return true;
> +    }
> +
>      /* If state is running or paused, it is not needed */
>  
>      if (strcmp(runstate, "running") == 0 ||
> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int version_id)
>          return -EINVAL;
>      }
>      s->state = r;
> +    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);

IIUC current vm_was_suspended (based on my read of your patch) was not the
same as a boolean representing "whether VM is suspended", but only a
temporary field to remember that for a VM stop request.  To be explicit, I
didn't see this flag set in qemu_system_suspend() in your previous patch.

If so, we can already do:

  vm_set_suspended(s->vm_was_suspended);

Irrelevant of RUN_STATE_SUSPENDED?

>  
>      return 0;
>  }
> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
>      .fields = (VMStateField[]) {
>          VMSTATE_UINT32(size, GlobalState),
>          VMSTATE_BUFFER(runstate, GlobalState),
> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
>          VMSTATE_END_OF_LIST()
>      },
>  };

I think this will break migration between old/new, unfortunately.  And
since the global state exist mostly for every VM, all VM setup should be
affected, and over all archs.

We used to have the version_id field right above for adding fields, but I
_think_ that will still break backward migration fron new->old binary, so
not wanted.  Juan can keep me honest.

The best thing is still machine compat properties, afaict, to fix.  It's
slightly involved, but let me attach a sample diff for you (at the end,
possibly working with your current patch kind-of squashed, but not ever
tested), hopefully make it slightly easier.

I'm wondering how bad it is to just ignore it, it's not as bad as if we
don't fix stop-during-suspend, in this case the worst case of forgetting
this field over migration is: if VM stopped (and used to be suspended) then
after migration it'll keep being stopped, however after "cont" it'll forget
the suspended state.  Not that bad!  IIUC SPR should always migrate with
suspended (rather than any fully stopped state), right?  Then shouldn't be
affected.  If risk is low, maybe we can leave this one for later?

Thanks,

===8<===

diff --git a/migration/migration.h b/migration/migration.h
index cf2c9c88e0..c3fd1f8347 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -470,6 +470,8 @@ struct MigrationState {
     bool switchover_acked;
     /* Is this a rdma migration */
     bool rdma_migration;
+    /* Whether remember global vm_was_suspended field? */
+    bool store_vm_was_suspended;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0c17398141..365e01c1c9 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_1[] = {
     { "ramfb", "x-migrate", "off" },
     { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" },
     { "igb", "x-pcie-flr-init", "off" },
+    { "migration", "store-vm-was-suspended", false },
 };
 const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
 
diff --git a/migration/global_state.c b/migration/global_state.c
index 4e2a9d8ec0..ffa7bf82ca 100644
--- a/migration/global_state.c
+++ b/migration/global_state.c
@@ -25,6 +25,7 @@ typedef struct {
     uint8_t runstate[100];
     RunState state;
     bool received;
+    bool vm_was_suspended;
 } GlobalState;
 
 static GlobalState global_state;
@@ -124,6 +125,25 @@ static int global_state_pre_save(void *opaque)
     return 0;
 }
 
+static bool global_state_has_vm_was_suspended(void *opaque)
+{
+    return migrate_get_current()->store_vm_was_suspended;
+}
+
+static const VMStateDescription vmstate_vm_was_suspended = {
+    .name = "globalstate/vm_was_suspended",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    /* TODO: Fill these in */
+    .pre_save = NULL,
+    .post_load = NULL,
+    .needed = global_state_has_vm_was_suspended,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(vm_was_suspended, GlobalState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_globalstate = {
     .name = "globalstate",
     .version_id = 1,
@@ -136,6 +156,10 @@ static const VMStateDescription vmstate_globalstate = {
         VMSTATE_BUFFER(runstate, GlobalState),
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_vm_was_suspended,
+        NULL
+    },
 };
 
 void register_global_state(void)
diff --git a/migration/options.c b/migration/options.c
index 8d8ec73ad9..5f9998d3e8 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -88,6 +88,8 @@
 Property migration_properties[] = {
     DEFINE_PROP_BOOL("store-global-state", MigrationState,
                      store_global_state, true),
+    DEFINE_PROP_BOOL("store-vm-was-suspended", MigrationState,
+                     store_vm_was_suspended, true),
     DEFINE_PROP_BOOL("send-configuration", MigrationState,
                      send_configuration, true),
     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
Steven Sistare Dec. 1, 2023, 4:23 p.m. UTC | #2
On 11/30/2023 6:06 PM, Peter Xu wrote:
> On Thu, Nov 30, 2023 at 01:37:18PM -0800, Steve Sistare wrote:
>> If the outgoing machine was previously suspended, propagate that to the
>> incoming side via global_state, so a subsequent vm_start restores the
>> suspended state.  To maintain backward and forward compatibility, define
>> the new field in a zero'd hole in the GlobalState struct.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  migration/global_state.c | 10 ++++++++++
>>  1 file changed, 10 insertions(+)
>>
>> diff --git a/migration/global_state.c b/migration/global_state.c
>> index 4e2a9d8..de2532c 100644
>> --- a/migration/global_state.c
>> +++ b/migration/global_state.c
>> @@ -25,6 +25,7 @@ typedef struct {
>>      uint8_t runstate[100];
>>      RunState state;
>>      bool received;
>> +    bool vm_was_suspended;
>>  } GlobalState;
>>  
>>  static GlobalState global_state;
>> @@ -35,6 +36,7 @@ static void global_state_do_store(RunState state)
>>      assert(strlen(state_str) < sizeof(global_state.runstate));
>>      strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate),
>>                state_str, '\0');
>> +    global_state.vm_was_suspended = vm_get_suspended();
>>  }
>>  
>>  void global_state_store(void)
>> @@ -68,6 +70,12 @@ static bool global_state_needed(void *opaque)
>>          return true;
>>      }
>>  
>> +    /* If the suspended state must be remembered, it is needed */
>> +
>> +    if (vm_get_suspended()) {
>> +        return true;
>> +    }
>> +
>>      /* If state is running or paused, it is not needed */
>>  
>>      if (strcmp(runstate, "running") == 0 ||
>> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int version_id)
>>          return -EINVAL;
>>      }
>>      s->state = r;
>> +    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
> 
> IIUC current vm_was_suspended (based on my read of your patch) was not the
> same as a boolean representing "whether VM is suspended", but only a
> temporary field to remember that for a VM stop request.  To be explicit, I
> didn't see this flag set in qemu_system_suspend() in your previous patch.
> 
> If so, we can already do:
> 
>   vm_set_suspended(s->vm_was_suspended);
> 
> Irrelevant of RUN_STATE_SUSPENDED?

We need both terms of the expression.

If the vm *is* suspended (RUN_STATE_SUSPENDED), then vm_was_suspended = false.
We call global_state_store prior to vm_stop_force_state, so the incoming
side sees s->state = RUN_STATE_SUSPENDED and s->vm_was_suspended = false.
However, the runstate is RUN_STATE_INMIGRATE.  When incoming finishes by
calling vm_start, we need to restore the suspended state.  Thus in 
global_state_post_load, we must set vm_was_suspended = true.

If the vm *was* suspended, but is currently stopped (eg RUN_STATE_PAUSED),
then vm_was_suspended = true.  Migration from that state sets
vm_was_suspended = s->vm_was_suspended = true in global_state_post_load and 
ends with runstate_set(RUN_STATE_PAUSED).

I will add a comment here in the code.
 
>>      return 0;
>>  }
>> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
>>      .fields = (VMStateField[]) {
>>          VMSTATE_UINT32(size, GlobalState),
>>          VMSTATE_BUFFER(runstate, GlobalState),
>> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
>>          VMSTATE_END_OF_LIST()
>>      },
>>  };
> 
> I think this will break migration between old/new, unfortunately.  And
> since the global state exist mostly for every VM, all VM setup should be
> affected, and over all archs.

Thanks, I keep forgetting that my binary tricks are no good here.  However,
I have one other trick up my sleeve, which is to store vm_was_running in
global_state.runstate[strlen(runstate) + 2].  It is forwards and backwards
compatible, since that byte is always 0 in older qemu.  It can be implemented
with a few lines of code change confined to global_state.c, versus many lines 
spread across files to do it the conventional way using a compat property and
a subsection.  Sound OK?  

> We used to have the version_id field right above for adding fields, but I
> _think_ that will still break backward migration fron new->old binary, so
> not wanted.  Juan can keep me honest.
> 
> The best thing is still machine compat properties, afaict, to fix.  It's
> slightly involved, but let me attach a sample diff for you (at the end,
> possibly working with your current patch kind-of squashed, but not ever
> tested), hopefully make it slightly easier.
> 
> I'm wondering how bad it is to just ignore it, it's not as bad as if we
> don't fix stop-during-suspend, in this case the worst case of forgetting
> this field over migration is: if VM stopped (and used to be suspended) then
> after migration it'll keep being stopped, however after "cont" it'll forget
> the suspended state.  

Cont changes state to running, but the VM is broken because wake was never called.

> Not that bad!  IIUC SPR should always migrate with
> suspended (rather than any fully stopped state), right?  Then shouldn't be
> affected.  If risk is low, maybe we can leave this one for later?
> 
> Thanks,

Thanks for the patch.  I am going to save this as a template for adding compatible
subsections in future work.

- Steve

> ===8<===
> 
> diff --git a/migration/migration.h b/migration/migration.h
> index cf2c9c88e0..c3fd1f8347 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -470,6 +470,8 @@ struct MigrationState {
>      bool switchover_acked;
>      /* Is this a rdma migration */
>      bool rdma_migration;
> +    /* Whether remember global vm_was_suspended field? */
> +    bool store_vm_was_suspended;
>  };
>  
>  void migrate_set_state(int *state, int old_state, int new_state);
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 0c17398141..365e01c1c9 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_1[] = {
>      { "ramfb", "x-migrate", "off" },
>      { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" },
>      { "igb", "x-pcie-flr-init", "off" },
> +    { "migration", "store-vm-was-suspended", false },
>  };
>  const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
>  
> diff --git a/migration/global_state.c b/migration/global_state.c
> index 4e2a9d8ec0..ffa7bf82ca 100644
> --- a/migration/global_state.c
> +++ b/migration/global_state.c
> @@ -25,6 +25,7 @@ typedef struct {
>      uint8_t runstate[100];
>      RunState state;
>      bool received;
> +    bool vm_was_suspended;
>  } GlobalState;
>  
>  static GlobalState global_state;
> @@ -124,6 +125,25 @@ static int global_state_pre_save(void *opaque)
>      return 0;
>  }
>  
> +static bool global_state_has_vm_was_suspended(void *opaque)
> +{
> +    return migrate_get_current()->store_vm_was_suspended;
> +}
> +
> +static const VMStateDescription vmstate_vm_was_suspended = {
> +    .name = "globalstate/vm_was_suspended",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    /* TODO: Fill these in */
> +    .pre_save = NULL,
> +    .post_load = NULL,
> +    .needed = global_state_has_vm_was_suspended,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static const VMStateDescription vmstate_globalstate = {
>      .name = "globalstate",
>      .version_id = 1,
> @@ -136,6 +156,10 @@ static const VMStateDescription vmstate_globalstate = {
>          VMSTATE_BUFFER(runstate, GlobalState),
>          VMSTATE_END_OF_LIST()
>      },
> +    .subsections = (const VMStateDescription*[]) {
> +        &vmstate_vm_was_suspended,
> +        NULL
> +    },
>  };
>  
>  void register_global_state(void)
> diff --git a/migration/options.c b/migration/options.c
> index 8d8ec73ad9..5f9998d3e8 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -88,6 +88,8 @@
>  Property migration_properties[] = {
>      DEFINE_PROP_BOOL("store-global-state", MigrationState,
>                       store_global_state, true),
> +    DEFINE_PROP_BOOL("store-vm-was-suspended", MigrationState,
> +                     store_vm_was_suspended, true),
>      DEFINE_PROP_BOOL("send-configuration", MigrationState,
>                       send_configuration, true),
>      DEFINE_PROP_BOOL("send-section-footer", MigrationState,
>
Peter Xu Dec. 4, 2023, 5:24 p.m. UTC | #3
On Fri, Dec 01, 2023 at 11:23:33AM -0500, Steven Sistare wrote:
> >> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int version_id)
> >>          return -EINVAL;
> >>      }
> >>      s->state = r;
> >> +    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
> > 
> > IIUC current vm_was_suspended (based on my read of your patch) was not the
> > same as a boolean representing "whether VM is suspended", but only a
> > temporary field to remember that for a VM stop request.  To be explicit, I
> > didn't see this flag set in qemu_system_suspend() in your previous patch.
> > 
> > If so, we can already do:
> > 
> >   vm_set_suspended(s->vm_was_suspended);
> > 
> > Irrelevant of RUN_STATE_SUSPENDED?
> 
> We need both terms of the expression.
> 
> If the vm *is* suspended (RUN_STATE_SUSPENDED), then vm_was_suspended = false.
> We call global_state_store prior to vm_stop_force_state, so the incoming
> side sees s->state = RUN_STATE_SUSPENDED and s->vm_was_suspended = false.

Right.

> However, the runstate is RUN_STATE_INMIGRATE.  When incoming finishes by
> calling vm_start, we need to restore the suspended state.  Thus in 
> global_state_post_load, we must set vm_was_suspended = true.

With above, shouldn't global_state_get_runstate() (on dest) fetch SUSPENDED
already?  Then I think it should call vm_start(SUSPENDED) if to start.

Maybe you're talking about the special case where autostart==false?  We
used to have this (existing process_incoming_migration_bh()):

    if (!global_state_received() ||
        global_state_get_runstate() == RUN_STATE_RUNNING) {
        if (autostart) {
            vm_start();
        } else {
            runstate_set(RUN_STATE_PAUSED);
        }
    }

If so maybe I get you, because in the "else" path we do seem to lose the
SUSPENDED state again, but in that case IMHO we should logically set
vm_was_suspended only when we "lose" it - we didn't lose it during
migration, but only until we decided to switch to PAUSED (due to
autostart==false). IOW, change above to something like:

    state = global_state_get_runstate();
    if (!global_state_received() || runstate_is_alive(state)) {
        if (autostart) {
            vm_start(state);
        } else {
            if (runstate_is_suspended(state)) {
                /* Remember suspended state before setting system to STOPed */
                vm_was_suspended = true;
            }
            runstate_set(RUN_STATE_PAUSED);
        }
    }

It may or may not have a functional difference even if current patch,
though.  However maybe clearer to follow vm_was_suspended's strict
definition.

> 
> If the vm *was* suspended, but is currently stopped (eg RUN_STATE_PAUSED),
> then vm_was_suspended = true.  Migration from that state sets
> vm_was_suspended = s->vm_was_suspended = true in global_state_post_load and 
> ends with runstate_set(RUN_STATE_PAUSED).
> 
> I will add a comment here in the code.
>  
> >>      return 0;
> >>  }
> >> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
> >>      .fields = (VMStateField[]) {
> >>          VMSTATE_UINT32(size, GlobalState),
> >>          VMSTATE_BUFFER(runstate, GlobalState),
> >> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
> >>          VMSTATE_END_OF_LIST()
> >>      },
> >>  };
> > 
> > I think this will break migration between old/new, unfortunately.  And
> > since the global state exist mostly for every VM, all VM setup should be
> > affected, and over all archs.
> 
> Thanks, I keep forgetting that my binary tricks are no good here.  However,
> I have one other trick up my sleeve, which is to store vm_was_running in
> global_state.runstate[strlen(runstate) + 2].  It is forwards and backwards
> compatible, since that byte is always 0 in older qemu.  It can be implemented
> with a few lines of code change confined to global_state.c, versus many lines 
> spread across files to do it the conventional way using a compat property and
> a subsection.  Sound OK?  

Tricky!  But sounds okay to me.  I think you're inventing some of your own
way of being compatible, not relying on machine type as a benefit.  If go
this route please document clearly on the layout and also what it looked
like in old binaries.

I think maybe it'll be good to keep using strings, so in the new binaries
we allow >1 strings, then we define properly on those strings (index 0:
runstate, existed since start; index 2: suspended, perhaps using "1"/"0" to
express, while 0x00 means old binary, etc.).

I hope this trick will need less code than the subsection solution,
otherwise I'd still consider going with that, which is the "common
solution".

Let's also see whether Juan/Fabiano/others has any opinions.
Fabiano Rosas Dec. 4, 2023, 7:31 p.m. UTC | #4
Peter Xu <peterx@redhat.com> writes:

> On Fri, Dec 01, 2023 at 11:23:33AM -0500, Steven Sistare wrote:
>> >> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int version_id)
>> >>          return -EINVAL;
>> >>      }
>> >>      s->state = r;
>> >> +    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
>> > 
>> > IIUC current vm_was_suspended (based on my read of your patch) was not the
>> > same as a boolean representing "whether VM is suspended", but only a
>> > temporary field to remember that for a VM stop request.  To be explicit, I
>> > didn't see this flag set in qemu_system_suspend() in your previous patch.
>> > 
>> > If so, we can already do:
>> > 
>> >   vm_set_suspended(s->vm_was_suspended);
>> > 
>> > Irrelevant of RUN_STATE_SUSPENDED?
>> 
>> We need both terms of the expression.
>> 
>> If the vm *is* suspended (RUN_STATE_SUSPENDED), then vm_was_suspended = false.
>> We call global_state_store prior to vm_stop_force_state, so the incoming
>> side sees s->state = RUN_STATE_SUSPENDED and s->vm_was_suspended = false.
>
> Right.
>
>> However, the runstate is RUN_STATE_INMIGRATE.  When incoming finishes by
>> calling vm_start, we need to restore the suspended state.  Thus in 
>> global_state_post_load, we must set vm_was_suspended = true.
>
> With above, shouldn't global_state_get_runstate() (on dest) fetch SUSPENDED
> already?  Then I think it should call vm_start(SUSPENDED) if to start.
>
> Maybe you're talking about the special case where autostart==false?  We
> used to have this (existing process_incoming_migration_bh()):
>
>     if (!global_state_received() ||
>         global_state_get_runstate() == RUN_STATE_RUNNING) {
>         if (autostart) {
>             vm_start();
>         } else {
>             runstate_set(RUN_STATE_PAUSED);
>         }
>     }
>
> If so maybe I get you, because in the "else" path we do seem to lose the
> SUSPENDED state again, but in that case IMHO we should logically set
> vm_was_suspended only when we "lose" it - we didn't lose it during
> migration, but only until we decided to switch to PAUSED (due to
> autostart==false). IOW, change above to something like:
>
>     state = global_state_get_runstate();
>     if (!global_state_received() || runstate_is_alive(state)) {
>         if (autostart) {
>             vm_start(state);
>         } else {
>             if (runstate_is_suspended(state)) {
>                 /* Remember suspended state before setting system to STOPed */
>                 vm_was_suspended = true;
>             }
>             runstate_set(RUN_STATE_PAUSED);
>         }
>     }
>
> It may or may not have a functional difference even if current patch,
> though.  However maybe clearer to follow vm_was_suspended's strict
> definition.
>
>> 
>> If the vm *was* suspended, but is currently stopped (eg RUN_STATE_PAUSED),
>> then vm_was_suspended = true.  Migration from that state sets
>> vm_was_suspended = s->vm_was_suspended = true in global_state_post_load and 
>> ends with runstate_set(RUN_STATE_PAUSED).
>> 
>> I will add a comment here in the code.
>>  
>> >>      return 0;
>> >>  }
>> >> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
>> >>      .fields = (VMStateField[]) {
>> >>          VMSTATE_UINT32(size, GlobalState),
>> >>          VMSTATE_BUFFER(runstate, GlobalState),
>> >> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
>> >>          VMSTATE_END_OF_LIST()
>> >>      },
>> >>  };
>> > 
>> > I think this will break migration between old/new, unfortunately.  And
>> > since the global state exist mostly for every VM, all VM setup should be
>> > affected, and over all archs.
>> 
>> Thanks, I keep forgetting that my binary tricks are no good here.  However,
>> I have one other trick up my sleeve, which is to store vm_was_running in
>> global_state.runstate[strlen(runstate) + 2].  It is forwards and backwards
>> compatible, since that byte is always 0 in older qemu.  It can be implemented
>> with a few lines of code change confined to global_state.c, versus many lines 
>> spread across files to do it the conventional way using a compat property and
>> a subsection.  Sound OK?  
>
> Tricky!  But sounds okay to me.  I think you're inventing some of your own
> way of being compatible, not relying on machine type as a benefit.  If go
> this route please document clearly on the layout and also what it looked
> like in old binaries.
>
> I think maybe it'll be good to keep using strings, so in the new binaries
> we allow >1 strings, then we define properly on those strings (index 0:
> runstate, existed since start; index 2: suspended, perhaps using "1"/"0" to
> express, while 0x00 means old binary, etc.).
>
> I hope this trick will need less code than the subsection solution,
> otherwise I'd still consider going with that, which is the "common
> solution".
>
> Let's also see whether Juan/Fabiano/others has any opinions.

Can't we pack the structure and just go ahead and slash 'runstate' in
half? That would claim some unused bytes for future backward
compatibility issues.
Peter Xu Dec. 4, 2023, 8:02 p.m. UTC | #5
On Mon, Dec 04, 2023 at 04:31:56PM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Fri, Dec 01, 2023 at 11:23:33AM -0500, Steven Sistare wrote:
> >> >> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int version_id)
> >> >>          return -EINVAL;
> >> >>      }
> >> >>      s->state = r;
> >> >> +    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
> >> > 
> >> > IIUC current vm_was_suspended (based on my read of your patch) was not the
> >> > same as a boolean representing "whether VM is suspended", but only a
> >> > temporary field to remember that for a VM stop request.  To be explicit, I
> >> > didn't see this flag set in qemu_system_suspend() in your previous patch.
> >> > 
> >> > If so, we can already do:
> >> > 
> >> >   vm_set_suspended(s->vm_was_suspended);
> >> > 
> >> > Irrelevant of RUN_STATE_SUSPENDED?
> >> 
> >> We need both terms of the expression.
> >> 
> >> If the vm *is* suspended (RUN_STATE_SUSPENDED), then vm_was_suspended = false.
> >> We call global_state_store prior to vm_stop_force_state, so the incoming
> >> side sees s->state = RUN_STATE_SUSPENDED and s->vm_was_suspended = false.
> >
> > Right.
> >
> >> However, the runstate is RUN_STATE_INMIGRATE.  When incoming finishes by
> >> calling vm_start, we need to restore the suspended state.  Thus in 
> >> global_state_post_load, we must set vm_was_suspended = true.
> >
> > With above, shouldn't global_state_get_runstate() (on dest) fetch SUSPENDED
> > already?  Then I think it should call vm_start(SUSPENDED) if to start.
> >
> > Maybe you're talking about the special case where autostart==false?  We
> > used to have this (existing process_incoming_migration_bh()):
> >
> >     if (!global_state_received() ||
> >         global_state_get_runstate() == RUN_STATE_RUNNING) {
> >         if (autostart) {
> >             vm_start();
> >         } else {
> >             runstate_set(RUN_STATE_PAUSED);
> >         }
> >     }
> >
> > If so maybe I get you, because in the "else" path we do seem to lose the
> > SUSPENDED state again, but in that case IMHO we should logically set
> > vm_was_suspended only when we "lose" it - we didn't lose it during
> > migration, but only until we decided to switch to PAUSED (due to
> > autostart==false). IOW, change above to something like:
> >
> >     state = global_state_get_runstate();
> >     if (!global_state_received() || runstate_is_alive(state)) {
> >         if (autostart) {
> >             vm_start(state);
> >         } else {
> >             if (runstate_is_suspended(state)) {
> >                 /* Remember suspended state before setting system to STOPed */
> >                 vm_was_suspended = true;
> >             }
> >             runstate_set(RUN_STATE_PAUSED);
> >         }
> >     }
> >
> > It may or may not have a functional difference even if current patch,
> > though.  However maybe clearer to follow vm_was_suspended's strict
> > definition.
> >
> >> 
> >> If the vm *was* suspended, but is currently stopped (eg RUN_STATE_PAUSED),
> >> then vm_was_suspended = true.  Migration from that state sets
> >> vm_was_suspended = s->vm_was_suspended = true in global_state_post_load and 
> >> ends with runstate_set(RUN_STATE_PAUSED).
> >> 
> >> I will add a comment here in the code.
> >>  
> >> >>      return 0;
> >> >>  }
> >> >> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
> >> >>      .fields = (VMStateField[]) {
> >> >>          VMSTATE_UINT32(size, GlobalState),
> >> >>          VMSTATE_BUFFER(runstate, GlobalState),
> >> >> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
> >> >>          VMSTATE_END_OF_LIST()
> >> >>      },
> >> >>  };
> >> > 
> >> > I think this will break migration between old/new, unfortunately.  And
> >> > since the global state exist mostly for every VM, all VM setup should be
> >> > affected, and over all archs.
> >> 
> >> Thanks, I keep forgetting that my binary tricks are no good here.  However,
> >> I have one other trick up my sleeve, which is to store vm_was_running in
> >> global_state.runstate[strlen(runstate) + 2].  It is forwards and backwards
> >> compatible, since that byte is always 0 in older qemu.  It can be implemented
> >> with a few lines of code change confined to global_state.c, versus many lines 
> >> spread across files to do it the conventional way using a compat property and
> >> a subsection.  Sound OK?  
> >
> > Tricky!  But sounds okay to me.  I think you're inventing some of your own
> > way of being compatible, not relying on machine type as a benefit.  If go
> > this route please document clearly on the layout and also what it looked
> > like in old binaries.
> >
> > I think maybe it'll be good to keep using strings, so in the new binaries
> > we allow >1 strings, then we define properly on those strings (index 0:
> > runstate, existed since start; index 2: suspended, perhaps using "1"/"0" to
> > express, while 0x00 means old binary, etc.).
> >
> > I hope this trick will need less code than the subsection solution,
> > otherwise I'd still consider going with that, which is the "common
> > solution".
> >
> > Let's also see whether Juan/Fabiano/others has any opinions.
> 
> Can't we pack the structure and just go ahead and slash 'runstate' in
> half? That would claim some unused bytes for future backward
> compatibility issues.

What I meant is something like:

  runstate[100] = {"str1", 0x00, "str2", 0x00, ...}

Where str1 is runstate, and str2 can be either "0"/"1" to reflect suspended
value.  We define all the strings separated by 0x00, then IIUC we save the
most chars for potential future extension of this string.

Thanks,
Fabiano Rosas Dec. 4, 2023, 9:09 p.m. UTC | #6
Peter Xu <peterx@redhat.com> writes:

> On Mon, Dec 04, 2023 at 04:31:56PM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>> 
>> > On Fri, Dec 01, 2023 at 11:23:33AM -0500, Steven Sistare wrote:
>> >> >> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int version_id)
>> >> >>          return -EINVAL;
>> >> >>      }
>> >> >>      s->state = r;
>> >> >> +    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
>> >> > 
>> >> > IIUC current vm_was_suspended (based on my read of your patch) was not the
>> >> > same as a boolean representing "whether VM is suspended", but only a
>> >> > temporary field to remember that for a VM stop request.  To be explicit, I
>> >> > didn't see this flag set in qemu_system_suspend() in your previous patch.
>> >> > 
>> >> > If so, we can already do:
>> >> > 
>> >> >   vm_set_suspended(s->vm_was_suspended);
>> >> > 
>> >> > Irrelevant of RUN_STATE_SUSPENDED?
>> >> 
>> >> We need both terms of the expression.
>> >> 
>> >> If the vm *is* suspended (RUN_STATE_SUSPENDED), then vm_was_suspended = false.
>> >> We call global_state_store prior to vm_stop_force_state, so the incoming
>> >> side sees s->state = RUN_STATE_SUSPENDED and s->vm_was_suspended = false.
>> >
>> > Right.
>> >
>> >> However, the runstate is RUN_STATE_INMIGRATE.  When incoming finishes by
>> >> calling vm_start, we need to restore the suspended state.  Thus in 
>> >> global_state_post_load, we must set vm_was_suspended = true.
>> >
>> > With above, shouldn't global_state_get_runstate() (on dest) fetch SUSPENDED
>> > already?  Then I think it should call vm_start(SUSPENDED) if to start.
>> >
>> > Maybe you're talking about the special case where autostart==false?  We
>> > used to have this (existing process_incoming_migration_bh()):
>> >
>> >     if (!global_state_received() ||
>> >         global_state_get_runstate() == RUN_STATE_RUNNING) {
>> >         if (autostart) {
>> >             vm_start();
>> >         } else {
>> >             runstate_set(RUN_STATE_PAUSED);
>> >         }
>> >     }
>> >
>> > If so maybe I get you, because in the "else" path we do seem to lose the
>> > SUSPENDED state again, but in that case IMHO we should logically set
>> > vm_was_suspended only when we "lose" it - we didn't lose it during
>> > migration, but only until we decided to switch to PAUSED (due to
>> > autostart==false). IOW, change above to something like:
>> >
>> >     state = global_state_get_runstate();
>> >     if (!global_state_received() || runstate_is_alive(state)) {
>> >         if (autostart) {
>> >             vm_start(state);
>> >         } else {
>> >             if (runstate_is_suspended(state)) {
>> >                 /* Remember suspended state before setting system to STOPed */
>> >                 vm_was_suspended = true;
>> >             }
>> >             runstate_set(RUN_STATE_PAUSED);
>> >         }
>> >     }
>> >
>> > It may or may not have a functional difference even if current patch,
>> > though.  However maybe clearer to follow vm_was_suspended's strict
>> > definition.
>> >
>> >> 
>> >> If the vm *was* suspended, but is currently stopped (eg RUN_STATE_PAUSED),
>> >> then vm_was_suspended = true.  Migration from that state sets
>> >> vm_was_suspended = s->vm_was_suspended = true in global_state_post_load and 
>> >> ends with runstate_set(RUN_STATE_PAUSED).
>> >> 
>> >> I will add a comment here in the code.
>> >>  
>> >> >>      return 0;
>> >> >>  }
>> >> >> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
>> >> >>      .fields = (VMStateField[]) {
>> >> >>          VMSTATE_UINT32(size, GlobalState),
>> >> >>          VMSTATE_BUFFER(runstate, GlobalState),
>> >> >> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
>> >> >>          VMSTATE_END_OF_LIST()
>> >> >>      },
>> >> >>  };
>> >> > 
>> >> > I think this will break migration between old/new, unfortunately.  And
>> >> > since the global state exist mostly for every VM, all VM setup should be
>> >> > affected, and over all archs.
>> >> 
>> >> Thanks, I keep forgetting that my binary tricks are no good here.  However,
>> >> I have one other trick up my sleeve, which is to store vm_was_running in
>> >> global_state.runstate[strlen(runstate) + 2].  It is forwards and backwards
>> >> compatible, since that byte is always 0 in older qemu.  It can be implemented
>> >> with a few lines of code change confined to global_state.c, versus many lines 
>> >> spread across files to do it the conventional way using a compat property and
>> >> a subsection.  Sound OK?  
>> >
>> > Tricky!  But sounds okay to me.  I think you're inventing some of your own
>> > way of being compatible, not relying on machine type as a benefit.  If go
>> > this route please document clearly on the layout and also what it looked
>> > like in old binaries.
>> >
>> > I think maybe it'll be good to keep using strings, so in the new binaries
>> > we allow >1 strings, then we define properly on those strings (index 0:
>> > runstate, existed since start; index 2: suspended, perhaps using "1"/"0" to
>> > express, while 0x00 means old binary, etc.).
>> >
>> > I hope this trick will need less code than the subsection solution,
>> > otherwise I'd still consider going with that, which is the "common
>> > solution".
>> >
>> > Let's also see whether Juan/Fabiano/others has any opinions.
>> 
>> Can't we pack the structure and just go ahead and slash 'runstate' in
>> half? That would claim some unused bytes for future backward
>> compatibility issues.
>
> What I meant is something like:
>
>   runstate[100] = {"str1", 0x00, "str2", 0x00, ...}
>
> Where str1 is runstate, and str2 can be either "0"/"1" to reflect suspended
> value.  We define all the strings separated by 0x00, then IIUC we save the
> most chars for potential future extension of this string.
>
> Thanks,

Right, I got your point. I just think we could avoid designing this new
string format by creating new fields with the extra space:

typedef struct QEMU_PACKED {
    uint32_t size;
    uint8_t runstate[50];
    uint8_t unused[50];
    RunState state;
    bool received;
} GlobalState;

In my mind this works seamlessly, or am I mistaken?

In any case, a oneshot hack might be better than both our suggestions
because we can just clean it up a couple of releases from now as if
nothing happened.
Peter Xu Dec. 4, 2023, 10:04 p.m. UTC | #7
On Mon, Dec 04, 2023 at 06:09:16PM -0300, Fabiano Rosas wrote:
> Right, I got your point. I just think we could avoid designing this new
> string format by creating new fields with the extra space:
> 
> typedef struct QEMU_PACKED {
>     uint32_t size;
>     uint8_t runstate[50];
>     uint8_t unused[50];
>     RunState state;
>     bool received;
> } GlobalState;
> 
> In my mind this works seamlessly, or am I mistaken?

I think what you proposed should indeed work.

Currently it's:

    .fields = (VMStateField[]) {
        VMSTATE_UINT32(size, GlobalState),
        VMSTATE_BUFFER(runstate, GlobalState),
        VMSTATE_END_OF_LIST()
    },

I had a quick look at vmstate_info_buffer, it mostly only get()/put() those
buffers with its sizeof(), so looks all fine.  For sure in all cases we'd
better test it to verify.

One side note is since we so far use qapi_enum_parse() for the runstate, I
think the "size" is not ever used..

If we do want a split, IMHO we can consider making runstate[] even smaller
to just free up the rest spaces all in one shot:

  typedef struct QEMU_PACKED {
      uint32_t size;
      /*
       * Assuming 16 is good enough to fit all possible runstate strings..
       * This field must be a string ending with '\0'.
       */
      uint8_t runstate[16];
      /* 0x00 when QEMU doesn't support it, or "0"/"1" to reflect its state */
      uint8_t vm_was_suspended[1];
      /*
       * Still free of use space.  Note that we only have 99 bytes for use
       * because the last byte (the 100th byte) must be zero due to legacy
       * reasons, if not it may be set to zero after loaded on dest QEMU. 
       */
      uint8_t unused[82];
      RunState state;
      bool received;
  } GlobalState;

Pairs with something like:

    .fields = (VMStateField[]) {
        /* Used to be "size" but never used on dest, so always ignored */
        VMSTATE_UNUSED(4),
        VMSTATE_BUFFER(runstate, GlobalState),
        VMSTATE_BUFFER(vm_was_suspended, GlobalState),
        /*
         * This is actually all zeros, but just to differenciate from the
         * last byte..
         */
        VMSTATE_BUFFER(unused, GlobalState),
        /*
         * For historical reasons, the last byte must be 0x00 or it'll be
         * overwritten by old qemu otherwise.
         */
        VMSTATE_UNUSED(1),
        VMSTATE_END_OF_LIST()
    },

> 
> In any case, a oneshot hack might be better than both our suggestions
> because we can just clean it up a couple of releases from now as if
> nothing happened.

It can be forgotten forever, then we keep the code less readable.  If we
have a plan to do that and not so awkward, IMHO we should go directly with
that plan.

Thanks,
Steven Sistare Dec. 4, 2023, 10:23 p.m. UTC | #8
On 12/4/2023 12:24 PM, Peter Xu wrote:
> On Fri, Dec 01, 2023 at 11:23:33AM -0500, Steven Sistare wrote:
>>>> @@ -109,6 +117,7 @@ static int global_state_post_load(void *opaque, int version_id)
>>>>          return -EINVAL;
>>>>      }
>>>>      s->state = r;
>>>> +    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
>>>
>>> IIUC current vm_was_suspended (based on my read of your patch) was not the
>>> same as a boolean representing "whether VM is suspended", but only a
>>> temporary field to remember that for a VM stop request.  To be explicit, I
>>> didn't see this flag set in qemu_system_suspend() in your previous patch.
>>>
>>> If so, we can already do:
>>>
>>>   vm_set_suspended(s->vm_was_suspended);
>>>
>>> Irrelevant of RUN_STATE_SUSPENDED?
>>
>> We need both terms of the expression.
>>
>> If the vm *is* suspended (RUN_STATE_SUSPENDED), then vm_was_suspended = false.
>> We call global_state_store prior to vm_stop_force_state, so the incoming
>> side sees s->state = RUN_STATE_SUSPENDED and s->vm_was_suspended = false.
> 
> Right.
> 
>> However, the runstate is RUN_STATE_INMIGRATE.  When incoming finishes by
>> calling vm_start, we need to restore the suspended state.  Thus in 
>> global_state_post_load, we must set vm_was_suspended = true.
> 
> With above, shouldn't global_state_get_runstate() (on dest) fetch SUSPENDED
> already?  Then I think it should call vm_start(SUSPENDED) if to start.

The V6 code does not pass a state to vm_start, and knowledge of vm_was_suspended
is confined to the global_state and cpus functions.  IMO this is a more modular
and robust solution, as multiple sites may call vm_start(), and the right thing
happens.  Look at patch 6.  The changes are minimal because vm_start "just works".

> Maybe you're talking about the special case where autostart==false?  We
> used to have this (existing process_incoming_migration_bh()):
> 
>     if (!global_state_received() ||
>         global_state_get_runstate() == RUN_STATE_RUNNING) {
>         if (autostart) {
>             vm_start();
>         } else {
>             runstate_set(RUN_STATE_PAUSED);
>         }
>     }
> 
> If so maybe I get you, because in the "else" path we do seem to lose the
> SUSPENDED state again, but in that case IMHO we should logically set
> vm_was_suspended only when we "lose" it - we didn't lose it during
> migration, but only until we decided to switch to PAUSED (due to
> autostart==false). IOW, change above to something like:
> 
>     state = global_state_get_runstate();
>     if (!global_state_received() || runstate_is_alive(state)) {
>         if (autostart) {
>             vm_start(state);
>         } else {
>             if (runstate_is_suspended(state)) {
>                 /* Remember suspended state before setting system to STOPed */
>                 vm_was_suspended = true;
>             }
>             runstate_set(RUN_STATE_PAUSED);
>         }
>     }

This is similar to V5 which tested suspended and fiddled with runstate at
multiple call sites in migration and snapshot.  I believe V6 is cleaner.

> It may or may not have a functional difference even if current patch,
> though.  However maybe clearer to follow vm_was_suspended's strict
> definition.
> 
>>
>> If the vm *was* suspended, but is currently stopped (eg RUN_STATE_PAUSED),
>> then vm_was_suspended = true.  Migration from that state sets
>> vm_was_suspended = s->vm_was_suspended = true in global_state_post_load and 
>> ends with runstate_set(RUN_STATE_PAUSED).
>>
>> I will add a comment here in the code.
>>  
>>>>      return 0;
>>>>  }
>>>> @@ -134,6 +143,7 @@ static const VMStateDescription vmstate_globalstate = {
>>>>      .fields = (VMStateField[]) {
>>>>          VMSTATE_UINT32(size, GlobalState),
>>>>          VMSTATE_BUFFER(runstate, GlobalState),
>>>> +        VMSTATE_BOOL(vm_was_suspended, GlobalState),
>>>>          VMSTATE_END_OF_LIST()
>>>>      },
>>>>  };
>>>
>>> I think this will break migration between old/new, unfortunately.  And
>>> since the global state exist mostly for every VM, all VM setup should be
>>> affected, and over all archs.
>>
>> Thanks, I keep forgetting that my binary tricks are no good here.  However,
>> I have one other trick up my sleeve, which is to store vm_was_running in
>> global_state.runstate[strlen(runstate) + 2].  It is forwards and backwards
>> compatible, since that byte is always 0 in older qemu.  It can be implemented
>> with a few lines of code change confined to global_state.c, versus many lines 
>> spread across files to do it the conventional way using a compat property and
>> a subsection.  Sound OK?  
> 
> Tricky!  But sounds okay to me.  I think you're inventing some of your own
> way of being compatible, not relying on machine type as a benefit.  If go
> this route please document clearly on the layout and also what it looked
> like in old binaries.
> 
> I think maybe it'll be good to keep using strings, so in the new binaries
> we allow >1 strings, then we define properly on those strings (index 0:
> runstate, existed since start; index 2: suspended, perhaps using "1"/"0" to
> express, while 0x00 means old binary, etc.).
> 
> I hope this trick will need less code than the subsection solution,
> otherwise I'd still consider going with that, which is the "common
> solution".
> 
> Let's also see whether Juan/Fabiano/others has any opinions.

The disadvantage of using strings '0' and '1' is the additional check for
the backwards compatible value 0x00.  No big deal, and I'll do that if you prefer, 
but it seems unnecessary. I had already written this for binary 0/1. Not yet tested, 
and still needs comments:

-----------
diff --git a/migration/global_state.c b/migration/global_state.c
index 4e2a9d8..8a59554 100644
--- a/migration/global_state.c
+++ b/migration/global_state.c
@@ -32,9 +32,10 @@ static GlobalState global_state;
 static void global_state_do_store(RunState state)
 {
     const char *state_str = RunState_str(state);
-    assert(strlen(state_str) < sizeof(global_state.runstate));
+    assert(strlen(state_str) < sizeof(global_state.runstate) - 2);
     strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate),
               state_str, '\0');
+    global_state.runstate[strlen(state_str) + 1] = vm_get_suspended();
 }

 void global_state_store(void)
@@ -68,6 +69,12 @@ static bool global_state_needed(void *opaque)
         return true;
     }

+    /* If the suspended state must be remembered, it is needed */
+
+    if (vm_get_suspended()) {
+        return true;
+    }
+
     /* If state is running or paused, it is not needed */

     if (strcmp(runstate, "running") == 0 ||
@@ -109,6 +116,8 @@ static int global_state_post_load(void *opaque, int version_
         return -EINVAL;
     }
     s->state = r;
+    bool vm_was_suspended = runstate[strlen(runstate) + 1];
+    vm_set_suspended(vm_was_suspended || r == RUN_STATE_SUSPENDED);

     return 0;
 }
------------

- Steve
Fabiano Rosas Dec. 5, 2023, 12:44 p.m. UTC | #9
Peter Xu <peterx@redhat.com> writes:

> On Mon, Dec 04, 2023 at 06:09:16PM -0300, Fabiano Rosas wrote:
>> Right, I got your point. I just think we could avoid designing this new
>> string format by creating new fields with the extra space:
>> 
>> typedef struct QEMU_PACKED {
>>     uint32_t size;
>>     uint8_t runstate[50];
>>     uint8_t unused[50];
>>     RunState state;
>>     bool received;
>> } GlobalState;
>> 
>> In my mind this works seamlessly, or am I mistaken?
>
> I think what you proposed should indeed work.
>
> Currently it's:
>
>     .fields = (VMStateField[]) {
>         VMSTATE_UINT32(size, GlobalState),
>         VMSTATE_BUFFER(runstate, GlobalState),
>         VMSTATE_END_OF_LIST()
>     },
>
> I had a quick look at vmstate_info_buffer, it mostly only get()/put() those
> buffers with its sizeof(), so looks all fine.  For sure in all cases we'd
> better test it to verify.
>
> One side note is since we so far use qapi_enum_parse() for the runstate, I
> think the "size" is not ever used..
>
> If we do want a split, IMHO we can consider making runstate[] even smaller
> to just free up the rest spaces all in one shot:
>
>   typedef struct QEMU_PACKED {
>       uint32_t size;
>       /*
>        * Assuming 16 is good enough to fit all possible runstate strings..
>        * This field must be a string ending with '\0'.
>        */
>       uint8_t runstate[16];
>       /* 0x00 when QEMU doesn't support it, or "0"/"1" to reflect its state */
>       uint8_t vm_was_suspended[1];
>       /*
>        * Still free of use space.  Note that we only have 99 bytes for use
>        * because the last byte (the 100th byte) must be zero due to legacy
>        * reasons, if not it may be set to zero after loaded on dest QEMU. 
>        */

I'd add a 'uint8_t reserved;' to go along with this comment instead of
leaving a hole.
Steven Sistare Dec. 5, 2023, 2:14 p.m. UTC | #10
On 12/5/2023 7:44 AM, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
>> On Mon, Dec 04, 2023 at 06:09:16PM -0300, Fabiano Rosas wrote:
>>> Right, I got your point. I just think we could avoid designing this new
>>> string format by creating new fields with the extra space:
>>>
>>> typedef struct QEMU_PACKED {
>>>     uint32_t size;
>>>     uint8_t runstate[50];
>>>     uint8_t unused[50];
>>>     RunState state;
>>>     bool received;
>>> } GlobalState;
>>>
>>> In my mind this works seamlessly, or am I mistaken?
>>
>> I think what you proposed should indeed work.
>>
>> Currently it's:
>>
>>     .fields = (VMStateField[]) {
>>         VMSTATE_UINT32(size, GlobalState),
>>         VMSTATE_BUFFER(runstate, GlobalState),
>>         VMSTATE_END_OF_LIST()
>>     },
>>
>> I had a quick look at vmstate_info_buffer, it mostly only get()/put() those
>> buffers with its sizeof(), so looks all fine.  For sure in all cases we'd
>> better test it to verify.
>>
>> One side note is since we so far use qapi_enum_parse() for the runstate, I
>> think the "size" is not ever used..
>>
>> If we do want a split, IMHO we can consider making runstate[] even smaller
>> to just free up the rest spaces all in one shot:
>>
>>   typedef struct QEMU_PACKED {
>>       uint32_t size;
>>       /*
>>        * Assuming 16 is good enough to fit all possible runstate strings..
>>        * This field must be a string ending with '\0'.
>>        */
>>       uint8_t runstate[16];
>>       /* 0x00 when QEMU doesn't support it, or "0"/"1" to reflect its state */
>>       uint8_t vm_was_suspended[1];
>>       /*
>>        * Still free of use space.  Note that we only have 99 bytes for use
>>        * because the last byte (the 100th byte) must be zero due to legacy
>>        * reasons, if not it may be set to zero after loaded on dest QEMU. 
>>        */
> 
> I'd add a 'uint8_t reserved;' to go along with this comment instead of
> leaving a hole.

I'll use this scheme, thanks.  It is a clearer than implicitly packing strings.

- Steve
Peter Xu Dec. 5, 2023, 4:18 p.m. UTC | #11
On Tue, Dec 05, 2023 at 09:44:12AM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Mon, Dec 04, 2023 at 06:09:16PM -0300, Fabiano Rosas wrote:
> >> Right, I got your point. I just think we could avoid designing this new
> >> string format by creating new fields with the extra space:
> >> 
> >> typedef struct QEMU_PACKED {
> >>     uint32_t size;
> >>     uint8_t runstate[50];
> >>     uint8_t unused[50];
> >>     RunState state;
> >>     bool received;
> >> } GlobalState;
> >> 
> >> In my mind this works seamlessly, or am I mistaken?
> >
> > I think what you proposed should indeed work.
> >
> > Currently it's:
> >
> >     .fields = (VMStateField[]) {
> >         VMSTATE_UINT32(size, GlobalState),
> >         VMSTATE_BUFFER(runstate, GlobalState),
> >         VMSTATE_END_OF_LIST()
> >     },
> >
> > I had a quick look at vmstate_info_buffer, it mostly only get()/put() those
> > buffers with its sizeof(), so looks all fine.  For sure in all cases we'd
> > better test it to verify.
> >
> > One side note is since we so far use qapi_enum_parse() for the runstate, I
> > think the "size" is not ever used..
> >
> > If we do want a split, IMHO we can consider making runstate[] even smaller
> > to just free up the rest spaces all in one shot:
> >
> >   typedef struct QEMU_PACKED {

[1]

> >       uint32_t size;
> >       /*
> >        * Assuming 16 is good enough to fit all possible runstate strings..
> >        * This field must be a string ending with '\0'.
> >        */
> >       uint8_t runstate[16];
> >       /* 0x00 when QEMU doesn't support it, or "0"/"1" to reflect its state */
> >       uint8_t vm_was_suspended[1];
> >       /*
> >        * Still free of use space.  Note that we only have 99 bytes for use
> >        * because the last byte (the 100th byte) must be zero due to legacy
> >        * reasons, if not it may be set to zero after loaded on dest QEMU. 
> >        */
> 
> I'd add a 'uint8_t reserved;' to go along with this comment instead of
> leaving a hole.

Note that "struct GlobalState" is not a binary format but only some
internal storage, what really matters is vmstate_globalstate.  Here the
"uint8_reserved" will be a pure waste of 1 byte in QEMU binary, imho.

I think I just copied what you had previously and extended it, logically I
don't think we ever need QEMU_PACKED right above [1].  We can also drop
"size" directly here, but this can be done later.
Peter Xu Dec. 5, 2023, 4:50 p.m. UTC | #12
On Mon, Dec 04, 2023 at 05:23:57PM -0500, Steven Sistare wrote:
> The V6 code does not pass a state to vm_start, and knowledge of vm_was_suspended
> is confined to the global_state and cpus functions.  IMO this is a more modular
> and robust solution, as multiple sites may call vm_start(), and the right thing
> happens.  Look at patch 6.  The changes are minimal because vm_start "just works".

Oh I think I see what you meant.  Sounds good then.

Shall we hide that into vm_prepare_start()?  It seems three's still one
more call sites that always pass in RUNNING (gdb_continue_partial).

If with above, vm_prepare_start() will go into either RUNNING, SUSPENDED,
or an error.  It returns 0 only if RUNNING, -1 for all the rest.  Maybe we
can already also touch up the retval of vm_prepare_start() to be a boolean,
reflecting "whether vcpu needs to be started".
Fabiano Rosas Dec. 5, 2023, 4:52 p.m. UTC | #13
Peter Xu <peterx@redhat.com> writes:

> On Tue, Dec 05, 2023 at 09:44:12AM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>> 
>> > On Mon, Dec 04, 2023 at 06:09:16PM -0300, Fabiano Rosas wrote:
>> >> Right, I got your point. I just think we could avoid designing this new
>> >> string format by creating new fields with the extra space:
>> >> 
>> >> typedef struct QEMU_PACKED {
>> >>     uint32_t size;
>> >>     uint8_t runstate[50];
>> >>     uint8_t unused[50];
>> >>     RunState state;
>> >>     bool received;
>> >> } GlobalState;
>> >> 
>> >> In my mind this works seamlessly, or am I mistaken?
>> >
>> > I think what you proposed should indeed work.
>> >
>> > Currently it's:
>> >
>> >     .fields = (VMStateField[]) {
>> >         VMSTATE_UINT32(size, GlobalState),
>> >         VMSTATE_BUFFER(runstate, GlobalState),
>> >         VMSTATE_END_OF_LIST()
>> >     },
>> >
>> > I had a quick look at vmstate_info_buffer, it mostly only get()/put() those
>> > buffers with its sizeof(), so looks all fine.  For sure in all cases we'd
>> > better test it to verify.
>> >
>> > One side note is since we so far use qapi_enum_parse() for the runstate, I
>> > think the "size" is not ever used..
>> >
>> > If we do want a split, IMHO we can consider making runstate[] even smaller
>> > to just free up the rest spaces all in one shot:
>> >
>> >   typedef struct QEMU_PACKED {
>
> [1]
>
>> >       uint32_t size;
>> >       /*
>> >        * Assuming 16 is good enough to fit all possible runstate strings..
>> >        * This field must be a string ending with '\0'.
>> >        */
>> >       uint8_t runstate[16];
>> >       /* 0x00 when QEMU doesn't support it, or "0"/"1" to reflect its state */
>> >       uint8_t vm_was_suspended[1];
>> >       /*
>> >        * Still free of use space.  Note that we only have 99 bytes for use
>> >        * because the last byte (the 100th byte) must be zero due to legacy
>> >        * reasons, if not it may be set to zero after loaded on dest QEMU. 
>> >        */
>> 
>> I'd add a 'uint8_t reserved;' to go along with this comment instead of
>> leaving a hole.
>
> Note that "struct GlobalState" is not a binary format but only some
> internal storage, what really matters is vmstate_globalstate.  Here the
> "uint8_reserved" will be a pure waste of 1 byte in QEMU binary, imho.
>

I prefer wasting the byte and make the code more obvious to people who
might not immediately understand what's going on. We could even
assert(!global_state.reserved) to sanity check the assumption. Anyway,
that's minor, I'm fine with it either way.

> I think I just copied what you had previously and extended it, logically I
> don't think we ever need QEMU_PACKED right above [1].  We can also drop
> "size" directly here, but this can be done later.

Ah right, I was initially thinking of letting the new qemu overrun
runstate[16] so we wouldn't have to change the code. But that's indeed
not necessary, your additions to the vmstate make it ok. Thanks.
Steven Sistare Dec. 5, 2023, 5:04 p.m. UTC | #14
On 12/5/2023 11:52 AM, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
>> On Tue, Dec 05, 2023 at 09:44:12AM -0300, Fabiano Rosas wrote:
>>> Peter Xu <peterx@redhat.com> writes:
>>>
>>>> On Mon, Dec 04, 2023 at 06:09:16PM -0300, Fabiano Rosas wrote:
>>>>> Right, I got your point. I just think we could avoid designing this new
>>>>> string format by creating new fields with the extra space:
>>>>>
>>>>> typedef struct QEMU_PACKED {
>>>>>     uint32_t size;
>>>>>     uint8_t runstate[50];
>>>>>     uint8_t unused[50];
>>>>>     RunState state;
>>>>>     bool received;
>>>>> } GlobalState;
>>>>>
>>>>> In my mind this works seamlessly, or am I mistaken?
>>>>
>>>> I think what you proposed should indeed work.
>>>>
>>>> Currently it's:
>>>>
>>>>     .fields = (VMStateField[]) {
>>>>         VMSTATE_UINT32(size, GlobalState),
>>>>         VMSTATE_BUFFER(runstate, GlobalState),
>>>>         VMSTATE_END_OF_LIST()
>>>>     },
>>>>
>>>> I had a quick look at vmstate_info_buffer, it mostly only get()/put() those
>>>> buffers with its sizeof(), so looks all fine.  For sure in all cases we'd
>>>> better test it to verify.
>>>>
>>>> One side note is since we so far use qapi_enum_parse() for the runstate, I
>>>> think the "size" is not ever used..
>>>>
>>>> If we do want a split, IMHO we can consider making runstate[] even smaller
>>>> to just free up the rest spaces all in one shot:
>>>>
>>>>   typedef struct QEMU_PACKED {
>>
>> [1]
>>
>>>>       uint32_t size;
>>>>       /*
>>>>        * Assuming 16 is good enough to fit all possible runstate strings..
>>>>        * This field must be a string ending with '\0'.
>>>>        */
>>>>       uint8_t runstate[16];
>>>>       /* 0x00 when QEMU doesn't support it, or "0"/"1" to reflect its state */
>>>>       uint8_t vm_was_suspended[1];
>>>>       /*
>>>>        * Still free of use space.  Note that we only have 99 bytes for use
>>>>        * because the last byte (the 100th byte) must be zero due to legacy
>>>>        * reasons, if not it may be set to zero after loaded on dest QEMU. 
>>>>        */
>>>
>>> I'd add a 'uint8_t reserved;' to go along with this comment instead of
>>> leaving a hole.
>>
>> Note that "struct GlobalState" is not a binary format but only some
>> internal storage, what really matters is vmstate_globalstate.  Here the
>> "uint8_reserved" will be a pure waste of 1 byte in QEMU binary, imho.
>>
> 
> I prefer wasting the byte and make the code more obvious to people who
> might not immediately understand what's going on. We could even
> assert(!global_state.reserved) to sanity check the assumption. Anyway,
> that's minor, I'm fine with it either way.
> 
>> I think I just copied what you had previously and extended it, logically I
>> don't think we ever need QEMU_PACKED right above [1].  We can also drop
>> "size" directly here, but this can be done later.
> 
> Ah right, I was initially thinking of letting the new qemu overrun
> runstate[16] so we wouldn't have to change the code. But that's indeed
> not necessary, your additions to the vmstate make it ok. Thanks.

There is no need to reserve byte 100 in the new scheme.  The incoming side
sets s->runstate[sizeof(s->runstate) - 1] = 0 to protect itself, and now
sizeof(runstate) is smaller.

- Steve
Steven Sistare Dec. 5, 2023, 5:48 p.m. UTC | #15
On 12/5/2023 11:50 AM, Peter Xu wrote:
> On Mon, Dec 04, 2023 at 05:23:57PM -0500, Steven Sistare wrote:
>> The V6 code does not pass a state to vm_start, and knowledge of vm_was_suspended
>> is confined to the global_state and cpus functions.  IMO this is a more modular
>> and robust solution, as multiple sites may call vm_start(), and the right thing
>> happens.  Look at patch 6.  The changes are minimal because vm_start "just works".
> 
> Oh I think I see what you meant.  Sounds good then.
> 
> Shall we hide that into vm_prepare_start()?  It seems three's still one
> more call sites that always pass in RUNNING (gdb_continue_partial).
> 
> If with above, vm_prepare_start() will go into either RUNNING, SUSPENDED,
> or an error.  It returns 0 only if RUNNING, -1 for all the rest.  Maybe we
> can already also touch up the retval of vm_prepare_start() to be a boolean,
> reflecting "whether vcpu needs to be started".

Yes, that is even nicer, thanks.

/**
 * Prepare for (re)starting the VM.
 * Returns 0 if the vCPUs should be restarted, -1 on an error condition,
 * and 1 otherwise.
 */
int vm_prepare_start(bool step_pending)
{
    int ret = vm_was_suspended ? 1 : 0;
    RunState state = vm_was_suspended ? RUN_STATE_SUSPENDED : RUN_STATE_RUNNING;
    ...
    vm_was_suspended = false;
    return ret;
}

void vm_start(void)
{
    if (!vm_prepare_start(false)) {
        resume_all_vcpus();
    }
}

- Steve
diff mbox series

Patch

diff --git a/migration/global_state.c b/migration/global_state.c
index 4e2a9d8..de2532c 100644
--- a/migration/global_state.c
+++ b/migration/global_state.c
@@ -25,6 +25,7 @@  typedef struct {
     uint8_t runstate[100];
     RunState state;
     bool received;
+    bool vm_was_suspended;
 } GlobalState;
 
 static GlobalState global_state;
@@ -35,6 +36,7 @@  static void global_state_do_store(RunState state)
     assert(strlen(state_str) < sizeof(global_state.runstate));
     strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate),
               state_str, '\0');
+    global_state.vm_was_suspended = vm_get_suspended();
 }
 
 void global_state_store(void)
@@ -68,6 +70,12 @@  static bool global_state_needed(void *opaque)
         return true;
     }
 
+    /* If the suspended state must be remembered, it is needed */
+
+    if (vm_get_suspended()) {
+        return true;
+    }
+
     /* If state is running or paused, it is not needed */
 
     if (strcmp(runstate, "running") == 0 ||
@@ -109,6 +117,7 @@  static int global_state_post_load(void *opaque, int version_id)
         return -EINVAL;
     }
     s->state = r;
+    vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED);
 
     return 0;
 }
@@ -134,6 +143,7 @@  static const VMStateDescription vmstate_globalstate = {
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(size, GlobalState),
         VMSTATE_BUFFER(runstate, GlobalState),
+        VMSTATE_BOOL(vm_was_suspended, GlobalState),
         VMSTATE_END_OF_LIST()
     },
 };