Message ID | 1701883417-356268-6-git-send-email-steven.sistare@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fix migration of suspended runstate | expand |
Steve Sistare <steven.sistare@oracle.com> writes: > If the outgoing machine was previously suspended, propagate that to the > incoming side via global_state, so a subsequent vm_start restores the > suspended state. To maintain backward and forward compatibility, reclaim > some space from the runstate member. > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com> > --- > migration/global_state.c | 35 +++++++++++++++++++++++++++++++++-- > 1 file changed, 33 insertions(+), 2 deletions(-) > > diff --git a/migration/global_state.c b/migration/global_state.c > index 4e2a9d8..d4f61a1 100644 > --- a/migration/global_state.c > +++ b/migration/global_state.c > @@ -22,7 +22,16 @@ > > typedef struct { > uint32_t size; > - uint8_t runstate[100]; > + > + /* > + * runstate was 100 bytes, zero padded, but we trimmed it to add a > + * few fields and maintain backwards compatibility. > + */ > + uint8_t runstate[32]; > + uint8_t has_vm_was_suspended; > + uint8_t vm_was_suspended; > + uint8_t unused[66]; > + > RunState state; > bool received; > } GlobalState; > @@ -35,6 +44,10 @@ static void global_state_do_store(RunState state) > assert(strlen(state_str) < sizeof(global_state.runstate)); > strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), > state_str, '\0'); > + global_state.has_vm_was_suspended = true; > + global_state.vm_was_suspended = vm_get_suspended(); > + > + memset(global_state.unused, 0, sizeof(global_state.unused)); > } > > void global_state_store(void) > @@ -68,6 +81,12 @@ static bool global_state_needed(void *opaque) > return true; > } > > + /* If the suspended state must be remembered, it is needed */ > + > + if (vm_get_suspended()) { > + return true; > + } > + > /* If state is running or paused, it is not needed */ > > if (strcmp(runstate, "running") == 0 || > @@ -85,6 +104,7 @@ static int global_state_post_load(void *opaque, int version_id) > Error *local_err = NULL; > int r; > char *runstate = (char *)s->runstate; > + bool vm_was_suspended = s->has_vm_was_suspended && s->vm_was_suspended; Why do you need has_vm_was_suspended? If they're always read like this, then one flag should do, no? > > s->received = true; > trace_migrate_global_state_post_load(runstate); > @@ -93,7 +113,7 @@ static int global_state_post_load(void *opaque, int version_id) > sizeof(s->runstate)) == sizeof(s->runstate)) { > /* > * This condition should never happen during migration, because > - * all runstate names are shorter than 100 bytes (the size of > + * all runstate names are shorter than 32 bytes (the size of > * s->runstate). However, a malicious stream could overflow > * the qapi_enum_parse() call, so we force the last character > * to a NUL byte. > @@ -110,6 +130,14 @@ static int global_state_post_load(void *opaque, int version_id) > } > s->state = r; > > + /* > + * global_state is saved on the outgoing side before forcing a stopped > + * state, so it may have saved state=suspended and vm_was_suspended=0. > + * Now we are in a paused state, and when we later call vm_start, it must > + * restore the suspended state, so we must set vm_was_suspended=1 here. > + */ > + vm_set_suspended(vm_was_suspended || r == RUN_STATE_SUSPENDED); > + > return 0; > } > > @@ -134,6 +162,9 @@ static const VMStateDescription vmstate_globalstate = { > .fields = (VMStateField[]) { > VMSTATE_UINT32(size, GlobalState), > VMSTATE_BUFFER(runstate, GlobalState), > + VMSTATE_UINT8(has_vm_was_suspended, GlobalState), > + VMSTATE_UINT8(vm_was_suspended, GlobalState), > + VMSTATE_BUFFER(unused, GlobalState), > VMSTATE_END_OF_LIST() > }, > };
On 12/8/2023 11:37 AM, Fabiano Rosas wrote: > Steve Sistare <steven.sistare@oracle.com> writes: > >> If the outgoing machine was previously suspended, propagate that to the >> incoming side via global_state, so a subsequent vm_start restores the >> suspended state. To maintain backward and forward compatibility, reclaim >> some space from the runstate member. >> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> >> --- >> migration/global_state.c | 35 +++++++++++++++++++++++++++++++++-- >> 1 file changed, 33 insertions(+), 2 deletions(-) >> >> diff --git a/migration/global_state.c b/migration/global_state.c >> index 4e2a9d8..d4f61a1 100644 >> --- a/migration/global_state.c >> +++ b/migration/global_state.c >> @@ -22,7 +22,16 @@ >> >> typedef struct { >> uint32_t size; >> - uint8_t runstate[100]; >> + >> + /* >> + * runstate was 100 bytes, zero padded, but we trimmed it to add a >> + * few fields and maintain backwards compatibility. >> + */ >> + uint8_t runstate[32]; >> + uint8_t has_vm_was_suspended; >> + uint8_t vm_was_suspended; >> + uint8_t unused[66]; >> + >> RunState state; >> bool received; >> } GlobalState; >> @@ -35,6 +44,10 @@ static void global_state_do_store(RunState state) >> assert(strlen(state_str) < sizeof(global_state.runstate)); >> strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), >> state_str, '\0'); >> + global_state.has_vm_was_suspended = true; >> + global_state.vm_was_suspended = vm_get_suspended(); >> + >> + memset(global_state.unused, 0, sizeof(global_state.unused)); >> } >> >> void global_state_store(void) >> @@ -68,6 +81,12 @@ static bool global_state_needed(void *opaque) >> return true; >> } >> >> + /* If the suspended state must be remembered, it is needed */ >> + >> + if (vm_get_suspended()) { >> + return true; >> + } >> + >> /* If state is running or paused, it is not needed */ >> >> if (strcmp(runstate, "running") == 0 || >> @@ -85,6 +104,7 @@ static int global_state_post_load(void *opaque, int version_id) >> Error *local_err = NULL; >> int r; >> char *runstate = (char *)s->runstate; >> + bool vm_was_suspended = s->has_vm_was_suspended && s->vm_was_suspended; > > Why do you need has_vm_was_suspended? If they're always read like this, > then one flag should do, no? has_vm_was_suspended=0 means a legacy qemu. Currently the dest has no reason to care, and we treat that as a vm_was_suspended=0 case, but I want to keep that field in case we ever need to know. But you are correct that this line can simply be: bool vm_was_suspended = s->vm_was_suspended; - Steve >> s->received = true; >> trace_migrate_global_state_post_load(runstate); >> @@ -93,7 +113,7 @@ static int global_state_post_load(void *opaque, int version_id) >> sizeof(s->runstate)) == sizeof(s->runstate)) { >> /* >> * This condition should never happen during migration, because >> - * all runstate names are shorter than 100 bytes (the size of >> + * all runstate names are shorter than 32 bytes (the size of >> * s->runstate). However, a malicious stream could overflow >> * the qapi_enum_parse() call, so we force the last character >> * to a NUL byte. >> @@ -110,6 +130,14 @@ static int global_state_post_load(void *opaque, int version_id) >> } >> s->state = r; >> >> + /* >> + * global_state is saved on the outgoing side before forcing a stopped >> + * state, so it may have saved state=suspended and vm_was_suspended=0. >> + * Now we are in a paused state, and when we later call vm_start, it must >> + * restore the suspended state, so we must set vm_was_suspended=1 here. >> + */ >> + vm_set_suspended(vm_was_suspended || r == RUN_STATE_SUSPENDED); >> + >> return 0; >> } >> >> @@ -134,6 +162,9 @@ static const VMStateDescription vmstate_globalstate = { >> .fields = (VMStateField[]) { >> VMSTATE_UINT32(size, GlobalState), >> VMSTATE_BUFFER(runstate, GlobalState), >> + VMSTATE_UINT8(has_vm_was_suspended, GlobalState), >> + VMSTATE_UINT8(vm_was_suspended, GlobalState), >> + VMSTATE_BUFFER(unused, GlobalState), >> VMSTATE_END_OF_LIST() >> }, >> };
On Wed, Dec 06, 2023 at 09:23:30AM -0800, Steve Sistare wrote: > If the outgoing machine was previously suspended, propagate that to the > incoming side via global_state, so a subsequent vm_start restores the > suspended state. To maintain backward and forward compatibility, reclaim > some space from the runstate member. > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> One nitpick below. > --- > migration/global_state.c | 35 +++++++++++++++++++++++++++++++++-- > 1 file changed, 33 insertions(+), 2 deletions(-) > > diff --git a/migration/global_state.c b/migration/global_state.c > index 4e2a9d8..d4f61a1 100644 > --- a/migration/global_state.c > +++ b/migration/global_state.c > @@ -22,7 +22,16 @@ > > typedef struct { > uint32_t size; > - uint8_t runstate[100]; > + > + /* > + * runstate was 100 bytes, zero padded, but we trimmed it to add a > + * few fields and maintain backwards compatibility. > + */ > + uint8_t runstate[32]; > + uint8_t has_vm_was_suspended; > + uint8_t vm_was_suspended; > + uint8_t unused[66]; > + > RunState state; > bool received; > } GlobalState; > @@ -35,6 +44,10 @@ static void global_state_do_store(RunState state) > assert(strlen(state_str) < sizeof(global_state.runstate)); > strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), > state_str, '\0'); > + global_state.has_vm_was_suspended = true; > + global_state.vm_was_suspended = vm_get_suspended(); > + > + memset(global_state.unused, 0, sizeof(global_state.unused)); > } > > void global_state_store(void) > @@ -68,6 +81,12 @@ static bool global_state_needed(void *opaque) > return true; > } > > + /* If the suspended state must be remembered, it is needed */ > + > + if (vm_get_suspended()) { > + return true; > + } Can we drop this section? I felt unsafe when QEMU can overwrite the option even if user explicitly specified store-global-state=off but we still send this.. Ideally I think it's better if it's as simple as: static bool global_state_needed(void *opaque) { return migrate_get_current()->store_global_state; } I don't think we can remove the old trick due to compatibility reasons, but maybe nice to not add new ones to make this section more unpredictable in the migration stream? IMHO it shouldn't matter in reality for the current use case even dropping it, as I don't expect any non-Xen QEMU VMs migrates without having the option turned on (store-global-state=on) after QEMU 2.4. Thanks,
On 12/11/2023 1:46 AM, Peter Xu wrote: > On Wed, Dec 06, 2023 at 09:23:30AM -0800, Steve Sistare wrote: >> If the outgoing machine was previously suspended, propagate that to the >> incoming side via global_state, so a subsequent vm_start restores the >> suspended state. To maintain backward and forward compatibility, reclaim >> some space from the runstate member. >> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> > > Reviewed-by: Peter Xu <peterx@redhat.com> > > One nitpick below. > >> --- >> migration/global_state.c | 35 +++++++++++++++++++++++++++++++++-- >> 1 file changed, 33 insertions(+), 2 deletions(-) >> >> diff --git a/migration/global_state.c b/migration/global_state.c >> index 4e2a9d8..d4f61a1 100644 >> --- a/migration/global_state.c >> +++ b/migration/global_state.c >> @@ -22,7 +22,16 @@ >> >> typedef struct { >> uint32_t size; >> - uint8_t runstate[100]; >> + >> + /* >> + * runstate was 100 bytes, zero padded, but we trimmed it to add a >> + * few fields and maintain backwards compatibility. >> + */ >> + uint8_t runstate[32]; >> + uint8_t has_vm_was_suspended; >> + uint8_t vm_was_suspended; >> + uint8_t unused[66]; >> + >> RunState state; >> bool received; >> } GlobalState; >> @@ -35,6 +44,10 @@ static void global_state_do_store(RunState state) >> assert(strlen(state_str) < sizeof(global_state.runstate)); >> strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), >> state_str, '\0'); >> + global_state.has_vm_was_suspended = true; >> + global_state.vm_was_suspended = vm_get_suspended(); >> + >> + memset(global_state.unused, 0, sizeof(global_state.unused)); >> } >> >> void global_state_store(void) >> @@ -68,6 +81,12 @@ static bool global_state_needed(void *opaque) >> return true; >> } >> >> + /* If the suspended state must be remembered, it is needed */ >> + >> + if (vm_get_suspended()) { >> + return true; >> + } > > Can we drop this section? > > I felt unsafe when QEMU can overwrite the option even if user explicitly > specified store-global-state=off but we still send this.. Ideally I think > it's better if it's as simple as: > > static bool global_state_needed(void *opaque) > { > return migrate_get_current()->store_global_state; > } I agree, I also did not see the point of dropping global_state for some states. I will simplify it to this. - Steve > I don't think we can remove the old trick due to compatibility reasons, but > maybe nice to not add new ones to make this section more unpredictable in > the migration stream? > > IMHO it shouldn't matter in reality for the current use case even dropping > it, as I don't expect any non-Xen QEMU VMs migrates without having the > option turned on (store-global-state=on) after QEMU 2.4. > > Thanks, >
diff --git a/migration/global_state.c b/migration/global_state.c index 4e2a9d8..d4f61a1 100644 --- a/migration/global_state.c +++ b/migration/global_state.c @@ -22,7 +22,16 @@ typedef struct { uint32_t size; - uint8_t runstate[100]; + + /* + * runstate was 100 bytes, zero padded, but we trimmed it to add a + * few fields and maintain backwards compatibility. + */ + uint8_t runstate[32]; + uint8_t has_vm_was_suspended; + uint8_t vm_was_suspended; + uint8_t unused[66]; + RunState state; bool received; } GlobalState; @@ -35,6 +44,10 @@ static void global_state_do_store(RunState state) assert(strlen(state_str) < sizeof(global_state.runstate)); strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), state_str, '\0'); + global_state.has_vm_was_suspended = true; + global_state.vm_was_suspended = vm_get_suspended(); + + memset(global_state.unused, 0, sizeof(global_state.unused)); } void global_state_store(void) @@ -68,6 +81,12 @@ static bool global_state_needed(void *opaque) return true; } + /* If the suspended state must be remembered, it is needed */ + + if (vm_get_suspended()) { + return true; + } + /* If state is running or paused, it is not needed */ if (strcmp(runstate, "running") == 0 || @@ -85,6 +104,7 @@ static int global_state_post_load(void *opaque, int version_id) Error *local_err = NULL; int r; char *runstate = (char *)s->runstate; + bool vm_was_suspended = s->has_vm_was_suspended && s->vm_was_suspended; s->received = true; trace_migrate_global_state_post_load(runstate); @@ -93,7 +113,7 @@ static int global_state_post_load(void *opaque, int version_id) sizeof(s->runstate)) == sizeof(s->runstate)) { /* * This condition should never happen during migration, because - * all runstate names are shorter than 100 bytes (the size of + * all runstate names are shorter than 32 bytes (the size of * s->runstate). However, a malicious stream could overflow * the qapi_enum_parse() call, so we force the last character * to a NUL byte. @@ -110,6 +130,14 @@ static int global_state_post_load(void *opaque, int version_id) } s->state = r; + /* + * global_state is saved on the outgoing side before forcing a stopped + * state, so it may have saved state=suspended and vm_was_suspended=0. + * Now we are in a paused state, and when we later call vm_start, it must + * restore the suspended state, so we must set vm_was_suspended=1 here. + */ + vm_set_suspended(vm_was_suspended || r == RUN_STATE_SUSPENDED); + return 0; } @@ -134,6 +162,9 @@ static const VMStateDescription vmstate_globalstate = { .fields = (VMStateField[]) { VMSTATE_UINT32(size, GlobalState), VMSTATE_BUFFER(runstate, GlobalState), + VMSTATE_UINT8(has_vm_was_suspended, GlobalState), + VMSTATE_UINT8(vm_was_suspended, GlobalState), + VMSTATE_BUFFER(unused, GlobalState), VMSTATE_END_OF_LIST() }, };
If the outgoing machine was previously suspended, propagate that to the incoming side via global_state, so a subsequent vm_start restores the suspended state. To maintain backward and forward compatibility, reclaim some space from the runstate member. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> --- migration/global_state.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-)