Message ID | 1701380247-340457-12-git-send-email-steven.sistare@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fix migration of suspended runstate | expand |
On Thu, Nov 30, 2023 at 01:37:24PM -0800, Steve Sistare wrote: > Add a test case to verify that the suspended state is handled correctly > during live migration precopy. The test suspends the src, migrates, then > wakes the dest. > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com> > --- > tests/qtest/migration-helpers.c | 3 ++ > tests/qtest/migration-helpers.h | 2 ++ > tests/qtest/migration-test.c | 64 ++++++++++++++++++++++++++++++++++++++--- > 3 files changed, 65 insertions(+), 4 deletions(-) > > diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c > index fd3b94e..37e8e81 100644 > --- a/tests/qtest/migration-helpers.c > +++ b/tests/qtest/migration-helpers.c > @@ -32,6 +32,9 @@ bool migrate_watch_for_events(QTestState *who, const char *name, > if (g_str_equal(name, "STOP")) { > state->stop_seen = true; > return true; > + } else if (g_str_equal(name, "SUSPEND")) { > + state->suspend_seen = true; > + return true; > } else if (g_str_equal(name, "RESUME")) { > state->resume_seen = true; > return true; > diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h > index 3d32699..b478549 100644 > --- a/tests/qtest/migration-helpers.h > +++ b/tests/qtest/migration-helpers.h > @@ -18,6 +18,8 @@ > typedef struct QTestMigrationState { > bool stop_seen; > bool resume_seen; > + bool suspend_seen; > + bool suspend_me; > } QTestMigrationState; > > bool migrate_watch_for_events(QTestState *who, const char *name, > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c > index e10d5a4..200f023 100644 > --- a/tests/qtest/migration-test.c > +++ b/tests/qtest/migration-test.c > @@ -178,7 +178,7 @@ static void bootfile_delete(void) > /* > * Wait for some output in the serial output file, > * we get an 'A' followed by an endless string of 'B's > - * but on the destination we won't have the A. > + * but on the destination we won't have the A (unless we enabled suspend/resume) > */ > static void wait_for_serial(const char *side) > { > @@ -245,6 +245,13 @@ static void wait_for_resume(QTestState *who, QTestMigrationState *state) > } > } > > +static void wait_for_suspend(QTestState *who, QTestMigrationState *state) > +{ > + if (!state->suspend_seen) { > + qtest_qmp_eventwait(who, "SUSPEND"); > + } > +} > + > /* > * It's tricky to use qemu's migration event capability with qtest, > * events suddenly appearing confuse the qmp()/hmp() responses. > @@ -299,7 +306,7 @@ static void wait_for_migration_pass(QTestState *who) > { > uint64_t pass, prev_pass = 0, changes = 0; > > - while (changes < 2 && !src_state.stop_seen) { > + while (changes < 2 && !src_state.stop_seen && !src_state.suspend_seen) { > usleep(1000); > pass = get_migration_pass(who); > changes += (pass != prev_pass); > @@ -595,7 +602,8 @@ static void migrate_wait_for_dirty_mem(QTestState *from, > watch_byte = qtest_readb(from, watch_address); > do { > usleep(1000 * 10); > - } while (qtest_readb(from, watch_address) == watch_byte); > + } while (qtest_readb(from, watch_address) == watch_byte && > + !src_state.suspend_seen); This is hackish to me. AFAIU the guest code won't ever dirty anything after printing the initial 'B'. IOW, migrate_wait_for_dirty_mem() should not be called for suspend test at all, I guess, because we know it won't. > } > > > @@ -771,6 +779,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, > dst_state = (QTestMigrationState) { }; > src_state = (QTestMigrationState) { }; > bootfile_create(tmpfs, args->suspend_me); > + src_state.suspend_me = args->suspend_me; > > if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { > memory_size = "150M"; > @@ -1730,6 +1739,9 @@ static void test_precopy_common(MigrateCommon *args) > * change anything. > */ > if (args->result == MIG_TEST_SUCCEED) { > + if (src_state.suspend_me) { > + wait_for_suspend(from, &src_state); > + } > qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}"); > wait_for_stop(from, &src_state); > migrate_ensure_converge(from); > @@ -1777,6 +1789,9 @@ static void test_precopy_common(MigrateCommon *args) > */ > wait_for_migration_complete(from); > > + if (src_state.suspend_me) { > + wait_for_suspend(from, &src_state); > + } Here it's pretty much uneasy to follow too, waiting for SUSPEND to come, even after migration has already completed! I suspect it never waits, since suspend_seen is normally always already set (with the above hack, migrate_wait_for_dirty_mem() plays the role of waiting for SUSPENDED). > wait_for_stop(from, &src_state); > > } else { IMHO it'll be cleaner to explicitly wait for SUSPENDED when we know migration is not yet completed. Then, we know we're migrating with SUSPENDED. In summary, perhaps something squashed like this? ====8<==== diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 30d4b32a35..65e6692716 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -605,8 +605,7 @@ static void migrate_wait_for_dirty_mem(QTestState *from, watch_byte = qtest_readb(from, watch_address); do { usleep(1000 * 10); - } while (qtest_readb(from, watch_address) == watch_byte && - !src_state.suspend_seen); + } while (qtest_readb(from, watch_address) == watch_byte); } @@ -1805,7 +1804,12 @@ static void test_precopy_common(MigrateCommon *args) wait_for_migration_pass(from); args->iterations--; } - migrate_wait_for_dirty_mem(from, to); + + if (src_state.suspend_me) { + wait_for_suspend(from, &src_state); + } else { + migrate_wait_for_dirty_mem(from, to); + } migrate_ensure_converge(from); @@ -1814,10 +1818,6 @@ static void test_precopy_common(MigrateCommon *args) * hanging forever if migration didn't converge */ wait_for_migration_complete(from); - - if (src_state.suspend_me) { - wait_for_suspend(from, &src_state); - } wait_for_stop(from, &src_state); } else { ====8<==== I didn't check the postcopy patch, but I'd expect something similar would be nice. Thanks,
On 12/4/2023 3:49 PM, Peter Xu wrote: > On Thu, Nov 30, 2023 at 01:37:24PM -0800, Steve Sistare wrote: >> Add a test case to verify that the suspended state is handled correctly >> during live migration precopy. The test suspends the src, migrates, then >> wakes the dest. >> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> >> --- >> tests/qtest/migration-helpers.c | 3 ++ >> tests/qtest/migration-helpers.h | 2 ++ >> tests/qtest/migration-test.c | 64 ++++++++++++++++++++++++++++++++++++++--- >> 3 files changed, 65 insertions(+), 4 deletions(-) >> >> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c >> index fd3b94e..37e8e81 100644 >> --- a/tests/qtest/migration-helpers.c >> +++ b/tests/qtest/migration-helpers.c >> @@ -32,6 +32,9 @@ bool migrate_watch_for_events(QTestState *who, const char *name, >> if (g_str_equal(name, "STOP")) { >> state->stop_seen = true; >> return true; >> + } else if (g_str_equal(name, "SUSPEND")) { >> + state->suspend_seen = true; >> + return true; >> } else if (g_str_equal(name, "RESUME")) { >> state->resume_seen = true; >> return true; >> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h >> index 3d32699..b478549 100644 >> --- a/tests/qtest/migration-helpers.h >> +++ b/tests/qtest/migration-helpers.h >> @@ -18,6 +18,8 @@ >> typedef struct QTestMigrationState { >> bool stop_seen; >> bool resume_seen; >> + bool suspend_seen; >> + bool suspend_me; >> } QTestMigrationState; >> >> bool migrate_watch_for_events(QTestState *who, const char *name, >> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c >> index e10d5a4..200f023 100644 >> --- a/tests/qtest/migration-test.c >> +++ b/tests/qtest/migration-test.c >> @@ -178,7 +178,7 @@ static void bootfile_delete(void) >> /* >> * Wait for some output in the serial output file, >> * we get an 'A' followed by an endless string of 'B's >> - * but on the destination we won't have the A. >> + * but on the destination we won't have the A (unless we enabled suspend/resume) >> */ >> static void wait_for_serial(const char *side) >> { >> @@ -245,6 +245,13 @@ static void wait_for_resume(QTestState *who, QTestMigrationState *state) >> } >> } >> >> +static void wait_for_suspend(QTestState *who, QTestMigrationState *state) >> +{ >> + if (!state->suspend_seen) { >> + qtest_qmp_eventwait(who, "SUSPEND"); >> + } >> +} >> + >> /* >> * It's tricky to use qemu's migration event capability with qtest, >> * events suddenly appearing confuse the qmp()/hmp() responses. >> @@ -299,7 +306,7 @@ static void wait_for_migration_pass(QTestState *who) >> { >> uint64_t pass, prev_pass = 0, changes = 0; >> >> - while (changes < 2 && !src_state.stop_seen) { >> + while (changes < 2 && !src_state.stop_seen && !src_state.suspend_seen) { >> usleep(1000); >> pass = get_migration_pass(who); >> changes += (pass != prev_pass); >> @@ -595,7 +602,8 @@ static void migrate_wait_for_dirty_mem(QTestState *from, >> watch_byte = qtest_readb(from, watch_address); >> do { >> usleep(1000 * 10); >> - } while (qtest_readb(from, watch_address) == watch_byte); >> + } while (qtest_readb(from, watch_address) == watch_byte && >> + !src_state.suspend_seen); > > This is hackish to me. > > AFAIU the guest code won't ever dirty anything after printing the initial > 'B'. IOW, suspend not seen() should not be called for suspend > test at all, I guess, because we know it won't. Calling migrate_wait_for_dirty_mem proves that a source write is propagated to the dest, even for the suspended case. We fully expect that, but a good test verifies our expectations. That is done in the first loop of migrate_wait_for_dirty_mem. After that, we must check for the suspended state, because the second loop will not terminate. Here is a more explicit version: static void migrate_wait_for_dirty_mem(QTestState *from, QTestState *to) { uint64_t watch_address = start_address + MAGIC_OFFSET_BASE; uint64_t marker_address = start_address + MAGIC_OFFSET; uint8_t watch_byte; /* * Wait for the MAGIC_MARKER to get transferred, as an * indicator that a migration pass has made some known * amount of progress. */ do { usleep(1000 * 10); } while (qtest_readq(to, marker_address) != MAGIC_MARKER); + /* If suspended, src only iterates once, and watch_byte may never change */ + if (src_state.suspend_me) { + return; + } >> } >> >> >> @@ -771,6 +779,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, >> dst_state = (QTestMigrationState) { }; >> src_state = (QTestMigrationState) { }; >> bootfile_create(tmpfs, args->suspend_me); >> + src_state.suspend_me = args->suspend_me; >> >> if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { >> memory_size = "150M"; >> @@ -1730,6 +1739,9 @@ static void test_precopy_common(MigrateCommon *args) >> * change anything. >> */ >> if (args->result == MIG_TEST_SUCCEED) { >> + if (src_state.suspend_me) { >> + wait_for_suspend(from, &src_state); >> + } >> qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}"); >> wait_for_stop(from, &src_state); >> migrate_ensure_converge(from); >> @@ -1777,6 +1789,9 @@ static void test_precopy_common(MigrateCommon *args) >> */ >> wait_for_migration_complete(from); >> >> + if (src_state.suspend_me) { >> + wait_for_suspend(from, &src_state); >> + } > > Here it's pretty much uneasy to follow too, waiting for SUSPEND to come, > even after migration has already completed! Agreed. > I suspect it never waits, since suspend_seen is normally always already > set (with the above hack, migrate_wait_for_dirty_mem() plays the role of > waiting for SUSPENDED). Yes, it played that role. I will delete all the existing calls to wait_for_suspended, and add them after wait_for_serial("src_serial") in test_precopy_common and migrate_postcopy_prepare. - Steve >> wait_for_stop(from, &src_state); >> >> } else { > > IMHO it'll be cleaner to explicitly wait for SUSPENDED when we know > migration is not yet completed. Then, we know we're migrating with > SUSPENDED. In summary, perhaps something squashed like this? > > ====8<==== > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c > index 30d4b32a35..65e6692716 100644 > --- a/tests/qtest/migration-test.c > +++ b/tests/qtest/migration-test.c > @@ -605,8 +605,7 @@ static void migrate_wait_for_dirty_mem(QTestState *from, > watch_byte = qtest_readb(from, watch_address); > do { > usleep(1000 * 10); > - } while (qtest_readb(from, watch_address) == watch_byte && > - !src_state.suspend_seen); > + } while (qtest_readb(from, watch_address) == watch_byte); > } > > > @@ -1805,7 +1804,12 @@ static void test_precopy_common(MigrateCommon *args) > wait_for_migration_pass(from); > args->iterations--; > } > - migrate_wait_for_dirty_mem(from, to); > + > + if (src_state.suspend_me) { > + wait_for_suspend(from, &src_state); > + } else { > + migrate_wait_for_dirty_mem(from, to); > + } > > migrate_ensure_converge(from); > > @@ -1814,10 +1818,6 @@ static void test_precopy_common(MigrateCommon *args) > * hanging forever if migration didn't converge > */ > wait_for_migration_complete(from); > - > - if (src_state.suspend_me) { > - wait_for_suspend(from, &src_state); > - } > wait_for_stop(from, &src_state); > > } else { > ====8<==== > > I didn't check the postcopy patch, but I'd expect something similar would > be nice. > > Thanks, >
On Tue, Dec 05, 2023 at 11:14:43AM -0500, Steven Sistare wrote: > Calling migrate_wait_for_dirty_mem proves that a source write is propagated > to the dest, even for the suspended case. We fully expect that, but a good > test verifies our expectations. That is done in the first loop of > migrate_wait_for_dirty_mem. After that, we must check for the suspended > state, because the second loop will not terminate. Here is a more explicit > version: > > static void migrate_wait_for_dirty_mem(QTestState *from, > QTestState *to) > { > uint64_t watch_address = start_address + MAGIC_OFFSET_BASE; > uint64_t marker_address = start_address + MAGIC_OFFSET; > uint8_t watch_byte; > > /* > * Wait for the MAGIC_MARKER to get transferred, as an > * indicator that a migration pass has made some known > * amount of progress. > */ > do { > usleep(1000 * 10); > } while (qtest_readq(to, marker_address) != MAGIC_MARKER); > > + /* If suspended, src only iterates once, and watch_byte may never change */ > + if (src_state.suspend_me) { > + return; > + } Ok. > Yes, it played that role. I will delete all the existing calls to wait_for_suspended, > and add them after wait_for_serial("src_serial") in test_precopy_common and > migrate_postcopy_prepare. Sounds good.
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index fd3b94e..37e8e81 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -32,6 +32,9 @@ bool migrate_watch_for_events(QTestState *who, const char *name, if (g_str_equal(name, "STOP")) { state->stop_seen = true; return true; + } else if (g_str_equal(name, "SUSPEND")) { + state->suspend_seen = true; + return true; } else if (g_str_equal(name, "RESUME")) { state->resume_seen = true; return true; diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index 3d32699..b478549 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -18,6 +18,8 @@ typedef struct QTestMigrationState { bool stop_seen; bool resume_seen; + bool suspend_seen; + bool suspend_me; } QTestMigrationState; bool migrate_watch_for_events(QTestState *who, const char *name, diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index e10d5a4..200f023 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -178,7 +178,7 @@ static void bootfile_delete(void) /* * Wait for some output in the serial output file, * we get an 'A' followed by an endless string of 'B's - * but on the destination we won't have the A. + * but on the destination we won't have the A (unless we enabled suspend/resume) */ static void wait_for_serial(const char *side) { @@ -245,6 +245,13 @@ static void wait_for_resume(QTestState *who, QTestMigrationState *state) } } +static void wait_for_suspend(QTestState *who, QTestMigrationState *state) +{ + if (!state->suspend_seen) { + qtest_qmp_eventwait(who, "SUSPEND"); + } +} + /* * It's tricky to use qemu's migration event capability with qtest, * events suddenly appearing confuse the qmp()/hmp() responses. @@ -299,7 +306,7 @@ static void wait_for_migration_pass(QTestState *who) { uint64_t pass, prev_pass = 0, changes = 0; - while (changes < 2 && !src_state.stop_seen) { + while (changes < 2 && !src_state.stop_seen && !src_state.suspend_seen) { usleep(1000); pass = get_migration_pass(who); changes += (pass != prev_pass); @@ -595,7 +602,8 @@ static void migrate_wait_for_dirty_mem(QTestState *from, watch_byte = qtest_readb(from, watch_address); do { usleep(1000 * 10); - } while (qtest_readb(from, watch_address) == watch_byte); + } while (qtest_readb(from, watch_address) == watch_byte && + !src_state.suspend_seen); } @@ -771,6 +779,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, dst_state = (QTestMigrationState) { }; src_state = (QTestMigrationState) { }; bootfile_create(tmpfs, args->suspend_me); + src_state.suspend_me = args->suspend_me; if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { memory_size = "150M"; @@ -1730,6 +1739,9 @@ static void test_precopy_common(MigrateCommon *args) * change anything. */ if (args->result == MIG_TEST_SUCCEED) { + if (src_state.suspend_me) { + wait_for_suspend(from, &src_state); + } qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}"); wait_for_stop(from, &src_state); migrate_ensure_converge(from); @@ -1777,6 +1789,9 @@ static void test_precopy_common(MigrateCommon *args) */ wait_for_migration_complete(from); + if (src_state.suspend_me) { + wait_for_suspend(from, &src_state); + } wait_for_stop(from, &src_state); } else { @@ -1793,6 +1808,11 @@ static void test_precopy_common(MigrateCommon *args) wait_for_resume(to, &dst_state); + if (args->start.suspend_me) { + /* wakeup succeeds only if guest is suspended */ + qtest_qmp_assert_success(to, "{'execute': 'system_wakeup'}"); + } + wait_for_serial("dest_serial"); } @@ -1879,6 +1899,34 @@ static void test_precopy_unix_plain(void) test_precopy_common(&args); } +static void test_precopy_unix_suspend_live(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .listen_uri = uri, + .connect_uri = uri, + /* + * despite being live, the test is fast because the src + * suspends immediately. + */ + .live = true, + .start.suspend_me = true, + }; + + test_precopy_common(&args); +} + +static void test_precopy_unix_suspend_notlive(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .listen_uri = uri, + .connect_uri = uri, + .start.suspend_me = true, + }; + + test_precopy_common(&args); +} static void test_precopy_unix_dirty_ring(void) { @@ -3279,7 +3327,7 @@ static bool kvm_dirty_ring_supported(void) int main(int argc, char **argv) { bool has_kvm, has_tcg; - bool has_uffd; + bool has_uffd, is_x86; const char *arch; g_autoptr(GError) err = NULL; const char *qemu_src = getenv(QEMU_ENV_SRC); @@ -3309,6 +3357,7 @@ int main(int argc, char **argv) has_uffd = ufd_version_check(); arch = qtest_get_arch(); + is_x86 = !strcmp(arch, "i386") || !strcmp(arch, "x86_64"); /* * On ppc64, the test only works with kvm-hv, but not with kvm-pr and TCG @@ -3339,6 +3388,13 @@ int main(int argc, char **argv) module_call_init(MODULE_INIT_QOM); + if (is_x86) { + qtest_add_func("/migration/precopy/unix/suspend/live", + test_precopy_unix_suspend_live); + qtest_add_func("/migration/precopy/unix/suspend/notlive", + test_precopy_unix_suspend_notlive); + } + if (has_uffd) { qtest_add_func("/migration/postcopy/plain", test_postcopy); qtest_add_func("/migration/postcopy/recovery/plain",
Add a test case to verify that the suspended state is handled correctly during live migration precopy. The test suspends the src, migrates, then wakes the dest. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> --- tests/qtest/migration-helpers.c | 3 ++ tests/qtest/migration-helpers.h | 2 ++ tests/qtest/migration-test.c | 64 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 65 insertions(+), 4 deletions(-)