Message ID | 20241029150908.1136894-6-ppandit@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Allow to enable multifd and postcopy migration together | expand |
On Tue, Oct 29, 2024 at 08:39:08PM +0530, Prasad Pandit wrote: > From: Prasad Pandit <pjp@fedoraproject.org> > > Enable Multifd and Postcopy migration together. > The migration_ioc_process_incoming() routine > checks magic value sent on each channel and > helps to properly setup multifd and postcopy > channels. > > Idea is to take advantage of the multifd threads > to accelerate transfer of large guest RAM to the > destination and switch to postcopy mode sooner. > > The Precopy and Multifd threads work during the > initial guest RAM transfer. When migration moves > to the Postcopy phase, the source guest is > paused, so the Precopy and Multifd threads stop > sending data on their channels. Postcopy threads > on the destination request/pull data from the > source side. Hmm I think this is not the truth.. Precopy keeps sending data even during postcopy, that's the background stream (with/without preempt feature enabled). May need some amendment when repost here. > > Signed-off-by: Prasad Pandit <pjp@fedoraproject.org> > --- > migration/migration.c | 73 ++++++++++++++++++++++++++----------------- > 1 file changed, 44 insertions(+), 29 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 021faee2f3..11fcc1e012 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -92,6 +92,9 @@ enum mig_rp_message_type { > MIG_RP_MSG_MAX > }; > > +/* Migration channel types */ > +enum { CH_DEFAULT, CH_MULTIFD, CH_POSTCOPY }; > + > /* When we add fault tolerance, we could have several > migrations at once. For now we don't need to add > dynamic creation of migration */ > @@ -919,16 +922,15 @@ void migration_fd_process_incoming(QEMUFile *f) > * Returns true when we want to start a new incoming migration process, > * false otherwise. > */ > -static bool migration_should_start_incoming(bool main_channel) > +static bool migration_should_start_incoming(uint8_t channel) > { > + if (channel == CH_POSTCOPY) { > + return false; > + } Please see my other reply, I think here it should never be POSTCOPY channel, because postcopy (aka, preempt) channel is only created after the main channel.. So I wonder whether this "if" will hit at all. > + > /* Multifd doesn't start unless all channels are established */ > if (migrate_multifd()) { > - return migration_has_all_channels(); > - } > - > - /* Preempt channel only starts when the main channel is created */ > - if (migrate_postcopy_preempt()) { > - return main_channel; > + return multifd_recv_all_channels_created(); I think this is incorrect.. We should also need to check main channel is established before start incoming. The old code uses migration_has_all_channels() which checks that. > } > > /* > @@ -936,7 +938,7 @@ static bool migration_should_start_incoming(bool main_channel) > * it's the main channel that's being created, and we should always > * proceed with this channel. > */ > - assert(main_channel); > + assert(channel == CH_DEFAULT); > return true; > } > > @@ -945,13 +947,11 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > MigrationIncomingState *mis = migration_incoming_get_current(); > Error *local_err = NULL; > QEMUFile *f; > - bool default_channel = true; > uint32_t channel_magic = 0; > + uint8_t channel = CH_DEFAULT; > int ret = 0; > > - if (migrate_multifd() && !migrate_mapped_ram() && > - !migrate_postcopy_ram() && > - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > + if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > /* > * With multiple channels, it is possible that we receive channels > * out of order on destination side, causing incorrect mapping of > @@ -969,35 +969,49 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > return; > } > > - default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); > - } else { > - default_channel = !mis->from_src_file; > + if (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)) { > + channel = CH_DEFAULT; > + } else if (channel_magic == cpu_to_be32(MULTIFD_MAGIC)) { > + channel = CH_MULTIFD; > + } else if (channel_magic == cpu_to_be32(POSTCOPY_MAGIC)) { > + if (qio_channel_read_all(ioc, (char *)&channel_magic, > + sizeof(channel_magic), &local_err)) { > + error_report_err(local_err); > + return; > + } > + channel = CH_POSTCOPY; > + } else { > + error_report("%s: could not identify channel, unknown magic: %u", > + __func__, channel_magic); > + return; > + } > } > > if (multifd_recv_setup(errp) != 0) { > return; > } > > - if (default_channel) { > + if (channel == CH_DEFAULT) { > f = qemu_file_new_input(ioc); > migration_incoming_setup(f); > - } else { > + } else if (channel == CH_MULTIFD) { > /* Multiple connections */ > - assert(migration_needs_multiple_sockets()); > if (migrate_multifd()) { > multifd_recv_new_channel(ioc, &local_err); > - } else { > + } > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } else if (channel == CH_POSTCOPY) { > + if (migrate_postcopy()) { > assert(migrate_postcopy_preempt()); > f = qemu_file_new_input(ioc); > postcopy_preempt_new_channel(mis, f); > } > - if (local_err) { > - error_propagate(errp, local_err); > - return; > - } > } > > - if (migration_should_start_incoming(default_channel)) { > + if (migration_should_start_incoming(channel)) { > /* If it's a recovery, we're done */ > if (postcopy_try_recover()) { > return; > @@ -1014,21 +1028,22 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > */ > bool migration_has_all_channels(void) > { > + bool ret = false; > MigrationIncomingState *mis = migration_incoming_get_current(); > > if (!mis->from_src_file) { > - return false; > + return ret; > } > > if (migrate_multifd()) { > - return multifd_recv_all_channels_created(); > + ret = multifd_recv_all_channels_created(); > } > > - if (migrate_postcopy_preempt()) { > - return mis->postcopy_qemufile_dst != NULL; > + if (ret && migrate_postcopy_preempt()) { > + ret = mis->postcopy_qemufile_dst != NULL; > } IMHO it's clearer written as: if (migrate_multifd()) { if (!multifd_recv_all_channels_created()) { return false; } } if (migrate_preempt()) { if (mis->postcopy_qemufile_dst == NULL) { return false; } } return true; > > - return true; > + return ret; > } I don't yet see how you manage the multifd threads, etc, on both sides. Or any logic to make sure multifd will properly flush the pages before postcopy starts. IOW, any guarantee that all the pages will only be installed using UFFDIO_COPY as long as vcpu starts on dest. Any comments? The most complicated part of this work would be testing, to make sure it works in all previous cases, and that's majorly why we disabled it before: it was because it randomly crashes, but not always; fundamentally it's because when multifd was designed there wasn't enough consideration on working together with postcopy. We didn't clearly know what's missing at that time. So we would definitely need to add test cases, just like whoever adds new features to migration, to make sure at least it works for existing multifd / postcopy test cases, but when both features enabled. Some hints on what we can add (assuming we already enable both features): - All possible multifd test cases can run one more time with postcopy enabled, but when precopy will converge and finish / cancel migration. e.g.: /x86_64/migration/multifd/file/mapped-ram/* These ones need to keep working like before, it should simply ignore postcopy being enabled. /x86_64/migration/multifd/tcp/uri/plain/none This one is the most generic multifd test, we'd want to make this run again with postcopy enabled, so it verifies it keeps working if it can converge before postcopy enabled. /x86_64/migration/multifd/tcp/plain/cancel This one tests multifd cancellations, and we want to make sure this works too when postcopy is enabled. - All possible postcopy test cases can also run one more time with multifd enabled. Exmaples: /x86_64/migration/postcopy/plain The most generic postcopy test, we want to run this with multifd enabled, then this will cover the most simple use case of multifd-precopy plus postcopy. /x86_64/migration/postcopy/recovery/* These tests cover the fundamental postcopy recovery (plan, fails in handshake, fails in reconnect, or when using TLS), we may want to make sure these work even if multifd cap is enabled. /x86_64/migration/postcopy/preempt/* Similarly like above, but it now enables preempt channels too. It will add quite a few tests to run here, but I don't see a good way otherwise when we want to enable the two features, because it is indeed a challenge to enable the two major features together here. You can also do some manual tests with real workloads when working on this series, that'll be definitely very helpful. I had a feeling that this series could already fail some when enable both features, but please give it a shot. Thanks,
On Mon, 4 Nov 2024 at 23:19, Peter Xu <peterx@redhat.com> wrote: > Precopy keeps sending data even during postcopy, that's the background > stream (with/without preempt feature enabled). May need some amendment > when repost here. * Okay. > > + if (channel == CH_POSTCOPY) { > > + return false; > > + } > > Please see my other reply, I think here it should never be POSTCOPY > channel, because postcopy (aka, preempt) channel is only created after the > main channel.. So I wonder whether this "if" will hit at all. * It does. migration_ioc_process_incoming() is called for each incoming connection. And every time it calls migration_should_start_incoming() to check if it should proceed with the migration or should wait for further connections, because multifd does not start until all its connections are established. * Similarly when the Postcopy channel is initiated towards the end of Precopy migration, migration_should_start_incoming() gets called, and returns 'false' because we don't want to start a new incoming migration at that time. Earlier it was receiving boolean value 'default_channel' as parameter, which was set to false while accepting 'postcopy' connection via => default_channel = !mis->from_src_file; > > + > > /* Multifd doesn't start unless all channels are established */ > > if (migrate_multifd()) { > > - return migration_has_all_channels(); > > - } > > - > > - /* Preempt channel only starts when the main channel is created */ > > - if (migrate_postcopy_preempt()) { > > - return main_channel; > > + return multifd_recv_all_channels_created(); > > I think this is incorrect.. We should also need to check main channel is > established before start incoming. The old code uses > migration_has_all_channels() which checks that. * Okay, will try to fix it better. But calling migration_has_all_channels() after checking migrate_multifd() does not seem/read right. > I don't yet see how you manage the multifd threads, etc, on both sides. Or > any logic to make sure multifd will properly flush the pages before > postcopy starts. IOW, any guarantee that all the pages will only be > installed using UFFDIO_COPY as long as vcpu starts on dest. Any comments? * There are no changes related to that. As this series only tries to enable the multifd and postcopy options together. > The most complicated part of this work would be testing, to make sure it > works in all previous cases, and that's majorly why we disabled it before: > it was because it randomly crashes, but not always; fundamentally it's > because when multifd was designed there wasn't enough consideration on > working together with postcopy. We didn't clearly know what's missing at > that time. * Right. I have tested this series with the following migration commands to confirm that migration works as expected and there were no crash(es) observed. === [Precopy] # virsh migrate --verbose --live --auto-converge f39vm qemu+ssh://<destination-host-name>/system [Precopy + Multifd] # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 02 \ f39vm qemu+ssh://<destination-host-name>/system # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 04 \ f39vm qemu+ssh://<destination-host-name>/system # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 08 \ f39vm qemu+ssh://<destination-host-name>/system # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 16 \ f39vm qemu+ssh://<destination-host-name>/system [Postcopy] # virsh migrate --verbose --live --auto-converge \ --postcopy --postcopy-after-precopy f39vm qemu+ssh://<destination-host-name>/system [Postcopy + Multifd] # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 02 \ --postcopy --postcopy-after-precopy f39vm qemu+ssh://<destination-host-name>/system # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 04 \ --postcopy --postcopy-after-precopy f39vm qemu+ssh://<destination-host-name>/system # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 08 \ --postcopy --postcopy-after-precopy f39vm qemu+ssh://<destination-host-name>/system # virsh migrate --verbose --live --auto-converge --parallel --parallel-connections 16 \ --postcopy --postcopy-after-precopy f39vm qemu+ssh://<destination-host-name>/system === > So we would definitely need to add test cases, just like whoever adds new > features to migration, to make sure at least it works for existing multifd > / postcopy test cases, but when both features enabled. ... > It will add quite a few tests to run here, but I don't see a good way > otherwise when we want to enable the two features, because it is indeed a > challenge to enable the two major features together here. > * Thank you for the hints about the tests, will surely look into them and try to learn about how to add new test cases. Thank you. --- - Prasad
On Tue, Nov 05, 2024 at 05:24:55PM +0530, Prasad Pandit wrote: > On Mon, 4 Nov 2024 at 23:19, Peter Xu <peterx@redhat.com> wrote: > > Precopy keeps sending data even during postcopy, that's the background > > stream (with/without preempt feature enabled). May need some amendment > > when repost here. > > * Okay. > > > > + if (channel == CH_POSTCOPY) { > > > + return false; > > > + } > > > > Please see my other reply, I think here it should never be POSTCOPY > > channel, because postcopy (aka, preempt) channel is only created after the > > main channel.. So I wonder whether this "if" will hit at all. > > * It does. migration_ioc_process_incoming() is called for each > incoming connection. And every time it calls > migration_should_start_incoming() to check if it should proceed with > the migration or should wait for further connections, because multifd > does not start until all its connections are established. > > * Similarly when the Postcopy channel is initiated towards the end of > Precopy migration, migration_should_start_incoming() gets called, and > returns 'false' because we don't want to start a new incoming > migration at that time. Earlier it was receiving boolean value > 'default_channel' as parameter, which was set to false while accepting > 'postcopy' connection via => default_channel = !mis->from_src_file; Hmm yes, it will happen, but should only happen after the main channel. > > > > + > > > /* Multifd doesn't start unless all channels are established */ > > > if (migrate_multifd()) { > > > - return migration_has_all_channels(); > > > - } > > > - > > > - /* Preempt channel only starts when the main channel is created */ > > > - if (migrate_postcopy_preempt()) { > > > - return main_channel; > > > + return multifd_recv_all_channels_created(); > > > > I think this is incorrect.. We should also need to check main channel is > > established before start incoming. The old code uses > > migration_has_all_channels() which checks that. > > * Okay, will try to fix it better. But calling > migration_has_all_channels() after checking migrate_multifd() does not > seem/read right. > > > I don't yet see how you manage the multifd threads, etc, on both sides. Or > > any logic to make sure multifd will properly flush the pages before > > postcopy starts. IOW, any guarantee that all the pages will only be > > installed using UFFDIO_COPY as long as vcpu starts on dest. Any comments? > > * There are no changes related to that. As this series only tries to > enable the multifd and postcopy options together. In short, qemu_savevm_state_complete_precopy_iterable() is skipped in postcopy. I don't yet see anywhere multifd flushes pages. We need to flush multifd pages before vcpu starts on dest. As we discussed off-list, we can add an assertion in multifd recv threads to make sure it won't touch any guest page during active postcopy. Maybe something like: diff --git a/migration/multifd.c b/migration/multifd.c index 4374e14a96..32425137bd 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -1182,6 +1182,13 @@ static void *multifd_recv_thread(void *opaque) } if (has_data) { + /* + * Now we're going to receive some guest pages into iov + * buffers. If it's during postcopy, it means vcpu can be + * running, so this can corrupt memory when modified + * concurrently by multifd threads! + */ + assert(!migration_in_postcopy()); ret = multifd_recv_state->ops->recv(p, &local_err); if (ret != 0) { break; > > > The most complicated part of this work would be testing, to make sure it > > works in all previous cases, and that's majorly why we disabled it before: > > it was because it randomly crashes, but not always; fundamentally it's > > because when multifd was designed there wasn't enough consideration on > > working together with postcopy. We didn't clearly know what's missing at > > that time. > > * Right. I have tested this series with the following migration > commands to confirm that migration works as expected and there were no > crash(es) observed. > === > [Precopy] > # virsh migrate --verbose --live --auto-converge f39vm > qemu+ssh://<destination-host-name>/system > > [Precopy + Multifd] > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 02 \ > f39vm qemu+ssh://<destination-host-name>/system > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 04 \ > f39vm qemu+ssh://<destination-host-name>/system > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 08 \ > f39vm qemu+ssh://<destination-host-name>/system > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 16 \ > f39vm qemu+ssh://<destination-host-name>/system > > [Postcopy] > # virsh migrate --verbose --live --auto-converge \ > --postcopy --postcopy-after-precopy f39vm > qemu+ssh://<destination-host-name>/system I'd suggest we try interrupt postcopy with migrate-pause, then go with postcopy recover. I wonder how the current series work if the network failure will fail all the multifd iochannels, and how reconnect works. > > [Postcopy + Multifd] > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 02 \ > --postcopy --postcopy-after-precopy f39vm > qemu+ssh://<destination-host-name>/system > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 04 \ > --postcopy --postcopy-after-precopy f39vm > qemu+ssh://<destination-host-name>/system > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 08 \ > --postcopy --postcopy-after-precopy f39vm > qemu+ssh://<destination-host-name>/system > # virsh migrate --verbose --live --auto-converge --parallel > --parallel-connections 16 \ > --postcopy --postcopy-after-precopy f39vm > qemu+ssh://<destination-host-name>/system > === > > > So we would definitely need to add test cases, just like whoever adds new > > features to migration, to make sure at least it works for existing multifd > > / postcopy test cases, but when both features enabled. > ... > > It will add quite a few tests to run here, but I don't see a good way > > otherwise when we want to enable the two features, because it is indeed a > > challenge to enable the two major features together here. > > > > * Thank you for the hints about the tests, will surely look into them > and try to learn about how to add new test cases. Thanks,
diff --git a/migration/migration.c b/migration/migration.c index 021faee2f3..11fcc1e012 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -92,6 +92,9 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; +/* Migration channel types */ +enum { CH_DEFAULT, CH_MULTIFD, CH_POSTCOPY }; + /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add dynamic creation of migration */ @@ -919,16 +922,15 @@ void migration_fd_process_incoming(QEMUFile *f) * Returns true when we want to start a new incoming migration process, * false otherwise. */ -static bool migration_should_start_incoming(bool main_channel) +static bool migration_should_start_incoming(uint8_t channel) { + if (channel == CH_POSTCOPY) { + return false; + } + /* Multifd doesn't start unless all channels are established */ if (migrate_multifd()) { - return migration_has_all_channels(); - } - - /* Preempt channel only starts when the main channel is created */ - if (migrate_postcopy_preempt()) { - return main_channel; + return multifd_recv_all_channels_created(); } /* @@ -936,7 +938,7 @@ static bool migration_should_start_incoming(bool main_channel) * it's the main channel that's being created, and we should always * proceed with this channel. */ - assert(main_channel); + assert(channel == CH_DEFAULT); return true; } @@ -945,13 +947,11 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; QEMUFile *f; - bool default_channel = true; uint32_t channel_magic = 0; + uint8_t channel = CH_DEFAULT; int ret = 0; - if (migrate_multifd() && !migrate_mapped_ram() && - !migrate_postcopy_ram() && - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { /* * With multiple channels, it is possible that we receive channels * out of order on destination side, causing incorrect mapping of @@ -969,35 +969,49 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) return; } - default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); - } else { - default_channel = !mis->from_src_file; + if (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)) { + channel = CH_DEFAULT; + } else if (channel_magic == cpu_to_be32(MULTIFD_MAGIC)) { + channel = CH_MULTIFD; + } else if (channel_magic == cpu_to_be32(POSTCOPY_MAGIC)) { + if (qio_channel_read_all(ioc, (char *)&channel_magic, + sizeof(channel_magic), &local_err)) { + error_report_err(local_err); + return; + } + channel = CH_POSTCOPY; + } else { + error_report("%s: could not identify channel, unknown magic: %u", + __func__, channel_magic); + return; + } } if (multifd_recv_setup(errp) != 0) { return; } - if (default_channel) { + if (channel == CH_DEFAULT) { f = qemu_file_new_input(ioc); migration_incoming_setup(f); - } else { + } else if (channel == CH_MULTIFD) { /* Multiple connections */ - assert(migration_needs_multiple_sockets()); if (migrate_multifd()) { multifd_recv_new_channel(ioc, &local_err); - } else { + } + if (local_err) { + error_propagate(errp, local_err); + return; + } + } else if (channel == CH_POSTCOPY) { + if (migrate_postcopy()) { assert(migrate_postcopy_preempt()); f = qemu_file_new_input(ioc); postcopy_preempt_new_channel(mis, f); } - if (local_err) { - error_propagate(errp, local_err); - return; - } } - if (migration_should_start_incoming(default_channel)) { + if (migration_should_start_incoming(channel)) { /* If it's a recovery, we're done */ if (postcopy_try_recover()) { return; @@ -1014,21 +1028,22 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) */ bool migration_has_all_channels(void) { + bool ret = false; MigrationIncomingState *mis = migration_incoming_get_current(); if (!mis->from_src_file) { - return false; + return ret; } if (migrate_multifd()) { - return multifd_recv_all_channels_created(); + ret = multifd_recv_all_channels_created(); } - if (migrate_postcopy_preempt()) { - return mis->postcopy_qemufile_dst != NULL; + if (ret && migrate_postcopy_preempt()) { + ret = mis->postcopy_qemufile_dst != NULL; } - return true; + return ret; } int migrate_send_rp_switchover_ack(MigrationIncomingState *mis)