Message ID | 20250205122712.229151-4-ppandit@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Allow to enable multifd and postcopy migration together | expand |
On Wed, Feb 05, 2025 at 05:57:10PM +0530, Prasad Pandit wrote: > From: Prasad Pandit <pjp@fedoraproject.org> > > Enable Multifd and Postcopy migration together. > The migration_ioc_process_incoming() routine > checks magic value sent on each channel and > helps to properly setup multifd and postcopy > channels. > > The Precopy and Multifd threads work during the > initial guest RAM transfer. When migration moves > to the Postcopy phase, the multifd threads are > restrained and Postcopy threads on the destination > request/pull data from the source side. > > Signed-off-by: Prasad Pandit <pjp@fedoraproject.org> > --- > migration/migration.c | 106 +++++++++++++++++++++++-------------- > migration/multifd-nocomp.c | 3 +- > migration/options.c | 5 -- > migration/ram.c | 4 +- > 4 files changed, 70 insertions(+), 48 deletions(-) > > v4: no change > - https://lore.kernel.org/qemu-devel/20250127120823.144949-1-ppandit@redhat.com/T/#t > > diff --git a/migration/migration.c b/migration/migration.c > index 2d1da917c7..a280722e9e 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -92,6 +92,9 @@ enum mig_rp_message_type { > MIG_RP_MSG_MAX > }; > > +/* Migration channel types */ > +enum { CH_DEFAULT, CH_MULTIFD, CH_POSTCOPY }; Maybe s/DEFAULT/MAIN/? > + > /* When we add fault tolerance, we could have several > migrations at once. For now we don't need to add > dynamic creation of migration */ > @@ -929,26 +932,33 @@ void migration_fd_process_incoming(QEMUFile *f) > /* > * Returns true when we want to start a new incoming migration process, > * false otherwise. > + * > + * All the required channels must be in place before a new incoming > + * migration process starts. > + * - Multifd enabled: > + * The main channel and the multifd channels are required. > + * - Multifd/Postcopy disabled: > + * The main channel is required. > + * - Postcopy enabled: > + * We don't want to start a new incoming migration when > + * the postcopy channel is created. Because it is created > + * towards the end of the precopy migration. > + * > */ > -static bool migration_should_start_incoming(bool main_channel) > +static bool migration_should_start_incoming(uint8_t channel) > { > - /* Multifd doesn't start unless all channels are established */ > - if (migrate_multifd()) { > - return migration_has_all_channels(); > - } > + bool ret = false; > + > + if (channel != CH_POSTCOPY) { > + MigrationIncomingState *mis = migration_incoming_get_current(); > + ret = mis->from_src_file ? true : false; > > - /* Preempt channel only starts when the main channel is created */ > - if (migrate_postcopy_preempt()) { > - return main_channel; > + if (ret && migrate_multifd()) { > + ret = multifd_recv_all_channels_created(); > + } > } > > - /* > - * For all the rest types of migration, we should only reach here when > - * it's the main channel that's being created, and we should always > - * proceed with this channel. > - */ > - assert(main_channel); > - return true; > + return ret; > } > > void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > @@ -956,13 +966,12 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > MigrationIncomingState *mis = migration_incoming_get_current(); > Error *local_err = NULL; > QEMUFile *f; > - bool default_channel = true; > uint32_t channel_magic = 0; > + uint8_t channel = CH_DEFAULT; > int ret = 0; > > - if (migrate_multifd() && !migrate_mapped_ram() && > - !migrate_postcopy_ram() && > - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > + if (!migration_should_start_incoming(channel)) { This says "if we assume this is the main channel, and if we shouldn't start incoming migration, then we should peek at the buffers". Could you help explain? > + if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > /* > * With multiple channels, it is possible that we receive channels > * out of order on destination side, causing incorrect mapping of > @@ -973,42 +982,58 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > * tls handshake while initializing main channel so with tls this > * issue is not possible. > */ > - ret = migration_channel_read_peek(ioc, (void *)&channel_magic, > - sizeof(channel_magic), errp); > + ret = migration_channel_read_peek(ioc, (void *)&channel_magic, > + sizeof(channel_magic), errp); > + if (ret != 0) { > + return; > + } > > - if (ret != 0) { > - return; > + if (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)) { > + channel = CH_DEFAULT; > + } else if (channel_magic == cpu_to_be32(MULTIFD_MAGIC)) { > + channel = CH_MULTIFD; > + } else if (!mis->from_src_file > + && mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { > + /* reconnect default channel for postcopy recovery */ > + channel = CH_DEFAULT; This is still in the big "peek buffer" if condition. IMHO we can skip peeking buffer when postcopy paused, because in this stage the channel must be (1) main channel first, then (2) preempt channel next. > + } else { > + error_report("%s: could not identify channel, unknown magic: %u", > + __func__, channel_magic); > + return; > + } > + } else if (mis->from_src_file > + && (!strcmp(ioc->name, "migration-tls-incoming") > + || !strcmp(ioc->name, "migration-file-incoming"))) { > + channel = CH_MULTIFD; Confused here too. Why do we need to check ioc name? Shouldn't multifd has the headers? > } > - > - default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); > - } else { > - default_channel = !mis->from_src_file; > + } else if (mis->from_src_file) { // && migrate_postcopy_preempt() > + channel = CH_POSTCOPY; > } > > if (multifd_recv_setup(errp) != 0) { > return; > } > > - if (default_channel) { > + if (channel == CH_DEFAULT) { > f = qemu_file_new_input(ioc); > migration_incoming_setup(f); > - } else { > + } else if (channel == CH_MULTIFD) { > /* Multiple connections */ > - assert(migration_needs_multiple_sockets()); Could I ask why removal? > if (migrate_multifd()) { > multifd_recv_new_channel(ioc, &local_err); > - } else { > - assert(migrate_postcopy_preempt()); > - f = qemu_file_new_input(ioc); > - postcopy_preempt_new_channel(mis, f); > } > if (local_err) { > error_propagate(errp, local_err); > return; > } > + } else if (channel == CH_POSTCOPY) { > + assert(migrate_postcopy_preempt()); > + assert(!mis->postcopy_qemufile_dst); > + f = qemu_file_new_input(ioc); > + postcopy_preempt_new_channel(mis, f); > } > > - if (migration_should_start_incoming(default_channel)) { > + if (migration_should_start_incoming(channel)) { > /* If it's a recovery, we're done */ > if (postcopy_try_recover()) { > return; > @@ -1025,21 +1050,22 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > */ > bool migration_has_all_channels(void) > { > + bool ret = false; > MigrationIncomingState *mis = migration_incoming_get_current(); > > if (!mis->from_src_file) { > - return false; > + return ret; > } > > if (migrate_multifd()) { > - return multifd_recv_all_channels_created(); > + ret = multifd_recv_all_channels_created(); > } > > - if (migrate_postcopy_preempt()) { > - return mis->postcopy_qemufile_dst != NULL; > + if (ret && migrate_postcopy_preempt()) { It might be better to avoid such "ret && XXX" nested check. E.g. do you think below easier to read? diff --git a/migration/migration.c b/migration/migration.c index 74c50cc72c..9eb2f3fdeb 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1064,12 +1064,14 @@ bool migration_has_all_channels(void) return false; } - if (migrate_multifd()) { - return multifd_recv_all_channels_created(); + if (migrate_multifd() && + !multifd_recv_all_channels_created()) { + return false; } - if (migrate_postcopy_preempt()) { - return mis->postcopy_qemufile_dst != NULL; + if (migrate_postcopy_preempt() && + mis->postcopy_qemufile_dst == NULL) { + return false; } return true; > + ret = mis->postcopy_qemufile_dst != NULL; > } > > - return true; > + return ret; > } > > int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) > diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c > index 1325dba97c..d0edec7cd1 100644 > --- a/migration/multifd-nocomp.c > +++ b/migration/multifd-nocomp.c > @@ -16,6 +16,7 @@ > #include "file.h" > #include "multifd.h" > #include "options.h" > +#include "migration.h" > #include "qapi/error.h" > #include "qemu/cutils.h" > #include "qemu/error-report.h" > @@ -391,7 +392,7 @@ int multifd_ram_flush_and_sync(QEMUFile *f) > MultiFDSyncReq req; > int ret; > > - if (!migrate_multifd()) { > + if (!migrate_multifd() || migration_in_postcopy()) { > return 0; > } [1] > > diff --git a/migration/options.c b/migration/options.c > index b8d5300326..8c878dea49 100644 > --- a/migration/options.c > +++ b/migration/options.c > @@ -479,11 +479,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) > error_setg(errp, "Postcopy is not compatible with ignore-shared"); > return false; > } > - > - if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { > - error_setg(errp, "Postcopy is not yet compatible with multifd"); > - return false; > - } > } > > if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { > diff --git a/migration/ram.c b/migration/ram.c > index f2326788de..bdba7abe73 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -1295,7 +1295,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) > pss->page = 0; > pss->block = QLIST_NEXT_RCU(pss->block, next); > if (!pss->block) { > - if (multifd_ram_sync_per_round()) { > + if (multifd_ram_sync_per_round() && !migration_in_postcopy()) { If you have above[1], why need this? > QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; > int ret = multifd_ram_flush_and_sync(f); > if (ret < 0) { > @@ -1969,7 +1969,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) > } > } > > - if (migrate_multifd()) { > + if (migrate_multifd() && !migration_in_postcopy()) { > RAMBlock *block = pss->block; > return ram_save_multifd_page(block, offset); > } > -- > 2.48.1 > This patch still did nothing for multifd in postcopy_start(). I'm not sure it's safe. What happens if some multifd pages were sent, then we start postcopy, dest vcpu threads running, then during postcopy some multifd pages finally arrived and modifying the guest pages during vcpus running? Thanks,
Hi, On Fri, 7 Feb 2025 at 04:46, Peter Xu <peterx@redhat.com> wrote: > > +/* Migration channel types */ > > +enum { CH_DEFAULT, CH_MULTIFD, CH_POSTCOPY }; > > Maybe s/DEFAULT/MAIN/? * Okay. > > - if (migrate_multifd() && !migrate_mapped_ram() && > > - !migrate_postcopy_ram() && > > - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > > + if (!migration_should_start_incoming(channel)) { > > This says "if we assume this is the main channel, and if we shouldn't start > incoming migration, then we should peek at the buffers". > Could you help explain? * New migration starts only when the main channel and if 'multifd' is enabled all multifd channels are established. So, if 'main' and 'multifd' channels are _not_ established then migration should _not_ start. And in that case, incoming connection is likely for one of those channels and so we should peek at the buffers, because both 'main' and 'multifd' channels send magic values. * migration_should_start_incoming() function returns 'true' only when 'main' and 'multifd' channels are being established. For 'postcopy' channel it returns false. > > + } else if (!mis->from_src_file > > + && mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { > > + /* reconnect default channel for postcopy recovery */ > > + channel = CH_DEFAULT; > > This is still in the big "peek buffer" if condition. > IMHO we can skip peeking buffer when postcopy paused, because in this stage > the channel must be (1) main channel first, then (2) preempt channel next. * It is in the big 'peek buffer' condition because the 'main' channel (= CH_DEFAULT) is being established here. Ideally, all channels should send magic values to be consistent. The 'main' channel sends magic value when it is established before starting migration, but the same 'main' channel does not send magic value when it is established during postcopy recovery, that is an inconsistency (a bug) here. Ideal fix is to send a magic value every time the 'main' channel is established, irrespective of when it is established. * Adding conditionals to check if it is _POSTCOPY_PAUSED state then don't peek will only lead to complicated 'if' conditionals. This channel handling code is already complex and non-intuitive enough. > > + } else if (mis->from_src_file > > + && (!strcmp(ioc->name, "migration-tls-incoming") > > + || !strcmp(ioc->name, "migration-file-incoming"))) { > > + channel = CH_MULTIFD; > > Confused here too. Why do we need to check ioc name? Shouldn't multifd has > the headers? * Because they are not 'multifd' channels, tls/file channels don't send magic values, but are still handled by 'multifd_recv_new_channel()' function. === ... if (default_channel) { migration_incoming_setup(f); } else { if (migrate_multifd()) { multifd_recv_new_channel(ioc, &local_err); } else { postcopy_preempt_new_channel(mis, f); } === In the code above, if 'default_channel==false' and multifd() is enabled, all incoming connections are handled by 'multifd_recv_new_channel()', irrespective of whether it is a 'multifd' channel or not. While creating multifd channels, there is no check for channel type like: if(channel == CH_MULTIFD). * IMHO, if we make all channels behave with consistency, ie. either they all send magic value or none sends magic value, that'll simplify this code a lot. > > - assert(migration_needs_multiple_sockets()); > Could I ask why removal? * Because that function returns migrate_multifd() => migrate_multifd() || migrate_postcopy_preempt(); * And the very following check is also migrate_multifd(), as below: > > if (migrate_multifd()) { > > multifd_recv_new_channel(ioc, &local_err); > It might be better to avoid such "ret && XXX" nested check. E.g. do you > think below easier to read? > > diff --git a/migration/migration.c b/migration/migration.c > index 74c50cc72c..9eb2f3fdeb 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -1064,12 +1064,14 @@ bool migration_has_all_channels(void) > return false; > } > > - if (migrate_multifd()) { > - return multifd_recv_all_channels_created(); > + if (migrate_multifd() && > + !multifd_recv_all_channels_created()) { > + return false; > } > > - if (migrate_postcopy_preempt()) { > - return mis->postcopy_qemufile_dst != NULL; > + if (migrate_postcopy_preempt() && > + mis->postcopy_qemufile_dst == NULL) { > + return false; > } > > return true; * Will try it. > > - if (!migrate_multifd()) { > > + if (!migrate_multifd() || migration_in_postcopy()) { > > return 0; > > } > > [1] > > > > > if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { > > diff --git a/migration/ram.c b/migration/ram.c > > index f2326788de..bdba7abe73 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -1295,7 +1295,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) > > pss->page = 0; > > pss->block = QLIST_NEXT_RCU(pss->block, next); > > if (!pss->block) { > > - if (multifd_ram_sync_per_round()) { > > + if (multifd_ram_sync_per_round() && !migration_in_postcopy()) { > > If you have above[1], why need this? * True, I tried with just [1] above first, but it was failing for some reason. Will try again. > This patch still did nothing for multifd in postcopy_start(). I'm not sure > it's safe. > > What happens if some multifd pages were sent, then we start postcopy, dest > vcpu threads running, then during postcopy some multifd pages finally > arrived and modifying the guest pages during vcpus running? * ram_save_target_page() function saves multifd pages only when (..!migration_in_postcopy()) not in postcopy mode. Case of 'multifd' page arriving late on destination and 'postcopy' starting before that is strange, because if multifd page is getting late, that network latency should affect 'postcopy' channel too, no? But still if it is possible, do we want to call - multifd_ram_flush_and_sync() before postcopy_start()? Will that help? I'll check if/how it works. Thank you. --- - Prasad
On Fri, Feb 07, 2025 at 04:02:44PM +0530, Prasad Pandit wrote: > Hi, > > On Fri, 7 Feb 2025 at 04:46, Peter Xu <peterx@redhat.com> wrote: > > > +/* Migration channel types */ > > > +enum { CH_DEFAULT, CH_MULTIFD, CH_POSTCOPY }; > > > > Maybe s/DEFAULT/MAIN/? > > * Okay. > > > > - if (migrate_multifd() && !migrate_mapped_ram() && [b] > > > - !migrate_postcopy_ram() && > > > - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > > > + if (!migration_should_start_incoming(channel)) { > > > > This says "if we assume this is the main channel, and if we shouldn't start > > incoming migration, then we should peek at the buffers". > > Could you help explain? > > * New migration starts only when the main channel and if 'multifd' is > enabled all multifd channels are established. So, if 'main' and > 'multifd' channels are _not_ established then migration should _not_ > start. And in that case, incoming connection is likely for one of > those channels and so we should peek at the buffers, because both > 'main' and 'multifd' channels send magic values. > > * migration_should_start_incoming() function returns 'true' only when > 'main' and 'multifd' channels are being established. For 'postcopy' > channel it returns false. This is not easy to follow neither with the current name, nor that you "assumed this is main channel" and test it. I think you may want to split migration_has_all_channels() into migration_has_essential_channels() which only covers main and multifd cases. Then you can check if (!has_esential) here. You'd better also add a comment that all "essential channels" can be peeked. You may also want to bypass a few things, e.g. "postcopy paused stage" here rather than inside, because postcopy-recover only happens: - First with a main channel, that is not peekable as no header when resume - Then with preempt channel, that is also not peekable [a] You may also need to keep the mapped-ram check. They also don't support peek. > > > > > + } else if (!mis->from_src_file > > > + && mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { > > > + /* reconnect default channel for postcopy recovery */ > > > + channel = CH_DEFAULT; > > > > This is still in the big "peek buffer" if condition. > > IMHO we can skip peeking buffer when postcopy paused, because in this stage > > the channel must be (1) main channel first, then (2) preempt channel next. > > * It is in the big 'peek buffer' condition because the 'main' channel > (= CH_DEFAULT) is being established here. Ideally, all channels should > send magic values to be consistent. The 'main' channel sends magic > value when it is established before starting migration, but the same > 'main' channel does not send magic value when it is established during > postcopy recovery, that is an inconsistency (a bug) here. Ideal fix is For a reconnection we could do better to define a header format indeed for such extensions. I can't say it's a bug. > to send a magic value every time the 'main' channel is established, > irrespective of when it is established. > > * Adding conditionals to check if it is _POSTCOPY_PAUSED state then > don't peek will only lead to complicated 'if' conditionals. This > channel handling code is already complex and non-intuitive enough. Please see above [a]. > > > > + } else if (mis->from_src_file > > > + && (!strcmp(ioc->name, "migration-tls-incoming") > > > + || !strcmp(ioc->name, "migration-file-incoming"))) { > > > + channel = CH_MULTIFD; > > > > Confused here too. Why do we need to check ioc name? Shouldn't multifd has > > the headers? > > * Because they are not 'multifd' channels, tls/file channels don't > send magic values, but are still handled by It might be because you have a bug where you removed mapped-ram check at [b] above. I think we need to keep it. Why TLS channels don't send magic? > 'multifd_recv_new_channel()' function. > === > ... > if (default_channel) { > migration_incoming_setup(f); > } else { > if (migrate_multifd()) { > multifd_recv_new_channel(ioc, &local_err); > } else { > postcopy_preempt_new_channel(mis, f); > } > === > In the code above, if 'default_channel==false' and multifd() is > enabled, all incoming connections are handled by > 'multifd_recv_new_channel()', irrespective of whether it is a > 'multifd' channel or not. While creating multifd channels, there is no > check for channel type like: if(channel == CH_MULTIFD). > > * IMHO, if we make all channels behave with consistency, ie. either > they all send magic value or none sends magic value, that'll simplify > this code a lot. > > > > - assert(migration_needs_multiple_sockets()); > > Could I ask why removal? > > * Because that function returns migrate_multifd() => > migrate_multifd() || migrate_postcopy_preempt(); > * And the very following check is also migrate_multifd(), as below: > > > > if (migrate_multifd()) { > > > multifd_recv_new_channel(ioc, &local_err); > > > > It might be better to avoid such "ret && XXX" nested check. E.g. do you > > think below easier to read? > > > > diff --git a/migration/migration.c b/migration/migration.c > > index 74c50cc72c..9eb2f3fdeb 100644 > > --- a/migration/migration.c > > +++ b/migration/migration.c > > @@ -1064,12 +1064,14 @@ bool migration_has_all_channels(void) > > return false; > > } > > > > - if (migrate_multifd()) { > > - return multifd_recv_all_channels_created(); > > + if (migrate_multifd() && > > + !multifd_recv_all_channels_created()) { > > + return false; > > } > > > > - if (migrate_postcopy_preempt()) { > > - return mis->postcopy_qemufile_dst != NULL; > > + if (migrate_postcopy_preempt() && > > + mis->postcopy_qemufile_dst == NULL) { > > + return false; > > } > > > > return true; > > * Will try it. > > > > - if (!migrate_multifd()) { > > > + if (!migrate_multifd() || migration_in_postcopy()) { > > > return 0; > > > } > > > > [1] > > > > > > > > if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { > > > diff --git a/migration/ram.c b/migration/ram.c > > > index f2326788de..bdba7abe73 100644 > > > --- a/migration/ram.c > > > +++ b/migration/ram.c > > > @@ -1295,7 +1295,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) > > > pss->page = 0; > > > pss->block = QLIST_NEXT_RCU(pss->block, next); > > > if (!pss->block) { > > > - if (multifd_ram_sync_per_round()) { > > > + if (multifd_ram_sync_per_round() && !migration_in_postcopy()) { > > > > If you have above[1], why need this? > > * True, I tried with just [1] above first, but it was failing for some > reason. Will try again. Another approach (cleaner at least to me..) is we check in_postcopy in multifd_ram_sync_per_*() functions. > > > This patch still did nothing for multifd in postcopy_start(). I'm not sure > > it's safe. > > > > What happens if some multifd pages were sent, then we start postcopy, dest > > vcpu threads running, then during postcopy some multifd pages finally > > arrived and modifying the guest pages during vcpus running? > > * ram_save_target_page() function saves multifd pages only when > (..!migration_in_postcopy()) not in postcopy mode. Case of 'multifd' > page arriving late on destination and 'postcopy' starting before that > is strange, because if multifd page is getting late, that network > latency should affect 'postcopy' channel too, no? But still if it is I don't think so. postcopy doesn't use any multifd channels. > possible, do we want to call - multifd_ram_flush_and_sync() before > postcopy_start()? Will that help? I'll check if/how it works. Note that all things flushed may or may not be enough, because IIUC the flush only makes sure all threads are synced. It may not make sure the order of things to happen in multifd threads and postcopy thread. The latter is what we need - we need to make sure no page land in postcopy threads. That's why I was requesting to add an assert() in multifd recv thread to make sure we will never receive a page during postcopy. This part is the most important change of the whole series, please take your time to understand the workflow and let's make sure it won't happen. Thanks,
Hello Peter, On Fri, 7 Feb 2025 at 21:16, Peter Xu <peterx@redhat.com> wrote: > This is not easy to follow neither with the current name, nor that you > "assumed this is main channel" and test it. * It is not my doing, nor is there any assumption, but that is how current implementation works. === static bool migration_should_start_incoming(bool main_channel) { /* Multifd doesn't start unless all channels are established */ if (migrate_multifd()) { return migration_has_all_channels(); } /* Preempt channel only starts when the main channel is created */ if (migrate_postcopy_preempt()) { return main_channel; } /* * For all the rest types of migration, we should only reach here when * it's the main channel that's being created, and we should always * proceed with this channel. */ assert(main_channel); return true; } === * Above code returns 'true' for 'multifd' and 'main_channel' cases. When migrate_postcopy_preempt() is true, main_channel is 'false', so it returns false. All I have done is reused the migration_should_start_incoming() function to simplify the 'if' conditional at the top. > I think you may want to split > migration_has_all_channels() into migration_has_essential_channels() which > only covers main and multifd cases. Then you can check if (!has_esential) > here. You'd better also add a comment that all "essential channels" can be > peeked. > > You may also want to bypass a few things, e.g. "postcopy paused stage" here > rather than inside, because postcopy-recover only happens: > > - First with a main channel, that is not peekable as no header when resume > - Then with preempt channel, that is also not peekable > > [a] > > You may also need to keep the mapped-ram check. They also don't support > peek. * Instead of adding specific conditions and splitting functions, my request is, let's work towards consistent channel behaviour that will automatically simplify these conditions and channel handling. Maybe we can do that in a subsequent series. > > > > > > + } else if (mis->from_src_file > > > > + && (!strcmp(ioc->name, "migration-tls-incoming") > > > > + || !strcmp(ioc->name, "migration-file-incoming"))) { > > > > + channel = CH_MULTIFD; > > > > > > Confused here too. Why do we need to check ioc name? Shouldn't multifd has > > > the headers? > > > > * Because they are not 'multifd' channels, tls/file channels don't > > send magic values, but are still handled by > > It might be because you have a bug where you removed mapped-ram check at > [b] above. I think we need to keep it. * ie. Because I removed the mapped-ram check, so tls/file channels are handled by multifd_recv_new_channel()? No, that's not the case. Rather, that is how it works currently. I have not changed anything, only made it more explicit to see that when it is tls/file channel, handle it as a CH_MULTIFD type. Looking at the current code, one can not see clearly how tls/file channels are handled. > Why TLS channels don't send magic? * Probably because they do TLS hand-shake while establishing a connection? > > because if multifd page is getting late, that network > > latency should affect 'postcopy' channel too, no? But still if it is > > I don't think so. postcopy doesn't use any multifd channels. * Yes, but it uses the same wire/network. > > possible, do we want to call - multifd_ram_flush_and_sync() before > > postcopy_start()? Will that help? I'll check if/how it works. > > Note that all things flushed may or may not be enough, because IIUC the > flush only makes sure all threads are synced. * We are again using 'flush' and 'sync' interchangeably. What does - flush only makes sure all threads are synced - mean really? Is it not writing all socket data onto the wire/channel? * Flush should write all socket data onto the network wire/channel. The _order_ in which threads flush/write their socket data onto the wire/channel is to synchronise them, maintaining/controlling that _order_ is not flush. > It may not make sure the order of things to happen in multifd threads and postcopy thread. The > latter is what we need - we need to make sure no page land in postcopy threads. * Let's see: 1) When migration is in Postcopy mode, ram_save_multifd_page() is _not_ called on the source side. ie. no multifd data gets enqueued on the multifd queue. 1.1) multifd_queue_page() function also calls multifd_send() if the queue is full, before enqueueing new pages. 2) If a multifd page reaches the destination during Postcopy mode, it must have been sent/written on the multifd channel before Postcopy mode started, right? 3) In this case, writing/flushing all multifd socket data onto the wire/channel, before calling postcopy_start() function should help IIUC. 3.1) ie. calling multifd_send() before postcopy_start() should help to clear the multifd queue, before Postcopy begins. 3.2) Same can be done by - multifd_ram_flush_and_sync() -> multifd_send() - sequence. * If all multifd pages are sent/written/flushed onto the multifd channels before postcopy_start() is called, then multifd pages should not arrive at the destination after postcopy starts IIUC. If that is happening, we need a reproducer for such a case. Do we have such a reproducer? > That's why I was requesting to add an assert() in multifd recv thread to > make sure we will never receive a page during postcopy. * ie. Add assert(!migrate_in_postcopy()) in multifd_recv_thread() function? Okay. Thank you. --- - Prasad
On Sat, Feb 08, 2025 at 04:06:56PM +0530, Prasad Pandit wrote: > Hello Peter, > > On Fri, 7 Feb 2025 at 21:16, Peter Xu <peterx@redhat.com> wrote: > > This is not easy to follow neither with the current name, nor that you > > "assumed this is main channel" and test it. > > * It is not my doing, nor is there any assumption, but that is how > current implementation works. > === > static bool migration_should_start_incoming(bool main_channel) > { > /* Multifd doesn't start unless all channels are established */ > if (migrate_multifd()) { > return migration_has_all_channels(); > } > > /* Preempt channel only starts when the main channel is created */ > if (migrate_postcopy_preempt()) { > return main_channel; > } > > /* > * For all the rest types of migration, we should only reach here when > * it's the main channel that's being created, and we should always > * proceed with this channel. > */ > assert(main_channel); > return true; > } > === > * Above code returns 'true' for 'multifd' and 'main_channel' cases. > When migrate_postcopy_preempt() is true, main_channel is 'false', so > it returns false. All I have done is reused the > migration_should_start_incoming() function to simplify the 'if' > conditional at the top. Yes, and I suggest a rename or introduce a new helper, per previous reply. > > > I think you may want to split > > migration_has_all_channels() into migration_has_essential_channels() which > > only covers main and multifd cases. Then you can check if (!has_esential) > > here. You'd better also add a comment that all "essential channels" can be > > peeked. > > > > You may also want to bypass a few things, e.g. "postcopy paused stage" here > > rather than inside, because postcopy-recover only happens: > > > > - First with a main channel, that is not peekable as no header when resume > > - Then with preempt channel, that is also not peekable > > > > [a] > > > > You may also need to keep the mapped-ram check. They also don't support > > peek. > > * Instead of adding specific conditions and splitting functions, my > request is, let's work towards consistent channel behaviour that will > automatically simplify these conditions and channel handling. Maybe we > can do that in a subsequent series. I didn't follow, sorry - do you mean this patch is correct on dropping the mapped-ram check? I don't yet understand how it can work if without. > > > > > > > > > + } else if (mis->from_src_file > > > > > + && (!strcmp(ioc->name, "migration-tls-incoming") > > > > > + || !strcmp(ioc->name, "migration-file-incoming"))) { > > > > > + channel = CH_MULTIFD; > > > > > > > > Confused here too. Why do we need to check ioc name? Shouldn't multifd has > > > > the headers? > > > > > > * Because they are not 'multifd' channels, tls/file channels don't > > > send magic values, but are still handled by > > > > It might be because you have a bug where you removed mapped-ram check at > > [b] above. I think we need to keep it. > > * ie. Because I removed the mapped-ram check, so tls/file channels are > handled by multifd_recv_new_channel()? No, that's not the case. > Rather, that is how it works currently. I have not changed anything, > only made it more explicit to see that when it is tls/file channel, > handle it as a CH_MULTIFD type. Looking at the current code, one can > not see clearly how tls/file channels are handled. > > > Why TLS channels don't send magic? > > * Probably because they do TLS hand-shake while establishing a connection? I meant tls channels should have these magics too. Do you mean they're not? > > > > because if multifd page is getting late, that network > > > latency should affect 'postcopy' channel too, no? But still if it is > > > > I don't think so. postcopy doesn't use any multifd channels. > > * Yes, but it uses the same wire/network. > > > > possible, do we want to call - multifd_ram_flush_and_sync() before > > > postcopy_start()? Will that help? I'll check if/how it works. > > > > Note that all things flushed may or may not be enough, because IIUC the > > flush only makes sure all threads are synced. > > * We are again using 'flush' and 'sync' interchangeably. What does - > flush only makes sure all threads are synced - mean really? Is it not > writing all socket data onto the wire/channel? > > * Flush should write all socket data onto the network wire/channel. > The _order_ in which threads flush/write their socket data onto the > wire/channel is to synchronise them, maintaining/controlling that > _order_ is not flush. > > > It may not make sure the order of things to happen in multifd threads and postcopy thread. The > > latter is what we need - we need to make sure no page land in postcopy threads. > > * Let's see: > 1) When migration is in Postcopy mode, ram_save_multifd_page() is > _not_ called on the source side. ie. no multifd data gets enqueued on > the multifd queue. > 1.1) multifd_queue_page() function also calls multifd_send() if > the queue is full, before enqueueing new pages. > 2) If a multifd page reaches the destination during Postcopy mode, > it must have been sent/written on the multifd channel before Postcopy > mode started, right? Yes. > 3) In this case, writing/flushing all multifd socket data onto the > wire/channel, before calling postcopy_start() function should help > IIUC. > 3.1) ie. calling multifd_send() before postcopy_start() should > help to clear the multifd queue, before Postcopy begins. > 3.2) Same can be done by - multifd_ram_flush_and_sync() -> > multifd_send() - sequence. No I don't think so. Flushing sending side makes sure send buffer is empty. It doesn't guarantee recv buffer is empty on the other side. > > * If all multifd pages are sent/written/flushed onto the multifd > channels before postcopy_start() is called, then multifd pages should > not arrive at the destination after postcopy starts IIUC. If that is > happening, we need a reproducer for such a case. Do we have such a > reproducer? With or without a reproducer, we need to at least justify it in theory. If it doesn't even work in theory, it's a problem. > > > That's why I was requesting to add an assert() in multifd recv thread to > > make sure we will never receive a page during postcopy. > > * ie. Add assert(!migrate_in_postcopy()) in multifd_recv_thread() > function? Okay. Yes, probably before multifd_recv_state->ops->recv(). Thanks,
On Mon, 10 Feb 2025 at 22:29, Peter Xu <peterx@redhat.com> wrote: > Yes, and I suggest a rename or introduce a new helper, per previous reply. * Okay, will try it. > I didn't follow, sorry - do you mean this patch is correct on dropping the > mapped-ram check? I don't yet understand how it can work if without. * It goes for channel peek if '!migrate_mapped_ram', ie. when mapped_ram is not set. When it is enabled, likely it just falls into the multifd channel, like other tls/file channels. I'll see if we have to add a check for mapped_ram stream, like tls/file one. > I meant tls channels should have these magics too. Do you mean they're not? * Yes. AFAIU, tls/file channels don't send magic values. > No I don't think so. > Flushing sending side makes sure send buffer is empty. It doesn't > guarantee recv buffer is empty on the other side. * A simple 'flush' operation is not supposed to guarantee reception on the destination side. It is just a 'flush' operation. If we want to _confirm_ whether the pages sent to the destination are received or not, then the destination side should send an 'Acknowledgement' to the source side. Is there such a mechanism in place currently? > > > > * If all multifd pages are sent/written/flushed onto the multifd > > channels before postcopy_start() is called, then multifd pages should > > not arrive at the destination after postcopy starts IIUC. If that is > > happening, we need a reproducer for such a case. Do we have such a > > reproducer? > > With or without a reproducer, we need to at least justify it in theory. If > it doesn't even work in theory, it's a problem. * The theory that both multifd and postcopy channels use the same underlying network wire; And in that multifd pages get delayed, but postcopy pages don't, is not understandable. There must be something else happening in such a case, which a reproducer could help with. Thank you. --- - Prasad
On Tue, Feb 11, 2025 at 02:34:07PM +0530, Prasad Pandit wrote: > On Mon, 10 Feb 2025 at 22:29, Peter Xu <peterx@redhat.com> wrote: > > Yes, and I suggest a rename or introduce a new helper, per previous reply. > > * Okay, will try it. > > > I didn't follow, sorry - do you mean this patch is correct on dropping the > > mapped-ram check? I don't yet understand how it can work if without. > > * It goes for channel peek if '!migrate_mapped_ram', ie. when > mapped_ram is not set. When it is enabled, likely it just falls into > the multifd channel, like other tls/file channels. I'll see if we have > to add a check for mapped_ram stream, like tls/file one. > > > I meant tls channels should have these magics too. Do you mean they're not? > > * Yes. AFAIU, tls/file channels don't send magic values. Please double check whether TLS will send magics. AFAICT, they should. > > > No I don't think so. > > Flushing sending side makes sure send buffer is empty. It doesn't > > guarantee recv buffer is empty on the other side. > > * A simple 'flush' operation is not supposed to guarantee reception on > the destination side. It is just a 'flush' operation. If we want to > _confirm_ whether the pages sent to the destination are received or > not, then the destination side should send an 'Acknowledgement' to the > source side. Is there such a mechanism in place currently? No. We need to figure out a way to do that properly, and that's exactly what I mentioned as one of the core changes we need in this series, which is still missing. We may or may not need an ACK message. Please think about it. > > > > > > > * If all multifd pages are sent/written/flushed onto the multifd > > > channels before postcopy_start() is called, then multifd pages should > > > not arrive at the destination after postcopy starts IIUC. If that is > > > happening, we need a reproducer for such a case. Do we have such a > > > reproducer? > > > > With or without a reproducer, we need to at least justify it in theory. If > > it doesn't even work in theory, it's a problem. > > * The theory that both multifd and postcopy channels use the same > underlying network wire; And in that multifd pages get delayed, but > postcopy pages don't, is not understandable. There must be something > else happening in such a case, which a reproducer could help with. Please consider the case where multifd recv threads may get scheduled out on dest host during precopy phase, not getting chance to be scheduled until postcopy already started running on dst, then the recv thread can stumble upon a page that was sent during precopy. As long as that can be always avoided, I think we should be good. Thanks,
Hi, On Tue, 11 Feb 2025 at 20:50, Peter Xu <peterx@redhat.com> wrote: > > * Yes. AFAIU, tls/file channels don't send magic values. > Please double check whether TLS will send magics. AFAICT, they should. === * ... Also tls live migration already does * tls handshake while initializing main channel so with tls this * issue is not possible. */ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { } else if (mis->from_src_file && (!strcmp(ioc->name, "migration-tls-incoming") || !strcmp(ioc->name, "migration-file-incoming"))) { channel = CH_MULTIFD; } === * From the comment and condition above, both 'tls' and 'file' channels are not peekable, ie. no magic value to peek. The 'migration-file-incoming' check also helps to cover the migrate_mapped_ram() case IIUC. > No. We need to figure out a way to do that properly, and that's exactly > what I mentioned as one of the core changes we need in this series, which > is still missing. We may or may not need an ACK message. Please think > about it. * First we tried to call 'multifd_send_shutdown()' to close multifd channels before calling postcopy_start(). That's the best case scenario wherein multifd channels are closed before postcopy starts. So that there's no confusion and/or jumbling of different data packets. It did not work, as qemu would crash during multifd_shutdown(). * Second is we push/flush all multifd pages before calling postcopy_start() and let the multifd channels exist/stay till the migration ends, after that they are duly shutdown. It is working well so far, passing all migration tests too. * Third, if we want to confirm that multifd pages are received on the destination before calling postcopy_start(), then the best way is for the destination to send an acknowledgement to the source side that it has received and processed all multifd pages and nothing is pending on the multifd channels. * Another could be to define a multifd_recv_flush() function, which could process and clear the receive side multifd queue, so that no multifd pages are pending there. Not sure how best to do this yet. Also considering it lacks proper communication and synchronisation between source and destination sides, it does not seem like the best solution. * Do you have any other option/approach in mind? > Please consider the case where multifd recv threads may get scheduled out > on dest host during precopy phase, not getting chance to be scheduled until > postcopy already started running on dst, then the recv thread can stumble > upon a page that was sent during precopy. As long as that can be always > avoided, I think we should be good. * TBH, this sounds like a remote corner case. * I'm testing the revised patch series and will send it shortly. Thank you. --- - Prasad
On Wed, Feb 12, 2025 at 06:57:48PM +0530, Prasad Pandit wrote: > Hi, > > On Tue, 11 Feb 2025 at 20:50, Peter Xu <peterx@redhat.com> wrote: > > > * Yes. AFAIU, tls/file channels don't send magic values. > > Please double check whether TLS will send magics. AFAICT, they should. > === > * ... Also tls live migration already does > * tls handshake while initializing main channel so with tls this > * issue is not possible. > */ > if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > } else if (mis->from_src_file > && (!strcmp(ioc->name, "migration-tls-incoming") > || !strcmp(ioc->name, "migration-file-incoming"))) { > channel = CH_MULTIFD; > } > === > * From the comment and condition above, both 'tls' and 'file' channels > are not peekable, ie. no magic value to peek. The > 'migration-file-incoming' check also helps to cover the > migrate_mapped_ram() case IIUC. I think it's not because TLS channels don't send magic, but TLS channels are not prone to ordering issues. In general, I'm not convinced we need to check names of iochannels. > > > No. We need to figure out a way to do that properly, and that's exactly > > what I mentioned as one of the core changes we need in this series, which > > is still missing. We may or may not need an ACK message. Please think > > about it. > > * First we tried to call 'multifd_send_shutdown()' to close multifd > channels before calling postcopy_start(). That's the best case > scenario wherein multifd channels are closed before postcopy starts. > So that there's no confusion and/or jumbling of different data > packets. It did not work, as qemu would crash during > multifd_shutdown(). Have we debugged the crash? I'm not saying we should go with this, but crash isn't a reason to not choose a design. > > * Second is we push/flush all multifd pages before calling > postcopy_start() and let the multifd channels exist/stay till the > migration ends, after that they are duly shutdown. It is working well > so far, passing all migration tests too. > > * Third, if we want to confirm that multifd pages are received on the > destination before calling postcopy_start(), then the best way is for > the destination to send an acknowledgement to the source side that it > has received and processed all multifd pages and nothing is pending on > the multifd channels. > > * Another could be to define a multifd_recv_flush() function, which > could process and clear the receive side multifd queue, so that no > multifd pages are pending there. Not sure how best to do this yet. > Also considering it lacks proper communication and synchronisation > between source and destination sides, it does not seem like the best > solution. > > * Do you have any other option/approach in mind? > > > Please consider the case where multifd recv threads may get scheduled out > > on dest host during precopy phase, not getting chance to be scheduled until > > postcopy already started running on dst, then the recv thread can stumble > > upon a page that was sent during precopy. As long as that can be always > > avoided, I think we should be good. > > * TBH, this sounds like a remote corner case. No this is not. As I mentioned tons of times.. copying page in socket buffers directly into guest page during vcpus running / postcopy is a very hard to debug issue. If it's possible to happen, the design is flawed. > > * I'm testing the revised patch series and will send it shortly. I don't think passing the unit tests prove the series is correct and should be merged. We need to understand how it work, or we can't merge it. I feel very frustrated multiple times that you seem to want to ignore what I comment. I don't know why you rush to repost things. After a 2nd thought, I think maybe multifd flush and sync could work on src side indeed, because when flush and sync there'll be a message (MULTIFD_FLUSH) on main channel and that should order against the rest postcopy messages that will also be sent on the main channel (if we do multifd flush before all the postcopy processes). Then it should guarantee when postcopy starts on dest, dest multifd recv threads flushed all messages, and no multifd message will arrive anymore. But again we should guard it with an assert() in recv threads if you want to postpone recycling of multifd threads, just to double check no outliers we overlooked. When proposing the patches you need to justify the design first before anything. Please evaluate above, these things are critical to appear in either code comments or commit messages. Please digest everything before reposting. Thanks,
Hi, On Wed, 12 Feb 2025 at 20:08, Peter Xu <peterx@redhat.com> wrote: > I think it's not because TLS channels don't send magic, but TLS channels > are not prone to ordering issues. > In general, I'm not convinced we need to check names of iochannels. * If the channel does not set '_READ_MSG_SEEK' flag, which magic value are we going to read? As for checking names of the channels, it tells the reader how an incoming channel is processed. >> It did not work, as qemu would crash during multifd_shutdown(). > > Have we debugged the crash? I'm not saying we should go with this, but > crash isn't a reason to not choose a design. * Yes, I did try to debug it but couldn't get to a conclusion in time. > No this is not. As I mentioned tons of times.. copying page in socket > buffers directly into guest page during vcpus running / postcopy is a very > hard to debug issue. If it's possible to happen, the design is flawed. * Sure, agreed. So far in my testing it does not seem to happen. The theory of multifd_recv_threads getting scheduled out and causing guest page over-write seems remote to me, but I get the possibility/probability. One possible solution is to have the destination side send an 'acknowledgement' to the source side. > I don't think passing the unit tests prove the series is correct and should > be merged. We need to understand how it work, or we can't merge it. * Well, passing unit tests should confirm that it does not break existing functionality. Is there any metric/limit to such understanding? Everybody understands/sees things differently. Understanding is an ever evolving thing. Saying that merge should happen based on understanding sounds weird to me. > I feel very frustrated multiple times that you seem to want to ignore what > I comment. I don't know why you rush to repost things. * I guess we are seeing/understanding things differently here. > After a 2nd thought, I think maybe multifd flush and sync could work on src > side indeed, because when flush and sync there'll be a message > (MULTIFD_FLUSH) on main channel and that should order against the rest > postcopy messages that will also be sent on the main channel (if we do > multifd flush before all the postcopy processes). Then it should guarantee > when postcopy starts on dest, dest multifd recv threads flushed all > messages, and no multifd message will arrive anymore. * After a 2nd thought - that's evolving understanding right there. :) * To mention here, to flush multifd channels with 'multifd_ram_flush_and_sync()' I tried calling migration_iteration_run -> multifd_ram_flush_and_sync(s->to_dst_file) It does not work, it crashes. Is 's->to_dst_file' a right parameter there? But calling a function below works === /* Send enqueued data pages onto next available multifd channel */ int multifd_send_flush(void) { if (!multifd_payload_empty(multifd_ram_send)) { if (!multifd_send(&multifd_ram_send)) { error_report("%s: multifd_send fail", __func__); return -1; } } return 0; } === migration_iteration_run -> multifd_send_flush() Works, but it is not sending the 'RAM_SAVE_FLAG_MULTIFD_FLUSH' message as done by 'multifd_ram_flush_and_sync()' function. > But again we should guard it with an assert() in recv threads if you want > to postpone recycling of multifd threads, just to double check no outliers > we overlooked. * Yes, the revised patch is working with the assert(!migrate_in_postcopy) in multifd_recv_thread. * I was going to send a revised series with these changes, but will wait on that for now. Thank you. --- - Prasad
On Wed, Feb 12, 2025 at 11:06:00PM +0530, Prasad Pandit wrote: > * I was going to send a revised series with these changes, but will > wait on that for now. You were going to send some changes that you don't yet fully understand how it works? How would you do the flush in the new revision if your existing code crashes? Or did you do that in the revised series at all? Sorry, no - I don't think this is how it should or could ever work. I've repeated myself multiple times, I'll stop.
diff --git a/migration/migration.c b/migration/migration.c index 2d1da917c7..a280722e9e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -92,6 +92,9 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; +/* Migration channel types */ +enum { CH_DEFAULT, CH_MULTIFD, CH_POSTCOPY }; + /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add dynamic creation of migration */ @@ -929,26 +932,33 @@ void migration_fd_process_incoming(QEMUFile *f) /* * Returns true when we want to start a new incoming migration process, * false otherwise. + * + * All the required channels must be in place before a new incoming + * migration process starts. + * - Multifd enabled: + * The main channel and the multifd channels are required. + * - Multifd/Postcopy disabled: + * The main channel is required. + * - Postcopy enabled: + * We don't want to start a new incoming migration when + * the postcopy channel is created. Because it is created + * towards the end of the precopy migration. + * */ -static bool migration_should_start_incoming(bool main_channel) +static bool migration_should_start_incoming(uint8_t channel) { - /* Multifd doesn't start unless all channels are established */ - if (migrate_multifd()) { - return migration_has_all_channels(); - } + bool ret = false; + + if (channel != CH_POSTCOPY) { + MigrationIncomingState *mis = migration_incoming_get_current(); + ret = mis->from_src_file ? true : false; - /* Preempt channel only starts when the main channel is created */ - if (migrate_postcopy_preempt()) { - return main_channel; + if (ret && migrate_multifd()) { + ret = multifd_recv_all_channels_created(); + } } - /* - * For all the rest types of migration, we should only reach here when - * it's the main channel that's being created, and we should always - * proceed with this channel. - */ - assert(main_channel); - return true; + return ret; } void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) @@ -956,13 +966,12 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; QEMUFile *f; - bool default_channel = true; uint32_t channel_magic = 0; + uint8_t channel = CH_DEFAULT; int ret = 0; - if (migrate_multifd() && !migrate_mapped_ram() && - !migrate_postcopy_ram() && - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + if (!migration_should_start_incoming(channel)) { + if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { /* * With multiple channels, it is possible that we receive channels * out of order on destination side, causing incorrect mapping of @@ -973,42 +982,58 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) * tls handshake while initializing main channel so with tls this * issue is not possible. */ - ret = migration_channel_read_peek(ioc, (void *)&channel_magic, - sizeof(channel_magic), errp); + ret = migration_channel_read_peek(ioc, (void *)&channel_magic, + sizeof(channel_magic), errp); + if (ret != 0) { + return; + } - if (ret != 0) { - return; + if (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)) { + channel = CH_DEFAULT; + } else if (channel_magic == cpu_to_be32(MULTIFD_MAGIC)) { + channel = CH_MULTIFD; + } else if (!mis->from_src_file + && mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { + /* reconnect default channel for postcopy recovery */ + channel = CH_DEFAULT; + } else { + error_report("%s: could not identify channel, unknown magic: %u", + __func__, channel_magic); + return; + } + } else if (mis->from_src_file + && (!strcmp(ioc->name, "migration-tls-incoming") + || !strcmp(ioc->name, "migration-file-incoming"))) { + channel = CH_MULTIFD; } - - default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); - } else { - default_channel = !mis->from_src_file; + } else if (mis->from_src_file) { // && migrate_postcopy_preempt() + channel = CH_POSTCOPY; } if (multifd_recv_setup(errp) != 0) { return; } - if (default_channel) { + if (channel == CH_DEFAULT) { f = qemu_file_new_input(ioc); migration_incoming_setup(f); - } else { + } else if (channel == CH_MULTIFD) { /* Multiple connections */ - assert(migration_needs_multiple_sockets()); if (migrate_multifd()) { multifd_recv_new_channel(ioc, &local_err); - } else { - assert(migrate_postcopy_preempt()); - f = qemu_file_new_input(ioc); - postcopy_preempt_new_channel(mis, f); } if (local_err) { error_propagate(errp, local_err); return; } + } else if (channel == CH_POSTCOPY) { + assert(migrate_postcopy_preempt()); + assert(!mis->postcopy_qemufile_dst); + f = qemu_file_new_input(ioc); + postcopy_preempt_new_channel(mis, f); } - if (migration_should_start_incoming(default_channel)) { + if (migration_should_start_incoming(channel)) { /* If it's a recovery, we're done */ if (postcopy_try_recover()) { return; @@ -1025,21 +1050,22 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) */ bool migration_has_all_channels(void) { + bool ret = false; MigrationIncomingState *mis = migration_incoming_get_current(); if (!mis->from_src_file) { - return false; + return ret; } if (migrate_multifd()) { - return multifd_recv_all_channels_created(); + ret = multifd_recv_all_channels_created(); } - if (migrate_postcopy_preempt()) { - return mis->postcopy_qemufile_dst != NULL; + if (ret && migrate_postcopy_preempt()) { + ret = mis->postcopy_qemufile_dst != NULL; } - return true; + return ret; } int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c index 1325dba97c..d0edec7cd1 100644 --- a/migration/multifd-nocomp.c +++ b/migration/multifd-nocomp.c @@ -16,6 +16,7 @@ #include "file.h" #include "multifd.h" #include "options.h" +#include "migration.h" #include "qapi/error.h" #include "qemu/cutils.h" #include "qemu/error-report.h" @@ -391,7 +392,7 @@ int multifd_ram_flush_and_sync(QEMUFile *f) MultiFDSyncReq req; int ret; - if (!migrate_multifd()) { + if (!migrate_multifd() || migration_in_postcopy()) { return 0; } diff --git a/migration/options.c b/migration/options.c index b8d5300326..8c878dea49 100644 --- a/migration/options.c +++ b/migration/options.c @@ -479,11 +479,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) error_setg(errp, "Postcopy is not compatible with ignore-shared"); return false; } - - if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { - error_setg(errp, "Postcopy is not yet compatible with multifd"); - return false; - } } if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { diff --git a/migration/ram.c b/migration/ram.c index f2326788de..bdba7abe73 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1295,7 +1295,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { - if (multifd_ram_sync_per_round()) { + if (multifd_ram_sync_per_round() && !migration_in_postcopy()) { QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; int ret = multifd_ram_flush_and_sync(f); if (ret < 0) { @@ -1969,7 +1969,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) } } - if (migrate_multifd()) { + if (migrate_multifd() && !migration_in_postcopy()) { RAMBlock *block = pss->block; return ram_save_multifd_page(block, offset); }