Message ID | 20240104004452.324068-2-hao.xiang@bytedance.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Use Intel DSA accelerator to offload zero page checking in multifd live migration. | expand |
Hao Xiang <hao.xiang@bytedance.com> writes: > From: Juan Quintela <quintela@redhat.com> > > We have to enable it by default until we introduce the new code. > > Signed-off-by: Juan Quintela <quintela@redhat.com> > --- > migration/options.c | 15 +++++++++++++++ > migration/options.h | 1 + > qapi/migration.json | 8 +++++++- > 3 files changed, 23 insertions(+), 1 deletion(-) > > diff --git a/migration/options.c b/migration/options.c > index 8d8ec73ad9..0f6bd78b9f 100644 > --- a/migration/options.c > +++ b/migration/options.c > @@ -204,6 +204,8 @@ Property migration_properties[] = { > DEFINE_PROP_MIG_CAP("x-switchover-ack", > MIGRATION_CAPABILITY_SWITCHOVER_ACK), > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), > + DEFINE_PROP_MIG_CAP("main-zero-page", > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), > DEFINE_PROP_END_OF_LIST(), > }; > > @@ -284,6 +286,19 @@ bool migrate_multifd(void) > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; > } > > +bool migrate_use_main_zero_page(void) > +{ > + /* MigrationState *s; */ > + > + /* s = migrate_get_current(); */ > + > + /* > + * We will enable this when we add the right code. > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > + */ > + return true; > +} > + > bool migrate_pause_before_switchover(void) > { > MigrationState *s = migrate_get_current(); > diff --git a/migration/options.h b/migration/options.h > index 246c160aee..c901eb57c6 100644 > --- a/migration/options.h > +++ b/migration/options.h > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); > MultiFDCompression migrate_multifd_compression(void); > int migrate_multifd_zlib_level(void); > int migrate_multifd_zstd_level(void); > +bool migrate_use_main_zero_page(void); > uint8_t migrate_throttle_trigger_threshold(void); > const char *migrate_tls_authz(void); > const char *migrate_tls_creds(void); > diff --git a/qapi/migration.json b/qapi/migration.json > index eb2f883513..80c4b13516 100644 > --- a/qapi/migration.json > +++ b/qapi/migration.json > @@ -531,6 +531,12 @@ > # and can result in more stable read performance. Requires KVM > # with accelerator property "dirty-ring-size" set. (Since 8.1) > # > +# > +# @main-zero-page: If enabled, the detection of zero pages will be > +# done on the main thread. Otherwise it is done on > +# the multifd threads. > +# (since 8.2) > +# > # Features: > # > # @deprecated: Member @block is deprecated. Use blockdev-mirror with > @@ -555,7 +561,7 @@ > { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, > 'validate-uuid', 'background-snapshot', > 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', > - 'dirty-limit'] } > + 'dirty-limit', 'main-zero-page'] } > > ## > # @MigrationCapabilityStatus: I'll extract this zero page work into a separate series and submit for review soon. I want to get people's opinion on it independently of this series.
On Mon, Jan 8, 2024 at 12:39 PM Fabiano Rosas <farosas@suse.de> wrote: > > Hao Xiang <hao.xiang@bytedance.com> writes: > > > From: Juan Quintela <quintela@redhat.com> > > > > We have to enable it by default until we introduce the new code. > > > > Signed-off-by: Juan Quintela <quintela@redhat.com> > > --- > > migration/options.c | 15 +++++++++++++++ > > migration/options.h | 1 + > > qapi/migration.json | 8 +++++++- > > 3 files changed, 23 insertions(+), 1 deletion(-) > > > > diff --git a/migration/options.c b/migration/options.c > > index 8d8ec73ad9..0f6bd78b9f 100644 > > --- a/migration/options.c > > +++ b/migration/options.c > > @@ -204,6 +204,8 @@ Property migration_properties[] = { > > DEFINE_PROP_MIG_CAP("x-switchover-ack", > > MIGRATION_CAPABILITY_SWITCHOVER_ACK), > > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), > > + DEFINE_PROP_MIG_CAP("main-zero-page", > > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), > > DEFINE_PROP_END_OF_LIST(), > > }; > > > > @@ -284,6 +286,19 @@ bool migrate_multifd(void) > > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; > > } > > > > +bool migrate_use_main_zero_page(void) > > +{ > > + /* MigrationState *s; */ > > + > > + /* s = migrate_get_current(); */ > > + > > + /* > > + * We will enable this when we add the right code. > > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > > + */ > > + return true; > > +} > > + > > bool migrate_pause_before_switchover(void) > > { > > MigrationState *s = migrate_get_current(); > > diff --git a/migration/options.h b/migration/options.h > > index 246c160aee..c901eb57c6 100644 > > --- a/migration/options.h > > +++ b/migration/options.h > > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); > > MultiFDCompression migrate_multifd_compression(void); > > int migrate_multifd_zlib_level(void); > > int migrate_multifd_zstd_level(void); > > +bool migrate_use_main_zero_page(void); > > uint8_t migrate_throttle_trigger_threshold(void); > > const char *migrate_tls_authz(void); > > const char *migrate_tls_creds(void); > > diff --git a/qapi/migration.json b/qapi/migration.json > > index eb2f883513..80c4b13516 100644 > > --- a/qapi/migration.json > > +++ b/qapi/migration.json > > @@ -531,6 +531,12 @@ > > # and can result in more stable read performance. Requires KVM > > # with accelerator property "dirty-ring-size" set. (Since 8.1) > > # > > +# > > +# @main-zero-page: If enabled, the detection of zero pages will be > > +# done on the main thread. Otherwise it is done on > > +# the multifd threads. > > +# (since 8.2) > > +# > > # Features: > > # > > # @deprecated: Member @block is deprecated. Use blockdev-mirror with > > @@ -555,7 +561,7 @@ > > { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, > > 'validate-uuid', 'background-snapshot', > > 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', > > - 'dirty-limit'] } > > + 'dirty-limit', 'main-zero-page'] } > > > > ## > > # @MigrationCapabilityStatus: > > I'll extract this zero page work into a separate series and submit for > review soon. I want to get people's opinion on it independently of this > series. Sounds good. Thanks.
> On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xiang@bytedance.com> wrote: > > From: Juan Quintela <quintela@redhat.com> > > We have to enable it by default until we introduce the new code. > > Signed-off-by: Juan Quintela <quintela@redhat.com> > --- > migration/options.c | 15 +++++++++++++++ > migration/options.h | 1 + > qapi/migration.json | 8 +++++++- > 3 files changed, 23 insertions(+), 1 deletion(-) > > diff --git a/migration/options.c b/migration/options.c > index 8d8ec73ad9..0f6bd78b9f 100644 > --- a/migration/options.c > +++ b/migration/options.c > @@ -204,6 +204,8 @@ Property migration_properties[] = { > DEFINE_PROP_MIG_CAP("x-switchover-ack", > MIGRATION_CAPABILITY_SWITCHOVER_ACK), > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), > + DEFINE_PROP_MIG_CAP("main-zero-page", > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), > DEFINE_PROP_END_OF_LIST(), > }; > > @@ -284,6 +286,19 @@ bool migrate_multifd(void) > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; > } > > +bool migrate_use_main_zero_page(void) > +{ > + /* MigrationState *s; */ > + > + /* s = migrate_get_current(); */ > + > + /* > + * We will enable this when we add the right code. > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > + */ > + return true; > +} > + > bool migrate_pause_before_switchover(void) > { > MigrationState *s = migrate_get_current(); > diff --git a/migration/options.h b/migration/options.h > index 246c160aee..c901eb57c6 100644 > --- a/migration/options.h > +++ b/migration/options.h > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); > MultiFDCompression migrate_multifd_compression(void); > int migrate_multifd_zlib_level(void); > int migrate_multifd_zstd_level(void); > +bool migrate_use_main_zero_page(void); > uint8_t migrate_throttle_trigger_threshold(void); > const char *migrate_tls_authz(void); > const char *migrate_tls_creds(void); > diff --git a/qapi/migration.json b/qapi/migration.json > index eb2f883513..80c4b13516 100644 > --- a/qapi/migration.json > +++ b/qapi/migration.json > @@ -531,6 +531,12 @@ > # and can result in more stable read performance. Requires KVM > # with accelerator property "dirty-ring-size" set. (Since 8.1) > # > +# > +# @main-zero-page: If enabled, the detection of zero pages will be > +# done on the main thread. Otherwise it is done on > +# the multifd threads. > +# (since 8.2) > +# Should the capability name be something like "zero-page-detection" or just “zero-page”? CC: Fabiano Rosas > # Features: > # > # @deprecated: Member @block is deprecated. Use blockdev-mirror with > @@ -555,7 +561,7 @@ > { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, > 'validate-uuid', 'background-snapshot', > 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', > - 'dirty-limit'] } > + 'dirty-limit', 'main-zero-page'] } > > ## > # @MigrationCapabilityStatus: > -- > 2.30.2 > > >
On Sun, Jan 14, 2024 at 10:02 PM Shivam Kumar <shivam.kumar1@nutanix.com> wrote: > > > > > On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xiang@bytedance.com> wrote: > > > > From: Juan Quintela <quintela@redhat.com> > > > > We have to enable it by default until we introduce the new code. > > > > Signed-off-by: Juan Quintela <quintela@redhat.com> > > --- > > migration/options.c | 15 +++++++++++++++ > > migration/options.h | 1 + > > qapi/migration.json | 8 +++++++- > > 3 files changed, 23 insertions(+), 1 deletion(-) > > > > diff --git a/migration/options.c b/migration/options.c > > index 8d8ec73ad9..0f6bd78b9f 100644 > > --- a/migration/options.c > > +++ b/migration/options.c > > @@ -204,6 +204,8 @@ Property migration_properties[] = { > > DEFINE_PROP_MIG_CAP("x-switchover-ack", > > MIGRATION_CAPABILITY_SWITCHOVER_ACK), > > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), > > + DEFINE_PROP_MIG_CAP("main-zero-page", > > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), > > DEFINE_PROP_END_OF_LIST(), > > }; > > > > @@ -284,6 +286,19 @@ bool migrate_multifd(void) > > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; > > } > > > > +bool migrate_use_main_zero_page(void) > > +{ > > + /* MigrationState *s; */ > > + > > + /* s = migrate_get_current(); */ > > + > > + /* > > + * We will enable this when we add the right code. > > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > > + */ > > + return true; > > +} > > + > > bool migrate_pause_before_switchover(void) > > { > > MigrationState *s = migrate_get_current(); > > diff --git a/migration/options.h b/migration/options.h > > index 246c160aee..c901eb57c6 100644 > > --- a/migration/options.h > > +++ b/migration/options.h > > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); > > MultiFDCompression migrate_multifd_compression(void); > > int migrate_multifd_zlib_level(void); > > int migrate_multifd_zstd_level(void); > > +bool migrate_use_main_zero_page(void); > > uint8_t migrate_throttle_trigger_threshold(void); > > const char *migrate_tls_authz(void); > > const char *migrate_tls_creds(void); > > diff --git a/qapi/migration.json b/qapi/migration.json > > index eb2f883513..80c4b13516 100644 > > --- a/qapi/migration.json > > +++ b/qapi/migration.json > > @@ -531,6 +531,12 @@ > > # and can result in more stable read performance. Requires KVM > > # with accelerator property "dirty-ring-size" set. (Since 8.1) > > # > > +# > > +# @main-zero-page: If enabled, the detection of zero pages will be > > +# done on the main thread. Otherwise it is done on > > +# the multifd threads. > > +# (since 8.2) > > +# > Should the capability name be something like "zero-page-detection" or just “zero-page”? > CC: Fabiano Rosas I think the same concern was brought up last time Juan sent out the original patchset. Right now, the zero page detection is done in the main migration thread and it is always "ON". This change added a functionality to move the zero page detection from the main thread to the multifd sender threads. Now "main-zero-page" is turned "OFF" by default, and zero page checking is done in the multifd sender thread (much better performance). If user wants to run the zero page detection in the main thread (keep current behavior), user can change "main-zero-page" to "ON". Renaming it to "zero-page-detection" or just “zero-page” can not differentiate the old behavior and the new behavior. Here are the options: 1) Keep the current behavior. "main-zero-page" is OFF by default and zero page detection runs on the multifd thread by default. User can turn the switch to "ON" if they want old behavior. 2) Make "main-zero-page" switch ON as default. This would keep the current behavior by default. User can set it to "OFF" for better performance. > > # Features: > > # > > # @deprecated: Member @block is deprecated. Use blockdev-mirror with > > @@ -555,7 +561,7 @@ > > { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, > > 'validate-uuid', 'background-snapshot', > > 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', > > - 'dirty-limit'] } > > + 'dirty-limit', 'main-zero-page'] } > > > > ## > > # @MigrationCapabilityStatus: > > -- > > 2.30.2 > > > > > > >
Hao Xiang <hao.xiang@bytedance.com> writes: > On Sun, Jan 14, 2024 at 10:02 PM Shivam Kumar <shivam.kumar1@nutanix.com> wrote: >> >> >> >> > On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xiang@bytedance.com> wrote: >> > >> > From: Juan Quintela <quintela@redhat.com> >> > >> > We have to enable it by default until we introduce the new code. >> > >> > Signed-off-by: Juan Quintela <quintela@redhat.com> >> > --- >> > migration/options.c | 15 +++++++++++++++ >> > migration/options.h | 1 + >> > qapi/migration.json | 8 +++++++- >> > 3 files changed, 23 insertions(+), 1 deletion(-) >> > >> > diff --git a/migration/options.c b/migration/options.c >> > index 8d8ec73ad9..0f6bd78b9f 100644 >> > --- a/migration/options.c >> > +++ b/migration/options.c >> > @@ -204,6 +204,8 @@ Property migration_properties[] = { >> > DEFINE_PROP_MIG_CAP("x-switchover-ack", >> > MIGRATION_CAPABILITY_SWITCHOVER_ACK), >> > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), >> > + DEFINE_PROP_MIG_CAP("main-zero-page", >> > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), >> > DEFINE_PROP_END_OF_LIST(), >> > }; >> > >> > @@ -284,6 +286,19 @@ bool migrate_multifd(void) >> > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; >> > } >> > >> > +bool migrate_use_main_zero_page(void) >> > +{ >> > + /* MigrationState *s; */ >> > + >> > + /* s = migrate_get_current(); */ >> > + >> > + /* >> > + * We will enable this when we add the right code. >> > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; >> > + */ >> > + return true; >> > +} >> > + >> > bool migrate_pause_before_switchover(void) >> > { >> > MigrationState *s = migrate_get_current(); >> > diff --git a/migration/options.h b/migration/options.h >> > index 246c160aee..c901eb57c6 100644 >> > --- a/migration/options.h >> > +++ b/migration/options.h >> > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); >> > MultiFDCompression migrate_multifd_compression(void); >> > int migrate_multifd_zlib_level(void); >> > int migrate_multifd_zstd_level(void); >> > +bool migrate_use_main_zero_page(void); >> > uint8_t migrate_throttle_trigger_threshold(void); >> > const char *migrate_tls_authz(void); >> > const char *migrate_tls_creds(void); >> > diff --git a/qapi/migration.json b/qapi/migration.json >> > index eb2f883513..80c4b13516 100644 >> > --- a/qapi/migration.json >> > +++ b/qapi/migration.json >> > @@ -531,6 +531,12 @@ >> > # and can result in more stable read performance. Requires KVM >> > # with accelerator property "dirty-ring-size" set. (Since 8.1) >> > # >> > +# >> > +# @main-zero-page: If enabled, the detection of zero pages will be >> > +# done on the main thread. Otherwise it is done on >> > +# the multifd threads. >> > +# (since 8.2) >> > +# >> Should the capability name be something like "zero-page-detection" or just “zero-page”? >> CC: Fabiano Rosas > > I think the same concern was brought up last time Juan sent out the > original patchset. Right now, the zero page detection is done in the > main migration thread and it is always "ON". This change added a > functionality to move the zero page detection from the main thread to > the multifd sender threads. Now "main-zero-page" is turned "OFF" by > default, and zero page checking is done in the multifd sender thread > (much better performance). If user wants to run the zero page > detection in the main thread (keep current behavior), user can change > "main-zero-page" to "ON". > > Renaming it to "zero-page-detection" or just “zero-page” can not > differentiate the old behavior and the new behavior. Yes, the main point here is what happens when we try to migrate from different QEMU versions that have/don't have this code. We need some way to maintain the compatibility. In this case Juan chose to keep this capability with the semantics of "old behavior" so that we can enable it on the new QEMU to match with the old binary that doesn't expect to see zero pages on the packet/stream. > Here are the options: > 1) Keep the current behavior. "main-zero-page" is OFF by default and > zero page detection runs on the multifd thread by default. User can > turn the switch to "ON" if they want old behavior. > 2) Make "main-zero-page" switch ON as default. This would keep the > current behavior by default. User can set it to "OFF" for better > performance. 3) Make multifd-zero-page ON by default. User can set it to OFF to get the old behavior. There was some consideration about how libvirt works that would make this one unusable, but I don't understand what's that about. I would make this a default ON parameter instead of a capability. >> > # Features: >> > # >> > # @deprecated: Member @block is deprecated. Use blockdev-mirror with >> > @@ -555,7 +561,7 @@ >> > { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, >> > 'validate-uuid', 'background-snapshot', >> > 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', >> > - 'dirty-limit'] } >> > + 'dirty-limit', 'main-zero-page'] } >> > >> > ## >> > # @MigrationCapabilityStatus: >> > -- >> > 2.30.2 >> > >> > >> > >>
On Tue, Jan 23, 2024 at 7:11 AM Fabiano Rosas <farosas@suse.de> wrote: > > Hao Xiang <hao.xiang@bytedance.com> writes: > > > On Sun, Jan 14, 2024 at 10:02 PM Shivam Kumar <shivam.kumar1@nutanix.com> wrote: > >> > >> > >> > >> > On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xiang@bytedance.com> wrote: > >> > > >> > From: Juan Quintela <quintela@redhat.com> > >> > > >> > We have to enable it by default until we introduce the new code. > >> > > >> > Signed-off-by: Juan Quintela <quintela@redhat.com> > >> > --- > >> > migration/options.c | 15 +++++++++++++++ > >> > migration/options.h | 1 + > >> > qapi/migration.json | 8 +++++++- > >> > 3 files changed, 23 insertions(+), 1 deletion(-) > >> > > >> > diff --git a/migration/options.c b/migration/options.c > >> > index 8d8ec73ad9..0f6bd78b9f 100644 > >> > --- a/migration/options.c > >> > +++ b/migration/options.c > >> > @@ -204,6 +204,8 @@ Property migration_properties[] = { > >> > DEFINE_PROP_MIG_CAP("x-switchover-ack", > >> > MIGRATION_CAPABILITY_SWITCHOVER_ACK), > >> > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), > >> > + DEFINE_PROP_MIG_CAP("main-zero-page", > >> > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), > >> > DEFINE_PROP_END_OF_LIST(), > >> > }; > >> > > >> > @@ -284,6 +286,19 @@ bool migrate_multifd(void) > >> > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; > >> > } > >> > > >> > +bool migrate_use_main_zero_page(void) > >> > +{ > >> > + /* MigrationState *s; */ > >> > + > >> > + /* s = migrate_get_current(); */ > >> > + > >> > + /* > >> > + * We will enable this when we add the right code. > >> > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > >> > + */ > >> > + return true; > >> > +} > >> > + > >> > bool migrate_pause_before_switchover(void) > >> > { > >> > MigrationState *s = migrate_get_current(); > >> > diff --git a/migration/options.h b/migration/options.h > >> > index 246c160aee..c901eb57c6 100644 > >> > --- a/migration/options.h > >> > +++ b/migration/options.h > >> > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); > >> > MultiFDCompression migrate_multifd_compression(void); > >> > int migrate_multifd_zlib_level(void); > >> > int migrate_multifd_zstd_level(void); > >> > +bool migrate_use_main_zero_page(void); > >> > uint8_t migrate_throttle_trigger_threshold(void); > >> > const char *migrate_tls_authz(void); > >> > const char *migrate_tls_creds(void); > >> > diff --git a/qapi/migration.json b/qapi/migration.json > >> > index eb2f883513..80c4b13516 100644 > >> > --- a/qapi/migration.json > >> > +++ b/qapi/migration.json > >> > @@ -531,6 +531,12 @@ > >> > # and can result in more stable read performance. Requires KVM > >> > # with accelerator property "dirty-ring-size" set. (Since 8.1) > >> > # > >> > +# > >> > +# @main-zero-page: If enabled, the detection of zero pages will be > >> > +# done on the main thread. Otherwise it is done on > >> > +# the multifd threads. > >> > +# (since 8.2) > >> > +# > >> Should the capability name be something like "zero-page-detection" or just “zero-page”? > >> CC: Fabiano Rosas > > > > I think the same concern was brought up last time Juan sent out the > > original patchset. Right now, the zero page detection is done in the > > main migration thread and it is always "ON". This change added a > > functionality to move the zero page detection from the main thread to > > the multifd sender threads. Now "main-zero-page" is turned "OFF" by > > default, and zero page checking is done in the multifd sender thread > > (much better performance). If user wants to run the zero page > > detection in the main thread (keep current behavior), user can change > > "main-zero-page" to "ON". > > > > Renaming it to "zero-page-detection" or just “zero-page” can not > > differentiate the old behavior and the new behavior. > > Yes, the main point here is what happens when we try to migrate from > different QEMU versions that have/don't have this code. We need some way > to maintain the compatibility. In this case Juan chose to keep this > capability with the semantics of "old behavior" so that we can enable it > on the new QEMU to match with the old binary that doesn't expect to see > zero pages on the packet/stream. > > > Here are the options: > > 1) Keep the current behavior. "main-zero-page" is OFF by default and > > zero page detection runs on the multifd thread by default. User can > > turn the switch to "ON" if they want old behavior. > > 2) Make "main-zero-page" switch ON as default. This would keep the > > current behavior by default. User can set it to "OFF" for better > > performance. > > 3) Make multifd-zero-page ON by default. User can set it to OFF to get > the old behavior. There was some consideration about how libvirt works > that would make this one unusable, but I don't understand what's that > about. > > I would make this a default ON parameter instead of a capability. Sounds good to me. > > >> > # Features: > >> > # > >> > # @deprecated: Member @block is deprecated. Use blockdev-mirror with > >> > @@ -555,7 +561,7 @@ > >> > { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, > >> > 'validate-uuid', 'background-snapshot', > >> > 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', > >> > - 'dirty-limit'] } > >> > + 'dirty-limit', 'main-zero-page'] } > >> > > >> > ## > >> > # @MigrationCapabilityStatus: > >> > -- > >> > 2.30.2 > >> > > >> > > >> > > >>
On Tue, Jan 23, 2024 at 12:10:55PM -0300, Fabiano Rosas wrote: > Hao Xiang <hao.xiang@bytedance.com> writes: > > > On Sun, Jan 14, 2024 at 10:02 PM Shivam Kumar <shivam.kumar1@nutanix.com> wrote: > >> > >> > >> > >> > On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xiang@bytedance.com> wrote: > >> > > >> > From: Juan Quintela <quintela@redhat.com> > >> > > >> > We have to enable it by default until we introduce the new code. > >> > > >> > Signed-off-by: Juan Quintela <quintela@redhat.com> > >> > --- > >> > migration/options.c | 15 +++++++++++++++ > >> > migration/options.h | 1 + > >> > qapi/migration.json | 8 +++++++- > >> > 3 files changed, 23 insertions(+), 1 deletion(-) > >> > > >> > diff --git a/migration/options.c b/migration/options.c > >> > index 8d8ec73ad9..0f6bd78b9f 100644 > >> > --- a/migration/options.c > >> > +++ b/migration/options.c > >> > @@ -204,6 +204,8 @@ Property migration_properties[] = { > >> > DEFINE_PROP_MIG_CAP("x-switchover-ack", > >> > MIGRATION_CAPABILITY_SWITCHOVER_ACK), > >> > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), > >> > + DEFINE_PROP_MIG_CAP("main-zero-page", > >> > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), > >> > DEFINE_PROP_END_OF_LIST(), > >> > }; > >> > > >> > @@ -284,6 +286,19 @@ bool migrate_multifd(void) > >> > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; > >> > } > >> > > >> > +bool migrate_use_main_zero_page(void) > >> > +{ > >> > + /* MigrationState *s; */ > >> > + > >> > + /* s = migrate_get_current(); */ > >> > + > >> > + /* > >> > + * We will enable this when we add the right code. > >> > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > >> > + */ > >> > + return true; > >> > +} > >> > + > >> > bool migrate_pause_before_switchover(void) > >> > { > >> > MigrationState *s = migrate_get_current(); > >> > diff --git a/migration/options.h b/migration/options.h > >> > index 246c160aee..c901eb57c6 100644 > >> > --- a/migration/options.h > >> > +++ b/migration/options.h > >> > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); > >> > MultiFDCompression migrate_multifd_compression(void); > >> > int migrate_multifd_zlib_level(void); > >> > int migrate_multifd_zstd_level(void); > >> > +bool migrate_use_main_zero_page(void); > >> > uint8_t migrate_throttle_trigger_threshold(void); > >> > const char *migrate_tls_authz(void); > >> > const char *migrate_tls_creds(void); > >> > diff --git a/qapi/migration.json b/qapi/migration.json > >> > index eb2f883513..80c4b13516 100644 > >> > --- a/qapi/migration.json > >> > +++ b/qapi/migration.json > >> > @@ -531,6 +531,12 @@ > >> > # and can result in more stable read performance. Requires KVM > >> > # with accelerator property "dirty-ring-size" set. (Since 8.1) > >> > # > >> > +# > >> > +# @main-zero-page: If enabled, the detection of zero pages will be > >> > +# done on the main thread. Otherwise it is done on > >> > +# the multifd threads. > >> > +# (since 8.2) > >> > +# > >> Should the capability name be something like "zero-page-detection" or just “zero-page”? > >> CC: Fabiano Rosas > > > > I think the same concern was brought up last time Juan sent out the > > original patchset. Right now, the zero page detection is done in the > > main migration thread and it is always "ON". This change added a > > functionality to move the zero page detection from the main thread to > > the multifd sender threads. Now "main-zero-page" is turned "OFF" by > > default, and zero page checking is done in the multifd sender thread > > (much better performance). If user wants to run the zero page > > detection in the main thread (keep current behavior), user can change > > "main-zero-page" to "ON". > > > > Renaming it to "zero-page-detection" or just “zero-page” can not > > differentiate the old behavior and the new behavior. > > Yes, the main point here is what happens when we try to migrate from > different QEMU versions that have/don't have this code. We need some way > to maintain the compatibility. In this case Juan chose to keep this > capability with the semantics of "old behavior" so that we can enable it > on the new QEMU to match with the old binary that doesn't expect to see > zero pages on the packet/stream. > > > Here are the options: > > 1) Keep the current behavior. "main-zero-page" is OFF by default and > > zero page detection runs on the multifd thread by default. User can > > turn the switch to "ON" if they want old behavior. > > 2) Make "main-zero-page" switch ON as default. This would keep the > > current behavior by default. User can set it to "OFF" for better > > performance. > > 3) Make multifd-zero-page ON by default. User can set it to OFF to get > the old behavior. There was some consideration about how libvirt works > that would make this one unusable, but I don't understand what's that > about. > > I would make this a default ON parameter instead of a capability. If we want to add a knob for zero page, can it start with a string rather than boolean? It might already be helpful for debugging purpose when e.g. someone would like to completely turn off zero page detection just for a comparison. I also believe there can be some corner cases where the guest workload migrates faster without zero page detection: an extreme case is the guest memory always got dirtied 1 byte at the end of each page, where the detection will have a worst case overhead while always returns a !zero page. So that implies a string parameter with: - none: no zero page detection - legacy: only detect in main thread - multifd: use multifd detections Then we can grow that with more HW accelerators. We make machines <=9.0 to use legacy then, with the default to multifd.
diff --git a/migration/options.c b/migration/options.c index 8d8ec73ad9..0f6bd78b9f 100644 --- a/migration/options.c +++ b/migration/options.c @@ -204,6 +204,8 @@ Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-switchover-ack", MIGRATION_CAPABILITY_SWITCHOVER_ACK), DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), + DEFINE_PROP_MIG_CAP("main-zero-page", + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), DEFINE_PROP_END_OF_LIST(), }; @@ -284,6 +286,19 @@ bool migrate_multifd(void) return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; } +bool migrate_use_main_zero_page(void) +{ + /* MigrationState *s; */ + + /* s = migrate_get_current(); */ + + /* + * We will enable this when we add the right code. + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; + */ + return true; +} + bool migrate_pause_before_switchover(void) { MigrationState *s = migrate_get_current(); diff --git a/migration/options.h b/migration/options.h index 246c160aee..c901eb57c6 100644 --- a/migration/options.h +++ b/migration/options.h @@ -88,6 +88,7 @@ int migrate_multifd_channels(void); MultiFDCompression migrate_multifd_compression(void); int migrate_multifd_zlib_level(void); int migrate_multifd_zstd_level(void); +bool migrate_use_main_zero_page(void); uint8_t migrate_throttle_trigger_threshold(void); const char *migrate_tls_authz(void); const char *migrate_tls_creds(void); diff --git a/qapi/migration.json b/qapi/migration.json index eb2f883513..80c4b13516 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -531,6 +531,12 @@ # and can result in more stable read performance. Requires KVM # with accelerator property "dirty-ring-size" set. (Since 8.1) # +# +# @main-zero-page: If enabled, the detection of zero pages will be +# done on the main thread. Otherwise it is done on +# the multifd threads. +# (since 8.2) +# # Features: # # @deprecated: Member @block is deprecated. Use blockdev-mirror with @@ -555,7 +561,7 @@ { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, 'validate-uuid', 'background-snapshot', 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', - 'dirty-limit'] } + 'dirty-limit', 'main-zero-page'] } ## # @MigrationCapabilityStatus: