Message ID | 20180514165424.12884-5-zhangckid@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
* Zhang Chen (zhangckid@gmail.com) wrote: > For COLO FT, both the PVM and SVM run at the same time, > only sync the state while it needs. > > So here, let SVM runs while not doing checkpoint, change > DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100. > > Besides, we forgot to release colo_checkpoint_semd and > colo_delay_timer, fix them here. > > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > Signed-off-by: Zhang Chen <zhangckid@gmail.com> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > migration/colo.c | 42 ++++++++++++++++++++++++++++++++++++++++-- > migration/migration.c | 4 ++-- > 2 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/migration/colo.c b/migration/colo.c > index 4381067ed4..081df1835f 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -25,8 +25,11 @@ > #include "qemu/error-report.h" > #include "migration/failover.h" > #include "replication.h" > +#include "net/colo-compare.h" > +#include "net/colo.h" > > static bool vmstate_loading; > +static Notifier packets_compare_notifier; > > #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > > @@ -343,6 +346,11 @@ static int colo_do_checkpoint_transaction(MigrationState *s, > goto out; > } > > + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); > + if (local_err) { > + goto out; > + } > + > /* Disable block migration */ > migrate_set_block_enabled(false, &local_err); > qemu_savevm_state_header(fb); > @@ -400,6 +408,11 @@ out: > return ret; > } > > +static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) > +{ > + colo_checkpoint_notify(data); > +} > + > static void colo_process_checkpoint(MigrationState *s) > { > QIOChannelBuffer *bioc; > @@ -416,6 +429,9 @@ static void colo_process_checkpoint(MigrationState *s) > goto out; > } > > + packets_compare_notifier.notify = colo_compare_notify_checkpoint; > + colo_compare_register_notifier(&packets_compare_notifier); > + > /* > * Wait for Secondary finish loading VM states and enter COLO > * restore. > @@ -461,11 +477,21 @@ out: > qemu_fclose(fb); > } > > - timer_del(s->colo_delay_timer); > - > /* Hope this not to be too long to wait here */ > qemu_sem_wait(&s->colo_exit_sem); > qemu_sem_destroy(&s->colo_exit_sem); > + > + /* > + * It is safe to unregister notifier after failover finished. > + * Besides, colo_delay_timer and colo_checkpoint_sem can't be > + * released befor unregister notifier, or there will be use-after-free > + * error. > + */ > + colo_compare_unregister_notifier(&packets_compare_notifier); > + timer_del(s->colo_delay_timer); > + timer_free(s->colo_delay_timer); > + qemu_sem_destroy(&s->colo_checkpoint_sem); > + > /* > * Must be called after failover BH is completed, > * Or the failover BH may shutdown the wrong fd that > @@ -558,6 +584,11 @@ void *colo_process_incoming_thread(void *opaque) > fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); > object_unref(OBJECT(bioc)); > > + qemu_mutex_lock_iothread(); > + vm_start(); > + trace_colo_vm_state_change("stop", "run"); > + qemu_mutex_unlock_iothread(); > + > colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, > &local_err); > if (local_err) { > @@ -577,6 +608,11 @@ void *colo_process_incoming_thread(void *opaque) > goto out; > } > > + qemu_mutex_lock_iothread(); > + vm_stop_force_state(RUN_STATE_COLO); > + trace_colo_vm_state_change("run", "stop"); > + qemu_mutex_unlock_iothread(); > + > /* FIXME: This is unnecessary for periodic checkpoint mode */ > colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, > &local_err); > @@ -630,6 +666,8 @@ void *colo_process_incoming_thread(void *opaque) > } > > vmstate_loading = false; > + vm_start(); > + trace_colo_vm_state_change("stop", "run"); > qemu_mutex_unlock_iothread(); > > if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { > diff --git a/migration/migration.c b/migration/migration.c > index 35f2781b03..bca187275a 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -76,9 +76,9 @@ > #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) > > /* The delay time (in ms) between two COLO checkpoints > - * Note: Please change this default value to 10000 when we support hybrid mode. > + * Note: Please change this default value to 20000 when we support hybrid mode. You can remove that comment now? Dave > */ > -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 > +#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) > #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 > #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 > > -- > 2.17.0 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, May 16, 2018 at 7:12 PM, Dr. David Alan Gilbert <dgilbert@redhat.com > wrote: > * Zhang Chen (zhangckid@gmail.com) wrote: > > For COLO FT, both the PVM and SVM run at the same time, > > only sync the state while it needs. > > > > So here, let SVM runs while not doing checkpoint, change > > DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100. > > > > Besides, we forgot to release colo_checkpoint_semd and > > colo_delay_timer, fix them here. > > > > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > > Signed-off-by: Zhang Chen <zhangckid@gmail.com> > > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > > --- > > migration/colo.c | 42 ++++++++++++++++++++++++++++++++++++++++-- > > migration/migration.c | 4 ++-- > > 2 files changed, 42 insertions(+), 4 deletions(-) > > > > diff --git a/migration/colo.c b/migration/colo.c > > index 4381067ed4..081df1835f 100644 > > --- a/migration/colo.c > > +++ b/migration/colo.c > > @@ -25,8 +25,11 @@ > > #include "qemu/error-report.h" > > #include "migration/failover.h" > > #include "replication.h" > > +#include "net/colo-compare.h" > > +#include "net/colo.h" > > > > static bool vmstate_loading; > > +static Notifier packets_compare_notifier; > > > > #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > > > > @@ -343,6 +346,11 @@ static int colo_do_checkpoint_transaction(MigrationState > *s, > > goto out; > > } > > > > + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, > &local_err); > > + if (local_err) { > > + goto out; > > + } > > + > > /* Disable block migration */ > > migrate_set_block_enabled(false, &local_err); > > qemu_savevm_state_header(fb); > > @@ -400,6 +408,11 @@ out: > > return ret; > > } > > > > +static void colo_compare_notify_checkpoint(Notifier *notifier, void > *data) > > +{ > > + colo_checkpoint_notify(data); > > +} > > + > > static void colo_process_checkpoint(MigrationState *s) > > { > > QIOChannelBuffer *bioc; > > @@ -416,6 +429,9 @@ static void colo_process_checkpoint(MigrationState > *s) > > goto out; > > } > > > > + packets_compare_notifier.notify = colo_compare_notify_checkpoint; > > + colo_compare_register_notifier(&packets_compare_notifier); > > + > > /* > > * Wait for Secondary finish loading VM states and enter COLO > > * restore. > > @@ -461,11 +477,21 @@ out: > > qemu_fclose(fb); > > } > > > > - timer_del(s->colo_delay_timer); > > - > > /* Hope this not to be too long to wait here */ > > qemu_sem_wait(&s->colo_exit_sem); > > qemu_sem_destroy(&s->colo_exit_sem); > > + > > + /* > > + * It is safe to unregister notifier after failover finished. > > + * Besides, colo_delay_timer and colo_checkpoint_sem can't be > > + * released befor unregister notifier, or there will be > use-after-free > > + * error. > > + */ > > + colo_compare_unregister_notifier(&packets_compare_notifier); > > + timer_del(s->colo_delay_timer); > > + timer_free(s->colo_delay_timer); > > + qemu_sem_destroy(&s->colo_checkpoint_sem); > > + > > /* > > * Must be called after failover BH is completed, > > * Or the failover BH may shutdown the wrong fd that > > @@ -558,6 +584,11 @@ void *colo_process_incoming_thread(void *opaque) > > fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); > > object_unref(OBJECT(bioc)); > > > > + qemu_mutex_lock_iothread(); > > + vm_start(); > > + trace_colo_vm_state_change("stop", "run"); > > + qemu_mutex_unlock_iothread(); > > + > > colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, > > &local_err); > > if (local_err) { > > @@ -577,6 +608,11 @@ void *colo_process_incoming_thread(void *opaque) > > goto out; > > } > > > > + qemu_mutex_lock_iothread(); > > + vm_stop_force_state(RUN_STATE_COLO); > > + trace_colo_vm_state_change("run", "stop"); > > + qemu_mutex_unlock_iothread(); > > + > > /* FIXME: This is unnecessary for periodic checkpoint mode */ > > colo_send_message(mis->to_src_file, > COLO_MESSAGE_CHECKPOINT_REPLY, > > &local_err); > > @@ -630,6 +666,8 @@ void *colo_process_incoming_thread(void *opaque) > > } > > > > vmstate_loading = false; > > + vm_start(); > > + trace_colo_vm_state_change("stop", "run"); > > qemu_mutex_unlock_iothread(); > > > > if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { > > diff --git a/migration/migration.c b/migration/migration.c > > index 35f2781b03..bca187275a 100644 > > --- a/migration/migration.c > > +++ b/migration/migration.c > > @@ -76,9 +76,9 @@ > > #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) > > > > /* The delay time (in ms) between two COLO checkpoints > > - * Note: Please change this default value to 10000 when we support > hybrid mode. > > + * Note: Please change this default value to 20000 when we support > hybrid mode. > > You can remove that comment now? > Yes, I will remove it in next version. Thanks Zhang Chen > > Dave > > > */ > > -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 > > +#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) > > #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 > > #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 > > > > -- > > 2.17.0 > > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
diff --git a/migration/colo.c b/migration/colo.c index 4381067ed4..081df1835f 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -25,8 +25,11 @@ #include "qemu/error-report.h" #include "migration/failover.h" #include "replication.h" +#include "net/colo-compare.h" +#include "net/colo.h" static bool vmstate_loading; +static Notifier packets_compare_notifier; #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) @@ -343,6 +346,11 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); + if (local_err) { + goto out; + } + /* Disable block migration */ migrate_set_block_enabled(false, &local_err); qemu_savevm_state_header(fb); @@ -400,6 +408,11 @@ out: return ret; } +static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) +{ + colo_checkpoint_notify(data); +} + static void colo_process_checkpoint(MigrationState *s) { QIOChannelBuffer *bioc; @@ -416,6 +429,9 @@ static void colo_process_checkpoint(MigrationState *s) goto out; } + packets_compare_notifier.notify = colo_compare_notify_checkpoint; + colo_compare_register_notifier(&packets_compare_notifier); + /* * Wait for Secondary finish loading VM states and enter COLO * restore. @@ -461,11 +477,21 @@ out: qemu_fclose(fb); } - timer_del(s->colo_delay_timer); - /* Hope this not to be too long to wait here */ qemu_sem_wait(&s->colo_exit_sem); qemu_sem_destroy(&s->colo_exit_sem); + + /* + * It is safe to unregister notifier after failover finished. + * Besides, colo_delay_timer and colo_checkpoint_sem can't be + * released befor unregister notifier, or there will be use-after-free + * error. + */ + colo_compare_unregister_notifier(&packets_compare_notifier); + timer_del(s->colo_delay_timer); + timer_free(s->colo_delay_timer); + qemu_sem_destroy(&s->colo_checkpoint_sem); + /* * Must be called after failover BH is completed, * Or the failover BH may shutdown the wrong fd that @@ -558,6 +584,11 @@ void *colo_process_incoming_thread(void *opaque) fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); + qemu_mutex_lock_iothread(); + vm_start(); + trace_colo_vm_state_change("stop", "run"); + qemu_mutex_unlock_iothread(); + colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, &local_err); if (local_err) { @@ -577,6 +608,11 @@ void *colo_process_incoming_thread(void *opaque) goto out; } + qemu_mutex_lock_iothread(); + vm_stop_force_state(RUN_STATE_COLO); + trace_colo_vm_state_change("run", "stop"); + qemu_mutex_unlock_iothread(); + /* FIXME: This is unnecessary for periodic checkpoint mode */ colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); @@ -630,6 +666,8 @@ void *colo_process_incoming_thread(void *opaque) } vmstate_loading = false; + vm_start(); + trace_colo_vm_state_change("stop", "run"); qemu_mutex_unlock_iothread(); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { diff --git a/migration/migration.c b/migration/migration.c index 35f2781b03..bca187275a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -76,9 +76,9 @@ #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) /* The delay time (in ms) between two COLO checkpoints - * Note: Please change this default value to 10000 when we support hybrid mode. + * Note: Please change this default value to 20000 when we support hybrid mode. */ -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 +#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16