diff mbox series

[1/6] migration/colo.c: Use event instead of semaphore

Message ID fd601ba1beb524aada54ba66e87ebfc12cf4574b.1589193382.git.lukasstraub2@web.de (mailing list archive)
State New, archived
Headers show
Series colo: migration related bugfixes | expand

Commit Message

Lukas Straub May 11, 2020, 11:10 a.m. UTC
If multiple packets miscompare in a short timeframe, the semaphore
value will be increased multiple times. This causes multiple
checkpoints even if one would be sufficient.

Fix this by using a event instead of a semaphore for triggering
checkpoints. Now, checkpoint requests will be ignored until the
checkpoint event is sent to colo-compare (which releases the
miscompared packets).

Benchmark results (iperf3):
Client-to-server tcp:
without patch: ~66 Mbit/s
with patch: ~61 Mbit/s
Server-to-client tcp:
without patch: ~702 Kbit/s
with patch: ~16 Mbit/s

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 migration/colo.c      | 9 +++++----
 migration/migration.h | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

Comments

Zhanghailiang May 13, 2020, 11:31 a.m. UTC | #1
> If multiple packets miscompare in a short timeframe, the semaphore value
> will be increased multiple times. This causes multiple checkpoints even if one
> would be sufficient.
> 

You right, good catch ;)

Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>

> Fix this by using a event instead of a semaphore for triggering checkpoints.
> Now, checkpoint requests will be ignored until the checkpoint event is sent
> to colo-compare (which releases the miscompared packets).
> 
> Benchmark results (iperf3):
> Client-to-server tcp:
> without patch: ~66 Mbit/s
> with patch: ~61 Mbit/s
> Server-to-client tcp:
> without patch: ~702 Kbit/s
> with patch: ~16 Mbit/s
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  migration/colo.c      | 9 +++++----
>  migration/migration.h | 4 ++--
>  2 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c index
> a54ac84f41..09168627bc 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -430,6 +430,7 @@ static int
> colo_do_checkpoint_transaction(MigrationState *s,
>          goto out;
>      }
> 
> +    qemu_event_reset(&s->colo_checkpoint_event);
>      colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT,
> &local_err);
>      if (local_err) {
>          goto out;
> @@ -580,7 +581,7 @@ static void colo_process_checkpoint(MigrationState
> *s)
>              goto out;
>          }
> 
> -        qemu_sem_wait(&s->colo_checkpoint_sem);
> +        qemu_event_wait(&s->colo_checkpoint_event);
> 
>          if (s->state != MIGRATION_STATUS_COLO) {
>              goto out;
> @@ -628,7 +629,7 @@ out:
>      colo_compare_unregister_notifier(&packets_compare_notifier);
>      timer_del(s->colo_delay_timer);
>      timer_free(s->colo_delay_timer);
> -    qemu_sem_destroy(&s->colo_checkpoint_sem);
> +    qemu_event_destroy(&s->colo_checkpoint_event);
> 
>      /*
>       * Must be called after failover BH is completed, @@ -645,7 +646,7
> @@ void colo_checkpoint_notify(void *opaque)
>      MigrationState *s = opaque;
>      int64_t next_notify_time;
> 
> -    qemu_sem_post(&s->colo_checkpoint_sem);
> +    qemu_event_set(&s->colo_checkpoint_event);
>      s->colo_checkpoint_time =
> qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      next_notify_time = s->colo_checkpoint_time +
>                      s->parameters.x_checkpoint_delay; @@ -655,7
> +656,7 @@ void colo_checkpoint_notify(void *opaque)  void
> migrate_start_colo_process(MigrationState *s)  {
>      qemu_mutex_unlock_iothread();
> -    qemu_sem_init(&s->colo_checkpoint_sem, 0);
> +    qemu_event_init(&s->colo_checkpoint_event, false);
>      s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
>                                  colo_checkpoint_notify, s);
> 
> diff --git a/migration/migration.h b/migration/migration.h index
> 507284e563..f617960522 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -215,8 +215,8 @@ struct MigrationState
>      /* The semaphore is used to notify COLO thread that failover is finished
> */
>      QemuSemaphore colo_exit_sem;
> 
> -    /* The semaphore is used to notify COLO thread to do checkpoint */
> -    QemuSemaphore colo_checkpoint_sem;
> +    /* The event is used to notify COLO thread to do checkpoint */
> +    QemuEvent colo_checkpoint_event;
>      int64_t colo_checkpoint_time;
>      QEMUTimer *colo_delay_timer;
> 
> --
> 2.20.1
diff mbox series

Patch

diff --git a/migration/colo.c b/migration/colo.c
index a54ac84f41..09168627bc 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -430,6 +430,7 @@  static int colo_do_checkpoint_transaction(MigrationState *s,
         goto out;
     }
 
+    qemu_event_reset(&s->colo_checkpoint_event);
     colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
     if (local_err) {
         goto out;
@@ -580,7 +581,7 @@  static void colo_process_checkpoint(MigrationState *s)
             goto out;
         }
 
-        qemu_sem_wait(&s->colo_checkpoint_sem);
+        qemu_event_wait(&s->colo_checkpoint_event);
 
         if (s->state != MIGRATION_STATUS_COLO) {
             goto out;
@@ -628,7 +629,7 @@  out:
     colo_compare_unregister_notifier(&packets_compare_notifier);
     timer_del(s->colo_delay_timer);
     timer_free(s->colo_delay_timer);
-    qemu_sem_destroy(&s->colo_checkpoint_sem);
+    qemu_event_destroy(&s->colo_checkpoint_event);
 
     /*
      * Must be called after failover BH is completed,
@@ -645,7 +646,7 @@  void colo_checkpoint_notify(void *opaque)
     MigrationState *s = opaque;
     int64_t next_notify_time;
 
-    qemu_sem_post(&s->colo_checkpoint_sem);
+    qemu_event_set(&s->colo_checkpoint_event);
     s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     next_notify_time = s->colo_checkpoint_time +
                     s->parameters.x_checkpoint_delay;
@@ -655,7 +656,7 @@  void colo_checkpoint_notify(void *opaque)
 void migrate_start_colo_process(MigrationState *s)
 {
     qemu_mutex_unlock_iothread();
-    qemu_sem_init(&s->colo_checkpoint_sem, 0);
+    qemu_event_init(&s->colo_checkpoint_event, false);
     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
                                 colo_checkpoint_notify, s);
 
diff --git a/migration/migration.h b/migration/migration.h
index 507284e563..f617960522 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -215,8 +215,8 @@  struct MigrationState
     /* The semaphore is used to notify COLO thread that failover is finished */
     QemuSemaphore colo_exit_sem;
 
-    /* The semaphore is used to notify COLO thread to do checkpoint */
-    QemuSemaphore colo_checkpoint_sem;
+    /* The event is used to notify COLO thread to do checkpoint */
+    QemuEvent colo_checkpoint_event;
     int64_t colo_checkpoint_time;
     QEMUTimer *colo_delay_timer;