diff mbox

[COLO-Frame,v16,23/35] COLO failover: Don't do failover during loading VM's state

Message ID 1460096797-14916-24-git-send-email-zhang.zhanghailiang@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhanghailiang April 8, 2016, 6:26 a.m. UTC
We should not do failover work while the main thread is loading
VM's state, otherwise it will destroy the consistent of VM's memory and
device state.

Here we add a new failover status 'RELAUNCH' which means we should
relaunch the process of failover.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
v14:
- Move the place of 'vmstate_loading = false;'.
v13:
- Add Reviewed-by tag
---
 include/migration/failover.h |  2 ++
 migration/colo.c             | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

Comments

Changlong Xie May 6, 2016, 9:09 a.m. UTC | #1
On 04/08/2016 02:26 PM, zhanghailiang wrote:
> We should not do failover work while the main thread is loading
> VM's state, otherwise it will destroy the consistent of VM's memory and
> device state.
>
> Here we add a new failover status 'RELAUNCH' which means we should
> relaunch the process of failover.
>
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
> v14:
> - Move the place of 'vmstate_loading = false;'.
> v13:
> - Add Reviewed-by tag
> ---
>   include/migration/failover.h |  2 ++
>   migration/colo.c             | 25 +++++++++++++++++++++++++
>   2 files changed, 27 insertions(+)
>
> diff --git a/include/migration/failover.h b/include/migration/failover.h
> index c4bd81e..99b0d58 100644
> --- a/include/migration/failover.h
> +++ b/include/migration/failover.h
> @@ -20,6 +20,8 @@ typedef enum COLOFailoverStatus {
>       FAILOVER_STATUS_REQUEST = 1, /* Request but not handled */
>       FAILOVER_STATUS_HANDLING = 2, /* In the process of handling failover */
>       FAILOVER_STATUS_COMPLETED = 3, /* Finish the failover process */
> +    /* Optional, Relaunch the failover process, again 'NONE' -> 'COMPLETED' */
> +    FAILOVER_STATUS_RELAUNCH = 4,
>   } COLOFailoverStatus;
>
>   void failover_init_state(void);
> diff --git a/migration/colo.c b/migration/colo.c
> index c4989c6..252a290 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -20,6 +20,8 @@
>   #include "migration/failover.h"
>   #include "qapi-event.h"
>
> +static bool vmstate_loading;
> +
>   /* colo buffer */
>   #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>
> @@ -52,6 +54,19 @@ static void secondary_vm_do_failover(void)
>       int old_state;
>       MigrationIncomingState *mis = migration_incoming_get_current();
>
> +    /* Can not do failover during the process of VM's loading VMstate, Or
> +      * it will break the secondary VM.

Wrong comments style

> +      */
> +    if (vmstate_loading) {
> +        old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
> +                                       FAILOVER_STATUS_RELAUNCH);
> +        if (old_state != FAILOVER_STATUS_HANDLING) {
> +            error_report("Unknown error while do failover for secondary VM,"
> +                         "old_state: %d", old_state);
> +        }
> +        return;
> +    }
> +
>       migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
>                         MIGRATION_STATUS_COMPLETED);
>
> @@ -561,13 +576,22 @@ void *colo_process_incoming_thread(void *opaque)
>
>           qemu_mutex_lock_iothread();
>           qemu_system_reset(VMRESET_SILENT);
> +        vmstate_loading = true;
>           if (qemu_loadvm_state(fb) < 0) {
>               error_report("COLO: loadvm failed");
>               qemu_mutex_unlock_iothread();
>               goto out;
>           }
> +
> +        vmstate_loading = false;
>           qemu_mutex_unlock_iothread();
>
> +        if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
> +            failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
> +            failover_request_active(NULL);
> +            goto out;
> +        }
> +
>           colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
>                        &local_err);
>           if (local_err) {
> @@ -579,6 +603,7 @@ void *colo_process_incoming_thread(void *opaque)
>       }
>
>   out:
> +     vmstate_loading = false;

An redundant blank.

It seems there are many coding style issues in whole colo.c that you 
did't notice.

Thanks
	-Xie

>       /* Throw the unreported error message after exited from loop */
>       if (local_err) {
>           error_report_err(local_err);
>
Zhanghailiang May 6, 2016, 11:07 a.m. UTC | #2
On 2016/5/6 17:09, Changlong Xie wrote:
> On 04/08/2016 02:26 PM, zhanghailiang wrote:
>> We should not do failover work while the main thread is loading
>> VM's state, otherwise it will destroy the consistent of VM's memory and
>> device state.
>>
>> Here we add a new failover status 'RELAUNCH' which means we should
>> relaunch the process of failover.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>> v14:
>> - Move the place of 'vmstate_loading = false;'.
>> v13:
>> - Add Reviewed-by tag
>> ---
>>    include/migration/failover.h |  2 ++
>>    migration/colo.c             | 25 +++++++++++++++++++++++++
>>    2 files changed, 27 insertions(+)
>>
>> diff --git a/include/migration/failover.h b/include/migration/failover.h
>> index c4bd81e..99b0d58 100644
>> --- a/include/migration/failover.h
>> +++ b/include/migration/failover.h
>> @@ -20,6 +20,8 @@ typedef enum COLOFailoverStatus {
>>        FAILOVER_STATUS_REQUEST = 1, /* Request but not handled */
>>        FAILOVER_STATUS_HANDLING = 2, /* In the process of handling failover */
>>        FAILOVER_STATUS_COMPLETED = 3, /* Finish the failover process */
>> +    /* Optional, Relaunch the failover process, again 'NONE' -> 'COMPLETED' */
>> +    FAILOVER_STATUS_RELAUNCH = 4,
>>    } COLOFailoverStatus;
>>
>>    void failover_init_state(void);
>> diff --git a/migration/colo.c b/migration/colo.c
>> index c4989c6..252a290 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -20,6 +20,8 @@
>>    #include "migration/failover.h"
>>    #include "qapi-event.h"
>>
>> +static bool vmstate_loading;
>> +
>>    /* colo buffer */
>>    #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>>
>> @@ -52,6 +54,19 @@ static void secondary_vm_do_failover(void)
>>        int old_state;
>>        MigrationIncomingState *mis = migration_incoming_get_current();
>>
>> +    /* Can not do failover during the process of VM's loading VMstate, Or
>> +      * it will break the secondary VM.
>
> Wrong comments style
>
>> +      */
>> +    if (vmstate_loading) {
>> +        old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
>> +                                       FAILOVER_STATUS_RELAUNCH);
>> +        if (old_state != FAILOVER_STATUS_HANDLING) {
>> +            error_report("Unknown error while do failover for secondary VM,"
>> +                         "old_state: %d", old_state);
>> +        }
>> +        return;
>> +    }
>> +
>>        migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
>>                          MIGRATION_STATUS_COMPLETED);
>>
>> @@ -561,13 +576,22 @@ void *colo_process_incoming_thread(void *opaque)
>>
>>            qemu_mutex_lock_iothread();
>>            qemu_system_reset(VMRESET_SILENT);
>> +        vmstate_loading = true;
>>            if (qemu_loadvm_state(fb) < 0) {
>>                error_report("COLO: loadvm failed");
>>                qemu_mutex_unlock_iothread();
>>                goto out;
>>            }
>> +
>> +        vmstate_loading = false;
>>            qemu_mutex_unlock_iothread();
>>
>> +        if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
>> +            failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
>> +            failover_request_active(NULL);
>> +            goto out;
>> +        }
>> +
>>            colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
>>                         &local_err);
>>            if (local_err) {
>> @@ -579,6 +603,7 @@ void *colo_process_incoming_thread(void *opaque)
>>        }
>>
>>    out:
>> +     vmstate_loading = false;
>
> An redundant blank.
>
> It seems there are many coding style issues in whole colo.c that you
> did't notice.
>

Good catch, i will fix them all in next version, thanks.

> Thanks
> 	-Xie
>
>>        /* Throw the unreported error message after exited from loop */
>>        if (local_err) {
>>            error_report_err(local_err);
>>
>
>
>
> .
>
diff mbox

Patch

diff --git a/include/migration/failover.h b/include/migration/failover.h
index c4bd81e..99b0d58 100644
--- a/include/migration/failover.h
+++ b/include/migration/failover.h
@@ -20,6 +20,8 @@  typedef enum COLOFailoverStatus {
     FAILOVER_STATUS_REQUEST = 1, /* Request but not handled */
     FAILOVER_STATUS_HANDLING = 2, /* In the process of handling failover */
     FAILOVER_STATUS_COMPLETED = 3, /* Finish the failover process */
+    /* Optional, Relaunch the failover process, again 'NONE' -> 'COMPLETED' */
+    FAILOVER_STATUS_RELAUNCH = 4,
 } COLOFailoverStatus;
 
 void failover_init_state(void);
diff --git a/migration/colo.c b/migration/colo.c
index c4989c6..252a290 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -20,6 +20,8 @@ 
 #include "migration/failover.h"
 #include "qapi-event.h"
 
+static bool vmstate_loading;
+
 /* colo buffer */
 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
 
@@ -52,6 +54,19 @@  static void secondary_vm_do_failover(void)
     int old_state;
     MigrationIncomingState *mis = migration_incoming_get_current();
 
+    /* Can not do failover during the process of VM's loading VMstate, Or
+      * it will break the secondary VM.
+      */
+    if (vmstate_loading) {
+        old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
+                                       FAILOVER_STATUS_RELAUNCH);
+        if (old_state != FAILOVER_STATUS_HANDLING) {
+            error_report("Unknown error while do failover for secondary VM,"
+                         "old_state: %d", old_state);
+        }
+        return;
+    }
+
     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
                       MIGRATION_STATUS_COMPLETED);
 
@@ -561,13 +576,22 @@  void *colo_process_incoming_thread(void *opaque)
 
         qemu_mutex_lock_iothread();
         qemu_system_reset(VMRESET_SILENT);
+        vmstate_loading = true;
         if (qemu_loadvm_state(fb) < 0) {
             error_report("COLO: loadvm failed");
             qemu_mutex_unlock_iothread();
             goto out;
         }
+
+        vmstate_loading = false;
         qemu_mutex_unlock_iothread();
 
+        if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
+            failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
+            failover_request_active(NULL);
+            goto out;
+        }
+
         colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
                      &local_err);
         if (local_err) {
@@ -579,6 +603,7 @@  void *colo_process_incoming_thread(void *opaque)
     }
 
 out:
+     vmstate_loading = false;
     /* Throw the unreported error message after exited from loop */
     if (local_err) {
         error_report_err(local_err);