diff mbox

[COLO-Frame,v16,13/35] COLO: Load VMState into qsb before restore it

Message ID 1460096797-14916-14-git-send-email-zhang.zhanghailiang@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhanghailiang April 8, 2016, 6:26 a.m. UTC
We should not destroy the state of SVM (Secondary VM) until we receive the whole
state from the PVM (Primary VM), in case the primary fails in the middle of sending
the state, so, here we cache the device state in Secondary before restore it.

Besides, we should call qemu_system_reset() before load VM state,
which can ensure the data is intact.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
v16:
- Rename colo_get_cmd_value() to colo_receive_mesage_value();
v13:
- Fix the define of colo_get_cmd_value() to use 'Error **errp' instead of
  return value.
v12:
- Use the new helper colo_get_cmd_value() instead of colo_ctl_get()
---
 migration/colo.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 2 deletions(-)

Comments

Dr. David Alan Gilbert April 22, 2016, 10:12 a.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> We should not destroy the state of SVM (Secondary VM) until we receive the whole
> state from the PVM (Primary VM), in case the primary fails in the middle of sending
> the state, so, here we cache the device state in Secondary before restore it.
> 
> Besides, we should call qemu_system_reset() before load VM state,
> which can ensure the data is intact.

You'll need to figure out how to merge this with Dan Berrange's TLS series
( http://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04591.html );
in particular note that he's killed off the QEMUSizedBuffer but now has
a QIOChannelBuffer.

I did review his full set last month, so I think it's ready to go.

Dave

> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
> v16:
> - Rename colo_get_cmd_value() to colo_receive_mesage_value();
> v13:
> - Fix the define of colo_get_cmd_value() to use 'Error **errp' instead of
>   return value.
> v12:
> - Use the new helper colo_get_cmd_value() instead of colo_ctl_get()
> ---
>  migration/colo.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 72 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index fb2b705..52a2c3e 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -115,6 +115,28 @@ static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
>      }
>  }
>  
> +static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
> +                                           Error **errp)
> +{
> +    Error *local_err = NULL;
> +    uint64_t value;
> +    int ret;
> +
> +    colo_receive_check_message(f, expect_msg, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return 0;
> +    }
> +
> +    value = qemu_get_be64(f);
> +    ret = qemu_file_get_error(f);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
> +                         COLOMessage_lookup[expect_msg]);
> +    }
> +    return value;
> +}
> +
>  static int colo_do_checkpoint_transaction(MigrationState *s,
>                                            QEMUSizedBuffer *buffer)
>  {
> @@ -298,6 +320,10 @@ static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
>  void *colo_process_incoming_thread(void *opaque)
>  {
>      MigrationIncomingState *mis = opaque;
> +    QEMUFile *fb = NULL;
> +    QEMUSizedBuffer *buffer = NULL; /* Cache incoming device state */
> +    uint64_t total_size;
> +    uint64_t value;
>      Error *local_err = NULL;
>      int ret;
>  
> @@ -321,6 +347,12 @@ void *colo_process_incoming_thread(void *opaque)
>          goto out;
>      }
>  
> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
> +    if (buffer == NULL) {
> +        error_report("Failed to allocate colo buffer!");
> +        goto out;
> +    }
> +
>      colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
>                        &local_err);
>      if (local_err) {
> @@ -348,7 +380,21 @@ void *colo_process_incoming_thread(void *opaque)
>              goto out;
>          }
>  
> -        /* TODO: read migration data into colo buffer */
> +        /* read the VM state total size first */
> +        value = colo_receive_message_value(mis->from_src_file,
> +                                 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
> +        if (local_err) {
> +            goto out;
> +        }
> +
> +        /* read vm device state into colo buffer */
> +        total_size = qsb_fill_buffer(buffer, mis->from_src_file, value);
> +        if (total_size != value) {
> +            error_report("Got %lu VMState data, less than expected %lu",
> +                         total_size, value);
> +            ret = -EINVAL;
> +            goto out;
> +        }
>  
>          colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
>                       &local_err);
> @@ -356,13 +402,32 @@ void *colo_process_incoming_thread(void *opaque)
>              goto out;
>          }
>  
> -        /* TODO: load vm state */
> +        /* open colo buffer for read */
> +        fb = qemu_bufopen("r", buffer);
> +        if (!fb) {
> +            error_report("Can't open colo buffer for read");
> +            goto out;
> +        }
> +
> +        qemu_mutex_lock_iothread();
> +        qemu_system_reset(VMRESET_SILENT);
> +        if (qemu_loadvm_state(fb) < 0) {
> +            error_report("COLO: loadvm failed");
> +            qemu_mutex_unlock_iothread();
> +            goto out;
> +        }
> +        qemu_mutex_unlock_iothread();
> +
> +        /* TODO: flush vm state */
>  
>          colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
>                       &local_err);
>          if (local_err) {
>              goto out;
>          }
> +
> +        qemu_fclose(fb);
> +        fb = NULL;
>      }
>  
>  out:
> @@ -371,6 +436,11 @@ out:
>          error_report_err(local_err);
>      }
>  
> +    if (fb) {
> +        qemu_fclose(fb);
> +    }
> +    qsb_free(buffer);
> +
>      qemu_mutex_lock_iothread();
>      colo_release_ram_cache();
>      qemu_mutex_unlock_iothread();
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang April 25, 2016, 9:17 a.m. UTC | #2
On 2016/4/22 18:12, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> We should not destroy the state of SVM (Secondary VM) until we receive the whole
>> state from the PVM (Primary VM), in case the primary fails in the middle of sending
>> the state, so, here we cache the device state in Secondary before restore it.
>>
>> Besides, we should call qemu_system_reset() before load VM state,
>> which can ensure the data is intact.
>
> You'll need to figure out how to merge this with Dan Berrange's TLS series
> ( http://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04591.html );
> in particular note that he's killed off the QEMUSizedBuffer but now has
> a QIOChannelBuffer.
>
> I did review his full set last month, so I think it's ready to go.

Yes, i already noticed it, if that series merged first, we need to
do some other rebase works, i will look at it later.

Thanks,
Hailiang

> Dave
>
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>> v16:
>> - Rename colo_get_cmd_value() to colo_receive_mesage_value();
>> v13:
>> - Fix the define of colo_get_cmd_value() to use 'Error **errp' instead of
>>    return value.
>> v12:
>> - Use the new helper colo_get_cmd_value() instead of colo_ctl_get()
>> ---
>>   migration/colo.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 72 insertions(+), 2 deletions(-)
>>
>> diff --git a/migration/colo.c b/migration/colo.c
>> index fb2b705..52a2c3e 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -115,6 +115,28 @@ static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
>>       }
>>   }
>>
>> +static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
>> +                                           Error **errp)
>> +{
>> +    Error *local_err = NULL;
>> +    uint64_t value;
>> +    int ret;
>> +
>> +    colo_receive_check_message(f, expect_msg, &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return 0;
>> +    }
>> +
>> +    value = qemu_get_be64(f);
>> +    ret = qemu_file_get_error(f);
>> +    if (ret < 0) {
>> +        error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
>> +                         COLOMessage_lookup[expect_msg]);
>> +    }
>> +    return value;
>> +}
>> +
>>   static int colo_do_checkpoint_transaction(MigrationState *s,
>>                                             QEMUSizedBuffer *buffer)
>>   {
>> @@ -298,6 +320,10 @@ static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
>>   void *colo_process_incoming_thread(void *opaque)
>>   {
>>       MigrationIncomingState *mis = opaque;
>> +    QEMUFile *fb = NULL;
>> +    QEMUSizedBuffer *buffer = NULL; /* Cache incoming device state */
>> +    uint64_t total_size;
>> +    uint64_t value;
>>       Error *local_err = NULL;
>>       int ret;
>>
>> @@ -321,6 +347,12 @@ void *colo_process_incoming_thread(void *opaque)
>>           goto out;
>>       }
>>
>> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
>> +    if (buffer == NULL) {
>> +        error_report("Failed to allocate colo buffer!");
>> +        goto out;
>> +    }
>> +
>>       colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
>>                         &local_err);
>>       if (local_err) {
>> @@ -348,7 +380,21 @@ void *colo_process_incoming_thread(void *opaque)
>>               goto out;
>>           }
>>
>> -        /* TODO: read migration data into colo buffer */
>> +        /* read the VM state total size first */
>> +        value = colo_receive_message_value(mis->from_src_file,
>> +                                 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
>> +        if (local_err) {
>> +            goto out;
>> +        }
>> +
>> +        /* read vm device state into colo buffer */
>> +        total_size = qsb_fill_buffer(buffer, mis->from_src_file, value);
>> +        if (total_size != value) {
>> +            error_report("Got %lu VMState data, less than expected %lu",
>> +                         total_size, value);
>> +            ret = -EINVAL;
>> +            goto out;
>> +        }
>>
>>           colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
>>                        &local_err);
>> @@ -356,13 +402,32 @@ void *colo_process_incoming_thread(void *opaque)
>>               goto out;
>>           }
>>
>> -        /* TODO: load vm state */
>> +        /* open colo buffer for read */
>> +        fb = qemu_bufopen("r", buffer);
>> +        if (!fb) {
>> +            error_report("Can't open colo buffer for read");
>> +            goto out;
>> +        }
>> +
>> +        qemu_mutex_lock_iothread();
>> +        qemu_system_reset(VMRESET_SILENT);
>> +        if (qemu_loadvm_state(fb) < 0) {
>> +            error_report("COLO: loadvm failed");
>> +            qemu_mutex_unlock_iothread();
>> +            goto out;
>> +        }
>> +        qemu_mutex_unlock_iothread();
>> +
>> +        /* TODO: flush vm state */
>>
>>           colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
>>                        &local_err);
>>           if (local_err) {
>>               goto out;
>>           }
>> +
>> +        qemu_fclose(fb);
>> +        fb = NULL;
>>       }
>>
>>   out:
>> @@ -371,6 +436,11 @@ out:
>>           error_report_err(local_err);
>>       }
>>
>> +    if (fb) {
>> +        qemu_fclose(fb);
>> +    }
>> +    qsb_free(buffer);
>> +
>>       qemu_mutex_lock_iothread();
>>       colo_release_ram_cache();
>>       qemu_mutex_unlock_iothread();
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
diff mbox

Patch

diff --git a/migration/colo.c b/migration/colo.c
index fb2b705..52a2c3e 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -115,6 +115,28 @@  static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
     }
 }
 
+static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
+                                           Error **errp)
+{
+    Error *local_err = NULL;
+    uint64_t value;
+    int ret;
+
+    colo_receive_check_message(f, expect_msg, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return 0;
+    }
+
+    value = qemu_get_be64(f);
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
+                         COLOMessage_lookup[expect_msg]);
+    }
+    return value;
+}
+
 static int colo_do_checkpoint_transaction(MigrationState *s,
                                           QEMUSizedBuffer *buffer)
 {
@@ -298,6 +320,10 @@  static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
 void *colo_process_incoming_thread(void *opaque)
 {
     MigrationIncomingState *mis = opaque;
+    QEMUFile *fb = NULL;
+    QEMUSizedBuffer *buffer = NULL; /* Cache incoming device state */
+    uint64_t total_size;
+    uint64_t value;
     Error *local_err = NULL;
     int ret;
 
@@ -321,6 +347,12 @@  void *colo_process_incoming_thread(void *opaque)
         goto out;
     }
 
+    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
+    if (buffer == NULL) {
+        error_report("Failed to allocate colo buffer!");
+        goto out;
+    }
+
     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
                       &local_err);
     if (local_err) {
@@ -348,7 +380,21 @@  void *colo_process_incoming_thread(void *opaque)
             goto out;
         }
 
-        /* TODO: read migration data into colo buffer */
+        /* read the VM state total size first */
+        value = colo_receive_message_value(mis->from_src_file,
+                                 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        /* read vm device state into colo buffer */
+        total_size = qsb_fill_buffer(buffer, mis->from_src_file, value);
+        if (total_size != value) {
+            error_report("Got %lu VMState data, less than expected %lu",
+                         total_size, value);
+            ret = -EINVAL;
+            goto out;
+        }
 
         colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
                      &local_err);
@@ -356,13 +402,32 @@  void *colo_process_incoming_thread(void *opaque)
             goto out;
         }
 
-        /* TODO: load vm state */
+        /* open colo buffer for read */
+        fb = qemu_bufopen("r", buffer);
+        if (!fb) {
+            error_report("Can't open colo buffer for read");
+            goto out;
+        }
+
+        qemu_mutex_lock_iothread();
+        qemu_system_reset(VMRESET_SILENT);
+        if (qemu_loadvm_state(fb) < 0) {
+            error_report("COLO: loadvm failed");
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+        qemu_mutex_unlock_iothread();
+
+        /* TODO: flush vm state */
 
         colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
                      &local_err);
         if (local_err) {
             goto out;
         }
+
+        qemu_fclose(fb);
+        fb = NULL;
     }
 
 out:
@@ -371,6 +436,11 @@  out:
         error_report_err(local_err);
     }
 
+    if (fb) {
+        qemu_fclose(fb);
+    }
+    qsb_free(buffer);
+
     qemu_mutex_lock_iothread();
     colo_release_ram_cache();
     qemu_mutex_unlock_iothread();