diff mbox

[COLO-Frame,v18,12/34] COLO: Load VMState into buffer before restore it

Message ID 1470227172-13704-13-git-send-email-zhang.zhanghailiang@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhanghailiang Aug. 3, 2016, 12:25 p.m. UTC
We should not destroy the state of SVM (Secondary VM) until we receive
the whole state from the PVM (Primary VM), in case the primary fails in
the middle of sending the state, so, here we cache the device state in
Secondary before restore it.

Besides, we should call qemu_system_reset() before load VM state,
which can ensure the data is intact.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
v17:
- Replace the old buffer API with the new channel buffer API.
v16:
- Rename colo_get_cmd_value() to colo_receive_mesage_value();
v13:
- Fix the define of colo_get_cmd_value() to use 'Error **errp' instead of
  return value.
v12:
- Use the new helper colo_get_cmd_value() instead of colo_ctl_get()
---
 migration/colo.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 2 deletions(-)

Comments

Dr. David Alan Gilbert Aug. 5, 2016, 5:53 p.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> We should not destroy the state of SVM (Secondary VM) until we receive
> the whole state from the PVM (Primary VM), in case the primary fails in
> the middle of sending the state, so, here we cache the device state in
> Secondary before restore it.
> 
> Besides, we should call qemu_system_reset() before load VM state,
> which can ensure the data is intact.
> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
> v17:
> - Replace the old buffer API with the new channel buffer API.
> v16:
> - Rename colo_get_cmd_value() to colo_receive_mesage_value();
> v13:
> - Fix the define of colo_get_cmd_value() to use 'Error **errp' instead of
>   return value.
> v12:
> - Use the new helper colo_get_cmd_value() instead of colo_ctl_get()
> ---
>  migration/colo.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 68 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index 0401629..8fa2104 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -115,6 +115,28 @@ static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
>      }
>  }
>  
> +static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
> +                                           Error **errp)
> +{
> +    Error *local_err = NULL;
> +    uint64_t value;
> +    int ret;
> +
> +    colo_receive_check_message(f, expect_msg, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return 0;
> +    }
> +
> +    value = qemu_get_be64(f);
> +    ret = qemu_file_get_error(f);
> +    if (ret < 0) {
> +        error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
> +                         COLOMessage_lookup[expect_msg]);
> +    }
> +    return value;
> +}
> +
>  static int colo_do_checkpoint_transaction(MigrationState *s,
>                                            QIOChannelBuffer *bioc,
>                                            QEMUFile *fb)
> @@ -286,6 +308,10 @@ static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
>  void *colo_process_incoming_thread(void *opaque)
>  {
>      MigrationIncomingState *mis = opaque;
> +    QEMUFile *fb = NULL;
> +    QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
> +    uint64_t total_size;
> +    uint64_t value;
>      Error *local_err = NULL;
>      int ret;
>  
> @@ -310,6 +336,10 @@ void *colo_process_incoming_thread(void *opaque)
>          goto out;
>      }
>  
> +    bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
> +    fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
> +    object_unref(OBJECT(bioc));
> +
>      colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
>                        &local_err);
>      if (local_err) {
> @@ -337,7 +367,30 @@ void *colo_process_incoming_thread(void *opaque)
>              goto out;
>          }
>  
> -        /* TODO: read migration data into colo buffer */
> +        value = colo_receive_message_value(mis->from_src_file,
> +                                 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
> +        if (local_err) {
> +            goto out;
> +        }
> +
> +        /*
> +         * Read VM device state data into channel buffer,
> +         * It's better to re-use the memory allocated.
> +         * Here we need to handle the channel buffer directly.
> +         */
> +        if (value > bioc->capacity) {
> +            bioc->capacity = value;
> +            bioc->data = g_realloc(bioc->data, bioc->capacity);
> +        }

Dan: Are you ok with that, or should there be some io/channel-buffer.c call
to do it?

Dave

> +        total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
> +        if (total_size != value) {
> +            error_report("Got %lu VMState data, less than expected %lu",
> +                         total_size, value);
> +            ret = -EINVAL;
> +            goto out;
> +        }
> +        bioc->usage = total_size;
> +        qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
>  
>          colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
>                       &local_err);
> @@ -345,7 +398,16 @@ void *colo_process_incoming_thread(void *opaque)
>              goto out;
>          }
>  
> -        /* TODO: load vm state */
> +        qemu_mutex_lock_iothread();
> +        qemu_system_reset(VMRESET_SILENT);
> +        if (qemu_loadvm_state(fb) < 0) {
> +            error_report("COLO: loadvm failed");
> +            qemu_mutex_unlock_iothread();
> +            goto out;
> +        }
> +        qemu_mutex_unlock_iothread();
> +
> +        /* TODO: flush vm state */
>  
>          colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
>                       &local_err);
> @@ -360,6 +422,10 @@ out:
>          error_report_err(local_err);
>      }
>  
> +    if (fb) {
> +        qemu_fclose(fb);
> +    }
> +
>      qemu_mutex_lock_iothread();
>      colo_release_ram_cache();
>      qemu_mutex_unlock_iothread();
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Daniel P. Berrangé Aug. 8, 2016, 8:52 a.m. UTC | #2
On Fri, Aug 05, 2016 at 06:53:49PM +0100, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> > We should not destroy the state of SVM (Secondary VM) until we receive
> > the whole state from the PVM (Primary VM), in case the primary fails in
> > the middle of sending the state, so, here we cache the device state in
> > Secondary before restore it.
> > 
> > Besides, we should call qemu_system_reset() before load VM state,
> > which can ensure the data is intact.
> > 
> > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> > Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> > v17:
> > - Replace the old buffer API with the new channel buffer API.
> > v16:
> > - Rename colo_get_cmd_value() to colo_receive_mesage_value();
> > v13:
> > - Fix the define of colo_get_cmd_value() to use 'Error **errp' instead of
> >   return value.
> > v12:
> > - Use the new helper colo_get_cmd_value() instead of colo_ctl_get()
> > ---
> >  migration/colo.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 68 insertions(+), 2 deletions(-)
> > 
> > diff --git a/migration/colo.c b/migration/colo.c
> > index 0401629..8fa2104 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -115,6 +115,28 @@ static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
> >      }
> >  }
> >  
> > +static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
> > +                                           Error **errp)
> > +{
> > +    Error *local_err = NULL;
> > +    uint64_t value;
> > +    int ret;
> > +
> > +    colo_receive_check_message(f, expect_msg, &local_err);
> > +    if (local_err) {
> > +        error_propagate(errp, local_err);
> > +        return 0;
> > +    }
> > +
> > +    value = qemu_get_be64(f);
> > +    ret = qemu_file_get_error(f);
> > +    if (ret < 0) {
> > +        error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
> > +                         COLOMessage_lookup[expect_msg]);
> > +    }
> > +    return value;
> > +}
> > +
> >  static int colo_do_checkpoint_transaction(MigrationState *s,
> >                                            QIOChannelBuffer *bioc,
> >                                            QEMUFile *fb)
> > @@ -286,6 +308,10 @@ static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
> >  void *colo_process_incoming_thread(void *opaque)
> >  {
> >      MigrationIncomingState *mis = opaque;
> > +    QEMUFile *fb = NULL;
> > +    QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
> > +    uint64_t total_size;
> > +    uint64_t value;
> >      Error *local_err = NULL;
> >      int ret;
> >  
> > @@ -310,6 +336,10 @@ void *colo_process_incoming_thread(void *opaque)
> >          goto out;
> >      }
> >  
> > +    bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
> > +    fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
> > +    object_unref(OBJECT(bioc));
> > +
> >      colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
> >                        &local_err);
> >      if (local_err) {
> > @@ -337,7 +367,30 @@ void *colo_process_incoming_thread(void *opaque)
> >              goto out;
> >          }
> >  
> > -        /* TODO: read migration data into colo buffer */
> > +        value = colo_receive_message_value(mis->from_src_file,
> > +                                 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
> > +        if (local_err) {
> > +            goto out;
> > +        }
> > +
> > +        /*
> > +         * Read VM device state data into channel buffer,
> > +         * It's better to re-use the memory allocated.
> > +         * Here we need to handle the channel buffer directly.
> > +         */
> > +        if (value > bioc->capacity) {
> > +            bioc->capacity = value;
> > +            bioc->data = g_realloc(bioc->data, bioc->capacity);
> > +        }
> 
> Dan: Are you ok with that, or should there be some io/channel-buffer.c call
> to do it?

While we could add APIs for this, I'm perfectly ok with the buffer
being updated directly too, as its a very simple struct.


Regards,
Daniel
diff mbox

Patch

diff --git a/migration/colo.c b/migration/colo.c
index 0401629..8fa2104 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -115,6 +115,28 @@  static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
     }
 }
 
+static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
+                                           Error **errp)
+{
+    Error *local_err = NULL;
+    uint64_t value;
+    int ret;
+
+    colo_receive_check_message(f, expect_msg, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return 0;
+    }
+
+    value = qemu_get_be64(f);
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
+                         COLOMessage_lookup[expect_msg]);
+    }
+    return value;
+}
+
 static int colo_do_checkpoint_transaction(MigrationState *s,
                                           QIOChannelBuffer *bioc,
                                           QEMUFile *fb)
@@ -286,6 +308,10 @@  static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
 void *colo_process_incoming_thread(void *opaque)
 {
     MigrationIncomingState *mis = opaque;
+    QEMUFile *fb = NULL;
+    QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
+    uint64_t total_size;
+    uint64_t value;
     Error *local_err = NULL;
     int ret;
 
@@ -310,6 +336,10 @@  void *colo_process_incoming_thread(void *opaque)
         goto out;
     }
 
+    bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
+    fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
+    object_unref(OBJECT(bioc));
+
     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
                       &local_err);
     if (local_err) {
@@ -337,7 +367,30 @@  void *colo_process_incoming_thread(void *opaque)
             goto out;
         }
 
-        /* TODO: read migration data into colo buffer */
+        value = colo_receive_message_value(mis->from_src_file,
+                                 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        /*
+         * Read VM device state data into channel buffer,
+         * It's better to re-use the memory allocated.
+         * Here we need to handle the channel buffer directly.
+         */
+        if (value > bioc->capacity) {
+            bioc->capacity = value;
+            bioc->data = g_realloc(bioc->data, bioc->capacity);
+        }
+        total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
+        if (total_size != value) {
+            error_report("Got %lu VMState data, less than expected %lu",
+                         total_size, value);
+            ret = -EINVAL;
+            goto out;
+        }
+        bioc->usage = total_size;
+        qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
 
         colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
                      &local_err);
@@ -345,7 +398,16 @@  void *colo_process_incoming_thread(void *opaque)
             goto out;
         }
 
-        /* TODO: load vm state */
+        qemu_mutex_lock_iothread();
+        qemu_system_reset(VMRESET_SILENT);
+        if (qemu_loadvm_state(fb) < 0) {
+            error_report("COLO: loadvm failed");
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+        qemu_mutex_unlock_iothread();
+
+        /* TODO: flush vm state */
 
         colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
                      &local_err);
@@ -360,6 +422,10 @@  out:
         error_report_err(local_err);
     }
 
+    if (fb) {
+        qemu_fclose(fb);
+    }
+
     qemu_mutex_lock_iothread();
     colo_release_ram_cache();
     qemu_mutex_unlock_iothread();