diff mbox

[v2,2/3] migration: use the free page reporting feature from balloon

Message ID 1517915299-15349-3-git-send-email-wei.w.wang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wang, Wei W Feb. 6, 2018, 11:08 a.m. UTC
Use the free page reporting feature from the balloon device to clear the
bits corresponding to guest free pages from the dirty bitmap, so that the
free memory are not sent.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
---
 migration/ram.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

Comments

Michael S. Tsirkin Feb. 6, 2018, 11:57 p.m. UTC | #1
On Tue, Feb 06, 2018 at 07:08:18PM +0800, Wei Wang wrote:
> Use the free page reporting feature from the balloon device to clear the
> bits corresponding to guest free pages from the dirty bitmap, so that the
> free memory are not sent.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>

What the patch seems to do is stop migration
completely - blocking until guest completes the reporting.

Which makes no sense to me, since it's just an optimization.
Why not proceed with the migration? What do we have to loose?

I imagine some people might want to defer migration until reporting
completes to reduce the load on the network. Fair enough,
but it does not look like you actually measured the reduction
in traffic. So I suggest you work on that as a separate feature.


> ---
>  migration/ram.c | 24 ++++++++++++++++++++----
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index d6f462c..4fe16d2 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -49,6 +49,7 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  #include "migration/block.h"
> +#include "sysemu/balloon.h"
>  
>  /***********************************************************/
>  /* ram save/restore */
> @@ -206,6 +207,10 @@ struct RAMState {
>      uint32_t last_version;
>      /* We are in the first round */
>      bool ram_bulk_stage;
> +    /* The feature, skipping the transfer of free pages, is supported */
> +    bool free_page_support;
> +    /* Skip the transfer of free pages in the bulk stage */
> +    bool free_page_done;
>      /* How many times we have dirty too many pages */
>      int dirty_rate_high_cnt;
>      /* these variables are used for bitmap sync */
> @@ -773,7 +778,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>      unsigned long *bitmap = rb->bmap;
>      unsigned long next;
>  
> -    if (rs->ram_bulk_stage && start > 0) {
> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>          next = start + 1;
>      } else {
>          next = find_next_bit(bitmap, size, start);
> @@ -1653,6 +1658,8 @@ static void ram_state_reset(RAMState *rs)
>      rs->last_page = 0;
>      rs->last_version = ram_list.version;
>      rs->ram_bulk_stage = true;
> +    rs->free_page_support = balloon_free_page_support();
> +    rs->free_page_done = false;
>  }
>  
>  #define MAX_WAIT 50 /* ms, half buffered_file limit */
> @@ -2135,7 +2142,7 @@ static int ram_state_init(RAMState **rsp)
>      return 0;
>  }
>  
> -static void ram_list_init_bitmaps(void)
> +static void ram_list_init_bitmaps(RAMState *rs)
>  {
>      RAMBlock *block;
>      unsigned long pages;
> @@ -2145,7 +2152,11 @@ static void ram_list_init_bitmaps(void)
>          QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>              pages = block->max_length >> TARGET_PAGE_BITS;
>              block->bmap = bitmap_new(pages);
> -            bitmap_set(block->bmap, 0, pages);
> +            if (rs->free_page_support) {
> +                bitmap_set(block->bmap, 1, pages);
> +            } else {
> +                bitmap_set(block->bmap, 0, pages);
> +            }
>              if (migrate_postcopy_ram()) {
>                  block->unsentmap = bitmap_new(pages);
>                  bitmap_set(block->unsentmap, 0, pages);
> @@ -2161,7 +2172,7 @@ static void ram_init_bitmaps(RAMState *rs)
>      qemu_mutex_lock_ramlist();
>      rcu_read_lock();
>  
> -    ram_list_init_bitmaps();
> +    ram_list_init_bitmaps(rs);
>      memory_global_dirty_log_start();
>      migration_bitmap_sync(rs);
>  
> @@ -2275,6 +2286,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>  
>      ram_control_before_iterate(f, RAM_CONTROL_ROUND);
>  
> +    if (rs->free_page_support && !rs->free_page_done) {
> +        balloon_free_page_poll();
> +        rs->free_page_done = true;
> +    }
> +
>      t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>      i = 0;
>      while ((ret = qemu_file_rate_limit(f)) == 0) {
> -- 
> 1.8.3.1
Wang, Wei W Feb. 8, 2018, 3:54 a.m. UTC | #2
On 02/07/2018 07:57 AM, Michael S. Tsirkin wrote:
> On Tue, Feb 06, 2018 at 07:08:18PM +0800, Wei Wang wrote:
>> Use the free page reporting feature from the balloon device to clear the
>> bits corresponding to guest free pages from the dirty bitmap, so that the
>> free memory are not sent.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>> CC: Michael S. Tsirkin <mst@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
> What the patch seems to do is stop migration
> completely - blocking until guest completes the reporting.
>
> Which makes no sense to me, since it's just an optimization.
> Why not proceed with the migration? What do we have to loose?

If we want the optimization to run in parallel with the migration 
thread, we will need to create another polling thread, like 
multithreading compression. In that way, we will waste some host CPU. 
For example, the migration thread may proceed to send pages to the 
destination while the optimization thread is in progress, but those 
pages may turn out to be free pages (this is likely in the bulk stage) 
which don't need to be sent. In that case, why not let the migration 
thread wait a little bit (i.e. put the optimization into the migration 
thread) and proceed to do some useful things, instead of pretending to 
proceed but doing useless things?

The current plan of this patch is to skip free pages for the bulk stage 
only. I'm not sure if it would be useful for the 2nd stage onward, which 
basically relies on the dirty logging to send pages that have been 
written by the guest. For example, if the guest is not so active while 
live migration happens, there will be very few dirty bits. This 
optimization would be mostly clearing "0" bits from the dirty bitmap.


> I imagine some people might want to defer migration until reporting
> completes to reduce the load on the network. Fair enough,
> but it does not look like you actually measured the reduction
> in traffic. So I suggest you work on that as a separate feature.
>

I have the traffic data actually. Tested with 8G idle guest, Legacy v.s. 
Optimization: ~390MB v.s. ~337MB.
The legacy case has the zero page checking optimization, so the traffic 
reduction is not very obvious. But zero checking has much more overhead, 
which is demonstrated by the migration time (this optimization takes 
~14% of the legacy migration time).


Best,
Wei
Dr. David Alan Gilbert Feb. 9, 2018, 11:50 a.m. UTC | #3
* Wei Wang (wei.w.wang@intel.com) wrote:
> Use the free page reporting feature from the balloon device to clear the
> bits corresponding to guest free pages from the dirty bitmap, so that the
> free memory are not sent.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> ---
>  migration/ram.c | 24 ++++++++++++++++++++----
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index d6f462c..4fe16d2 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -49,6 +49,7 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  #include "migration/block.h"
> +#include "sysemu/balloon.h"
>  
>  /***********************************************************/
>  /* ram save/restore */
> @@ -206,6 +207,10 @@ struct RAMState {
>      uint32_t last_version;
>      /* We are in the first round */
>      bool ram_bulk_stage;
> +    /* The feature, skipping the transfer of free pages, is supported */
> +    bool free_page_support;
> +    /* Skip the transfer of free pages in the bulk stage */
> +    bool free_page_done;
>      /* How many times we have dirty too many pages */
>      int dirty_rate_high_cnt;
>      /* these variables are used for bitmap sync */
> @@ -773,7 +778,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>      unsigned long *bitmap = rb->bmap;
>      unsigned long next;
>  
> -    if (rs->ram_bulk_stage && start > 0) {
> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>          next = start + 1;
>      } else {
>          next = find_next_bit(bitmap, size, start);
> @@ -1653,6 +1658,8 @@ static void ram_state_reset(RAMState *rs)
>      rs->last_page = 0;
>      rs->last_version = ram_list.version;
>      rs->ram_bulk_stage = true;
> +    rs->free_page_support = balloon_free_page_support();
> +    rs->free_page_done = false;
>  }
>  
>  #define MAX_WAIT 50 /* ms, half buffered_file limit */
> @@ -2135,7 +2142,7 @@ static int ram_state_init(RAMState **rsp)
>      return 0;
>  }
>  
> -static void ram_list_init_bitmaps(void)
> +static void ram_list_init_bitmaps(RAMState *rs)
>  {
>      RAMBlock *block;
>      unsigned long pages;
> @@ -2145,7 +2152,11 @@ static void ram_list_init_bitmaps(void)
>          QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>              pages = block->max_length >> TARGET_PAGE_BITS;
>              block->bmap = bitmap_new(pages);
> -            bitmap_set(block->bmap, 0, pages);
> +            if (rs->free_page_support) {
> +                bitmap_set(block->bmap, 1, pages);

I don't understand how it makes sense to do that here;
ignoring anything ese it means that migration_dirty_pages is wrong
which could end up with migration finishing before all real pages are
sent.

Dave

> +            } else {
> +                bitmap_set(block->bmap, 0, pages);
> +            }
>              if (migrate_postcopy_ram()) {
>                  block->unsentmap = bitmap_new(pages);
>                  bitmap_set(block->unsentmap, 0, pages);
> @@ -2161,7 +2172,7 @@ static void ram_init_bitmaps(RAMState *rs)
>      qemu_mutex_lock_ramlist();
>      rcu_read_lock();
>  
> -    ram_list_init_bitmaps();
> +    ram_list_init_bitmaps(rs);
>      memory_global_dirty_log_start();
>      migration_bitmap_sync(rs);
>  
> @@ -2275,6 +2286,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>  
>      ram_control_before_iterate(f, RAM_CONTROL_ROUND);
>  
> +    if (rs->free_page_support && !rs->free_page_done) {
> +        balloon_free_page_poll();
> +        rs->free_page_done = true;
> +    }
> +
>      t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>      i = 0;
>      while ((ret = qemu_file_rate_limit(f)) == 0) {
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W Feb. 26, 2018, 5:07 a.m. UTC | #4
On 02/09/2018 07:50 PM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> Use the free page reporting feature from the balloon device to clear the
>> bits corresponding to guest free pages from the dirty bitmap, so that the
>> free memory are not sent.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>> CC: Michael S. Tsirkin <mst@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
>> ---
>>   migration/ram.c | 24 ++++++++++++++++++++----
>>   1 file changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index d6f462c..4fe16d2 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -49,6 +49,7 @@
>>   #include "qemu/rcu_queue.h"
>>   #include "migration/colo.h"
>>   #include "migration/block.h"
>> +#include "sysemu/balloon.h"
>>   
>>   /***********************************************************/
>>   /* ram save/restore */
>> @@ -206,6 +207,10 @@ struct RAMState {
>>       uint32_t last_version;
>>       /* We are in the first round */
>>       bool ram_bulk_stage;
>> +    /* The feature, skipping the transfer of free pages, is supported */
>> +    bool free_page_support;
>> +    /* Skip the transfer of free pages in the bulk stage */
>> +    bool free_page_done;
>>       /* How many times we have dirty too many pages */
>>       int dirty_rate_high_cnt;
>>       /* these variables are used for bitmap sync */
>> @@ -773,7 +778,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>>       unsigned long *bitmap = rb->bmap;
>>       unsigned long next;
>>   
>> -    if (rs->ram_bulk_stage && start > 0) {
>> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>>           next = start + 1;
>>       } else {
>>           next = find_next_bit(bitmap, size, start);
>> @@ -1653,6 +1658,8 @@ static void ram_state_reset(RAMState *rs)
>>       rs->last_page = 0;
>>       rs->last_version = ram_list.version;
>>       rs->ram_bulk_stage = true;
>> +    rs->free_page_support = balloon_free_page_support();
>> +    rs->free_page_done = false;
>>   }
>>   
>>   #define MAX_WAIT 50 /* ms, half buffered_file limit */
>> @@ -2135,7 +2142,7 @@ static int ram_state_init(RAMState **rsp)
>>       return 0;
>>   }
>>   
>> -static void ram_list_init_bitmaps(void)
>> +static void ram_list_init_bitmaps(RAMState *rs)
>>   {
>>       RAMBlock *block;
>>       unsigned long pages;
>> @@ -2145,7 +2152,11 @@ static void ram_list_init_bitmaps(void)
>>           QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>>               pages = block->max_length >> TARGET_PAGE_BITS;
>>               block->bmap = bitmap_new(pages);
>> -            bitmap_set(block->bmap, 0, pages);
>> +            if (rs->free_page_support) {
>> +                bitmap_set(block->bmap, 1, pages);
> I don't understand how it makes sense to do that here;
> ignoring anything ese it means that migration_dirty_pages is wrong
> which could end up with migration finishing before all real pages are
> sent.
>

The bulk stage treats all the pages as dirty pages, so we set all the 
bits to "1", this is needed by this optimization feature, because the 
free pages reported from the guest can then be directly cleared from the 
bitmap (we don't need any more bitmaps to record free pages).

Why is migration_dirty_pages incorrect?

Best,
Wei
Wang, Wei W Feb. 26, 2018, 9:22 a.m. UTC | #5
On Monday, February 26, 2018 1:07 PM, Wei Wang wrote:
> On 02/09/2018 07:50 PM, Dr. David Alan Gilbert wrote:
> > * Wei Wang (wei.w.wang@intel.com) wrote:
> >> Use the free page reporting feature from the balloon device to clear
> >> the bits corresponding to guest free pages from the dirty bitmap, so
> >> that the free memory are not sent.
> >>
> >> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> >> CC: Michael S. Tsirkin <mst@redhat.com>
> >> CC: Juan Quintela <quintela@redhat.com>
> >> ---
> >>   migration/ram.c | 24 ++++++++++++++++++++----
> >>   1 file changed, 20 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/migration/ram.c b/migration/ram.c index d6f462c..4fe16d2
> >> 100644
> >> --- a/migration/ram.c
> >> +++ b/migration/ram.c
> >> @@ -49,6 +49,7 @@
> >>   #include "qemu/rcu_queue.h"
> >>   #include "migration/colo.h"
> >>   #include "migration/block.h"
> >> +#include "sysemu/balloon.h"
> >>
> >>   /***********************************************************/
> >>   /* ram save/restore */
> >> @@ -206,6 +207,10 @@ struct RAMState {
> >>       uint32_t last_version;
> >>       /* We are in the first round */
> >>       bool ram_bulk_stage;
> >> +    /* The feature, skipping the transfer of free pages, is supported */
> >> +    bool free_page_support;
> >> +    /* Skip the transfer of free pages in the bulk stage */
> >> +    bool free_page_done;
> >>       /* How many times we have dirty too many pages */
> >>       int dirty_rate_high_cnt;
> >>       /* these variables are used for bitmap sync */ @@ -773,7 +778,7
> >> @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock
> *rb,
> >>       unsigned long *bitmap = rb->bmap;
> >>       unsigned long next;
> >>
> >> -    if (rs->ram_bulk_stage && start > 0) {
> >> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
> >>           next = start + 1;
> >>       } else {
> >>           next = find_next_bit(bitmap, size, start); @@ -1653,6
> >> +1658,8 @@ static void ram_state_reset(RAMState *rs)
> >>       rs->last_page = 0;
> >>       rs->last_version = ram_list.version;
> >>       rs->ram_bulk_stage = true;
> >> +    rs->free_page_support = balloon_free_page_support();
> >> +    rs->free_page_done = false;
> >>   }
> >>
> >>   #define MAX_WAIT 50 /* ms, half buffered_file limit */ @@ -2135,7
> >> +2142,7 @@ static int ram_state_init(RAMState **rsp)
> >>       return 0;
> >>   }
> >>
> >> -static void ram_list_init_bitmaps(void)
> >> +static void ram_list_init_bitmaps(RAMState *rs)
> >>   {
> >>       RAMBlock *block;
> >>       unsigned long pages;
> >> @@ -2145,7 +2152,11 @@ static void ram_list_init_bitmaps(void)
> >>           QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
> >>               pages = block->max_length >> TARGET_PAGE_BITS;
> >>               block->bmap = bitmap_new(pages);
> >> -            bitmap_set(block->bmap, 0, pages);
> >> +            if (rs->free_page_support) {
> >> +                bitmap_set(block->bmap, 1, pages);
> > I don't understand how it makes sense to do that here; ignoring
> > anything ese it means that migration_dirty_pages is wrong which could
> > end up with migration finishing before all real pages are sent.
> >
> 
> The bulk stage treats all the pages as dirty pages, so we set all the bits to "1",
> this is needed by this optimization feature, because the free pages reported
> from the guest can then be directly cleared from the bitmap (we don't need
> any more bitmaps to record free pages).
> 

Sorry, there was a misunderstanding of the bitmap_set API (thought it was used to set all the bits to 1 or 0). So the above change isn't needed actually. Btw, this doesn't affect the results I reported.

Best,
Wei
diff mbox

Patch

diff --git a/migration/ram.c b/migration/ram.c
index d6f462c..4fe16d2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -49,6 +49,7 @@ 
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 #include "migration/block.h"
+#include "sysemu/balloon.h"
 
 /***********************************************************/
 /* ram save/restore */
@@ -206,6 +207,10 @@  struct RAMState {
     uint32_t last_version;
     /* We are in the first round */
     bool ram_bulk_stage;
+    /* The feature, skipping the transfer of free pages, is supported */
+    bool free_page_support;
+    /* Skip the transfer of free pages in the bulk stage */
+    bool free_page_done;
     /* How many times we have dirty too many pages */
     int dirty_rate_high_cnt;
     /* these variables are used for bitmap sync */
@@ -773,7 +778,7 @@  unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
     unsigned long *bitmap = rb->bmap;
     unsigned long next;
 
-    if (rs->ram_bulk_stage && start > 0) {
+    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
         next = start + 1;
     } else {
         next = find_next_bit(bitmap, size, start);
@@ -1653,6 +1658,8 @@  static void ram_state_reset(RAMState *rs)
     rs->last_page = 0;
     rs->last_version = ram_list.version;
     rs->ram_bulk_stage = true;
+    rs->free_page_support = balloon_free_page_support();
+    rs->free_page_done = false;
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -2135,7 +2142,7 @@  static int ram_state_init(RAMState **rsp)
     return 0;
 }
 
-static void ram_list_init_bitmaps(void)
+static void ram_list_init_bitmaps(RAMState *rs)
 {
     RAMBlock *block;
     unsigned long pages;
@@ -2145,7 +2152,11 @@  static void ram_list_init_bitmaps(void)
         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
             pages = block->max_length >> TARGET_PAGE_BITS;
             block->bmap = bitmap_new(pages);
-            bitmap_set(block->bmap, 0, pages);
+            if (rs->free_page_support) {
+                bitmap_set(block->bmap, 1, pages);
+            } else {
+                bitmap_set(block->bmap, 0, pages);
+            }
             if (migrate_postcopy_ram()) {
                 block->unsentmap = bitmap_new(pages);
                 bitmap_set(block->unsentmap, 0, pages);
@@ -2161,7 +2172,7 @@  static void ram_init_bitmaps(RAMState *rs)
     qemu_mutex_lock_ramlist();
     rcu_read_lock();
 
-    ram_list_init_bitmaps();
+    ram_list_init_bitmaps(rs);
     memory_global_dirty_log_start();
     migration_bitmap_sync(rs);
 
@@ -2275,6 +2286,11 @@  static int ram_save_iterate(QEMUFile *f, void *opaque)
 
     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
 
+    if (rs->free_page_support && !rs->free_page_done) {
+        balloon_free_page_poll();
+        rs->free_page_done = true;
+    }
+
     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     i = 0;
     while ((ret = qemu_file_rate_limit(f)) == 0) {