diff mbox series

[RESEND,RFC,02/10] migration: Refine util functions to support periodic CPU throttle

Message ID 83f9756eb21e21300ced2bf77fdb3585a918f05e.1725891841.git.yong.huang@smartx.com (mailing list archive)
State New, archived
Headers show
Series migration: auto-converge refinements for huge VM | expand

Commit Message

Yong Huang Sept. 9, 2024, 2:25 p.m. UTC
Supply the migration_bitmap_sync function along with a periodic
argument. Introduce the sync_mode global variable to track the
sync mode and support periodic throttling while keeping backward
compatibility.

Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
 include/exec/ram_addr.h | 107 +++++++++++++++++++++++++++++++++++++---
 migration/ram.c         |  49 ++++++++++++++----
 2 files changed, 140 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 891c44cf2d..7df926ed96 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -472,17 +472,68 @@  static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
 }
 
+static void ramblock_clear_iter_bmap(RAMBlock *rb,
+                                     ram_addr_t start,
+                                     ram_addr_t length)
+{
+    ram_addr_t addr;
+    unsigned long *bmap = rb->bmap;
+    unsigned long *shadow_bmap = rb->shadow_bmap;
+    unsigned long *iter_bmap = rb->iter_bmap;
+
+    for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
+        long k = (start + addr) >> TARGET_PAGE_BITS;
+        if (test_bit(k, shadow_bmap) && !test_bit(k, bmap)) {
+            /* Page has been sent, clear the iter bmap */
+            clear_bit(k, iter_bmap);
+        }
+    }
+}
+
+static void ramblock_update_iter_bmap(RAMBlock *rb,
+                                      ram_addr_t start,
+                                      ram_addr_t length)
+{
+    ram_addr_t addr;
+    unsigned long *bmap = rb->bmap;
+    unsigned long *iter_bmap = rb->iter_bmap;
+
+    for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
+        long k = (start + addr) >> TARGET_PAGE_BITS;
+        if (test_bit(k, iter_bmap)) {
+            if (!test_bit(k, bmap)) {
+                set_bit(k, bmap);
+                rb->iter_dirty_pages++;
+            }
+        }
+    }
+}
 
 /* Called with RCU critical section */
 static inline
 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
                                                ram_addr_t start,
-                                               ram_addr_t length)
+                                               ram_addr_t length,
+                                               unsigned int flag)
 {
     ram_addr_t addr;
     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
     uint64_t num_dirty = 0;
     unsigned long *dest = rb->bmap;
+    unsigned long *shadow_bmap = rb->shadow_bmap;
+    unsigned long *iter_bmap = rb->iter_bmap;
+
+    assert(flag && !(flag & (~RAMBLOCK_SYN_MASK)));
+
+    /*
+     * We must remove the sent dirty page from the iter_bmap in order to
+     * minimize redundant page transfers if periodic sync has appeared
+     * during this iteration.
+     */
+    if (rb->periodic_sync_shown_up &&
+        (flag & (RAMBLOCK_SYN_MODERN_ITER | RAMBLOCK_SYN_MODERN_PERIOD))) {
+        ramblock_clear_iter_bmap(rb, start, length);
+    }
 
     /* start address and length is aligned at the start of a word? */
     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
@@ -503,8 +554,20 @@  uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
             if (src[idx][offset]) {
                 unsigned long bits = qatomic_xchg(&src[idx][offset], 0);
                 unsigned long new_dirty;
+                if (flag & (RAMBLOCK_SYN_MODERN_ITER |
+                            RAMBLOCK_SYN_MODERN_PERIOD)) {
+                    /* Back-up bmap for the next iteration */
+                    iter_bmap[k] |= bits;
+                    if (flag == RAMBLOCK_SYN_MODERN_PERIOD) {
+                        /* Back-up bmap to detect pages has been sent */
+                        shadow_bmap[k] = dest[k];
+                    }
+                }
                 new_dirty = ~dest[k];
-                dest[k] |= bits;
+                if (flag == RAMBLOCK_SYN_LEGACY_ITER) {
+                    dest[k] |= bits;
+                }
+
                 new_dirty &= bits;
                 num_dirty += ctpopl(new_dirty);
             }
@@ -534,18 +597,50 @@  uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
         ram_addr_t offset = rb->offset;
 
         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
-            if (cpu_physical_memory_test_and_clear_dirty(
+            bool dirty = false;
+            long k = (start + addr) >> TARGET_PAGE_BITS;
+            if (flag == RAMBLOCK_SYN_MODERN_PERIOD) {
+                if (test_bit(k, dest)) {
+                    /* Back-up bmap to detect pages has been sent */
+                    set_bit(k, shadow_bmap);
+                }
+            }
+
+            dirty = cpu_physical_memory_test_and_clear_dirty(
                         start + addr + offset,
                         TARGET_PAGE_SIZE,
-                        DIRTY_MEMORY_MIGRATION)) {
-                long k = (start + addr) >> TARGET_PAGE_BITS;
-                if (!test_and_set_bit(k, dest)) {
+                        DIRTY_MEMORY_MIGRATION);
+
+            if (flag == RAMBLOCK_SYN_LEGACY_ITER) {
+                if (dirty && !test_and_set_bit(k, dest)) {
                     num_dirty++;
                 }
+            } else {
+                if (dirty) {
+                    if (!test_bit(k, dest)) {
+                        num_dirty++;
+                    }
+                    /* Back-up bmap for the next iteration */
+                    set_bit(k, iter_bmap);
+                }
             }
         }
     }
 
+    /*
+     * We have to re-fetch dirty pages from the iter_bmap one by one.
+     * It's possible that not all of the dirty pages that meant to
+     * send in the current iteration are included in the bitmap
+     * that the current sync retrieved from the KVM.
+     */
+    if (flag == RAMBLOCK_SYN_MODERN_ITER) {
+        ramblock_update_iter_bmap(rb, start, length);
+    }
+
+    if (flag == RAMBLOCK_SYN_MODERN_PERIOD) {
+        rb->periodic_sync_shown_up = true;
+    }
+
     return num_dirty;
 }
 #endif
diff --git a/migration/ram.c b/migration/ram.c
index f29faa82d6..a56634eb46 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -112,6 +112,8 @@ 
 
 XBZRLECacheStats xbzrle_counters;
 
+static RAMBlockSynMode sync_mode = RAMBLOCK_SYN_LEGACY;
+
 /* used by the search for pages to send */
 struct PageSearchStatus {
     /* The migration channel used for a specific host page */
@@ -912,13 +914,38 @@  bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
     return false;
 }
 
+static void ramblock_reset_iter_stats(RAMBlock *rb)
+{
+    bitmap_clear(rb->shadow_bmap, 0, rb->used_length >> TARGET_PAGE_BITS);
+    bitmap_clear(rb->iter_bmap, 0, rb->used_length >> TARGET_PAGE_BITS);
+    rb->iter_dirty_pages = 0;
+    rb->periodic_sync_shown_up = false;
+}
+
 /* Called with RCU critical section */
-static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
+static void ramblock_sync_dirty_bitmap(RAMState *rs,
+                                       RAMBlock *rb,
+                                       bool periodic)
 {
-    uint64_t new_dirty_pages =
-        cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
+    uint64_t new_dirty_pages;
+    unsigned int flag = RAMBLOCK_SYN_LEGACY_ITER;
+
+    if (sync_mode == RAMBLOCK_SYN_MODERN) {
+        flag = periodic ? RAMBLOCK_SYN_MODERN_PERIOD : RAMBLOCK_SYN_MODERN_ITER;
+    }
+
+    new_dirty_pages =
+        cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length, flag);
+
+    if (flag & (RAMBLOCK_SYN_LEGACY_ITER | RAMBLOCK_SYN_MODERN_ITER)) {
+        if (flag == RAMBLOCK_SYN_LEGACY_ITER) {
+            rs->migration_dirty_pages += new_dirty_pages;
+        } else {
+            rs->migration_dirty_pages += rb->iter_dirty_pages;
+            ramblock_reset_iter_stats(rb);
+        }
+    }
 
-    rs->migration_dirty_pages += new_dirty_pages;
     rs->num_dirty_pages_period += new_dirty_pages;
 }
 
@@ -1041,7 +1068,9 @@  static void migration_trigger_throttle(RAMState *rs)
     }
 }
 
-static void migration_bitmap_sync(RAMState *rs, bool last_stage)
+static void migration_bitmap_sync(RAMState *rs,
+                                  bool last_stage,
+                                  bool periodic)
 {
     RAMBlock *block;
     int64_t end_time;
@@ -1058,7 +1087,7 @@  static void migration_bitmap_sync(RAMState *rs, bool last_stage)
     WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) {
         WITH_RCU_READ_LOCK_GUARD() {
             RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-                ramblock_sync_dirty_bitmap(rs, block);
+                ramblock_sync_dirty_bitmap(rs, block, periodic);
             }
             stat64_set(&mig_stats.dirty_bytes_last_sync, ram_bytes_remaining());
         }
@@ -1101,7 +1130,7 @@  static void migration_bitmap_sync_precopy(RAMState *rs, bool last_stage)
         local_err = NULL;
     }
 
-    migration_bitmap_sync(rs, last_stage);
+    migration_bitmap_sync(rs, last_stage, false);
 
     if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
         error_report_err(local_err);
@@ -2594,7 +2623,7 @@  void ram_postcopy_send_discard_bitmap(MigrationState *ms)
     RCU_READ_LOCK_GUARD();
 
     /* This should be our last sync, the src is now paused */
-    migration_bitmap_sync(rs, false);
+    migration_bitmap_sync(rs, false, false);
 
     /* Easiest way to make sure we don't resume in the middle of a host-page */
     rs->pss[RAM_CHANNEL_PRECOPY].last_sent_block = NULL;
@@ -3581,7 +3610,7 @@  void colo_incoming_start_dirty_log(void)
     memory_global_dirty_log_sync(false);
     WITH_RCU_READ_LOCK_GUARD() {
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-            ramblock_sync_dirty_bitmap(ram_state, block);
+            ramblock_sync_dirty_bitmap(ram_state, block, false);
             /* Discard this dirty bitmap record */
             bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
         }
@@ -3862,7 +3891,7 @@  void colo_flush_ram_cache(void)
     qemu_mutex_lock(&ram_state->bitmap_mutex);
     WITH_RCU_READ_LOCK_GUARD() {
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-            ramblock_sync_dirty_bitmap(ram_state, block);
+            ramblock_sync_dirty_bitmap(ram_state, block, false);
         }
     }