From patchwork Wed Apr 21 05:57:13 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yoshiaki Tamura X-Patchwork-Id: 93824 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o3L62Lmt005310 for ; Wed, 21 Apr 2010 06:02:52 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752527Ab0DUGCp (ORCPT ); Wed, 21 Apr 2010 02:02:45 -0400 Received: from sh.osrg.net ([192.16.179.4]:58802 "EHLO sh.osrg.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752441Ab0DUGCY (ORCPT ); Wed, 21 Apr 2010 02:02:24 -0400 Received: from fs.osrg.net (postfix@fs.osrg.net [10.0.0.12]) by sh.osrg.net (8.14.3/8.14.3/OSRG-NET) with ESMTP id o3L61QIj004099; Wed, 21 Apr 2010 15:01:26 +0900 Received: from localhost (hype-wd0.osrg.net [10.72.1.16]) by fs.osrg.net (Postfix) with ESMTP id 2D2783E02F5; Wed, 21 Apr 2010 15:01:25 +0900 (JST) From: Yoshiaki Tamura To: kvm@vger.kernel.org, qemu-devel@nongnu.org Cc: avi@redhat.com, aliguori@us.ibm.com, mtosatti@redhat.com, ohmura.kei@lab.ntt.co.jp, yoshikawa.takuya@oss.ntt.co.jp, Yoshiaki Tamura Subject: [RFC PATCH 08/20] Introduce RAMSaveIO and use cpu_physical_memory_get_dirty_range() to check multiple dirty pages. Date: Wed, 21 Apr 2010 14:57:13 +0900 Message-Id: <1271829445-5328-9-git-send-email-tamura.yoshiaki@lab.ntt.co.jp> X-Mailer: git-send-email 1.7.0.31.g1df487 In-Reply-To: <1271829445-5328-1-git-send-email-tamura.yoshiaki@lab.ntt.co.jp> References: <1271829445-5328-1-git-send-email-tamura.yoshiaki@lab.ntt.co.jp> X-Dispatcher: imput version 20070423(IM149) Lines: 305 X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Wed, 21 Apr 2010 06:02:53 +0000 (UTC) X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-3.0 (sh.osrg.net [192.16.179.4]); Wed, 21 Apr 2010 15:01:26 +0900 (JST) X-Virus-Scanned: clamav-milter 0.95.3 at sh X-Virus-Status: Clean Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org diff --git a/vl.c b/vl.c index 729c955..9c3dc4c 100644 --- a/vl.c +++ b/vl.c @@ -2774,12 +2774,167 @@ static int is_dup_page(uint8_t *page, uint8_t ch) return 1; } -static int ram_save_block(QEMUFile *f) +typedef struct RAMSaveIO RAMSaveIO; + +struct RAMSaveIO { + QEMUFile *f; + QEMUIOVector *qiov; + + uint8_t *ram_store; + size_t nalloc, nused; + uint8_t io_mode; + + void (*put_buffer)(RAMSaveIO *s, uint8_t *buf, size_t len); + void (*put_byte)(RAMSaveIO *s, int v); + void (*put_be64)(RAMSaveIO *s, uint64_t v); + +}; + +static inline void ram_saveio_flush(RAMSaveIO *s, int prepare) +{ + qemu_put_vector(s->f, s->qiov); + if (prepare) + qemu_put_vector_prepare(s->f); + + /* reset stored data */ + qemu_iovec_reset(s->qiov); + s->nused = 0; +} + +static inline void ram_saveio_put_buffer(RAMSaveIO *s, uint8_t *buf, size_t len) +{ + s->put_buffer(s, buf, len); +} + +static inline void ram_saveio_put_byte(RAMSaveIO *s, int v) +{ + s->put_byte(s, v); +} + +static inline void ram_saveio_put_be64(RAMSaveIO *s, uint64_t v) +{ + s->put_be64(s, v); +} + +static inline void ram_saveio_set_error(RAMSaveIO *s) +{ + qemu_file_set_error(s->f); +} + +static void ram_saveio_put_buffer_vector(RAMSaveIO *s, uint8_t *buf, size_t len) +{ + qemu_iovec_add(s->qiov, buf, len); +} + +static void ram_saveio_put_buffer_direct(RAMSaveIO *s, uint8_t *buf, size_t len) +{ + qemu_put_buffer(s->f, buf, len); +} + +static void ram_saveio_put_byte_vector(RAMSaveIO *s, int v) +{ + uint8_t *to_save; + + if (s->nalloc - s->nused < sizeof(int)) + ram_saveio_flush(s, 1); + + to_save = &s->ram_store[s->nused]; + to_save[0] = v & 0xff; + s->nused++; + + qemu_iovec_add(s->qiov, to_save, 1); +} + +static void ram_saveio_put_byte_direct(RAMSaveIO *s, int v) +{ + qemu_put_byte(s->f, v); +} + +static void ram_saveio_put_be64_vector(RAMSaveIO *s, uint64_t v) +{ + uint8_t *to_save; + + if (s->nalloc - s->nused < sizeof(uint64_t)) + ram_saveio_flush(s, 1); + + to_save = &s->ram_store[s->nused]; + to_save[0] = (v >> 56) & 0xff; + to_save[1] = (v >> 48) & 0xff; + to_save[2] = (v >> 40) & 0xff; + to_save[3] = (v >> 32) & 0xff; + to_save[4] = (v >> 24) & 0xff; + to_save[5] = (v >> 16) & 0xff; + to_save[6] = (v >> 8) & 0xff; + to_save[7] = (v >> 0) & 0xff; + s->nused += sizeof(uint64_t); + + qemu_iovec_add(s->qiov, to_save, sizeof(uint64_t)); +} + +static void ram_saveio_put_be64_direct(RAMSaveIO *s, uint64_t v) +{ + + qemu_put_be64(s->f, v); +} + +static RAMSaveIO *ram_saveio_new(QEMUFile *f, size_t max_store) +{ + RAMSaveIO *s; + + s = qemu_mallocz(sizeof(*s)); + + if (qemu_file_get_rate_limit(f) == 0) {/* non buffer mode */ + /* When QEMUFile don't have get_rate limit, + * qemu_file_get_rate_limit will return 0. + * However, we believe that all kinds of QEMUFile + * except non-block mode has rate limit function. + */ + s->io_mode = 1; + s->ram_store = qemu_mallocz(max_store); + s->nalloc = max_store; + s->nused = 0; + + s->qiov = qemu_mallocz(sizeof(*s->qiov)); + qemu_iovec_init(s->qiov, max_store); + + s->put_buffer = ram_saveio_put_buffer_vector; + s->put_byte = ram_saveio_put_byte_vector; + s->put_be64 = ram_saveio_put_be64_vector; + + qemu_put_vector_prepare(f); + } else { + s->io_mode = 0; + s->put_buffer = ram_saveio_put_buffer_direct; + s->put_byte = ram_saveio_put_byte_direct; + s->put_be64 = ram_saveio_put_be64_direct; + } + + s->f = f; + + return s; +} + +static void ram_saveio_destroy(RAMSaveIO *s) +{ + if (s->qiov != NULL) { /* means using put_vector */ + ram_saveio_flush(s, 0); + qemu_iovec_destroy(s->qiov); + qemu_free(s->qiov); + qemu_free(s->ram_store); + } + qemu_free(s); +} + +/* + * RAMSaveIO will manage I/O. + */ +static int ram_save_block(RAMSaveIO *s) { static ram_addr_t current_addr = 0; ram_addr_t saved_addr = current_addr; ram_addr_t addr = 0; - int found = 0; + ram_addr_t dirty_rams[HOST_LONG_BITS]; + int i, found = 0; while (addr < last_ram_offset) { if (kvm_enabled() && current_addr == 0) { @@ -2787,32 +2942,38 @@ static int ram_save_block(QEMUFile *f) r = kvm_update_dirty_pages_log(); if (r) { fprintf(stderr, "%s: update dirty pages log failed %d\n", __FUNCTION__, r); - qemu_file_set_error(f); + ram_saveio_set_error(s); return 0; } } - if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) { + if ((found = cpu_physical_memory_get_dirty_range( + current_addr, last_ram_offset, dirty_rams, HOST_LONG_BITS, + MIGRATION_DIRTY_FLAG))) { uint8_t *p; - cpu_physical_memory_reset_dirty(current_addr, - current_addr + TARGET_PAGE_SIZE, - MIGRATION_DIRTY_FLAG); + for (i = 0; i < found; i++) { + ram_addr_t page_addr = dirty_rams[i]; + cpu_physical_memory_reset_dirty(page_addr, + page_addr + TARGET_PAGE_SIZE, + MIGRATION_DIRTY_FLAG); - p = qemu_get_ram_ptr(current_addr); + p = qemu_get_ram_ptr(page_addr); - if (is_dup_page(p, *p)) { - qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS); - qemu_put_byte(f, *p); - } else { - qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE); - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); + if (is_dup_page(p, *p)) { + ram_saveio_put_be64(s, + (page_addr) | RAM_SAVE_FLAG_COMPRESS); + ram_saveio_put_byte(s, *p); + } else { + ram_saveio_put_be64(s, (page_addr) | RAM_SAVE_FLAG_PAGE); + ram_saveio_put_buffer(s, p, TARGET_PAGE_SIZE); + } } - found = 1; break; + } else { + addr += dirty_rams[0]; + current_addr = (saved_addr + addr) % last_ram_offset; } - addr += TARGET_PAGE_SIZE; - current_addr = (saved_addr + addr) % last_ram_offset; } return found; @@ -2822,12 +2983,19 @@ static uint64_t bytes_transferred; static ram_addr_t ram_save_remaining(void) { - ram_addr_t addr; + ram_addr_t addr = 0; ram_addr_t count = 0; + ram_addr_t dirty_rams[HOST_LONG_BITS]; + int found = 0; - for (addr = 0; addr < last_ram_offset; addr += TARGET_PAGE_SIZE) { - if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) - count++; + while (addr < last_ram_offset) { + if ((found = cpu_physical_memory_get_dirty_range(addr, last_ram_offset, + dirty_rams, HOST_LONG_BITS, MIGRATION_DIRTY_FLAG))) { + count += found; + addr = dirty_rams[found - 1] + TARGET_PAGE_SIZE; + } else { + addr += dirty_rams[0]; + } } return count; @@ -2854,6 +3022,7 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) uint64_t bytes_transferred_last; double bwidth = 0; uint64_t expected_time = 0; + RAMSaveIO *s; if (stage < 0) { cpu_physical_memory_set_dirty_tracking(0); @@ -2883,10 +3052,12 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) bytes_transferred_last = bytes_transferred; bwidth = qemu_get_clock_ns(rt_clock); - while (!qemu_file_rate_limit(f)) { + s = ram_saveio_new(f, IOV_MAX); + + while (!qemu_file_rate_limit(f)) { int ret; - ret = ram_save_block(f); + ret = ram_save_block(s); bytes_transferred += ret * TARGET_PAGE_SIZE; if (ret == 0) /* no more blocks */ break; @@ -2903,12 +3074,14 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) /* try transferring iterative blocks of memory */ if (stage == 3) { /* flush all remaining blocks regardless of rate limiting */ - while (ram_save_block(f) != 0) { + while (ram_save_block(s) != 0) { bytes_transferred += TARGET_PAGE_SIZE; } cpu_physical_memory_set_dirty_tracking(0); } + ram_saveio_destroy(s); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;