From patchwork Sat Jul 9 13:02:41 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pekka Enberg X-Patchwork-Id: 959532 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.4) with ESMTP id p69D3J0F004277 for ; Sat, 9 Jul 2011 13:03:26 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753409Ab1GINDN (ORCPT ); Sat, 9 Jul 2011 09:03:13 -0400 Received: from filtteri2.pp.htv.fi ([213.243.153.185]:59364 "EHLO filtteri2.pp.htv.fi" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753491Ab1GINDE (ORCPT ); Sat, 9 Jul 2011 09:03:04 -0400 Received: from localhost (localhost [127.0.0.1]) by filtteri2.pp.htv.fi (Postfix) with ESMTP id 27FDC1DF12F; Sat, 9 Jul 2011 16:03:01 +0300 (EEST) X-Virus-Scanned: Debian amavisd-new at pp.htv.fi Received: from smtp6.welho.com ([213.243.153.40]) by localhost (filtteri2.pp.htv.fi [213.243.153.185]) (amavisd-new, port 10024) with ESMTP id UdKzStm9MNDc; Sat, 9 Jul 2011 16:03:00 +0300 (EEST) Received: from localhost.localdomain (cs181136138.pp.htv.fi [82.181.136.138]) by smtp6.welho.com (Postfix) with ESMTP id B8B095BC005; Sat, 9 Jul 2011 16:03:00 +0300 (EEST) From: Pekka Enberg To: kvm@vger.kernel.org Cc: Pekka Enberg , Asias He , Cyrill Gorcunov , Ingo Molnar , Prasad Joshi , Sasha Levin Subject: [PATCH 8/9] kvm tools, qcow: Delayed L2 table writeout Date: Sat, 9 Jul 2011 16:02:41 +0300 Message-Id: <1310216563-17503-9-git-send-email-penberg@kernel.org> X-Mailer: git-send-email 1.7.0.4 In-Reply-To: <1310216563-17503-1-git-send-email-penberg@kernel.org> References: <1310216563-17503-1-git-send-email-penberg@kernel.org> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Sat, 09 Jul 2011 13:03:27 +0000 (UTC) This patch delays writeout for new L2 tables like we do for L1 tables. If a L2 table has non-allocated clusters, we mark that in the in-memory L2 table but don't actually write it to disk until the L2 table is thrown out of LRU cache or when qcow_disk_flush() is called. That makes writes to new clusters volatile before VIRTIO_BLK_T_FLUSH is issued without corrupting the QCOW image on I/O error. Cc: Asias He Cc: Cyrill Gorcunov Cc: Ingo Molnar Cc: Prasad Joshi Cc: Sasha Levin Signed-off-by: Pekka Enberg --- tools/kvm/disk/qcow.c | 66 +++++++++++++++++++++++++++++-------------------- 1 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tools/kvm/disk/qcow.c b/tools/kvm/disk/qcow.c index c851e7f..b71762f 100644 --- a/tools/kvm/disk/qcow.c +++ b/tools/kvm/disk/qcow.c @@ -88,6 +88,16 @@ static void free_cache(struct qcow *q) } } +static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) +{ + struct qcow_header *header = q->header; + u64 size; + + size = 1 << header->l2_bits; + + return pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset); +} + static int cache_table(struct qcow *q, struct qcow_l2_table *c) { struct rb_root *r = &q->root; @@ -100,6 +110,9 @@ static int cache_table(struct qcow *q, struct qcow_l2_table *c) */ lru = list_first_entry(&q->lru_list, struct qcow_l2_table, list); + if (qcow_l2_cache_write(q, lru) < 0) + goto error; + /* Remove the node from the cache */ rb_erase(&lru->node, r); list_del_init(&lru->list); @@ -361,7 +374,6 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src struct qcow_header *header = q->header; struct qcow_table *table = &q->table; struct qcow_l2_table *l2t; - bool update_meta; u64 clust_start; u64 clust_off; u64 clust_sz; @@ -371,7 +383,6 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src u64 l2t_sz; u64 f_sz; u64 len; - u64 t; l2t = NULL; l2t_sz = 1 << header->l2_bits; @@ -434,31 +445,16 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src clust_start = be64_to_cpu(l2t->table[l2t_idx]) & ~header->oflag_mask; if (!clust_start) { - clust_start = ALIGN(f_sz, clust_sz); - update_meta = true; - } else - update_meta = false; - - /* Write actual data */ - if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) - goto error; - - if (update_meta) { - t = cpu_to_be64(clust_start); - if (qcow_pwrite_sync(q->fd, &t, sizeof(t), l2t_off + l2t_idx * sizeof(u64)) < 0) { - /* Restore the file to consistent state */ - if (ftruncate(q->fd, f_sz) < 0) - goto error; - - goto error; - } - - /* Update the cached level2 entry */ - l2t->table[l2t_idx] = cpu_to_be64(clust_start); + clust_start = ALIGN(f_sz, clust_sz); + l2t->table[l2t_idx] = cpu_to_be64(clust_start); } mutex_unlock(&q->mutex); + /* Write actual data */ + if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) + return -1; + return len; free_cache: @@ -508,18 +504,34 @@ static int qcow_disk_flush(struct disk_image *disk) { struct qcow *q = disk->priv; struct qcow_header *header; + struct list_head *pos, *n; struct qcow_table *table; - if (fdatasync(disk->fd) < 0) - return -1; - header = q->header; table = &q->table; + mutex_lock(&q->mutex); + + list_for_each_safe(pos, n, &q->lru_list) { + struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); + + if (qcow_l2_cache_write(q, c) < 0) + goto error_unlock; + } + + if (fdatasync(disk->fd) < 0) + goto error_unlock; + if (pwrite_in_full(disk->fd, table->l1_table, table->table_size * sizeof(u64), header->l1_table_offset) < 0) - return -1; + goto error_unlock; + + mutex_unlock(&q->mutex); return fsync(disk->fd); + +error_unlock: + mutex_unlock(&q->mutex); + return -1; } static int qcow_disk_close(struct disk_image *disk)