From patchwork Sat Jul  9 13:02:41 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Pekka Enberg <penberg@kernel.org>
X-Patchwork-Id: 959532
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by demeter1.kernel.org (8.14.4/8.14.4) with ESMTP id p69D3J0F004277
	for <patchwork-kvm@patchwork.kernel.org>; Sat, 9 Jul 2011 13:03:26 GMT
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1753409Ab1GINDN (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Sat, 9 Jul 2011 09:03:13 -0400
Received: from filtteri2.pp.htv.fi ([213.243.153.185]:59364 "EHLO
	filtteri2.pp.htv.fi" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1753491Ab1GINDE (ORCPT <rfc822; kvm@vger.kernel.org>);
	Sat, 9 Jul 2011 09:03:04 -0400
Received: from localhost (localhost [127.0.0.1])
	by filtteri2.pp.htv.fi (Postfix) with ESMTP id 27FDC1DF12F;
	Sat,  9 Jul 2011 16:03:01 +0300 (EEST)
X-Virus-Scanned: Debian amavisd-new at pp.htv.fi
Received: from smtp6.welho.com ([213.243.153.40])
	by localhost (filtteri2.pp.htv.fi [213.243.153.185]) (amavisd-new,
	port 10024)
	with ESMTP id UdKzStm9MNDc; Sat,  9 Jul 2011 16:03:00 +0300 (EEST)
Received: from localhost.localdomain (cs181136138.pp.htv.fi [82.181.136.138])
	by smtp6.welho.com (Postfix) with ESMTP id B8B095BC005;
	Sat,  9 Jul 2011 16:03:00 +0300 (EEST)
From: Pekka Enberg <penberg@kernel.org>
To: kvm@vger.kernel.org
Cc: Pekka Enberg <penberg@kernel.org>, Asias He <asias.hejun@gmail.com>,
	Cyrill Gorcunov <gorcunov@gmail.com>, Ingo Molnar <mingo@elte.hu>,
	Prasad Joshi <prasadjoshi124@gmail.com>,
	Sasha Levin <levinsasha928@gmail.com>
Subject: [PATCH 8/9] kvm tools, qcow: Delayed L2 table writeout
Date: Sat,  9 Jul 2011 16:02:41 +0300
Message-Id: <1310216563-17503-9-git-send-email-penberg@kernel.org>
X-Mailer: git-send-email 1.7.0.4
In-Reply-To: <1310216563-17503-1-git-send-email-penberg@kernel.org>
References: <1310216563-17503-1-git-send-email-penberg@kernel.org>
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by
	milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]);
	Sat, 09 Jul 2011 13:03:27 +0000 (UTC)

This patch delays writeout for new L2 tables like we do for L1 tables. If a L2
table has non-allocated clusters, we mark that in the in-memory L2 table but
don't actually write it to disk until the L2 table is thrown out of LRU cache
or when qcow_disk_flush() is called. That makes writes to new clusters volatile
before VIRTIO_BLK_T_FLUSH is issued without corrupting the QCOW image on I/O
error.

Cc: Asias He <asias.hejun@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Prasad Joshi <prasadjoshi124@gmail.com>
Cc: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 tools/kvm/disk/qcow.c |   66 +++++++++++++++++++++++++++++--------------------
 1 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/tools/kvm/disk/qcow.c b/tools/kvm/disk/qcow.c
index c851e7f..b71762f 100644
--- a/tools/kvm/disk/qcow.c
+++ b/tools/kvm/disk/qcow.c
@@ -88,6 +88,16 @@ static void free_cache(struct qcow *q)
 	}
 }
 
+static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c)
+{
+	struct qcow_header *header = q->header;
+	u64 size;
+
+	size = 1 << header->l2_bits;
+
+	return pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset);
+}
+
 static int cache_table(struct qcow *q, struct qcow_l2_table *c)
 {
 	struct rb_root *r = &q->root;
@@ -100,6 +110,9 @@ static int cache_table(struct qcow *q, struct qcow_l2_table *c)
 		 */
 		lru = list_first_entry(&q->lru_list, struct qcow_l2_table, list);
 
+		if (qcow_l2_cache_write(q, lru) < 0)
+			goto error;
+
 		/* Remove the node from the cache */
 		rb_erase(&lru->node, r);
 		list_del_init(&lru->list);
@@ -361,7 +374,6 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src
 	struct qcow_header *header = q->header;
 	struct qcow_table  *table  = &q->table;
 	struct qcow_l2_table *l2t;
-	bool update_meta;
 	u64 clust_start;
 	u64 clust_off;
 	u64 clust_sz;
@@ -371,7 +383,6 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src
 	u64 l2t_sz;
 	u64 f_sz;
 	u64 len;
-	u64 t;
 
 	l2t		= NULL;
 	l2t_sz		= 1 << header->l2_bits;
@@ -434,31 +445,16 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src
 
 	clust_start	= be64_to_cpu(l2t->table[l2t_idx]) & ~header->oflag_mask;
 	if (!clust_start) {
-		clust_start	= ALIGN(f_sz, clust_sz);
-		update_meta	= true;
-	} else
-		update_meta	= false;
-
-	/* Write actual data */
-	if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
-		goto error;
-
-	if (update_meta) {
-		t = cpu_to_be64(clust_start);
-		if (qcow_pwrite_sync(q->fd, &t, sizeof(t), l2t_off + l2t_idx * sizeof(u64)) < 0) {
-			/* Restore the file to consistent state */
-			if (ftruncate(q->fd, f_sz) < 0)
-				goto error;
-
-			goto error;
-		}
-
-		/* Update the cached level2 entry */
-		l2t->table[l2t_idx] = cpu_to_be64(clust_start);
+		clust_start		= ALIGN(f_sz, clust_sz);
+		l2t->table[l2t_idx]	= cpu_to_be64(clust_start);
 	}
 
 	mutex_unlock(&q->mutex);
 
+	/* Write actual data */
+	if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
+		return -1;
+
 	return len;
 
 free_cache:
@@ -508,18 +504,34 @@ static int qcow_disk_flush(struct disk_image *disk)
 {
 	struct qcow *q = disk->priv;
 	struct qcow_header *header;
+	struct list_head *pos, *n;
 	struct qcow_table *table;
 
-	if (fdatasync(disk->fd) < 0)
-		return -1;
-
 	header	= q->header;
 	table	= &q->table;
 
+	mutex_lock(&q->mutex);
+
+	list_for_each_safe(pos, n, &q->lru_list) {
+		struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list);
+
+		if (qcow_l2_cache_write(q, c) < 0)
+			goto error_unlock;
+	}
+
+	if (fdatasync(disk->fd) < 0)
+		goto error_unlock;
+
 	if (pwrite_in_full(disk->fd, table->l1_table, table->table_size * sizeof(u64), header->l1_table_offset) < 0)
-		return -1;
+		goto error_unlock;
+
+	mutex_unlock(&q->mutex);
 
 	return fsync(disk->fd);
+
+error_unlock:
+	mutex_unlock(&q->mutex);
+	return -1;
 }
 
 static int qcow_disk_close(struct disk_image *disk)