[v1,3/3] kvm tools: Add QCOW level2 caching support

Message ID	1305713837-18889-3-git-send-email-prasadjoshi124@gmail.com (mailing list archive)
State	New, archived
Headers	show Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter2.kernel.org (8.14.4/8.14.3) with ESMTP id p4IAMV8G029441 for <patchwork-kvm@patchwork.kernel.org>; Wed, 18 May 2011 10:22:31 GMT DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references; b=DHzPFvsLs5PXDMXsd4hEk6h8oVw5ZIpCZblrebFWmsS4V7tgd/BUHgQX5JNgEID4wU rti3Sz1KJvSspZL1/wjEIhzQCFDthUQa1CI9T2NWWijIwS/qd8FNt4FXpM/X7SiOafxO wJxlHTz26rCxoWy7ipDTtwjkW06MvSqRPlX88= From: Prasad Joshi <prasadjoshi124@gmail.com> To: prasadjoshi124@gmail.com Cc: mingo@elte.hu, kvm@vger.kernel.org, penberg@kernel.org, asias.hejun@gmail.com, gorcunov@gmail.com, levinsasha928@gmail.com, chaitanyakulkarni15@gmail.com, ashwini.kulkarni@gmail.com Subject: [PATCH v1 3/3] kvm tools: Add QCOW level2 caching support Date: Wed, 18 May 2011 11:17:17 +0100 Message-Id: <1305713837-18889-3-git-send-email-prasadjoshi124@gmail.com> In-Reply-To: <1305713837-18889-1-git-send-email-prasadjoshi124@gmail.com> References: <1305713837-18889-1-git-send-email-prasadjoshi124@gmail.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk

On Wed, 2011-05-18 at 11:17 +0100, Prasad Joshi wrote: > QCOW uses two tables level1 (L1) table and level2 > (L2) table. The L1 table points to offset of L2 > table. When a QCOW image is probed, the L1 table is > cached in the memory to avoid reading it from disk > on every reference. This caching imporves the > performance. The similar performance improvment can > be observed when L2 tables are also cached. It is > impossible to cache all of the L2 tables because of > the memory constraint. The patch adds L2 table > caching capability for upto 128 L2 tables, it uses > combination of RB tree and List to manage the L2 > cached tables. The link list implementation helps > in building simple LRU structure and RB tree helps > in improving the search time during read/write > operations. > > Signed-off-by: Prasad Joshi <prasadjoshi124@gmail.com> > --- > tools/kvm/include/kvm/qcow.h | 18 ++++ > tools/kvm/qcow.c | 230 ++++++++++++++++++++++++++++++++++++++---- > 2 files changed, 230 insertions(+), 18 deletions(-) > > diff --git a/tools/kvm/include/kvm/qcow.h b/tools/kvm/include/kvm/qcow.h > index b6e7493..095566a 100644 > --- a/tools/kvm/include/kvm/qcow.h > +++ b/tools/kvm/include/kvm/qcow.h > @@ -3,6 +3,8 @@ > > #include <linux/types.h> > #include <stdbool.h> > +#include <linux/rbtree.h> > +#include <linux/list.h> > > #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) > > @@ -17,6 +19,16 @@ > #define QCOW2_OFLAG_COMPRESSED (1LL << 62) > #define QCOW2_OFLAG_MASK (QCOW2_OFLAG_COPIED|QCOW2_OFLAG_COMPRESSED) > > +#define MAX_CACHE_NODES 128 > + > +struct qcow_l2_cache { > + u64 offset; > + u64 *table; > + struct rb_node node; > + struct list_head list; > + struct qcow *qcow; > +}; > + > struct qcow_table { > u32 table_size; > u64 *l1_table; > @@ -26,6 +38,12 @@ struct qcow { > void *header; > struct qcow_table table; > int fd; > + > + /* Level2 caching data structures */ > + struct qcow_l2_cache cache; Why is 'cache' here? > + struct rb_root root; > + struct list_head lru_list; > + int no_cached; > }; > > struct qcow_header { > diff --git a/tools/kvm/qcow.c b/tools/kvm/qcow.c > index bb2345c..ca61374 100644 > --- a/tools/kvm/qcow.c > +++ b/tools/kvm/qcow.c > @@ -16,6 +16,153 @@ > #include <linux/kernel.h> > #include <linux/types.h> > > +static inline int insert(struct rb_root *root, struct qcow_l2_cache *new) > +{ > + struct rb_node **link = &(root->rb_node), *parent = NULL; > + u64 offset = new->offset; > + > + /* search the tree */ > + while (*link) { > + struct qcow_l2_cache *t; > + > + t = rb_entry(*link, struct qcow_l2_cache, node); > + if (!t) > + goto error; > + > + parent = *link; > + > + if (t->offset > offset) > + link = &(*link)->rb_left; > + else if (t->offset < offset) > + link = &(*link)->rb_right; > + else > + goto out; > + } > + > + /* add new node */ > + rb_link_node(&new->node, parent, link); > + rb_insert_color(&new->node, root); > +out: > + return 0; > +error: > + return -1; > +} > + > +static inline struct qcow_l2_cache *search(struct rb_root *root, u64 offset) > +{ > + struct rb_node *link = root->rb_node; > + > + while (link) { > + struct qcow_l2_cache *t; > + > + t = rb_entry(link, struct qcow_l2_cache, node); > + if (!t) > + goto out; > + > + if (t->offset > offset) > + link = link->rb_left; > + else if (t->offset < offset) > + link = link->rb_right; > + else > + return t; > + } > +out: > + return NULL; > +} > + > +static inline void free_cache(struct qcow *q) > +{ > + struct list_head *pos, *n; > + struct qcow_l2_cache *t; > + struct rb_root *r = &q->root; > + > + list_for_each_safe(pos, n, &q->lru_list) { > + /* Remove cache table from the list and RB tree */ > + list_del(pos); > + t = list_entry(pos, struct qcow_l2_cache, list); > + rb_erase(&t->node, r); > + > + /* Free the cached node */ > + free(t->table); > + free(t); > + } > +} > + > +static inline int cache_table(struct qcow *q, u64 *table, u64 offset) > +{ > + struct qcow_l2_cache *n; > + struct rb_root *r = &q->root; > + struct qcow_l2_cache *lru; > + > + n = search(r, offset); > + if (n) { > + /* Update the LRU state */ > + list_del_init(&n->list); > + list_add_tail(&n->list, &q->lru_list); > + return 0; > + } > + > + lru = NULL; > + if (q->no_cached == MAX_CACHE_NODES) { > + /* Find the node for LRU replacement */ > + lru = list_first_entry(&q->lru_list, struct qcow_l2_cache, list); > + } > + > + n = calloc(1, sizeof(struct qcow_l2_cache)); sizeof(*n) > + if (!n) > + goto error; > + > + n->offset = offset; > + n->table = table; > + n->qcow = q; > + RB_CLEAR_NODE(&n->node); > + INIT_LIST_HEAD(&n->list); > + > + if (lru) { > + /* LRU Replacemen */ Replacement > + rb_replace_node(&lru->node, &n->node, r); rb_replace_node() doesn't re-balance the tree, it's intended to be used when both keys are the same. Doing so for nodes with different keys breaks the rbtree. > + > + /* > + * The new node should be the last node in the LRU list > + * therefore, do not list_replace instead delete the node to > + * be replaced and add a new node at the end of the list. > + */ > + list_del_init(&lru->list); > + list_add_tail(&n->list, &q->lru_list); > + > + /* Free the LRUed node */ > + free(lru->table); > + free(lru); > + } else { > + /* Add in RB Tree: Helps in searching faster */ > + if (insert(r, n) < 0) > + goto error; > + > + /* Add in LRU replacement list */ > + list_add_tail(&n->list, &q->lru_list); > + q->no_cached++; > + } > + > + return 0; > +error: > + free(n); > + return -1; > +} > + > +static inline int search_table(struct qcow *q, u64 **table, u64 offset) > +{ > + struct qcow_l2_cache *c; > + > + *table = NULL; > + > + c = search(&q->root, offset); > + if (!c) > + return -1; > + > + *table = c->table; > + return 0; > +} > + > static inline u64 get_l1_index(struct qcow *q, u64 offset) > { > struct qcow_header *header = q->header; > @@ -37,6 +184,43 @@ static inline u64 get_cluster_offset(struct qcow *q, u64 offset) > return offset & ((1 << header->cluster_bits)-1); > } > > +static int qcow1_read_l2_table(struct qcow *q, u64 **table, u64 offset) > +{ > + struct qcow_header *header = q->header; > + u64 size; > + u64 i; > + u64 *t; > + > + *table = NULL; > + size = 1 << header->l2_bits; > + > + /* search an entry for offset in cache */ > + if (search_table(q, table, offset) >= 0) > + return 0; > + > + t = calloc(size, sizeof(u64)); sizeof(*t) > + if (!t) > + goto error; > + > + /* table not cached: read from the disk */ > + if (pread_in_full(q->fd, t, size * sizeof(u64), offset) < 0) > + goto error; > + > + /* cache the table */ > + if (cache_table(q, t, offset) < 0) > + goto error; > + > + /* change cached table to CPU's byte-order */ > + for (i = 0; i < size; i++) > + be64_to_cpus(&t[i]); > + > + *table = t; > + return 0; > +error: > + free(t); > + return -1; > +} > + > static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) > { > struct qcow_header *header = q->header; > @@ -51,8 +235,6 @@ static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst > u64 l1_idx; > u64 l2_idx; > > - l2_table = NULL; > - > cluster_size = 1 << header->cluster_bits; > > l1_idx = get_l1_index(q, offset); > @@ -72,18 +254,16 @@ static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst > goto zero_cluster; > > l2_table_size = 1 << header->l2_bits; > - l2_table = calloc(l2_table_size, sizeof(u64)); > - if (!l2_table) > - goto out_error; > > - if (pread_in_full(q->fd, l2_table, l2_table_size * sizeof(u64), l2_table_offset) < 0) > + /* read and cache level 2 table */ > + if (qcow1_read_l2_table(q, &l2_table, l2_table_offset) < 0) > goto out_error; > > l2_idx = get_l2_index(q, offset); > if (l2_idx >= l2_table_size) > goto out_error; > > - clust_start = be64_to_cpu(l2_table[l2_idx]) & ~header->oflag_mask; > + clust_start = l2_table[l2_idx] & ~header->oflag_mask; > if (!clust_start) > goto zero_cluster; > > @@ -91,7 +271,6 @@ static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst > goto out_error; > > out: > - free(l2_table); > return length; > > zero_cluster: > @@ -221,15 +400,16 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr > if (len > src_len) > len = src_len; > > - l2t = calloc(l2t_sz, sizeof(u64)); > - if (!l2t) > - goto error; > - > l2t_off = table->l1_table[l1t_idx] & ~header->oflag_mask; > if (l2t_off) { > - if (pread_in_full(q->fd, l2t, l2t_sz * sizeof(u64), l2t_off) < 0) > - goto free_l2; > + /* cache level2 table */ > + if (qcow1_read_l2_table(q, &l2t, l2t_off) < 0) > + goto error; > } else { > + l2t = calloc(l2t_sz, sizeof(u64)); sizeof(*l2t) > + if (!l2t) > + goto error; > + > /* Capture the state of the consistent QCOW image */ > f_sz = file_size(q->fd); > if (!f_sz) > @@ -251,6 +431,13 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr > goto free_l2; > } > > + if (cache_table(q, l2t, l2t_off) < 0) { > + if (ftruncate(q->fd, f_sz) < 0) > + goto free_l2; > + > + goto free_l2; > + } > + > /* Update the in-core entry */ > table->l1_table[l1t_idx] = l2t_off; > } > @@ -258,17 +445,15 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr > /* Capture the state of the consistent QCOW image */ > f_sz = file_size(q->fd); > if (!f_sz) > - goto free_l2; > + goto error; > > - clust_start = be64_to_cpu(l2t[l2t_idx]) & ~header->oflag_mask; > + clust_start = l2t[l2t_idx] & ~header->oflag_mask; > if (!clust_start) { > clust_start = ALIGN(f_sz, clust_sz); > update_meta = true; > } else > update_meta = false; > > - free(l2t); > - > /* Write actual data */ > if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) > goto error; > @@ -282,9 +467,13 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr > > goto error; > } > + > + /* Update the cached level2 entry */ > + l2t[l2t_idx] = clust_start; > } > > return len; > + > free_l2: > free(l2t); > error: > @@ -335,6 +524,7 @@ static void qcow1_disk_close(struct disk_image *disk) > > q = disk->priv; > > + free_cache(q); > free(q->table.l1_table); > free(q->header); > free(q); > @@ -425,6 +615,8 @@ static struct disk_image *qcow2_probe(int fd, bool readonly) > goto error; > > q->fd = fd; > + q->root = RB_ROOT; > + INIT_LIST_HEAD(&q->lru_list); > > h = q->header = qcow2_read_header(fd); > if (!h) > @@ -519,6 +711,8 @@ static struct disk_image *qcow1_probe(int fd, bool readonly) > goto error; > > q->fd = fd; > + q->root = RB_ROOT; > + INIT_LIST_HEAD(&q->lru_list); > > h = q->header = qcow1_read_header(fd); > if (!h)

diff --git a/tools/kvm/include/kvm/qcow.h b/tools/kvm/include/kvm/qcow.h index b6e7493..095566a 100644 --- a/tools/kvm/include/kvm/qcow.h +++ b/tools/kvm/include/kvm/qcow.h @@ -3,6 +3,8 @@ #include <linux/types.h> #include <stdbool.h> +#include <linux/rbtree.h> +#include <linux/list.h> #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) @@ -17,6 +19,16 @@ #define QCOW2_OFLAG_COMPRESSED (1LL << 62) #define QCOW2_OFLAG_MASK (QCOW2_OFLAG_COPIED|QCOW2_OFLAG_COMPRESSED) +#define MAX_CACHE_NODES 128 + +struct qcow_l2_cache { + u64 offset; + u64 *table; + struct rb_node node; + struct list_head list; + struct qcow *qcow; +}; + struct qcow_table { u32 table_size; u64 *l1_table; @@ -26,6 +38,12 @@ struct qcow { void *header; struct qcow_table table; int fd; + + /* Level2 caching data structures */ + struct qcow_l2_cache cache; + struct rb_root root; + struct list_head lru_list; + int no_cached; }; struct qcow_header { diff --git a/tools/kvm/qcow.c b/tools/kvm/qcow.c index bb2345c..ca61374 100644 --- a/tools/kvm/qcow.c +++ b/tools/kvm/qcow.c @@ -16,6 +16,153 @@ #include <linux/kernel.h> #include <linux/types.h> +static inline int insert(struct rb_root *root, struct qcow_l2_cache *new) +{ + struct rb_node **link = &(root->rb_node), *parent = NULL; + u64 offset = new->offset; + + /* search the tree */ + while (*link) { + struct qcow_l2_cache *t; + + t = rb_entry(*link, struct qcow_l2_cache, node); + if (!t) + goto error; + + parent = *link; + + if (t->offset > offset) + link = &(*link)->rb_left; + else if (t->offset < offset) + link = &(*link)->rb_right; + else + goto out; + } + + /* add new node */ + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, root); +out: + return 0; +error: + return -1; +} + +static inline struct qcow_l2_cache *search(struct rb_root *root, u64 offset) +{ + struct rb_node *link = root->rb_node; + + while (link) { + struct qcow_l2_cache *t; + + t = rb_entry(link, struct qcow_l2_cache, node); + if (!t) + goto out; + + if (t->offset > offset) + link = link->rb_left; + else if (t->offset < offset) + link = link->rb_right; + else + return t; + } +out: + return NULL; +} + +static inline void free_cache(struct qcow *q) +{ + struct list_head *pos, *n; + struct qcow_l2_cache *t; + struct rb_root *r = &q->root; + + list_for_each_safe(pos, n, &q->lru_list) { + /* Remove cache table from the list and RB tree */ + list_del(pos); + t = list_entry(pos, struct qcow_l2_cache, list); + rb_erase(&t->node, r); + + /* Free the cached node */ + free(t->table); + free(t); + } +} + +static inline int cache_table(struct qcow *q, u64 *table, u64 offset) +{ + struct qcow_l2_cache *n; + struct rb_root *r = &q->root; + struct qcow_l2_cache *lru; + + n = search(r, offset); + if (n) { + /* Update the LRU state */ + list_del_init(&n->list); + list_add_tail(&n->list, &q->lru_list); + return 0; + } + + lru = NULL; + if (q->no_cached == MAX_CACHE_NODES) { + /* Find the node for LRU replacement */ + lru = list_first_entry(&q->lru_list, struct qcow_l2_cache, list); + } + + n = calloc(1, sizeof(struct qcow_l2_cache)); + if (!n) + goto error; + + n->offset = offset; + n->table = table; + n->qcow = q; + RB_CLEAR_NODE(&n->node); + INIT_LIST_HEAD(&n->list); + + if (lru) { + /* LRU Replacemen */ + rb_replace_node(&lru->node, &n->node, r); + + /* + * The new node should be the last node in the LRU list + * therefore, do not list_replace instead delete the node to + * be replaced and add a new node at the end of the list. + */ + list_del_init(&lru->list); + list_add_tail(&n->list, &q->lru_list); + + /* Free the LRUed node */ + free(lru->table); + free(lru); + } else { + /* Add in RB Tree: Helps in searching faster */ + if (insert(r, n) < 0) + goto error; + + /* Add in LRU replacement list */ + list_add_tail(&n->list, &q->lru_list); + q->no_cached++; + } + + return 0; +error: + free(n); + return -1; +} + +static inline int search_table(struct qcow *q, u64 **table, u64 offset) +{ + struct qcow_l2_cache *c; + + *table = NULL; + + c = search(&q->root, offset); + if (!c) + return -1; + + *table = c->table; + return 0; +} + static inline u64 get_l1_index(struct qcow *q, u64 offset) { struct qcow_header *header = q->header; @@ -37,6 +184,43 @@ static inline u64 get_cluster_offset(struct qcow *q, u64 offset) return offset & ((1 << header->cluster_bits)-1); } +static int qcow1_read_l2_table(struct qcow *q, u64 **table, u64 offset) +{ + struct qcow_header *header = q->header; + u64 size; + u64 i; + u64 *t; + + *table = NULL; + size = 1 << header->l2_bits; + + /* search an entry for offset in cache */ + if (search_table(q, table, offset) >= 0) + return 0; + + t = calloc(size, sizeof(u64)); + if (!t) + goto error; + + /* table not cached: read from the disk */ + if (pread_in_full(q->fd, t, size * sizeof(u64), offset) < 0) + goto error; + + /* cache the table */ + if (cache_table(q, t, offset) < 0) + goto error; + + /* change cached table to CPU's byte-order */ + for (i = 0; i < size; i++) + be64_to_cpus(&t[i]); + + *table = t; + return 0; +error: + free(t); + return -1; +} + static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) { struct qcow_header *header = q->header; @@ -51,8 +235,6 @@ static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst u64 l1_idx; u64 l2_idx; - l2_table = NULL; - cluster_size = 1 << header->cluster_bits; l1_idx = get_l1_index(q, offset); @@ -72,18 +254,16 @@ static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst goto zero_cluster; l2_table_size = 1 << header->l2_bits; - l2_table = calloc(l2_table_size, sizeof(u64)); - if (!l2_table) - goto out_error; - if (pread_in_full(q->fd, l2_table, l2_table_size * sizeof(u64), l2_table_offset) < 0) + /* read and cache level 2 table */ + if (qcow1_read_l2_table(q, &l2_table, l2_table_offset) < 0) goto out_error; l2_idx = get_l2_index(q, offset); if (l2_idx >= l2_table_size) goto out_error; - clust_start = be64_to_cpu(l2_table[l2_idx]) & ~header->oflag_mask; + clust_start = l2_table[l2_idx] & ~header->oflag_mask; if (!clust_start) goto zero_cluster; @@ -91,7 +271,6 @@ static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst goto out_error; out: - free(l2_table); return length; zero_cluster: @@ -221,15 +400,16 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr if (len > src_len) len = src_len; - l2t = calloc(l2t_sz, sizeof(u64)); - if (!l2t) - goto error; - l2t_off = table->l1_table[l1t_idx] & ~header->oflag_mask; if (l2t_off) { - if (pread_in_full(q->fd, l2t, l2t_sz * sizeof(u64), l2t_off) < 0) - goto free_l2; + /* cache level2 table */ + if (qcow1_read_l2_table(q, &l2t, l2t_off) < 0) + goto error; } else { + l2t = calloc(l2t_sz, sizeof(u64)); + if (!l2t) + goto error; + /* Capture the state of the consistent QCOW image */ f_sz = file_size(q->fd); if (!f_sz) @@ -251,6 +431,13 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr goto free_l2; } + if (cache_table(q, l2t, l2t_off) < 0) { + if (ftruncate(q->fd, f_sz) < 0) + goto free_l2; + + goto free_l2; + } + /* Update the in-core entry */ table->l1_table[l1t_idx] = l2t_off; } @@ -258,17 +445,15 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr /* Capture the state of the consistent QCOW image */ f_sz = file_size(q->fd); if (!f_sz) - goto free_l2; + goto error; - clust_start = be64_to_cpu(l2t[l2t_idx]) & ~header->oflag_mask; + clust_start = l2t[l2t_idx] & ~header->oflag_mask; if (!clust_start) { clust_start = ALIGN(f_sz, clust_sz); update_meta = true; } else update_meta = false; - free(l2t); - /* Write actual data */ if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) goto error; @@ -282,9 +467,13 @@ static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 sr goto error; } + + /* Update the cached level2 entry */ + l2t[l2t_idx] = clust_start; } return len; + free_l2: free(l2t); error: @@ -335,6 +524,7 @@ static void qcow1_disk_close(struct disk_image *disk) q = disk->priv; + free_cache(q); free(q->table.l1_table); free(q->header); free(q); @@ -425,6 +615,8 @@ static struct disk_image *qcow2_probe(int fd, bool readonly) goto error; q->fd = fd; + q->root = RB_ROOT; + INIT_LIST_HEAD(&q->lru_list); h = q->header = qcow2_read_header(fd); if (!h) @@ -519,6 +711,8 @@ static struct disk_image *qcow1_probe(int fd, bool readonly) goto error; q->fd = fd; + q->root = RB_ROOT; + INIT_LIST_HEAD(&q->lru_list); h = q->header = qcow1_read_header(fd); if (!h)

[v1,3/3] kvm tools: Add QCOW level2 caching support

Commit Message

Comments

Patch