diff mbox

[RFC,05/22] block/pcache: add aio requests into cache

Message ID 20160825134421.20231-6-pbutsykin@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Butsykin Aug. 25, 2016, 1:44 p.m. UTC
For storing requests use an rbtree, here are add basic operations on the
rbtree to work with  cache nodes.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
---
 block/pcache.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 189 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/block/pcache.c b/block/pcache.c
index 7f221d6..f5022f9 100644
--- a/block/pcache.c
+++ b/block/pcache.c
@@ -27,6 +27,7 @@ 
 #include "block/raw-aio.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qstring.h"
+#include "qemu/rbtree.h"
 
 #define PCACHE_DEBUG
 
@@ -37,9 +38,53 @@ 
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif
 
+typedef struct RbNodeKey {
+    uint64_t    num;
+    uint32_t    size;
+} RbNodeKey;
+
+typedef struct BlockNode {
+    struct RbNode               rb_node;
+    union {
+        RbNodeKey               key;
+        struct {
+            uint64_t            sector_num;
+            uint32_t            nb_sectors;
+        };
+    };
+    QTAILQ_ENTRY(BlockNode) entry;
+} BlockNode;
+
+typedef struct PCNode {
+    BlockNode cm;
+
+    uint8_t                  *data;
+} PCNode;
+
+typedef struct ReqStor {
+    struct {
+        struct RbRoot root;
+        CoMutex       lock;
+    } tree;
+
+    uint32_t curr_size;
+} ReqStor;
+
+typedef struct BDRVPCacheState {
+    BlockDriverState **bs;
+
+    ReqStor pcache;
+
+    struct {
+        QTAILQ_HEAD(pcache_head, BlockNode) head;
+        CoMutex lock;
+    } list;
+} BDRVPCacheState;
+
 typedef struct PrefCacheAIOCB {
     BlockAIOCB common;
 
+    BDRVPCacheState *s;
     QEMUIOVector *qiov;
     uint64_t sector_num;
     uint32_t nb_sectors;
@@ -64,6 +109,124 @@  static QemuOptsList runtime_opts = {
     },
 };
 
+#define PCNODE(_n) ((PCNode *)(_n))
+
+static int pcache_key_cmp(const RbNodeKey *key1, const RbNodeKey *key2)
+{
+    assert(key1 != NULL);
+    assert(key2 != NULL);
+
+    if (key1->num >= key2->num + key2->size) {
+        return 1;
+    }
+    if (key1->num + key1->size <= key2->num) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static void *node_insert(struct RbRoot *root, BlockNode *node)
+{
+    struct RbNode **new = &(root->rb_node), *parent = NULL;
+
+    /* Figure out where to put new node */
+    while (*new) {
+        BlockNode *this = container_of(*new, BlockNode, rb_node);
+        int result = pcache_key_cmp(&node->key, &this->key);
+        if (result == 0) {
+            return this;
+        }
+        parent = *new;
+        new = result < 0 ? &((*new)->rb_left) : &((*new)->rb_right);
+    }
+    /* Add new node and rebalance tree. */
+    rb_link_node(&node->rb_node, parent, new);
+    rb_insert_color(&node->rb_node, root);
+
+    return node;
+}
+
+static inline PCNode *pcache_node_insert(struct RbRoot *root, PCNode *node)
+{
+    return node_insert(root, &node->cm);
+}
+
+static inline void pcache_node_free(PCNode *node)
+{
+    g_free(node->data);
+    g_slice_free1(sizeof(*node), node);
+}
+
+static inline void *pcache_node_alloc(RbNodeKey* key)
+{
+    PCNode *node = g_slice_alloc(sizeof(*node));
+
+    node->cm.sector_num = key->num;
+    node->cm.nb_sectors = key->size;
+    node->data = g_malloc(node->cm.nb_sectors << BDRV_SECTOR_BITS);
+
+    return node;
+}
+
+static bool pcache_node_find_and_create(PrefCacheAIOCB *acb, RbNodeKey *key,
+                                        PCNode **out_node)
+{
+    BDRVPCacheState *s = acb->s;
+    PCNode *new_node = pcache_node_alloc(key);
+    PCNode *found;
+
+    qemu_co_mutex_lock(&s->pcache.tree.lock);
+    found = pcache_node_insert(&s->pcache.tree.root, new_node);
+    qemu_co_mutex_unlock(&s->pcache.tree.lock);
+    if (found != new_node) {
+        pcache_node_free(new_node);
+        *out_node = found;
+        return false;
+    }
+    atomic_add(&s->pcache.curr_size, new_node->cm.nb_sectors);
+
+    qemu_co_mutex_lock(&s->list.lock);
+    QTAILQ_INSERT_HEAD(&s->list.head, &new_node->cm, entry);
+    qemu_co_mutex_unlock(&s->list.lock);
+
+    *out_node = new_node;
+    return true;
+}
+
+static inline void prefetch_init_key(PrefCacheAIOCB *acb, RbNodeKey* key)
+{
+    key->num = acb->sector_num;
+    key->size = acb->nb_sectors;
+}
+
+enum {
+    PREFETCH_NEW_NODE  = 0,
+    PREFETCH_FULL_UP   = 1,
+    PREFETCH_PART_UP   = 2
+};
+
+static int32_t pcache_prefetch(PrefCacheAIOCB *acb)
+{
+    RbNodeKey key;
+    PCNode *node = NULL;
+
+    prefetch_init_key(acb, &key);
+    if (pcache_node_find_and_create(acb, &key, &node)) {
+        return PREFETCH_NEW_NODE;
+    }
+
+    /* Node covers the whole request */
+    if (node->cm.sector_num <= acb->sector_num &&
+        node->cm.sector_num + node->cm.nb_sectors >= acb->sector_num +
+                                                     acb->nb_sectors)
+    {
+        return PREFETCH_FULL_UP;
+    }
+
+    return PREFETCH_PART_UP;
+}
+
 static void pcache_aio_cb(void *opaque, int ret)
 {
     PrefCacheAIOCB *acb = opaque;
@@ -80,6 +243,7 @@  static PrefCacheAIOCB *pcache_aio_get(BlockDriverState *bs, int64_t sector_num,
 {
     PrefCacheAIOCB *acb = qemu_aio_get(&pcache_aiocb_info, bs, cb, opaque);
 
+    acb->s = bs->opaque;
     acb->sector_num = sector_num;
     acb->nb_sectors = nb_sectors;
     acb->qiov = qiov;
@@ -99,6 +263,8 @@  static BlockAIOCB *pcache_aio_readv(BlockDriverState *bs,
     PrefCacheAIOCB *acb = pcache_aio_get(bs, sector_num, qiov, nb_sectors, cb,
                                          opaque, QEMU_AIO_READ);
 
+    pcache_prefetch(acb);
+
     bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
                    pcache_aio_cb, acb);
     return &acb->common;
@@ -119,6 +285,17 @@  static BlockAIOCB *pcache_aio_writev(BlockDriverState *bs,
     return &acb->common;
 }
 
+static void pcache_state_init(QemuOpts *opts, BDRVPCacheState *s)
+{
+    DPRINTF("pcache configure:\n");
+
+    s->pcache.tree.root = RB_ROOT;
+    qemu_co_mutex_init(&s->pcache.tree.lock);
+    QTAILQ_INIT(&s->list.head);
+    qemu_co_mutex_init(&s->list.lock);
+    s->pcache.curr_size = 0;
+}
+
 static int pcache_file_open(BlockDriverState *bs, QDict *options, int flags,
                             Error **errp)
 {
@@ -140,7 +317,9 @@  static int pcache_file_open(BlockDriverState *bs, QDict *options, int flags,
     if (local_err) {
         ret = -EINVAL;
         error_propagate(errp, local_err);
+        goto fail;
     }
+    pcache_state_init(opts, bs->opaque);
 fail:
     qemu_opts_del(opts);
     return ret;
@@ -148,6 +327,15 @@  fail:
 
 static void pcache_close(BlockDriverState *bs)
 {
+    uint32_t cnt = 0;
+    BDRVPCacheState *s = bs->opaque;
+    BlockNode *node, *next;
+    QTAILQ_FOREACH_SAFE(node, &s->list.head, entry, next) {
+        QTAILQ_REMOVE(&s->list.head, node, entry);
+        pcache_node_free(PCNODE(node));
+        cnt++;
+    }
+    DPRINTF("used %d nodes\n", cnt);
 }
 
 static void pcache_parse_filename(const char *filename, QDict *options,
@@ -170,7 +358,7 @@  static bool pcache_recurse_is_first_non_filter(BlockDriverState *bs,
 static BlockDriver bdrv_pcache = {
     .format_name                        = "pcache",
     .protocol_name                      = "pcache",
-    .instance_size                      = 0,
+    .instance_size                      = sizeof(BDRVPCacheState),
 
     .bdrv_parse_filename                = pcache_parse_filename,
     .bdrv_file_open                     = pcache_file_open,