@@ -46,6 +46,7 @@ struct testb {
atomic_long_t cur_bytes;
struct hrtimer timer;
+ unsigned long cache_flush_pos;
};
/*
@@ -62,16 +63,27 @@ struct testb_page {
};
/*
+ * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
+ * page is being flushing to storage. FREE means the cache page is freed and
+ * should be skipped from flushing to storage. Please see
+ * testb_make_cache_space
+ */
+#define TESTB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1)
+#define TESTB_PAGE_FREE (sizeof(unsigned long) * 8 - 2)
+
+/*
* Status flags for testb_device.
*
* CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
* UP: Device is currently on and visible in userspace.
* THROTTLED: Device is being throttled.
+ * CACHE: Device is using a write-back cache.
*/
enum testb_device_flags {
TESTB_DEV_FL_CONFIGURED = 0,
TESTB_DEV_FL_UP = 1,
TESTB_DEV_FL_THROTTLED = 2,
+ TESTB_DEV_FL_CACHE = 3,
};
/*
@@ -81,6 +93,8 @@ enum testb_device_flags {
* @lock: Protect data of the device
* @testb: The device that these attributes belong to.
* @pages: The storage of the device.
+ * @cache: The cache of the device.
+ * @curr_cache: The current cache size.
* @flags: TEST_DEV_FL_ flags to indicate various status.
*
* @power: 1 means on; 0 means off.
@@ -90,13 +104,16 @@ enum testb_device_flags {
* @q_depth: The depth of each queue.
* @discard: If enable discard
* @mbps: Bandwidth throttle cap (in mb/s).
+ * @cache_size: The max capacity of the cache.
*/
struct testb_device {
struct config_item item;
spinlock_t lock;
struct testb *testb;
struct radix_tree_root pages;
+ struct radix_tree_root cache;
unsigned long flags;
+ unsigned int curr_cache;
uint power;
u64 size;
@@ -105,11 +122,13 @@ struct testb_device {
uint q_depth;
uint discard;
uint mbps;
+ u64 cache_size;
};
static int testb_poweron_device(struct testb_device *dev);
static void testb_poweroff_device(struct testb_device *dev);
-static void testb_free_device_storage(struct testb_device *t_dev);
+static void testb_free_device_storage(struct testb_device *t_dev,
+ bool is_cache);
static inline struct testb_device *to_testb_device(struct config_item *item)
{
@@ -179,6 +198,7 @@ TESTB_DEVICE_ATTR(nr_queues, uint);
TESTB_DEVICE_ATTR(q_depth, uint);
TESTB_DEVICE_ATTR(discard, uint);
TESTB_DEVICE_ATTR(mbps, uint);
+TESTB_DEVICE_ATTR(cache_size, u64);
static ssize_t testb_device_power_show(struct config_item *item, char *page)
{
@@ -226,6 +246,7 @@ static struct configfs_attribute *testb_device_attrs[] = {
&testb_device_attr_q_depth,
&testb_device_attr_discard,
&testb_device_attr_mbps,
+ &testb_device_attr_cache_size,
NULL,
};
@@ -233,7 +254,7 @@ static void testb_device_release(struct config_item *item)
{
struct testb_device *t_dev = to_testb_device(item);
- testb_free_device_storage(t_dev);
+ testb_free_device_storage(t_dev, false);
kfree(t_dev);
}
@@ -257,6 +278,7 @@ config_item *testb_group_make_item(struct config_group *group, const char *name)
return ERR_PTR(-ENOMEM);
spin_lock_init(&t_dev->lock);
INIT_RADIX_TREE(&t_dev->pages, GFP_ATOMIC);
+ INIT_RADIX_TREE(&t_dev->cache, GFP_ATOMIC);
config_item_init_type_name(&t_dev->item, name, &testb_device_type);
@@ -267,6 +289,7 @@ config_item *testb_group_make_item(struct config_group *group, const char *name)
t_dev->q_depth = 64;
t_dev->discard = 1;
t_dev->mbps = -1;
+ t_dev->cache_size = 100 * 1024 * 1024ULL;
return &t_dev->item;
}
@@ -285,7 +308,7 @@ testb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
- return snprintf(page, PAGE_SIZE, "bandwidth\n");
+ return snprintf(page, PAGE_SIZE, "bandwidth,cache\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -324,6 +347,11 @@ static inline int testb_throttled(struct testb *testb)
return test_bit(TESTB_DEV_FL_THROTTLED, &testb->t_dev->flags);
}
+static inline int testb_cache_active(struct testb *testb)
+{
+ return test_bit(TESTB_DEV_FL_CACHE, &testb->t_dev->flags);
+}
+
static struct testb_page *testb_alloc_page(gfp_t gfp_flags)
{
struct testb_page *t_page;
@@ -348,11 +376,15 @@ static void testb_free_page(struct testb_page *t_page)
{
WARN_ON(!t_page);
+ __set_bit(TESTB_PAGE_FREE, &t_page->bitmap);
+ if (test_bit(TESTB_PAGE_LOCK, &t_page->bitmap))
+ return;
__free_page(t_page->page);
kfree(t_page);
}
-static void testb_free_sector(struct testb *testb, sector_t sector)
+static void testb_free_sector(struct testb *testb, sector_t sector,
+ bool is_cache)
{
unsigned int sector_bit;
u64 idx;
@@ -361,7 +393,7 @@ static void testb_free_sector(struct testb *testb, sector_t sector)
assert_spin_locked(&testb->t_dev->lock);
- root = &testb->t_dev->pages;
+ root = is_cache ? &testb->t_dev->cache : &testb->t_dev->pages;
idx = sector >> PAGE_SECTORS_SHIFT;
sector_bit = (sector & SECTOR_MASK);
@@ -373,36 +405,40 @@ static void testb_free_sector(struct testb *testb, sector_t sector)
ret = radix_tree_delete_item(root, idx, t_page);
WARN_ON(ret != t_page);
testb_free_page(ret);
+ if (is_cache)
+ testb->t_dev->curr_cache -= PAGE_SIZE;
}
}
}
static struct testb_page *testb_radix_tree_insert(struct testb *testb, u64 idx,
- struct testb_page *t_page)
+ struct testb_page *t_page, bool is_cache)
{
struct radix_tree_root *root;
assert_spin_locked(&testb->t_dev->lock);
- root = &testb->t_dev->pages;
+ root = is_cache ? &testb->t_dev->cache : &testb->t_dev->pages;
if (radix_tree_insert(root, idx, t_page)) {
testb_free_page(t_page);
t_page = radix_tree_lookup(root, idx);
WARN_ON(!t_page || t_page->page->index != idx);
- }
+ } else if (is_cache)
+ testb->t_dev->curr_cache += PAGE_SIZE;
return t_page;
}
-static void testb_free_device_storage(struct testb_device *t_dev)
+static void testb_free_device_storage(struct testb_device *t_dev,
+ bool is_cache)
{
unsigned long pos = 0;
int nr_pages;
struct testb_page *ret, *t_pages[FREE_BATCH];
struct radix_tree_root *root;
- root = &t_dev->pages;
+ root = is_cache ? &t_dev->cache : &t_dev->pages;
do {
int i;
@@ -419,21 +455,27 @@ static void testb_free_device_storage(struct testb_device *t_dev)
pos++;
} while (nr_pages == FREE_BATCH);
+
+ if (is_cache)
+ t_dev->curr_cache = 0;
}
-static struct testb_page *testb_lookup_page(struct testb *testb,
- sector_t sector, bool for_write)
+static struct testb_page *__testb_lookup_page(struct testb *testb,
+ sector_t sector, bool for_write, bool is_cache)
{
unsigned int sector_bit;
u64 idx;
struct testb_page *t_page;
+ struct radix_tree_root *root;
assert_spin_locked(&testb->t_dev->lock);
idx = sector >> PAGE_SECTORS_SHIFT;
sector_bit = (sector & SECTOR_MASK);
- t_page = radix_tree_lookup(&testb->t_dev->pages, idx);
+ root = is_cache ? &testb->t_dev->cache : &testb->t_dev->pages;
+
+ t_page = radix_tree_lookup(root, idx);
WARN_ON(t_page && t_page->page->index != idx);
if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap)))
@@ -442,15 +484,27 @@ static struct testb_page *testb_lookup_page(struct testb *testb,
return NULL;
}
+static struct testb_page *testb_lookup_page(struct testb *testb,
+ sector_t sector, bool for_write, bool ignore_cache)
+{
+ struct testb_page *page = NULL;
+
+ if (!ignore_cache)
+ page = __testb_lookup_page(testb, sector, for_write, true);
+ if (page)
+ return page;
+ return __testb_lookup_page(testb, sector, for_write, false);
+}
+
static struct testb_page *testb_insert_page(struct testb *testb,
- sector_t sector, unsigned long *lock_flag)
+ sector_t sector, unsigned long *lock_flag, bool ignore_cache)
{
u64 idx;
struct testb_page *t_page;
assert_spin_locked(&testb->t_dev->lock);
- t_page = testb_lookup_page(testb, sector, true);
+ t_page = testb_lookup_page(testb, sector, true, ignore_cache);
if (t_page)
return t_page;
@@ -466,7 +520,7 @@ static struct testb_page *testb_insert_page(struct testb *testb,
spin_lock_irqsave(&testb->t_dev->lock, *lock_flag);
idx = sector >> PAGE_SECTORS_SHIFT;
t_page->page->index = idx;
- t_page = testb_radix_tree_insert(testb, idx, t_page);
+ t_page = testb_radix_tree_insert(testb, idx, t_page, !ignore_cache);
radix_tree_preload_end();
return t_page;
@@ -474,11 +528,122 @@ static struct testb_page *testb_insert_page(struct testb *testb,
testb_free_page(t_page);
out_lock:
spin_lock_irqsave(&testb->t_dev->lock, *lock_flag);
- return testb_lookup_page(testb, sector, true);
+ return testb_lookup_page(testb, sector, true, ignore_cache);
+}
+
+static int
+testb_flush_cache_page(struct testb *testb, struct testb_page *c_page,
+ unsigned long *lock_flag)
+{
+ int i;
+ unsigned int offset;
+ u64 idx;
+ struct testb_page *t_page, *ret;
+ void *dst, *src;
+
+ assert_spin_locked(&testb->t_dev->lock);
+
+ idx = c_page->page->index;
+
+ t_page = testb_insert_page(testb, idx << PAGE_SECTORS_SHIFT,
+ lock_flag, true);
+
+ __clear_bit(TESTB_PAGE_LOCK, &c_page->bitmap);
+ if (test_bit(TESTB_PAGE_FREE, &c_page->bitmap)) {
+ testb_free_page(c_page);
+ if (t_page && t_page->bitmap == 0) {
+ ret = radix_tree_delete_item(&testb->t_dev->pages,
+ idx, t_page);
+ testb_free_page(t_page);
+ }
+ return 0;
+ }
+
+ if (!t_page)
+ return -ENOMEM;
+
+ src = kmap_atomic(c_page->page);
+ dst = kmap_atomic(t_page->page);
+
+ for (i = 0; i < PAGE_SECTORS;
+ i += (testb->t_dev->blocksize >> SECTOR_SHIFT)) {
+ if (test_bit(i, &c_page->bitmap)) {
+ offset = (i << SECTOR_SHIFT);
+ memcpy(dst + offset, src + offset,
+ testb->t_dev->blocksize);
+ __set_bit(i, &t_page->bitmap);
+ }
+ }
+
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ ret = radix_tree_delete_item(&testb->t_dev->cache, idx, c_page);
+ testb_free_page(ret);
+ testb->t_dev->curr_cache -= PAGE_SIZE;
+
+ return 0;
+}
+
+static int testb_make_cache_space(struct testb *testb,
+ unsigned long *lock_flag, size_t n)
+{
+ int i, err, nr_pages;
+ struct testb_page *c_pages[FREE_BATCH];
+ size_t flushed = 0, one_round;
+
+ assert_spin_locked(&testb->t_dev->lock);
+
+again:
+ if (testb->t_dev->cache_size > testb->t_dev->curr_cache + n ||
+ testb->t_dev->curr_cache == 0)
+ return 0;
+
+ nr_pages = radix_tree_gang_lookup(&testb->t_dev->cache,
+ (void **)c_pages, testb->cache_flush_pos, FREE_BATCH);
+ /*
+ * testb_flush_cache_page could unlock before using the c_pages. To
+ * avoid race, we don't allow page free
+ */
+ for (i = 0; i < nr_pages; i++) {
+ testb->cache_flush_pos = c_pages[i]->page->index;
+ /*
+ * We found the page which is being flushed to disk by other
+ * threads
+ */
+ if (test_bit(TESTB_PAGE_LOCK, &c_pages[i]->bitmap))
+ c_pages[i] = NULL;
+ else
+ __set_bit(TESTB_PAGE_LOCK, &c_pages[i]->bitmap);
+ }
+
+ one_round = 0;
+ for (i = 0; i < nr_pages; i++) {
+ if (c_pages[i] == NULL)
+ continue;
+ err = testb_flush_cache_page(testb, c_pages[i], lock_flag);
+ if (err)
+ return err;
+ one_round++;
+ }
+ flushed += one_round << PAGE_SHIFT;
+
+ if (n > flushed) {
+ if (nr_pages == 0)
+ testb->cache_flush_pos = 0;
+ if (one_round == 0) {
+ /* give other threads a chance */
+ spin_unlock_irqrestore(&testb->t_dev->lock, *lock_flag);
+ spin_lock_irqsave(&testb->t_dev->lock, *lock_flag);
+ }
+ goto again;
+ }
+ return 0;
}
static int copy_to_testb(struct testb *testb, struct page *source,
- unsigned int off, sector_t sector, size_t n, unsigned long *lock_flag)
+ unsigned int off, sector_t sector, size_t n, unsigned long *lock_flag,
+ bool is_fua)
{
size_t temp, count = 0;
unsigned int offset;
@@ -488,8 +653,12 @@ static int copy_to_testb(struct testb *testb, struct page *source,
while (count < n) {
temp = min_t(size_t, testb->t_dev->blocksize, n - count);
+ if (testb_cache_active(testb) && !is_fua)
+ testb_make_cache_space(testb, lock_flag, PAGE_SIZE);
+
offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
- t_page = testb_insert_page(testb, sector, lock_flag);
+ t_page = testb_insert_page(testb, sector, lock_flag,
+ !testb_cache_active(testb) || is_fua);
if (!t_page)
return -ENOSPC;
@@ -501,6 +670,9 @@ static int copy_to_testb(struct testb *testb, struct page *source,
__set_bit(sector & SECTOR_MASK, &t_page->bitmap);
+ if (is_fua)
+ testb_free_sector(testb, sector, true);
+
count += temp;
sector += temp >> SECTOR_SHIFT;
}
@@ -519,7 +691,8 @@ static int copy_from_testb(struct testb *testb, struct page *dest,
temp = min_t(size_t, testb->t_dev->blocksize, n - count);
offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
- t_page = testb_lookup_page(testb, sector, false);
+ t_page = testb_lookup_page(testb, sector, false,
+ !testb_cache_active(testb));
dst = kmap_atomic(dest);
if (!t_page) {
@@ -546,7 +719,9 @@ static void testb_handle_discard(struct testb *testb, sector_t sector, size_t n)
spin_lock_irqsave(&testb->t_dev->lock, lock_flag);
while (n > 0) {
temp = min_t(size_t, n, testb->t_dev->blocksize);
- testb_free_sector(testb, sector);
+ testb_free_sector(testb, sector, false);
+ if (testb_cache_active(testb))
+ testb_free_sector(testb, sector, true);
sector += temp >> SECTOR_SHIFT;
n -= temp;
}
@@ -555,12 +730,28 @@ static void testb_handle_discard(struct testb *testb, sector_t sector, size_t n)
static int testb_handle_flush(struct testb *testb)
{
+ unsigned long lock_flag;
+ int err;
+
+ if (!testb_cache_active(testb))
+ return 0;
+
+ spin_lock_irqsave(&testb->t_dev->lock, lock_flag);
+ while (true) {
+ err = testb_make_cache_space(testb, &lock_flag,
+ testb->t_dev->cache_size);
+ if (err || testb->t_dev->curr_cache == 0)
+ break;
+ }
+
+ WARN_ON(!radix_tree_empty(&testb->t_dev->cache));
+ spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag);
return 0;
}
static int testb_transfer(struct testb *testb, struct page *page,
unsigned int len, unsigned int off, bool is_write, sector_t sector,
- unsigned long *lock_flags)
+ unsigned long *lock_flags, bool is_fua)
{
int err = 0;
@@ -571,7 +762,7 @@ static int testb_transfer(struct testb *testb, struct page *page,
} else {
flush_dcache_page(page);
err = copy_to_testb(testb, page, off, sector, len,
- lock_flags);
+ lock_flags, is_fua);
}
return err;
@@ -638,7 +829,7 @@ static int testb_handle_rq(struct request *rq)
len = bvec.bv_len;
err = testb_transfer(testb, bvec.bv_page, len, bvec.bv_offset,
op_is_write(req_op(rq)), sector,
- &lock_flag);
+ &lock_flag, req_op(rq) & REQ_FUA);
if (err) {
spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag);
return err;
@@ -690,6 +881,8 @@ static void testb_free_bdev(struct testb *testb)
blk_cleanup_queue(testb->q);
blk_mq_free_tag_set(&testb->tag_set);
+ if (testb_cache_active(testb))
+ testb_free_device_storage(testb->t_dev, true);
kfree(testb);
}
@@ -799,6 +992,9 @@ static int testb_alloc_bdev(struct testb_device *t_dev)
testb->t_dev = t_dev;
t_dev->testb = testb;
+ if (t_dev->cache_size > 0)
+ set_bit(TESTB_DEV_FL_CACHE, &testb->t_dev->flags);
+
testb->tag_set.ops = &testb_mq_ops;
testb->tag_set.nr_hw_queues = t_dev->nr_queues;
testb->tag_set.queue_depth = t_dev->q_depth;
@@ -869,6 +1065,10 @@ static int __init testb_init(void)
int ret = 0;
struct configfs_subsystem *subsys = &testb_subsys;
+ /* check for testb_page.bitmap */
+ if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT))
+ return -EINVAL;
+
config_group_init(&subsys->su_group);
mutex_init(&subsys->su_mutex);