diff mbox

[2/5] testb: implement block device operations

Message ID 971572972425927f702b33da37be14a9ab646a94.1501945859.git.shli@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Shaohua Li Aug. 5, 2017, 3:51 p.m. UTC
From: Kyungchan Koh <kkc6196@fb.com>

This create/remove disk when user writes 1/0 to 'power' attribute.

Signed-off-by: Kyungchan Koh <kkc6196@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/block/test_blk.c | 539 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 538 insertions(+), 1 deletion(-)

Comments

Jens Axboe Aug. 8, 2017, 8:34 p.m. UTC | #1
On 08/05/2017 09:51 AM, Shaohua Li wrote:
> +static struct testb_page *testb_insert_page(struct testb *testb,
> +	sector_t sector, unsigned long *lock_flag)
> +{
> +	u64 idx;
> +	struct testb_page *t_page;
> +
> +	assert_spin_locked(&testb->t_dev->lock);
> +
> +	t_page = testb_lookup_page(testb, sector, true);
> +	if (t_page)
> +		return t_page;
> +
> +	spin_unlock_irqrestore(&testb->t_dev->lock, *lock_flag);

Passing in lock flags through several functions is kind of iffy. It also
used to break on some architectures, though I don't think that's the
case anymore. The problem is that it usually ends up being a code
locking, instead of a data structure locking. The latter is much
cleaner.

> +static int copy_from_testb(struct testb *testb, struct page *dest,
> +	unsigned int off, sector_t sector, size_t n, unsigned long *lock_flag)

This also gets the lock flags passed in, but doesn't use it.
diff mbox

Patch

diff --git a/drivers/block/test_blk.c b/drivers/block/test_blk.c
index 93e8ec2..1b06ce7 100644
--- a/drivers/block/test_blk.c
+++ b/drivers/block/test_blk.c
@@ -9,9 +9,44 @@ 
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/blk-mq.h>
 #include <linux/blkdev.h>
 #include <linux/configfs.h>
 #include <linux/radix-tree.h>
+#include <linux/idr.h>
+
+#define SECTOR_SHIFT		9
+#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_SIZE		(1 << SECTOR_SHIFT)
+#define SECTOR_MASK		(PAGE_SECTORS - 1)
+
+#define FREE_BATCH		16
+
+struct testb {
+	unsigned int index;
+	struct request_queue *q;
+	struct gendisk *disk;
+
+	struct testb_device *t_dev;
+
+	struct blk_mq_tag_set tag_set;
+
+	char disk_name[DISK_NAME_LEN];
+};
+
+/*
+ * testb_page is a page in memory for testb devices.
+ *
+ * @page:	The page holding the data.
+ * @bitmap:	The bitmap represents which sector in the page has data.
+ *		Each bit represents one block size. For example, sector 8
+ *		will use the 7th bit
+ */
+struct testb_page {
+	struct page *page;
+	unsigned long bitmap;
+};
 
 /*
  * Status flags for testb_device.
@@ -29,6 +64,7 @@  enum testb_device_flags {
  *
  * @item:	The struct used by configfs to represent items in fs.
  * @lock:	Protect data of the device
+ * @testb:	The device that these attributes belong to.
  * @pages:	The storage of the device.
  * @flags:	TEST_DEV_FL_ flags to indicate various status.
  *
@@ -42,6 +78,7 @@  enum testb_device_flags {
 struct testb_device {
 	struct config_item item;
 	spinlock_t lock;
+	struct testb *testb;
 	struct radix_tree_root pages;
 	unsigned long flags;
 
@@ -53,6 +90,10 @@  struct testb_device {
 	uint discard;
 };
 
+static int testb_poweron_device(struct testb_device *dev);
+static void testb_poweroff_device(struct testb_device *dev);
+static void testb_free_device_storage(struct testb_device *t_dev);
+
 static inline struct testb_device *to_testb_device(struct config_item *item)
 {
 	return item ? container_of(item, struct testb_device, item) : NULL;
@@ -140,11 +181,17 @@  static ssize_t testb_device_power_store(struct config_item *item,
 	if (!t_dev->power && newp) {
 		if (test_and_set_bit(TESTB_DEV_FL_UP, &t_dev->flags))
 			return count;
+		ret = testb_poweron_device(t_dev);
+		if (ret) {
+			clear_bit(TESTB_DEV_FL_UP, &t_dev->flags);
+			return -ENOMEM;
+		}
 
 		set_bit(TESTB_DEV_FL_CONFIGURED, &t_dev->flags);
 		t_dev->power = newp;
 	} else if (to_testb_device(item)->power && !newp) {
 		t_dev->power = newp;
+		testb_poweroff_device(t_dev);
 		clear_bit(TESTB_DEV_FL_UP, &t_dev->flags);
 	}
 
@@ -165,7 +212,10 @@  static struct configfs_attribute *testb_device_attrs[] = {
 
 static void testb_device_release(struct config_item *item)
 {
-	kfree(to_testb_device(item));
+	struct testb_device *t_dev = to_testb_device(item);
+
+	testb_free_device_storage(t_dev);
+	kfree(t_dev);
 }
 
 static struct configfs_item_operations testb_device_ops = {
@@ -186,6 +236,8 @@  config_item *testb_group_make_item(struct config_group *group, const char *name)
 	t_dev = kzalloc(sizeof(struct testb_device), GFP_KERNEL);
 	if (!t_dev)
 		return ERR_PTR(-ENOMEM);
+	spin_lock_init(&t_dev->lock);
+	INIT_RADIX_TREE(&t_dev->pages, GFP_ATOMIC);
 
 	config_item_init_type_name(&t_dev->item, name, &testb_device_type);
 
@@ -202,6 +254,12 @@  config_item *testb_group_make_item(struct config_group *group, const char *name)
 static void
 testb_group_drop_item(struct config_group *group, struct config_item *item)
 {
+	struct testb_device *t_dev = to_testb_device(item);
+
+	if (test_and_clear_bit(TESTB_DEV_FL_UP, &t_dev->flags)) {
+		testb_poweroff_device(t_dev);
+		t_dev->power = 0;
+	}
 	config_item_put(item);
 }
 
@@ -237,6 +295,473 @@  static struct configfs_subsystem testb_subsys = {
 	},
 };
 
+static DEFINE_IDA(testb_indices);
+static DEFINE_MUTEX(testb_lock);
+static int testb_major;
+
+static struct testb_page *testb_alloc_page(gfp_t gfp_flags)
+{
+	struct testb_page *t_page;
+
+	t_page = kmalloc(sizeof(struct testb_page), gfp_flags);
+	if (!t_page)
+		goto out;
+
+	t_page->page = alloc_pages(gfp_flags, 0);
+	if (!t_page->page)
+		goto out_freepage;
+
+	t_page->bitmap = 0;
+	return t_page;
+out_freepage:
+	kfree(t_page);
+out:
+	return NULL;
+}
+
+static void testb_free_page(struct testb_page *t_page)
+{
+	WARN_ON(!t_page);
+
+	__free_page(t_page->page);
+	kfree(t_page);
+}
+
+static void testb_free_sector(struct testb *testb, sector_t sector)
+{
+	unsigned int sector_bit;
+	u64 idx;
+	struct testb_page *t_page, *ret;
+	struct radix_tree_root *root;
+
+	assert_spin_locked(&testb->t_dev->lock);
+
+	root = &testb->t_dev->pages;
+	idx = sector >> PAGE_SECTORS_SHIFT;
+	sector_bit = (sector & SECTOR_MASK);
+
+	t_page = radix_tree_lookup(root, idx);
+	if (t_page) {
+		__clear_bit(sector_bit, &t_page->bitmap);
+
+		if (!t_page->bitmap) {
+			ret = radix_tree_delete_item(root, idx, t_page);
+			WARN_ON(ret != t_page);
+			testb_free_page(ret);
+		}
+	}
+}
+
+static struct testb_page *testb_radix_tree_insert(struct testb *testb, u64 idx,
+	struct testb_page *t_page)
+{
+	struct radix_tree_root *root;
+
+	assert_spin_locked(&testb->t_dev->lock);
+
+	root = &testb->t_dev->pages;
+
+	if (radix_tree_insert(root, idx, t_page)) {
+		testb_free_page(t_page);
+		t_page = radix_tree_lookup(root, idx);
+		WARN_ON(!t_page || t_page->page->index != idx);
+	}
+
+	return t_page;
+}
+
+static void testb_free_device_storage(struct testb_device *t_dev)
+{
+	unsigned long pos = 0;
+	int nr_pages;
+	struct testb_page *ret, *t_pages[FREE_BATCH];
+	struct radix_tree_root *root;
+
+	root = &t_dev->pages;
+
+	do {
+		int i;
+
+		nr_pages = radix_tree_gang_lookup(root,
+				(void **)t_pages, pos, FREE_BATCH);
+
+		for (i = 0; i < nr_pages; i++) {
+			pos = t_pages[i]->page->index;
+			ret = radix_tree_delete_item(root, pos, t_pages[i]);
+			WARN_ON(ret != t_pages[i]);
+			testb_free_page(ret);
+		}
+
+		pos++;
+	} while (nr_pages == FREE_BATCH);
+}
+
+static struct testb_page *testb_lookup_page(struct testb *testb,
+	sector_t sector, bool for_write)
+{
+	unsigned int sector_bit;
+	u64 idx;
+	struct testb_page *t_page;
+
+	assert_spin_locked(&testb->t_dev->lock);
+
+	idx = sector >> PAGE_SECTORS_SHIFT;
+	sector_bit = (sector & SECTOR_MASK);
+
+	t_page = radix_tree_lookup(&testb->t_dev->pages, idx);
+	WARN_ON(t_page && t_page->page->index != idx);
+
+	if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap)))
+		return t_page;
+
+	return NULL;
+}
+
+static struct testb_page *testb_insert_page(struct testb *testb,
+	sector_t sector, unsigned long *lock_flag)
+{
+	u64 idx;
+	struct testb_page *t_page;
+
+	assert_spin_locked(&testb->t_dev->lock);
+
+	t_page = testb_lookup_page(testb, sector, true);
+	if (t_page)
+		return t_page;
+
+	spin_unlock_irqrestore(&testb->t_dev->lock, *lock_flag);
+
+	t_page = testb_alloc_page(GFP_NOIO);
+	if (!t_page)
+		goto out_lock;
+
+	if (radix_tree_preload(GFP_NOIO))
+		goto out_freepage;
+
+	spin_lock_irqsave(&testb->t_dev->lock, *lock_flag);
+	idx = sector >> PAGE_SECTORS_SHIFT;
+	t_page->page->index = idx;
+	t_page = testb_radix_tree_insert(testb, idx, t_page);
+	radix_tree_preload_end();
+
+	return t_page;
+out_freepage:
+	testb_free_page(t_page);
+out_lock:
+	spin_lock_irqsave(&testb->t_dev->lock, *lock_flag);
+	return testb_lookup_page(testb, sector, true);
+}
+
+static int copy_to_testb(struct testb *testb, struct page *source,
+	unsigned int off, sector_t sector, size_t n, unsigned long *lock_flag)
+{
+	size_t temp, count = 0;
+	unsigned int offset;
+	struct testb_page *t_page;
+	void *dst, *src;
+
+	while (count < n) {
+		temp = min_t(size_t, testb->t_dev->blocksize, n - count);
+
+		offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+		t_page = testb_insert_page(testb, sector, lock_flag);
+		if (!t_page)
+			return -ENOSPC;
+
+		src = kmap_atomic(source);
+		dst = kmap_atomic(t_page->page);
+		memcpy(dst + offset, src + off + count, temp);
+		kunmap_atomic(dst);
+		kunmap_atomic(src);
+
+		__set_bit(sector & SECTOR_MASK, &t_page->bitmap);
+
+		count += temp;
+		sector += temp >> SECTOR_SHIFT;
+	}
+	return 0;
+}
+
+static int copy_from_testb(struct testb *testb, struct page *dest,
+	unsigned int off, sector_t sector, size_t n, unsigned long *lock_flag)
+{
+	size_t temp, count = 0;
+	unsigned int offset;
+	struct testb_page *t_page;
+	void *dst, *src;
+
+	while (count < n) {
+		temp = min_t(size_t, testb->t_dev->blocksize, n - count);
+
+		offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+		t_page = testb_lookup_page(testb, sector, false);
+
+		dst = kmap_atomic(dest);
+		if (!t_page) {
+			memset(dst + off + count, 0, temp);
+			goto next;
+		}
+		src = kmap_atomic(t_page->page);
+		memcpy(dst + off + count, src + offset, temp);
+		kunmap_atomic(src);
+next:
+		kunmap_atomic(dst);
+
+		count += temp;
+		sector += temp >> SECTOR_SHIFT;
+	}
+	return 0;
+}
+
+static void testb_handle_discard(struct testb *testb, sector_t sector, size_t n)
+{
+	size_t temp;
+	unsigned long lock_flag;
+
+	spin_lock_irqsave(&testb->t_dev->lock, lock_flag);
+	while (n > 0) {
+		temp = min_t(size_t, n, testb->t_dev->blocksize);
+		testb_free_sector(testb, sector);
+		sector += temp >> SECTOR_SHIFT;
+		n -= temp;
+	}
+	spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag);
+}
+
+static int testb_handle_flush(struct testb *testb)
+{
+	return 0;
+}
+
+static int testb_transfer(struct testb *testb, struct page *page,
+	unsigned int len, unsigned int off, bool is_write, sector_t sector,
+	unsigned long *lock_flags)
+{
+	int err = 0;
+
+	if (!is_write) {
+		err = copy_from_testb(testb, page, off, sector, len,
+						lock_flags);
+		flush_dcache_page(page);
+	} else {
+		flush_dcache_page(page);
+		err = copy_to_testb(testb, page, off, sector, len,
+						lock_flags);
+	}
+
+	return err;
+}
+
+static int testb_handle_rq(struct request *rq)
+{
+	struct testb *testb = rq->q->queuedata;
+	int err;
+	unsigned int len;
+	sector_t sector;
+	struct req_iterator iter;
+	struct bio_vec bvec;
+	unsigned long lock_flag;
+
+	sector = blk_rq_pos(rq);
+
+	if (req_op(rq) == REQ_OP_DISCARD) {
+		testb_handle_discard(testb, sector, blk_rq_bytes(rq));
+		return 0;
+	} else if (req_op(rq) == REQ_OP_FLUSH)
+		return testb_handle_flush(testb);
+
+	spin_lock_irqsave(&testb->t_dev->lock, lock_flag);
+	rq_for_each_segment(bvec, rq, iter) {
+		len = bvec.bv_len;
+		err = testb_transfer(testb, bvec.bv_page, len, bvec.bv_offset,
+				     op_is_write(req_op(rq)), sector,
+				     &lock_flag);
+		if (err) {
+			spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag);
+			return err;
+		}
+		sector += len >> SECTOR_SHIFT;
+	}
+	spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag);
+
+	return 0;
+}
+
+static blk_status_t
+testb_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd)
+{
+	int err;
+
+	blk_mq_start_request(bd->rq);
+
+	err = testb_handle_rq(bd->rq);
+	if (err)
+		return errno_to_blk_status(err);
+
+	blk_mq_complete_request(bd->rq);
+	return BLK_STS_OK;
+}
+
+static void testb_softirq_done_fn(struct request *rq)
+{
+	blk_mq_end_request(rq, BLK_STS_OK);
+}
+
+static const struct blk_mq_ops testb_mq_ops = {
+	.queue_rq	= testb_queue_rq,
+	.complete	= testb_softirq_done_fn,
+};
+
+static const struct block_device_operations testb_fops = {
+	.owner		= THIS_MODULE,
+};
+
+static void testb_free_bdev(struct testb *testb)
+{
+	mutex_lock(&testb_lock);
+	ida_simple_remove(&testb_indices, testb->index);
+	mutex_unlock(&testb_lock);
+
+	blk_cleanup_queue(testb->q);
+	blk_mq_free_tag_set(&testb->tag_set);
+
+	kfree(testb);
+}
+
+static void testb_gendisk_unregister(struct testb *testb)
+{
+	del_gendisk(testb->disk);
+
+	put_disk(testb->disk);
+}
+
+static void testb_poweroff_device(struct testb_device *dev)
+{
+	testb_gendisk_unregister(dev->testb);
+	testb_free_bdev(dev->testb);
+}
+
+static void testb_config_discard(struct testb *testb)
+{
+	if (testb->t_dev->discard == 0)
+		return;
+	testb->q->limits.discard_granularity = testb->t_dev->blocksize;
+	testb->q->limits.discard_alignment = testb->t_dev->blocksize;
+	blk_queue_max_discard_sectors(testb->q, UINT_MAX >> 9);
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, testb->q);
+}
+
+static void testb_config_flush(struct testb *testb)
+{
+	blk_queue_write_cache(testb->q, true, true);
+	blk_queue_flush_queueable(testb->q, true);
+}
+
+static int testb_gendisk_register(struct testb *testb)
+{
+	sector_t size;
+	struct gendisk *disk;
+
+	disk = testb->disk = alloc_disk(DISK_MAX_PARTS);
+	if (!disk)
+		return -ENOMEM;
+
+	size = testb->t_dev->size;
+	set_capacity(disk, size >> 9);
+
+	disk->flags		= GENHD_FL_EXT_DEVT;
+	disk->major		= testb_major;
+	disk->first_minor	= testb->index * DISK_MAX_PARTS;
+	disk->fops		= &testb_fops;
+	disk->private_data	= testb;
+	disk->queue		= testb->q;
+	snprintf(disk->disk_name, DISK_NAME_LEN, "%s", testb->disk_name);
+
+	add_disk(testb->disk);
+	return 0;
+}
+
+static int testb_alloc_bdev(struct testb_device *t_dev)
+{
+	int ret;
+	struct testb *testb;
+
+	testb = kzalloc(sizeof(struct testb), GFP_KERNEL);
+	if (!testb) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	t_dev->blocksize = (t_dev->blocksize >> SECTOR_SHIFT) << SECTOR_SHIFT;
+	t_dev->blocksize = clamp_t(uint, t_dev->blocksize, 512, 4096);
+
+	if (t_dev->nr_queues > nr_cpu_ids)
+		t_dev->nr_queues = nr_cpu_ids;
+	else if (!t_dev->nr_queues)
+		t_dev->nr_queues = 1;
+
+	testb->t_dev = t_dev;
+	t_dev->testb = testb;
+
+	testb->tag_set.ops = &testb_mq_ops;
+	testb->tag_set.nr_hw_queues = t_dev->nr_queues;
+	testb->tag_set.queue_depth = t_dev->q_depth;
+	testb->tag_set.numa_node = NUMA_NO_NODE;
+	testb->tag_set.cmd_size = 0;
+	testb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE |
+			      BLK_MQ_F_BLOCKING;
+	testb->tag_set.driver_data = testb;
+
+	ret = blk_mq_alloc_tag_set(&testb->tag_set);
+	if (ret)
+		goto out_cleanup_queues;
+
+	testb->q = blk_mq_init_queue(&testb->tag_set);
+	if (IS_ERR(testb->q)) {
+		ret = -ENOMEM;
+		goto out_cleanup_tags;
+	}
+
+	testb->q->queuedata = testb;
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, testb->q);
+	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, testb->q);
+
+	testb_config_discard(testb);
+	testb_config_flush(testb);
+
+	blk_queue_logical_block_size(testb->q, t_dev->blocksize);
+	blk_queue_physical_block_size(testb->q, t_dev->blocksize);
+
+	snprintf(testb->disk_name, CONFIGFS_ITEM_NAME_LEN, "testb_%s",
+		 t_dev->item.ci_name);
+
+	mutex_lock(&testb_lock);
+	testb->index = ida_simple_get(&testb_indices, 0, 0, GFP_KERNEL);
+	mutex_unlock(&testb_lock);
+
+	return 0;
+out_cleanup_tags:
+	blk_mq_free_tag_set(&testb->tag_set);
+out_cleanup_queues:
+	kfree(testb);
+out:
+	return ret;
+}
+
+static int testb_poweron_device(struct testb_device *dev)
+{
+	int ret;
+
+	ret = testb_alloc_bdev(dev);
+	if (ret)
+		return ret;
+	if (testb_gendisk_register(dev->testb)) {
+		testb_free_bdev(dev->testb);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int __init testb_init(void)
 {
 	int ret = 0;
@@ -245,12 +770,24 @@  static int __init testb_init(void)
 	config_group_init(&subsys->su_group);
 	mutex_init(&subsys->su_mutex);
 
+	testb_major = register_blkdev(0, "testb");
+	if (testb_major < 0)
+		return testb_major;
+
 	ret = configfs_register_subsystem(subsys);
+	if (ret)
+		goto out_unregister;
+
+	return 0;
+out_unregister:
+	unregister_blkdev(testb_major, "testb");
 	return ret;
 }
 
 static void __exit testb_exit(void)
 {
+	unregister_blkdev(testb_major, "testb");
+
 	configfs_unregister_subsystem(&testb_subsys);
 }