From patchwork Sat Aug 5 15:51:46 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shaohua Li X-Patchwork-Id: 9883125 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 1B963603B4 for ; Sat, 5 Aug 2017 15:52:02 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 19BEF205FB for ; Sat, 5 Aug 2017 15:52:02 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 0E62C28854; Sat, 5 Aug 2017 15:52:02 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-6.9 required=2.0 tests=BAYES_00,RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id F3A2A205FB for ; Sat, 5 Aug 2017 15:52:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752183AbdHEPv4 (ORCPT ); Sat, 5 Aug 2017 11:51:56 -0400 Received: from mail.kernel.org ([198.145.29.99]:34488 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752104AbdHEPvy (ORCPT ); Sat, 5 Aug 2017 11:51:54 -0400 Received: from shli-virt.localdomain (unknown [199.201.64.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 5C07C22DA8; Sat, 5 Aug 2017 15:51:53 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 5C07C22DA8 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=fail smtp.mailfrom=shli@fb.com From: Shaohua Li To: linux-block@vger.kernel.org, linux-raid@vger.kernel.org Cc: kernel-team@fb.com, Kyungchan Koh , Kyungchan Koh Subject: [PATCH 2/5] testb: implement block device operations Date: Sat, 5 Aug 2017 08:51:46 -0700 Message-Id: <971572972425927f702b33da37be14a9ab646a94.1501945859.git.shli@fb.com> X-Mailer: git-send-email 2.11.0 In-Reply-To: References: In-Reply-To: References: Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Kyungchan Koh This create/remove disk when user writes 1/0 to 'power' attribute. Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li --- drivers/block/test_blk.c | 539 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 538 insertions(+), 1 deletion(-) diff --git a/drivers/block/test_blk.c b/drivers/block/test_blk.c index 93e8ec2..1b06ce7 100644 --- a/drivers/block/test_blk.c +++ b/drivers/block/test_blk.c @@ -9,9 +9,44 @@ #include #include +#include #include #include #include +#include + +#define SECTOR_SHIFT 9 +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#define SECTOR_MASK (PAGE_SECTORS - 1) + +#define FREE_BATCH 16 + +struct testb { + unsigned int index; + struct request_queue *q; + struct gendisk *disk; + + struct testb_device *t_dev; + + struct blk_mq_tag_set tag_set; + + char disk_name[DISK_NAME_LEN]; +}; + +/* + * testb_page is a page in memory for testb devices. + * + * @page: The page holding the data. + * @bitmap: The bitmap represents which sector in the page has data. + * Each bit represents one block size. For example, sector 8 + * will use the 7th bit + */ +struct testb_page { + struct page *page; + unsigned long bitmap; +}; /* * Status flags for testb_device. @@ -29,6 +64,7 @@ enum testb_device_flags { * * @item: The struct used by configfs to represent items in fs. * @lock: Protect data of the device + * @testb: The device that these attributes belong to. * @pages: The storage of the device. * @flags: TEST_DEV_FL_ flags to indicate various status. * @@ -42,6 +78,7 @@ enum testb_device_flags { struct testb_device { struct config_item item; spinlock_t lock; + struct testb *testb; struct radix_tree_root pages; unsigned long flags; @@ -53,6 +90,10 @@ struct testb_device { uint discard; }; +static int testb_poweron_device(struct testb_device *dev); +static void testb_poweroff_device(struct testb_device *dev); +static void testb_free_device_storage(struct testb_device *t_dev); + static inline struct testb_device *to_testb_device(struct config_item *item) { return item ? container_of(item, struct testb_device, item) : NULL; @@ -140,11 +181,17 @@ static ssize_t testb_device_power_store(struct config_item *item, if (!t_dev->power && newp) { if (test_and_set_bit(TESTB_DEV_FL_UP, &t_dev->flags)) return count; + ret = testb_poweron_device(t_dev); + if (ret) { + clear_bit(TESTB_DEV_FL_UP, &t_dev->flags); + return -ENOMEM; + } set_bit(TESTB_DEV_FL_CONFIGURED, &t_dev->flags); t_dev->power = newp; } else if (to_testb_device(item)->power && !newp) { t_dev->power = newp; + testb_poweroff_device(t_dev); clear_bit(TESTB_DEV_FL_UP, &t_dev->flags); } @@ -165,7 +212,10 @@ static struct configfs_attribute *testb_device_attrs[] = { static void testb_device_release(struct config_item *item) { - kfree(to_testb_device(item)); + struct testb_device *t_dev = to_testb_device(item); + + testb_free_device_storage(t_dev); + kfree(t_dev); } static struct configfs_item_operations testb_device_ops = { @@ -186,6 +236,8 @@ config_item *testb_group_make_item(struct config_group *group, const char *name) t_dev = kzalloc(sizeof(struct testb_device), GFP_KERNEL); if (!t_dev) return ERR_PTR(-ENOMEM); + spin_lock_init(&t_dev->lock); + INIT_RADIX_TREE(&t_dev->pages, GFP_ATOMIC); config_item_init_type_name(&t_dev->item, name, &testb_device_type); @@ -202,6 +254,12 @@ config_item *testb_group_make_item(struct config_group *group, const char *name) static void testb_group_drop_item(struct config_group *group, struct config_item *item) { + struct testb_device *t_dev = to_testb_device(item); + + if (test_and_clear_bit(TESTB_DEV_FL_UP, &t_dev->flags)) { + testb_poweroff_device(t_dev); + t_dev->power = 0; + } config_item_put(item); } @@ -237,6 +295,473 @@ static struct configfs_subsystem testb_subsys = { }, }; +static DEFINE_IDA(testb_indices); +static DEFINE_MUTEX(testb_lock); +static int testb_major; + +static struct testb_page *testb_alloc_page(gfp_t gfp_flags) +{ + struct testb_page *t_page; + + t_page = kmalloc(sizeof(struct testb_page), gfp_flags); + if (!t_page) + goto out; + + t_page->page = alloc_pages(gfp_flags, 0); + if (!t_page->page) + goto out_freepage; + + t_page->bitmap = 0; + return t_page; +out_freepage: + kfree(t_page); +out: + return NULL; +} + +static void testb_free_page(struct testb_page *t_page) +{ + WARN_ON(!t_page); + + __free_page(t_page->page); + kfree(t_page); +} + +static void testb_free_sector(struct testb *testb, sector_t sector) +{ + unsigned int sector_bit; + u64 idx; + struct testb_page *t_page, *ret; + struct radix_tree_root *root; + + assert_spin_locked(&testb->t_dev->lock); + + root = &testb->t_dev->pages; + idx = sector >> PAGE_SECTORS_SHIFT; + sector_bit = (sector & SECTOR_MASK); + + t_page = radix_tree_lookup(root, idx); + if (t_page) { + __clear_bit(sector_bit, &t_page->bitmap); + + if (!t_page->bitmap) { + ret = radix_tree_delete_item(root, idx, t_page); + WARN_ON(ret != t_page); + testb_free_page(ret); + } + } +} + +static struct testb_page *testb_radix_tree_insert(struct testb *testb, u64 idx, + struct testb_page *t_page) +{ + struct radix_tree_root *root; + + assert_spin_locked(&testb->t_dev->lock); + + root = &testb->t_dev->pages; + + if (radix_tree_insert(root, idx, t_page)) { + testb_free_page(t_page); + t_page = radix_tree_lookup(root, idx); + WARN_ON(!t_page || t_page->page->index != idx); + } + + return t_page; +} + +static void testb_free_device_storage(struct testb_device *t_dev) +{ + unsigned long pos = 0; + int nr_pages; + struct testb_page *ret, *t_pages[FREE_BATCH]; + struct radix_tree_root *root; + + root = &t_dev->pages; + + do { + int i; + + nr_pages = radix_tree_gang_lookup(root, + (void **)t_pages, pos, FREE_BATCH); + + for (i = 0; i < nr_pages; i++) { + pos = t_pages[i]->page->index; + ret = radix_tree_delete_item(root, pos, t_pages[i]); + WARN_ON(ret != t_pages[i]); + testb_free_page(ret); + } + + pos++; + } while (nr_pages == FREE_BATCH); +} + +static struct testb_page *testb_lookup_page(struct testb *testb, + sector_t sector, bool for_write) +{ + unsigned int sector_bit; + u64 idx; + struct testb_page *t_page; + + assert_spin_locked(&testb->t_dev->lock); + + idx = sector >> PAGE_SECTORS_SHIFT; + sector_bit = (sector & SECTOR_MASK); + + t_page = radix_tree_lookup(&testb->t_dev->pages, idx); + WARN_ON(t_page && t_page->page->index != idx); + + if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap))) + return t_page; + + return NULL; +} + +static struct testb_page *testb_insert_page(struct testb *testb, + sector_t sector, unsigned long *lock_flag) +{ + u64 idx; + struct testb_page *t_page; + + assert_spin_locked(&testb->t_dev->lock); + + t_page = testb_lookup_page(testb, sector, true); + if (t_page) + return t_page; + + spin_unlock_irqrestore(&testb->t_dev->lock, *lock_flag); + + t_page = testb_alloc_page(GFP_NOIO); + if (!t_page) + goto out_lock; + + if (radix_tree_preload(GFP_NOIO)) + goto out_freepage; + + spin_lock_irqsave(&testb->t_dev->lock, *lock_flag); + idx = sector >> PAGE_SECTORS_SHIFT; + t_page->page->index = idx; + t_page = testb_radix_tree_insert(testb, idx, t_page); + radix_tree_preload_end(); + + return t_page; +out_freepage: + testb_free_page(t_page); +out_lock: + spin_lock_irqsave(&testb->t_dev->lock, *lock_flag); + return testb_lookup_page(testb, sector, true); +} + +static int copy_to_testb(struct testb *testb, struct page *source, + unsigned int off, sector_t sector, size_t n, unsigned long *lock_flag) +{ + size_t temp, count = 0; + unsigned int offset; + struct testb_page *t_page; + void *dst, *src; + + while (count < n) { + temp = min_t(size_t, testb->t_dev->blocksize, n - count); + + offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; + t_page = testb_insert_page(testb, sector, lock_flag); + if (!t_page) + return -ENOSPC; + + src = kmap_atomic(source); + dst = kmap_atomic(t_page->page); + memcpy(dst + offset, src + off + count, temp); + kunmap_atomic(dst); + kunmap_atomic(src); + + __set_bit(sector & SECTOR_MASK, &t_page->bitmap); + + count += temp; + sector += temp >> SECTOR_SHIFT; + } + return 0; +} + +static int copy_from_testb(struct testb *testb, struct page *dest, + unsigned int off, sector_t sector, size_t n, unsigned long *lock_flag) +{ + size_t temp, count = 0; + unsigned int offset; + struct testb_page *t_page; + void *dst, *src; + + while (count < n) { + temp = min_t(size_t, testb->t_dev->blocksize, n - count); + + offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; + t_page = testb_lookup_page(testb, sector, false); + + dst = kmap_atomic(dest); + if (!t_page) { + memset(dst + off + count, 0, temp); + goto next; + } + src = kmap_atomic(t_page->page); + memcpy(dst + off + count, src + offset, temp); + kunmap_atomic(src); +next: + kunmap_atomic(dst); + + count += temp; + sector += temp >> SECTOR_SHIFT; + } + return 0; +} + +static void testb_handle_discard(struct testb *testb, sector_t sector, size_t n) +{ + size_t temp; + unsigned long lock_flag; + + spin_lock_irqsave(&testb->t_dev->lock, lock_flag); + while (n > 0) { + temp = min_t(size_t, n, testb->t_dev->blocksize); + testb_free_sector(testb, sector); + sector += temp >> SECTOR_SHIFT; + n -= temp; + } + spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag); +} + +static int testb_handle_flush(struct testb *testb) +{ + return 0; +} + +static int testb_transfer(struct testb *testb, struct page *page, + unsigned int len, unsigned int off, bool is_write, sector_t sector, + unsigned long *lock_flags) +{ + int err = 0; + + if (!is_write) { + err = copy_from_testb(testb, page, off, sector, len, + lock_flags); + flush_dcache_page(page); + } else { + flush_dcache_page(page); + err = copy_to_testb(testb, page, off, sector, len, + lock_flags); + } + + return err; +} + +static int testb_handle_rq(struct request *rq) +{ + struct testb *testb = rq->q->queuedata; + int err; + unsigned int len; + sector_t sector; + struct req_iterator iter; + struct bio_vec bvec; + unsigned long lock_flag; + + sector = blk_rq_pos(rq); + + if (req_op(rq) == REQ_OP_DISCARD) { + testb_handle_discard(testb, sector, blk_rq_bytes(rq)); + return 0; + } else if (req_op(rq) == REQ_OP_FLUSH) + return testb_handle_flush(testb); + + spin_lock_irqsave(&testb->t_dev->lock, lock_flag); + rq_for_each_segment(bvec, rq, iter) { + len = bvec.bv_len; + err = testb_transfer(testb, bvec.bv_page, len, bvec.bv_offset, + op_is_write(req_op(rq)), sector, + &lock_flag); + if (err) { + spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag); + return err; + } + sector += len >> SECTOR_SHIFT; + } + spin_unlock_irqrestore(&testb->t_dev->lock, lock_flag); + + return 0; +} + +static blk_status_t +testb_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) +{ + int err; + + blk_mq_start_request(bd->rq); + + err = testb_handle_rq(bd->rq); + if (err) + return errno_to_blk_status(err); + + blk_mq_complete_request(bd->rq); + return BLK_STS_OK; +} + +static void testb_softirq_done_fn(struct request *rq) +{ + blk_mq_end_request(rq, BLK_STS_OK); +} + +static const struct blk_mq_ops testb_mq_ops = { + .queue_rq = testb_queue_rq, + .complete = testb_softirq_done_fn, +}; + +static const struct block_device_operations testb_fops = { + .owner = THIS_MODULE, +}; + +static void testb_free_bdev(struct testb *testb) +{ + mutex_lock(&testb_lock); + ida_simple_remove(&testb_indices, testb->index); + mutex_unlock(&testb_lock); + + blk_cleanup_queue(testb->q); + blk_mq_free_tag_set(&testb->tag_set); + + kfree(testb); +} + +static void testb_gendisk_unregister(struct testb *testb) +{ + del_gendisk(testb->disk); + + put_disk(testb->disk); +} + +static void testb_poweroff_device(struct testb_device *dev) +{ + testb_gendisk_unregister(dev->testb); + testb_free_bdev(dev->testb); +} + +static void testb_config_discard(struct testb *testb) +{ + if (testb->t_dev->discard == 0) + return; + testb->q->limits.discard_granularity = testb->t_dev->blocksize; + testb->q->limits.discard_alignment = testb->t_dev->blocksize; + blk_queue_max_discard_sectors(testb->q, UINT_MAX >> 9); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, testb->q); +} + +static void testb_config_flush(struct testb *testb) +{ + blk_queue_write_cache(testb->q, true, true); + blk_queue_flush_queueable(testb->q, true); +} + +static int testb_gendisk_register(struct testb *testb) +{ + sector_t size; + struct gendisk *disk; + + disk = testb->disk = alloc_disk(DISK_MAX_PARTS); + if (!disk) + return -ENOMEM; + + size = testb->t_dev->size; + set_capacity(disk, size >> 9); + + disk->flags = GENHD_FL_EXT_DEVT; + disk->major = testb_major; + disk->first_minor = testb->index * DISK_MAX_PARTS; + disk->fops = &testb_fops; + disk->private_data = testb; + disk->queue = testb->q; + snprintf(disk->disk_name, DISK_NAME_LEN, "%s", testb->disk_name); + + add_disk(testb->disk); + return 0; +} + +static int testb_alloc_bdev(struct testb_device *t_dev) +{ + int ret; + struct testb *testb; + + testb = kzalloc(sizeof(struct testb), GFP_KERNEL); + if (!testb) { + ret = -ENOMEM; + goto out; + } + + t_dev->blocksize = (t_dev->blocksize >> SECTOR_SHIFT) << SECTOR_SHIFT; + t_dev->blocksize = clamp_t(uint, t_dev->blocksize, 512, 4096); + + if (t_dev->nr_queues > nr_cpu_ids) + t_dev->nr_queues = nr_cpu_ids; + else if (!t_dev->nr_queues) + t_dev->nr_queues = 1; + + testb->t_dev = t_dev; + t_dev->testb = testb; + + testb->tag_set.ops = &testb_mq_ops; + testb->tag_set.nr_hw_queues = t_dev->nr_queues; + testb->tag_set.queue_depth = t_dev->q_depth; + testb->tag_set.numa_node = NUMA_NO_NODE; + testb->tag_set.cmd_size = 0; + testb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | + BLK_MQ_F_BLOCKING; + testb->tag_set.driver_data = testb; + + ret = blk_mq_alloc_tag_set(&testb->tag_set); + if (ret) + goto out_cleanup_queues; + + testb->q = blk_mq_init_queue(&testb->tag_set); + if (IS_ERR(testb->q)) { + ret = -ENOMEM; + goto out_cleanup_tags; + } + + testb->q->queuedata = testb; + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, testb->q); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, testb->q); + + testb_config_discard(testb); + testb_config_flush(testb); + + blk_queue_logical_block_size(testb->q, t_dev->blocksize); + blk_queue_physical_block_size(testb->q, t_dev->blocksize); + + snprintf(testb->disk_name, CONFIGFS_ITEM_NAME_LEN, "testb_%s", + t_dev->item.ci_name); + + mutex_lock(&testb_lock); + testb->index = ida_simple_get(&testb_indices, 0, 0, GFP_KERNEL); + mutex_unlock(&testb_lock); + + return 0; +out_cleanup_tags: + blk_mq_free_tag_set(&testb->tag_set); +out_cleanup_queues: + kfree(testb); +out: + return ret; +} + +static int testb_poweron_device(struct testb_device *dev) +{ + int ret; + + ret = testb_alloc_bdev(dev); + if (ret) + return ret; + if (testb_gendisk_register(dev->testb)) { + testb_free_bdev(dev->testb); + return -EINVAL; + } + return 0; +} + static int __init testb_init(void) { int ret = 0; @@ -245,12 +770,24 @@ static int __init testb_init(void) config_group_init(&subsys->su_group); mutex_init(&subsys->su_mutex); + testb_major = register_blkdev(0, "testb"); + if (testb_major < 0) + return testb_major; + ret = configfs_register_subsystem(subsys); + if (ret) + goto out_unregister; + + return 0; +out_unregister: + unregister_blkdev(testb_major, "testb"); return ret; } static void __exit testb_exit(void) { + unregister_blkdev(testb_major, "testb"); + configfs_unregister_subsystem(&testb_subsys); }