@@ -16,6 +16,7 @@
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/blk-mq.h>
#include <linux/highmem.h>
#include <linux/mutex.h>
#include <linux/pagemap.h>
@@ -46,6 +47,7 @@ struct brd_device {
spinlock_t brd_lock;
struct radix_tree_root brd_pages;
u64 brd_nr_pages;
+ struct blk_mq_tag_set tag_set;
};
/*
@@ -282,36 +284,46 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
return err;
}
-static void brd_submit_bio(struct bio *bio)
+static blk_status_t brd_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
- sector_t sector = bio->bi_iter.bi_sector;
+ struct request *rq = bd->rq;
+ struct brd_device *brd = hctx->queue->queuedata;
+ sector_t sector = blk_rq_pos(rq);
struct bio_vec bvec;
- struct bvec_iter iter;
+ struct req_iterator iter;
+ blk_status_t err = BLK_STS_OK;
- bio_for_each_segment(bvec, bio, iter) {
+ blk_mq_start_request(bd->rq);
+ rq_for_each_segment(bvec, rq, iter) {
unsigned int len = bvec.bv_len;
- int err;
+ int ret;
/* Don't support un-aligned buffer */
WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
- (len & (SECTOR_SIZE - 1)));
+ (len & (SECTOR_SIZE - 1)));
- err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio_op(bio), sector);
- if (err) {
- bio_io_error(bio);
- return;
+ ret = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
+ req_op(rq), sector);
+ if (ret) {
+ err = BLK_STS_IOERR;
+ goto end_request;
}
sector += len >> SECTOR_SHIFT;
}
- bio_endio(bio);
+end_request:
+ blk_mq_end_request(bd->rq, err);
+ return BLK_STS_OK;
}
+static const struct blk_mq_ops brd_mq_ops = {
+ .queue_rq = brd_queue_rq,
+};
+
+
static const struct block_device_operations brd_fops = {
.owner = THIS_MODULE,
- .submit_bio = brd_submit_bio,
};
/*
@@ -355,7 +367,7 @@ static int brd_alloc(int i)
struct brd_device *brd;
struct gendisk *disk;
char buf[DISK_NAME_LEN];
- int err = -ENOMEM;
+ int err = 0;
list_for_each_entry(brd, &brd_devices, brd_list)
if (brd->brd_number == i)
@@ -364,6 +376,14 @@ static int brd_alloc(int i)
if (!brd)
return -ENOMEM;
brd->brd_number = i;
+ brd->tag_set.ops = &brd_mq_ops;
+ brd->tag_set.queue_depth = 128;
+ brd->tag_set.numa_node = NUMA_NO_NODE;
+ brd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+ brd->tag_set.cmd_size = 0;
+ brd->tag_set.driver_data = brd;
+ brd->tag_set.nr_hw_queues = 1;
+
list_add_tail(&brd->brd_list, &brd_devices);
spin_lock_init(&brd->brd_lock);
@@ -374,9 +394,17 @@ static int brd_alloc(int i)
debugfs_create_u64(buf, 0444, brd_debugfs_dir,
&brd->brd_nr_pages);
- disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ err = blk_mq_alloc_tag_set(&brd->tag_set);
+ if (err) {
+ err = -ENOMEM;
goto out_free_dev;
+ }
+
+ disk = brd->brd_disk = blk_mq_alloc_disk(&brd->tag_set, brd);
+ if (IS_ERR(disk)) {
+ err = PTR_ERR(disk);
+ goto out_free_tags;
+ }
disk->major = RAMDISK_MAJOR;
disk->first_minor = i * max_part;
@@ -407,6 +435,8 @@ static int brd_alloc(int i)
out_cleanup_disk:
put_disk(disk);
+out_free_tags:
+ blk_mq_free_tag_set(&brd->tag_set);
out_free_dev:
list_del(&brd->brd_list);
kfree(brd);
move to blk-mq based request processing as brd is one of the few drivers that still uses submit_bio interface. The changes are pretty trivial to start using blk-mq. The performance increases up to 125% for direct IO read workloads. There is a slight dip in performance for direct IO write workload but considering the general performance gain with blk-mq support, it is not a lot. SW queues are mapped to one hw queue in the brd device, and rest of IO processing is retained as is. Performance results with none scheduler: --direct=0 ------------------------------------------------------ | | bio(base) | blk-mq | delta | ------------------------------------------------------ | randread | 133 | 223 | +75% | ------------------------------------------------------ | read | 150 | 313 | +108% | ----------------------------------------------------- | randwrite | 111 | 109 | -1.8% | ----------------------------------------------------- | write | 118 | 117 | -0.8%| ----------------------------------------------------- --direct=1 ------------------------------------------------------ | | bio(base) | blk-mq | delta | ------------------------------------------------------ | randread | 182 | 414 | +127% | ------------------------------------------------------ | read | 190 | 429 | +125% | ----------------------------------------------------- | randwrite | 378 | 387 | +2.38%| ----------------------------------------------------- | write | 443 | 420 | -5.1% | ----------------------------------------------------- Signed-off-by: Pankaj Raghav <p.raghav@samsung.com> --- drivers/block/brd.c | 64 +++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 17 deletions(-)