Message ID | 20240422224240.2637-1-dongsheng.yang@easystack.cn |
---|---|
State | New, archived |
Headers | show |
Series | block: Introduce CBD (CXL Block Device) | expand |
在 2024/4/23 星期二 上午 6:42, Dongsheng Yang 写道: > From: Dongsheng Yang <dongsheng.yang.linux@gmail.com> > > The "cbd_blkdev" represents a virtual block device named "/dev/cbdX". It > corresponds to a backend. The "blkdev" interacts with upper-layer users > and accepts IO requests from them. A "blkdev" includes multiple > "cbd_queues", each of which requires a "cbd_channel" to > interact with the backend's handler. The "cbd_queue" forwards IO > requests from the upper layer to the backend's handler through the > channel. > > Signed-off-by: Dongsheng Yang <dongsheng.yang.linux@gmail.com> > --- > drivers/block/cbd/Makefile | 2 +- > drivers/block/cbd/cbd_blkdev.c | 375 ++++++++++++++++++ > drivers/block/cbd/cbd_main.c | 6 + > drivers/block/cbd/cbd_queue.c | 621 ++++++++++++++++++++++++++++++ > drivers/block/cbd/cbd_transport.c | 11 + > 5 files changed, 1014 insertions(+), 1 deletion(-) > create mode 100644 drivers/block/cbd/cbd_blkdev.c > create mode 100644 drivers/block/cbd/cbd_queue.c > > diff --git a/drivers/block/cbd/Makefile b/drivers/block/cbd/Makefile > index b47f1e584946..f5fb5fd68f3d 100644 > --- a/drivers/block/cbd/Makefile > +++ b/drivers/block/cbd/Makefile > @@ -1,3 +1,3 @@ > -cbd-y := cbd_main.o cbd_transport.o cbd_channel.o cbd_host.o cbd_backend.o cbd_handler.o > +cbd-y := cbd_main.o cbd_transport.o cbd_channel.o cbd_host.o cbd_backend.o cbd_handler.o cbd_blkdev.o cbd_queue.o > > obj-$(CONFIG_BLK_DEV_CBD) += cbd.o > diff --git a/drivers/block/cbd/cbd_blkdev.c b/drivers/block/cbd/cbd_blkdev.c > new file mode 100644 > index 000000000000..816bc28afb49 > --- /dev/null > +++ b/drivers/block/cbd/cbd_blkdev.c > @@ -0,0 +1,375 @@ > +#include "cbd_internal.h" > + > +static ssize_t blkdev_backend_id_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct cbd_blkdev_device *blkdev; > + struct cbd_blkdev_info *blkdev_info; > + > + blkdev = container_of(dev, struct cbd_blkdev_device, dev); > + blkdev_info = blkdev->blkdev_info; > + > + cbdt_flush_range(blkdev->cbdt, blkdev_info, sizeof(*blkdev_info)); > + > + if (blkdev_info->state == cbd_blkdev_state_none) > + return 0; > + > + return sprintf(buf, "%u\n", blkdev_info->backend_id); > +} > + > +static DEVICE_ATTR(backend_id, 0400, blkdev_backend_id_show, NULL); > + > +static ssize_t blkdev_host_id_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct cbd_blkdev_device *blkdev; > + struct cbd_blkdev_info *blkdev_info; > + > + blkdev = container_of(dev, struct cbd_blkdev_device, dev); > + blkdev_info = blkdev->blkdev_info; > + > + cbdt_flush_range(blkdev->cbdt, blkdev_info, sizeof(*blkdev_info)); > + > + if (blkdev_info->state == cbd_blkdev_state_none) > + return 0; > + > + return sprintf(buf, "%u\n", blkdev_info->host_id); > +} > + > +static DEVICE_ATTR(host_id, 0400, blkdev_host_id_show, NULL); > + > +static ssize_t blkdev_mapped_id_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct cbd_blkdev_device *blkdev; > + struct cbd_blkdev_info *blkdev_info; > + > + blkdev = container_of(dev, struct cbd_blkdev_device, dev); > + blkdev_info = blkdev->blkdev_info; > + > + cbdt_flush_range(blkdev->cbdt, blkdev_info, sizeof(*blkdev_info)); > + > + if (blkdev_info->state == cbd_blkdev_state_none) > + return 0; > + > + return sprintf(buf, "%u\n", blkdev_info->mapped_id); > +} > + > +static DEVICE_ATTR(mapped_id, 0400, blkdev_mapped_id_show, NULL); > + > +CBD_OBJ_HEARTBEAT(blkdev); > + > +static struct attribute *cbd_blkdev_attrs[] = { > + &dev_attr_mapped_id.attr, > + &dev_attr_host_id.attr, > + &dev_attr_backend_id.attr, > + &dev_attr_alive.attr, > + NULL > +}; > + > +static struct attribute_group cbd_blkdev_attr_group = { > + .attrs = cbd_blkdev_attrs, > +}; > + > +static const struct attribute_group *cbd_blkdev_attr_groups[] = { > + &cbd_blkdev_attr_group, > + NULL > +}; > + > +static void cbd_blkdev_release(struct device *dev) > +{ > +} > + > +struct device_type cbd_blkdev_type = { > + .name = "cbd_blkdev", > + .groups = cbd_blkdev_attr_groups, > + .release = cbd_blkdev_release, > +}; > + > +struct device_type cbd_blkdevs_type = { > + .name = "cbd_blkdevs", > + .release = cbd_blkdev_release, > +}; > + > + > +static int cbd_major; > +static DEFINE_IDA(cbd_mapped_id_ida); > + > +static int minor_to_cbd_mapped_id(int minor) > +{ > + return minor >> CBD_PART_SHIFT; > +} > + > + > +static int cbd_open(struct gendisk *disk, blk_mode_t mode) > +{ > + return 0; > +} > + > +static void cbd_release(struct gendisk *disk) > +{ > +} > + > +static const struct block_device_operations cbd_bd_ops = { > + .owner = THIS_MODULE, > + .open = cbd_open, > + .release = cbd_release, > +}; > + > + > +static void cbd_blkdev_destroy_queues(struct cbd_blkdev *cbd_blkdev) > +{ > + int i; > + > + for (i = 0; i < cbd_blkdev->num_queues; i++) { > + cbd_queue_stop(&cbd_blkdev->queues[i]); > + } > + > + kfree(cbd_blkdev->queues); > +} > + > +static int cbd_blkdev_create_queues(struct cbd_blkdev *cbd_blkdev) > +{ > + int i; > + int ret; > + struct cbd_queue *cbdq; > + > + cbd_blkdev->queues = kcalloc(cbd_blkdev->num_queues, sizeof(struct cbd_queue), GFP_KERNEL); > + if (!cbd_blkdev->queues) { > + return -ENOMEM; > + } > + > + for (i = 0; i < cbd_blkdev->num_queues; i++) { > + cbdq = &cbd_blkdev->queues[i]; > + cbdq->cbd_blkdev = cbd_blkdev; > + cbdq->index = i; > + ret = cbd_queue_start(cbdq); > + if (ret) > + goto err; > + > + } > + > + return 0; > +err: > + cbd_blkdev_destroy_queues(cbd_blkdev); > + return ret; > +} > + > +static int disk_start(struct cbd_blkdev *cbd_blkdev) > +{ > + int ret; > + struct gendisk *disk; > + > + memset(&cbd_blkdev->tag_set, 0, sizeof(cbd_blkdev->tag_set)); > + cbd_blkdev->tag_set.ops = &cbd_mq_ops; > + cbd_blkdev->tag_set.queue_depth = 128; > + cbd_blkdev->tag_set.numa_node = NUMA_NO_NODE; > + cbd_blkdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED; > + cbd_blkdev->tag_set.nr_hw_queues = cbd_blkdev->num_queues; > + cbd_blkdev->tag_set.cmd_size = sizeof(struct cbd_request); > + cbd_blkdev->tag_set.timeout = 0; > + cbd_blkdev->tag_set.driver_data = cbd_blkdev; > + > + ret = blk_mq_alloc_tag_set(&cbd_blkdev->tag_set); > + if (ret) { > + pr_err("failed to alloc tag set %d", ret); > + goto err; > + } > + > + disk = blk_mq_alloc_disk(&cbd_blkdev->tag_set, cbd_blkdev); > + if (IS_ERR(disk)) { > + ret = PTR_ERR(disk); > + pr_err("failed to alloc disk"); > + goto out_tag_set; > + } > + > + snprintf(disk->disk_name, sizeof(disk->disk_name), "cbd%d", > + cbd_blkdev->mapped_id); > + > + disk->major = cbd_major; > + disk->first_minor = cbd_blkdev->mapped_id << CBD_PART_SHIFT; > + disk->minors = (1 << CBD_PART_SHIFT); > + > + disk->fops = &cbd_bd_ops; > + disk->private_data = cbd_blkdev; > + > + /* Tell the block layer that this is not a rotational device */ > + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); > + blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); > + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); > + > + blk_queue_physical_block_size(disk->queue, PAGE_SIZE); > + blk_queue_max_hw_sectors(disk->queue, 128); > + blk_queue_max_segments(disk->queue, USHRT_MAX); > + blk_queue_max_segment_size(disk->queue, UINT_MAX); > + blk_queue_io_min(disk->queue, 4096); > + blk_queue_io_opt(disk->queue, 4096); > + > + disk->queue->limits.max_sectors = queue_max_hw_sectors(disk->queue); > + /* TODO support discard */ > + disk->queue->limits.discard_granularity = 0; > + blk_queue_max_discard_sectors(disk->queue, 0); > + blk_queue_max_write_zeroes_sectors(disk->queue, 0); > + > + cbd_blkdev->disk = disk; > + > + cbdt_add_blkdev(cbd_blkdev->cbdt, cbd_blkdev); > + cbd_blkdev->blkdev_info->mapped_id = cbd_blkdev->blkdev_id; > + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_running; > + > + set_capacity(cbd_blkdev->disk, cbd_blkdev->dev_size); > + > + set_disk_ro(cbd_blkdev->disk, false); > + blk_queue_write_cache(cbd_blkdev->disk->queue, false, false); > + > + ret = add_disk(cbd_blkdev->disk); > + if (ret) { > + goto put_disk; > + } > + > + ret = sysfs_create_link(&disk_to_dev(cbd_blkdev->disk)->kobj, > + &cbd_blkdev->blkdev_dev->dev.kobj, "cbd_blkdev"); > + if (ret) { > + goto del_disk; > + } > + > + blk_put_queue(cbd_blkdev->disk->queue); > + > + return 0; > + > +del_disk: > + del_gendisk(cbd_blkdev->disk); > +put_disk: > + put_disk(cbd_blkdev->disk); > +out_tag_set: > + blk_mq_free_tag_set(&cbd_blkdev->tag_set); > +err: > + return ret; > +} > + > +int cbd_blkdev_start(struct cbd_transport *cbdt, u32 backend_id, u32 queues) > +{ > + struct cbd_blkdev *cbd_blkdev; > + struct cbd_backend_info *backend_info; > + u64 dev_size; > + int ret; > + > + backend_info = cbdt_get_backend_info(cbdt, backend_id); > + cbdt_flush_range(cbdt, backend_info, sizeof(*backend_info)); > + if (backend_info->blkdev_count == CBDB_BLKDEV_COUNT_MAX) { > + return -EBUSY; > + } > + > + dev_size = backend_info->dev_size; > + > + cbd_blkdev = kzalloc(sizeof(struct cbd_blkdev), GFP_KERNEL); > + if (!cbd_blkdev) { > + pr_err("fail to alloc cbd_blkdev"); > + return -ENOMEM; > + } > + > + ret = cbdt_get_empty_blkdev_id(cbdt, &cbd_blkdev->blkdev_id); > + if (ret < 0) { > + goto blkdev_free; > + } > + > + cbd_blkdev->mapped_id = ida_simple_get(&cbd_mapped_id_ida, 0, > + minor_to_cbd_mapped_id(1 << MINORBITS), > + GFP_KERNEL); > + if (cbd_blkdev->mapped_id < 0) { > + ret = -ENOENT; > + goto blkdev_free; > + } > + > + INIT_LIST_HEAD(&cbd_blkdev->node); > + cbd_blkdev->cbdt = cbdt; > + cbd_blkdev->backend_id = backend_id; > + cbd_blkdev->num_queues = queues; > + cbd_blkdev->dev_size = dev_size; > + cbd_blkdev->blkdev_info = cbdt_get_blkdev_info(cbdt, cbd_blkdev->blkdev_id); > + cbd_blkdev->blkdev_dev = &cbdt->cbd_blkdevs_dev->blkdev_devs[cbd_blkdev->blkdev_id]; > + > + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_running; > + cbdt_flush_range(cbdt, cbd_blkdev->blkdev_info, sizeof(*cbd_blkdev->blkdev_info)); > + > + INIT_DELAYED_WORK(&cbd_blkdev->hb_work, blkdev_hb_workfn); > + queue_delayed_work(cbd_wq, &cbd_blkdev->hb_work, 0); > + > + ret = cbd_blkdev_create_queues(cbd_blkdev); > + if (ret < 0) { > + goto cancel_hb;; > + } > + > + ret = disk_start(cbd_blkdev); > + if (ret < 0) { > + goto destroy_queues; > + } > + > + backend_info->blkdev_count++; > + cbdt_flush_range(cbdt, backend_info, sizeof(*backend_info)); > + > + return 0; > + > +destroy_queues: > + cbd_blkdev_destroy_queues(cbd_blkdev); > +cancel_hb: > + cancel_delayed_work_sync(&cbd_blkdev->hb_work); > + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_none; > + cbdt_flush_range(cbdt, cbd_blkdev->blkdev_info, sizeof(*cbd_blkdev->blkdev_info)); > + ida_simple_remove(&cbd_mapped_id_ida, cbd_blkdev->mapped_id); > +blkdev_free: > + kfree(cbd_blkdev); > + return ret; > +} > + > +static void disk_stop(struct cbd_blkdev *cbd_blkdev) > +{ > + sysfs_remove_link(&disk_to_dev(cbd_blkdev->disk)->kobj, "cache"); > + del_gendisk(cbd_blkdev->disk); > + put_disk(cbd_blkdev->disk); > + blk_mq_free_tag_set(&cbd_blkdev->tag_set); > +} > + > +int cbd_blkdev_stop(struct cbd_transport *cbdt, u32 devid) > +{ > + struct cbd_blkdev *cbd_blkdev; > + struct cbd_backend_info *backend_info; > + > + cbd_blkdev = cbdt_fetch_blkdev(cbdt, devid); > + if (!cbd_blkdev) { > + return -EINVAL; > + } > + > + backend_info = cbdt_get_backend_info(cbdt, cbd_blkdev->backend_id); > + > + disk_stop(cbd_blkdev); > + cbd_blkdev_destroy_queues(cbd_blkdev); > + cancel_delayed_work_sync(&cbd_blkdev->hb_work); > + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_none; > + cbdt_flush_range(cbdt, cbd_blkdev->blkdev_info, sizeof(*cbd_blkdev->blkdev_info)); > + ida_simple_remove(&cbd_mapped_id_ida, cbd_blkdev->mapped_id); > + > + kfree(cbd_blkdev); > + > + backend_info->blkdev_count--; > + cbdt_flush_range(cbdt, backend_info, sizeof(*backend_info)); > + > + return 0; > +} > + > +int cbd_blkdev_init(void) > +{ > + cbd_major = register_blkdev(0, "cbd"); > + if (cbd_major < 0) > + return cbd_major; > + > + return 0; > +} > + > +void cbd_blkdev_exit(void) > +{ > + unregister_blkdev(cbd_major, "cbd"); > +} > diff --git a/drivers/block/cbd/cbd_main.c b/drivers/block/cbd/cbd_main.c > index 8cfa60dde7c5..658233807b59 100644 > --- a/drivers/block/cbd/cbd_main.c > +++ b/drivers/block/cbd/cbd_main.c > @@ -195,6 +195,11 @@ static int __init cbd_init(void) > goto device_unregister; > } > > + ret = cbd_blkdev_init(); > + if (ret < 0) { > + goto bus_unregister; > + } > + > return 0; > > bus_unregister: > @@ -209,6 +214,7 @@ static int __init cbd_init(void) > > static void cbd_exit(void) > { > + cbd_blkdev_exit(); > bus_unregister(&cbd_bus_type); > device_unregister(&cbd_root_dev); > > diff --git a/drivers/block/cbd/cbd_queue.c b/drivers/block/cbd/cbd_queue.c > new file mode 100644 > index 000000000000..6709ac016e18 > --- /dev/null > +++ b/drivers/block/cbd/cbd_queue.c > @@ -0,0 +1,621 @@ > +#include "cbd_internal.h" > + > +/* > + * How do blkdev and backend interact through the channel? > + * a) For reader side, before reading the data, if the data in this channel may > + * be modified by the other party, then I need to flush the cache before reading to ensure > + * that I get the latest data. For example, the blkdev needs to flush the cache before > + * obtaining compr_head because compr_head will be updated by the backend handler. > + * b) For writter side, if the written information will be read by others, then > + * after writing, I need to flush the cache to let the other party see it immediately. > + * For example, after blkdev submits cbd_se, it needs to update cmd_head to let the > + * handler have a new cbd_se. Therefore, after updating cmd_head, I need to flush the > + * cache to let the backend see it. > + * > + * For the blkdev queue, I am the only one who updates the `cmd_head`, `cmd_tail`, and `compr_tail'. > + * Therefore, I don't need to flush_dcache before reading these data. However, after updating these data, > + * I need to flush_dcache so that the backend handler can see these updates. > + * > + * On the other hand, `compr_head` is updated by the backend handler. So, I need to flush_dcache before > + * reading `compr_head` to ensure that I can see the updates. > + * > + * ┌───────────┐ ┌─────────────┐ > + * │ blkdev │ │ backend │ > + * │ queue │ │ handler │ > + * └─────┬─────┘ └──────┬──────┘ > + * ▼ │ > + * init data and cbd_se │ > + * │ │ > + * ▼ │ > + * update cmd_head │ > + * │ │ > + * ▼ │ > + * flush_cache │ > + * │ ▼ > + * │ flush_cache > + * │ │ > + * │ ▼ > + * │ handle cmd > + * │ │ > + * │ ▼ > + * │ fill cbd_ce > + * │ │ > + * │ ▼ > + * │ flush_cache > + * ▼ > + * flush_cache > + * │ > + * ▼ > + * complete_req > + */ > + > +static inline struct cbd_se *get_submit_entry(struct cbd_queue *cbdq) > +{ > + return (struct cbd_se *)(cbdq->channel.cmdr + cbdq->channel_info->cmd_head); > +} > + > +static inline struct cbd_se *get_oldest_se(struct cbd_queue *cbdq) > +{ > + if (cbdq->channel_info->cmd_tail == cbdq->channel_info->cmd_head) > + return NULL; > + > + return (struct cbd_se *)(cbdq->channel.cmdr + cbdq->channel_info->cmd_tail); > +} > + > +static inline struct cbd_ce *get_complete_entry(struct cbd_queue *cbdq) > +{ > + if (cbdq->channel_info->compr_tail == cbdq->channel_info->compr_head) > + return NULL; > + > + return (struct cbd_ce *)(cbdq->channel.compr + cbdq->channel_info->compr_tail); > +} > + > +static void cbd_req_init(struct cbd_queue *cbdq, enum cbd_op op, struct request *rq) > +{ > + struct cbd_request *cbd_req = blk_mq_rq_to_pdu(rq); > + > + cbd_req->req = rq; > + cbd_req->cbdq = cbdq; > + cbd_req->op = op; > + > + return; > +} > + > +static bool cbd_req_nodata(struct cbd_request *cbd_req) > +{ > + switch (cbd_req->op) { > + case CBD_OP_WRITE: > + case CBD_OP_READ: > + return false; > + case CBD_OP_DISCARD: > + case CBD_OP_WRITE_ZEROS: > + case CBD_OP_FLUSH: > + return true; > + default: > + BUG(); > + } > +} > + > +static uint32_t cbd_req_segments(struct cbd_request *cbd_req) > +{ > + uint32_t segs = 0; > + struct bio *bio = cbd_req->req->bio; > + > + if (cbd_req_nodata(cbd_req)) > + return 0; > + > + while (bio) { > + segs += bio_segments(bio); > + bio = bio->bi_next; > + } > + > + return segs; > +} > + > +static inline size_t cbd_get_cmd_size(struct cbd_request *cbd_req) > +{ > + u32 segs = cbd_req_segments(cbd_req); > + u32 cmd_size = sizeof(struct cbd_se) + (sizeof(struct iovec) * segs); > + > + return round_up(cmd_size, CBD_OP_ALIGN_SIZE); > +} > + > +static void insert_padding(struct cbd_queue *cbdq, u32 cmd_size) > +{ > + struct cbd_se_hdr *header; > + u32 pad_len; > + > + if (cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head >= cmd_size) > + return; > + > + pad_len = cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head; > + cbd_queue_debug(cbdq, "insert pad:%d\n", pad_len); > + > + header = (struct cbd_se_hdr *)get_submit_entry(cbdq); > + memset(header, 0, pad_len); > + cbd_se_hdr_set_op(&header->len_op, CBD_OP_PAD); > + cbd_se_hdr_set_len(&header->len_op, pad_len); > + > + cbdt_flush_range(cbdq->cbd_blkdev->cbdt, header, sizeof(*header)); > + > + CBDC_UPDATE_CMDR_HEAD(cbdq->channel_info->cmd_head, pad_len, cbdq->channel_info->cmdr_size); > +} > + > +static void queue_req_se_init(struct cbd_request *cbd_req) > +{ > + struct cbd_se *se; > + struct cbd_se_hdr *header; > + u64 offset = (u64)blk_rq_pos(cbd_req->req) << SECTOR_SHIFT; > + u64 length = blk_rq_bytes(cbd_req->req); > + > + se = get_submit_entry(cbd_req->cbdq); > + memset(se, 0, cbd_get_cmd_size(cbd_req)); > + header = &se->header; > + > + cbd_se_hdr_set_op(&header->len_op, cbd_req->op); > + cbd_se_hdr_set_len(&header->len_op, cbd_get_cmd_size(cbd_req)); > + > + se->priv_data = cbd_req->req_tid; > + se->offset = offset; > + se->len = length; > + > + if (req_op(cbd_req->req) == REQ_OP_READ || req_op(cbd_req->req) == REQ_OP_WRITE) { > + se->data_off = cbd_req->cbdq->channel.data_head; > + se->data_len = length; > + } > + > + cbd_req->se = se; > +} > + > +static bool data_space_enough(struct cbd_queue *cbdq, struct cbd_request *cbd_req) > +{ > + u32 space_available; > + u32 space_needed; > + u32 space_used; > + u32 space_max; > + > + space_max = cbdq->channel.data_size - 4096; > + > + if (cbdq->channel.data_head > cbdq->channel.data_tail) > + space_used = cbdq->channel.data_head - cbdq->channel.data_tail; > + else if (cbdq->channel.data_head < cbdq->channel.data_tail) > + space_used = cbdq->channel.data_head + (cbdq->channel.data_size - cbdq->channel.data_tail); > + else > + space_used = 0; > + > + space_available = space_max - space_used; > + > + space_needed = round_up(cbd_req->data_len, 4096); > + > + if (space_available < space_needed) { > + cbd_queue_err(cbdq, "data space is not enough: availaible: %u needed: %u", > + space_available, space_needed); > + return false; > + } > + > + return true; > +} > + > +static bool submit_ring_space_enough(struct cbd_queue *cbdq, u32 cmd_size) > +{ > + u32 space_available; > + u32 space_needed; > + u32 space_max, space_used; > + > + /* There is a CMDR_RESERVED we dont use to prevent the ring to be used up */ > + space_max = cbdq->channel_info->cmdr_size - CBDC_CMDR_RESERVED; > + > + if (cbdq->channel_info->cmd_head > cbdq->channel_info->cmd_tail) > + space_used = cbdq->channel_info->cmd_head - cbdq->channel_info->cmd_tail; > + else if (cbdq->channel_info->cmd_head < cbdq->channel_info->cmd_tail) > + space_used = cbdq->channel_info->cmd_head + (cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_tail); > + else > + space_used = 0; > + > + space_available = space_max - space_used; > + > + if (cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head > cmd_size) > + space_needed = cmd_size; > + else > + space_needed = cmd_size + cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head; > + > + if (space_available < space_needed) > + return false; > + > + return true; > +} > + > +static void queue_req_data_init(struct cbd_request *cbd_req) > +{ > + struct cbd_queue *cbdq = cbd_req->cbdq; > + struct bio *bio = cbd_req->req->bio; > + > + if (cbd_req->op == CBD_OP_READ) { > + goto advance_data_head; > + } > + > + cbdc_copy_from_bio(&cbdq->channel, cbd_req->data_off, cbd_req->data_len, bio); > + > +advance_data_head: > + cbdq->channel.data_head = round_up(cbdq->channel.data_head + cbd_req->data_len, PAGE_SIZE); > + cbdq->channel.data_head %= cbdq->channel.data_size; > + > + return; > +} > + > +static void complete_inflight_req(struct cbd_queue *cbdq, struct cbd_request *cbd_req, int ret); > +static void cbd_queue_fn(struct cbd_request *cbd_req) > +{ > + struct cbd_queue *cbdq = cbd_req->cbdq; > + int ret = 0; > + size_t command_size; > + > + spin_lock(&cbdq->inflight_reqs_lock); > + list_add_tail(&cbd_req->inflight_reqs_node, &cbdq->inflight_reqs); > + spin_unlock(&cbdq->inflight_reqs_lock); > + > + command_size = cbd_get_cmd_size(cbd_req); > + > + spin_lock(&cbdq->channel.cmdr_lock); > + if (req_op(cbd_req->req) == REQ_OP_WRITE || req_op(cbd_req->req) == REQ_OP_READ) { > + cbd_req->data_off = cbdq->channel.data_head; > + cbd_req->data_len = blk_rq_bytes(cbd_req->req); > + } else { > + cbd_req->data_off = -1; > + cbd_req->data_len = 0; > + } > + > + if (!submit_ring_space_enough(cbdq, command_size) || > + !data_space_enough(cbdq, cbd_req)) { > + spin_unlock(&cbdq->channel.cmdr_lock); > + > + /* remove request from inflight_reqs */ > + spin_lock(&cbdq->inflight_reqs_lock); > + list_del_init(&cbd_req->inflight_reqs_node); > + spin_unlock(&cbdq->inflight_reqs_lock); > + > + cbd_blk_debug(cbdq->cbd_blkdev, "transport space is not enough"); > + ret = -ENOMEM; > + goto end_request; > + } > + > + insert_padding(cbdq, command_size); > + > + cbd_req->req_tid = ++cbdq->req_tid; > + queue_req_se_init(cbd_req); > + cbdt_flush_range(cbdq->cbd_blkdev->cbdt, cbd_req->se, sizeof(struct cbd_se)); > + > + if (!cbd_req_nodata(cbd_req)) { > + queue_req_data_init(cbd_req); > + } > + > + queue_delayed_work(cbdq->task_wq, &cbdq->complete_work, 0); > + > + CBDC_UPDATE_CMDR_HEAD(cbdq->channel_info->cmd_head, > + cbd_get_cmd_size(cbd_req), > + cbdq->channel_info->cmdr_size); > + cbdc_flush_ctrl(&cbdq->channel); > + spin_unlock(&cbdq->channel.cmdr_lock); > + > + return; > + > +end_request: > + if (ret == -ENOMEM || ret == -EBUSY) > + blk_mq_requeue_request(cbd_req->req, true); > + else > + blk_mq_end_request(cbd_req->req, errno_to_blk_status(ret)); > + > + return; > +} > + > +static void cbd_req_release(struct cbd_request *cbd_req) > +{ > + return; > +} > + > +static void advance_cmd_ring(struct cbd_queue *cbdq) > +{ > + struct cbd_se *se; > +again: > + se = get_oldest_se(cbdq); > + if (!se) > + goto out; > + > + if (cbd_se_hdr_flags_test(se, CBD_SE_HDR_DONE)) { > + CBDC_UPDATE_CMDR_TAIL(cbdq->channel_info->cmd_tail, > + cbd_se_hdr_get_len(se->header.len_op), > + cbdq->channel_info->cmdr_size); > + cbdc_flush_ctrl(&cbdq->channel); > + goto again; > + } > +out: > + return; > +} > + > +static bool __advance_data_tail(struct cbd_queue *cbdq, u32 data_off, u32 data_len) > +{ > + if (data_off == cbdq->channel.data_tail) { > + cbdq->released_extents[data_off / 4096] = 0; > + cbdq->channel.data_tail += data_len; > + if (cbdq->channel.data_tail >= cbdq->channel.data_size) { > + cbdq->channel.data_tail %= cbdq->channel.data_size; > + } > + return true; > + } > + > + return false; > +} > + > +static void advance_data_tail(struct cbd_queue *cbdq, u32 data_off, u32 data_len) > +{ > + cbdq->released_extents[data_off / 4096] = data_len; > + > + while (__advance_data_tail(cbdq, data_off, data_len)) { > + data_off += data_len; > + data_len = cbdq->released_extents[data_off / 4096]; > + if (!data_len) { > + break; > + } > + } > +} > + > +static inline void complete_inflight_req(struct cbd_queue *cbdq, struct cbd_request *cbd_req, int ret) > +{ > + u32 data_off, data_len; > + bool advance_data = false; > + > + spin_lock(&cbdq->inflight_reqs_lock); > + list_del_init(&cbd_req->inflight_reqs_node); > + spin_unlock(&cbdq->inflight_reqs_lock); > + > + cbd_se_hdr_flags_set(cbd_req->se, CBD_SE_HDR_DONE); > + data_off = cbd_req->data_off; > + data_len = cbd_req->data_len; > + advance_data = (!cbd_req_nodata(cbd_req)); > + > + blk_mq_end_request(cbd_req->req, errno_to_blk_status(ret)); > + > + cbd_req_release(cbd_req); > + > + spin_lock(&cbdq->channel.cmdr_lock); > + advance_cmd_ring(cbdq); > + if (advance_data) > + advance_data_tail(cbdq, data_off, round_up(data_len, PAGE_SIZE)); > + spin_unlock(&cbdq->channel.cmdr_lock); > +} > + > +static struct cbd_request *fetch_inflight_req(struct cbd_queue *cbdq, u64 req_tid) > +{ > + struct cbd_request *req; > + bool found = false; > + > + list_for_each_entry(req, &cbdq->inflight_reqs, inflight_reqs_node) { > + if (req->req_tid == req_tid) { > + list_del_init(&req->inflight_reqs_node); > + found = true; > + break; > + } > + } > + > + if (found) > + return req; > + > + return NULL; > +} > + > +static void copy_data_from_cbdteq(struct cbd_request *cbd_req) > +{ > + struct bio *bio = cbd_req->req->bio; > + struct cbd_queue *cbdq = cbd_req->cbdq; > + > + cbdc_copy_to_bio(&cbdq->channel, cbd_req->data_off, cbd_req->data_len, bio); > + > + return; > +} > + > +static void complete_work_fn(struct work_struct *work) > +{ > + struct cbd_queue *cbdq = container_of(work, struct cbd_queue, complete_work.work); > + struct cbd_ce *ce; > + struct cbd_request *cbd_req; > + > +again: > + /* compr_head would be updated by backend handler */ > + cbdc_flush_ctrl(&cbdq->channel); > + > + spin_lock(&cbdq->channel.compr_lock); > + ce = get_complete_entry(cbdq); > + if (!ce) { > + spin_unlock(&cbdq->channel.compr_lock); > + if (cbdwc_need_retry(&cbdq->complete_worker_cfg)) { > + goto again; > + } > + > + spin_lock(&cbdq->inflight_reqs_lock); > + if (list_empty(&cbdq->inflight_reqs)) { > + spin_unlock(&cbdq->inflight_reqs_lock); > + cbdwc_init(&cbdq->complete_worker_cfg); > + return; > + } > + spin_unlock(&cbdq->inflight_reqs_lock); > + > + cbdwc_miss(&cbdq->complete_worker_cfg); > + > + queue_delayed_work(cbdq->task_wq, &cbdq->complete_work, 0); > + return; > + } > + cbdwc_hit(&cbdq->complete_worker_cfg); > + CBDC_UPDATE_COMPR_TAIL(cbdq->channel_info->compr_tail, > + sizeof(struct cbd_ce), > + cbdq->channel_info->compr_size); > + cbdc_flush_ctrl(&cbdq->channel); > + spin_unlock(&cbdq->channel.compr_lock); > + > + spin_lock(&cbdq->inflight_reqs_lock); > + /* flush to ensure the content of ce is uptodate */ > + cbdt_flush_range(cbdq->cbd_blkdev->cbdt, ce, sizeof(*ce)); > + cbd_req = fetch_inflight_req(cbdq, ce->priv_data); > + spin_unlock(&cbdq->inflight_reqs_lock); > + if (!cbd_req) { > + goto again; > + } > + > + if (req_op(cbd_req->req) == REQ_OP_READ) { > + spin_lock(&cbdq->channel.cmdr_lock); > + copy_data_from_cbdteq(cbd_req); > + spin_unlock(&cbdq->channel.cmdr_lock); > + } > + > + complete_inflight_req(cbdq, cbd_req, ce->result); > + > + goto again; > +} > + > +static blk_status_t cbd_queue_rq(struct blk_mq_hw_ctx *hctx, > + const struct blk_mq_queue_data *bd) > +{ > + struct request *req = bd->rq; > + struct cbd_queue *cbdq = hctx->driver_data; > + struct cbd_request *cbd_req = blk_mq_rq_to_pdu(bd->rq); > + > + memset(cbd_req, 0, sizeof(struct cbd_request)); > + INIT_LIST_HEAD(&cbd_req->inflight_reqs_node); > + > + blk_mq_start_request(bd->rq); > + > + switch (req_op(bd->rq)) { > + case REQ_OP_FLUSH: > + cbd_req_init(cbdq, CBD_OP_FLUSH, req); > + break; > + case REQ_OP_DISCARD: > + cbd_req_init(cbdq, CBD_OP_DISCARD, req); > + break; > + case REQ_OP_WRITE_ZEROES: > + cbd_req_init(cbdq, CBD_OP_WRITE_ZEROS, req); > + break; > + case REQ_OP_WRITE: > + cbd_req_init(cbdq, CBD_OP_WRITE, req); > + break; > + case REQ_OP_READ: > + cbd_req_init(cbdq, CBD_OP_READ, req); > + break; > + default: > + return BLK_STS_IOERR; > + } > + > + cbd_queue_fn(cbd_req); > + > + return BLK_STS_OK; > +} > + > +static int cbd_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, > + unsigned int hctx_idx) > +{ > + struct cbd_blkdev *cbd_blkdev = driver_data; > + struct cbd_queue *cbdq; > + > + cbdq = &cbd_blkdev->queues[hctx_idx]; > + hctx->driver_data = cbdq; > + > + return 0; > +} > + > +const struct blk_mq_ops cbd_mq_ops = { > + .queue_rq = cbd_queue_rq, > + .init_hctx = cbd_init_hctx, > +}; > + > +static int cbd_queue_channel_init(struct cbd_queue *cbdq, u32 channel_id) > +{ > + struct cbd_blkdev *cbd_blkdev = cbdq->cbd_blkdev; > + struct cbd_transport *cbdt = cbd_blkdev->cbdt; > + > + cbdq->channel_id = channel_id; > + cbd_channel_init(&cbdq->channel, cbdt, channel_id); > + cbdq->channel_info = cbdq->channel.channel_info; > + > + cbdq->channel.data_head = cbdq->channel.data_tail = 0; > + > + /* Initialise the channel_info of the ring buffer */ > + cbdq->channel_info->cmdr_off = CBDC_CMDR_OFF; > + cbdq->channel_info->cmdr_size = CBDC_CMDR_SIZE; > + cbdq->channel_info->compr_off = CBDC_COMPR_OFF; > + cbdq->channel_info->compr_size = CBDC_COMPR_SIZE; > + > + cbdq->channel_info->backend_id = cbd_blkdev->backend_id; > + cbdq->channel_info->blkdev_id = cbd_blkdev->blkdev_id; > + cbdq->channel_info->blkdev_state = cbdc_blkdev_state_running; > + cbdq->channel_info->state = cbd_channel_state_running; > + > + cbdc_flush_ctrl(&cbdq->channel); > + > + return 0; > +} > + > +int cbd_queue_start(struct cbd_queue *cbdq) > +{ > + struct cbd_transport *cbdt = cbdq->cbd_blkdev->cbdt; > + u32 channel_id; > + int ret; > + > + ret = cbdt_get_empty_channel_id(cbdt, &channel_id); > + if (ret < 0) { > + cbdt_err(cbdt, "failed find available channel_id.\n"); > + goto err; > + } > + > + ret = cbd_queue_channel_init(cbdq, channel_id); > + if (ret) { > + cbd_queue_err(cbdq, "failed to init dev channel_info: %d.", ret); > + goto err; > + } > + > + INIT_LIST_HEAD(&cbdq->inflight_reqs); > + spin_lock_init(&cbdq->inflight_reqs_lock); > + cbdq->req_tid = 0; > + INIT_DELAYED_WORK(&cbdq->complete_work, complete_work_fn); > + cbdwc_init(&cbdq->complete_worker_cfg); > + > + cbdq->released_extents = kmalloc(sizeof(u32) * (CBDC_DATA_SIZE >> PAGE_SHIFT), GFP_KERNEL); Quick fixup, this would be kzalloc, the fix path is available at branch cbd of repo: https://github.com/DataTravelGuide/linux.git cbd: fixup: initilize cbdq->released_extents with zeros We have to initialize cbdq->released_extents with zeros, that means there is no released extents. Otherwise, it will make advance_data_tail confusing, and IO would be hang over. Signed-off-by: Dongsheng Yang <dongsheng.yang.linux@gmail.com> diff --git a/drivers/block/cbd/cbd_queue.c b/drivers/block/cbd/cbd_queue.c index 6709ac016e18..ebde191eb907 100644 --- a/drivers/block/cbd/cbd_queue.c +++ b/drivers/block/cbd/cbd_queue.c @@ -576,7 +576,7 @@ int cbd_queue_start(struct cbd_queue *cbdq) INIT_DELAYED_WORK(&cbdq->complete_work, complete_work_fn); cbdwc_init(&cbdq->complete_worker_cfg); - cbdq->released_extents = kmalloc(sizeof(u32) * (CBDC_DATA_SIZE >> PAGE_SHIFT), GFP_KERNEL); + cbdq->released_extents = kzalloc(sizeof(u32) * (CBDC_DATA_SIZE >> PAGE_SHIFT), GFP_KERNEL); if (!cbdq->released_extents) { ret = -ENOMEM; goto err; > + if (!cbdq->released_extents) { > + ret = -ENOMEM; > + goto err; > + } > + > + cbdq->task_wq = alloc_workqueue("cbd%d-queue%u", WQ_UNBOUND | WQ_MEM_RECLAIM, > + 0, cbdq->cbd_blkdev->mapped_id, cbdq->index); > + if (!cbdq->task_wq) { > + ret = -ENOMEM; > + goto released_extents_free; > + } > + > + queue_delayed_work(cbdq->task_wq, &cbdq->complete_work, 0); > + > + atomic_set(&cbdq->state, cbd_queue_state_running); > + > + return 0; > + > +released_extents_free: > + kfree(cbdq->released_extents); > +err: > + return ret; > +} > + > +void cbd_queue_stop(struct cbd_queue *cbdq) > +{ > + if (atomic_cmpxchg(&cbdq->state, > + cbd_queue_state_running, > + cbd_queue_state_none) != cbd_queue_state_running) > + return; > + > + cancel_delayed_work_sync(&cbdq->complete_work); > + drain_workqueue(cbdq->task_wq); > + destroy_workqueue(cbdq->task_wq); > + > + kfree(cbdq->released_extents); > + cbdq->channel_info->blkdev_state = cbdc_blkdev_state_none; > + > + cbdc_flush_ctrl(&cbdq->channel); > + > + return; > +} > diff --git a/drivers/block/cbd/cbd_transport.c b/drivers/block/cbd/cbd_transport.c > index 4dd9bf1b5fd5..75b9d34218fc 100644 > --- a/drivers/block/cbd/cbd_transport.c > +++ b/drivers/block/cbd/cbd_transport.c > @@ -315,8 +315,19 @@ static ssize_t cbd_adm_store(struct device *dev, > case CBDT_ADM_OP_B_CLEAR: > break; > case CBDT_ADM_OP_DEV_START: > + if (opts.blkdev.queues > CBD_QUEUES_MAX) { > + cbdt_err(cbdt, "invalid queues = %u, larger than max %u\n", > + opts.blkdev.queues, CBD_QUEUES_MAX); > + return -EINVAL; > + } > + ret = cbd_blkdev_start(cbdt, opts.backend_id, opts.blkdev.queues); > + if (ret < 0) > + return ret; > break; > case CBDT_ADM_OP_DEV_STOP: > + ret = cbd_blkdev_stop(cbdt, opts.blkdev.devid); > + if (ret < 0) > + return ret; > break; > default: > pr_err("invalid op: %d\n", opts.op); >
diff --git a/drivers/block/cbd/Makefile b/drivers/block/cbd/Makefile index b47f1e584946..f5fb5fd68f3d 100644 --- a/drivers/block/cbd/Makefile +++ b/drivers/block/cbd/Makefile @@ -1,3 +1,3 @@ -cbd-y := cbd_main.o cbd_transport.o cbd_channel.o cbd_host.o cbd_backend.o cbd_handler.o +cbd-y := cbd_main.o cbd_transport.o cbd_channel.o cbd_host.o cbd_backend.o cbd_handler.o cbd_blkdev.o cbd_queue.o obj-$(CONFIG_BLK_DEV_CBD) += cbd.o diff --git a/drivers/block/cbd/cbd_blkdev.c b/drivers/block/cbd/cbd_blkdev.c new file mode 100644 index 000000000000..816bc28afb49 --- /dev/null +++ b/drivers/block/cbd/cbd_blkdev.c @@ -0,0 +1,375 @@ +#include "cbd_internal.h" + +static ssize_t blkdev_backend_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cbd_blkdev_device *blkdev; + struct cbd_blkdev_info *blkdev_info; + + blkdev = container_of(dev, struct cbd_blkdev_device, dev); + blkdev_info = blkdev->blkdev_info; + + cbdt_flush_range(blkdev->cbdt, blkdev_info, sizeof(*blkdev_info)); + + if (blkdev_info->state == cbd_blkdev_state_none) + return 0; + + return sprintf(buf, "%u\n", blkdev_info->backend_id); +} + +static DEVICE_ATTR(backend_id, 0400, blkdev_backend_id_show, NULL); + +static ssize_t blkdev_host_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cbd_blkdev_device *blkdev; + struct cbd_blkdev_info *blkdev_info; + + blkdev = container_of(dev, struct cbd_blkdev_device, dev); + blkdev_info = blkdev->blkdev_info; + + cbdt_flush_range(blkdev->cbdt, blkdev_info, sizeof(*blkdev_info)); + + if (blkdev_info->state == cbd_blkdev_state_none) + return 0; + + return sprintf(buf, "%u\n", blkdev_info->host_id); +} + +static DEVICE_ATTR(host_id, 0400, blkdev_host_id_show, NULL); + +static ssize_t blkdev_mapped_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cbd_blkdev_device *blkdev; + struct cbd_blkdev_info *blkdev_info; + + blkdev = container_of(dev, struct cbd_blkdev_device, dev); + blkdev_info = blkdev->blkdev_info; + + cbdt_flush_range(blkdev->cbdt, blkdev_info, sizeof(*blkdev_info)); + + if (blkdev_info->state == cbd_blkdev_state_none) + return 0; + + return sprintf(buf, "%u\n", blkdev_info->mapped_id); +} + +static DEVICE_ATTR(mapped_id, 0400, blkdev_mapped_id_show, NULL); + +CBD_OBJ_HEARTBEAT(blkdev); + +static struct attribute *cbd_blkdev_attrs[] = { + &dev_attr_mapped_id.attr, + &dev_attr_host_id.attr, + &dev_attr_backend_id.attr, + &dev_attr_alive.attr, + NULL +}; + +static struct attribute_group cbd_blkdev_attr_group = { + .attrs = cbd_blkdev_attrs, +}; + +static const struct attribute_group *cbd_blkdev_attr_groups[] = { + &cbd_blkdev_attr_group, + NULL +}; + +static void cbd_blkdev_release(struct device *dev) +{ +} + +struct device_type cbd_blkdev_type = { + .name = "cbd_blkdev", + .groups = cbd_blkdev_attr_groups, + .release = cbd_blkdev_release, +}; + +struct device_type cbd_blkdevs_type = { + .name = "cbd_blkdevs", + .release = cbd_blkdev_release, +}; + + +static int cbd_major; +static DEFINE_IDA(cbd_mapped_id_ida); + +static int minor_to_cbd_mapped_id(int minor) +{ + return minor >> CBD_PART_SHIFT; +} + + +static int cbd_open(struct gendisk *disk, blk_mode_t mode) +{ + return 0; +} + +static void cbd_release(struct gendisk *disk) +{ +} + +static const struct block_device_operations cbd_bd_ops = { + .owner = THIS_MODULE, + .open = cbd_open, + .release = cbd_release, +}; + + +static void cbd_blkdev_destroy_queues(struct cbd_blkdev *cbd_blkdev) +{ + int i; + + for (i = 0; i < cbd_blkdev->num_queues; i++) { + cbd_queue_stop(&cbd_blkdev->queues[i]); + } + + kfree(cbd_blkdev->queues); +} + +static int cbd_blkdev_create_queues(struct cbd_blkdev *cbd_blkdev) +{ + int i; + int ret; + struct cbd_queue *cbdq; + + cbd_blkdev->queues = kcalloc(cbd_blkdev->num_queues, sizeof(struct cbd_queue), GFP_KERNEL); + if (!cbd_blkdev->queues) { + return -ENOMEM; + } + + for (i = 0; i < cbd_blkdev->num_queues; i++) { + cbdq = &cbd_blkdev->queues[i]; + cbdq->cbd_blkdev = cbd_blkdev; + cbdq->index = i; + ret = cbd_queue_start(cbdq); + if (ret) + goto err; + + } + + return 0; +err: + cbd_blkdev_destroy_queues(cbd_blkdev); + return ret; +} + +static int disk_start(struct cbd_blkdev *cbd_blkdev) +{ + int ret; + struct gendisk *disk; + + memset(&cbd_blkdev->tag_set, 0, sizeof(cbd_blkdev->tag_set)); + cbd_blkdev->tag_set.ops = &cbd_mq_ops; + cbd_blkdev->tag_set.queue_depth = 128; + cbd_blkdev->tag_set.numa_node = NUMA_NO_NODE; + cbd_blkdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED; + cbd_blkdev->tag_set.nr_hw_queues = cbd_blkdev->num_queues; + cbd_blkdev->tag_set.cmd_size = sizeof(struct cbd_request); + cbd_blkdev->tag_set.timeout = 0; + cbd_blkdev->tag_set.driver_data = cbd_blkdev; + + ret = blk_mq_alloc_tag_set(&cbd_blkdev->tag_set); + if (ret) { + pr_err("failed to alloc tag set %d", ret); + goto err; + } + + disk = blk_mq_alloc_disk(&cbd_blkdev->tag_set, cbd_blkdev); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); + pr_err("failed to alloc disk"); + goto out_tag_set; + } + + snprintf(disk->disk_name, sizeof(disk->disk_name), "cbd%d", + cbd_blkdev->mapped_id); + + disk->major = cbd_major; + disk->first_minor = cbd_blkdev->mapped_id << CBD_PART_SHIFT; + disk->minors = (1 << CBD_PART_SHIFT); + + disk->fops = &cbd_bd_ops; + disk->private_data = cbd_blkdev; + + /* Tell the block layer that this is not a rotational device */ + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); + + blk_queue_physical_block_size(disk->queue, PAGE_SIZE); + blk_queue_max_hw_sectors(disk->queue, 128); + blk_queue_max_segments(disk->queue, USHRT_MAX); + blk_queue_max_segment_size(disk->queue, UINT_MAX); + blk_queue_io_min(disk->queue, 4096); + blk_queue_io_opt(disk->queue, 4096); + + disk->queue->limits.max_sectors = queue_max_hw_sectors(disk->queue); + /* TODO support discard */ + disk->queue->limits.discard_granularity = 0; + blk_queue_max_discard_sectors(disk->queue, 0); + blk_queue_max_write_zeroes_sectors(disk->queue, 0); + + cbd_blkdev->disk = disk; + + cbdt_add_blkdev(cbd_blkdev->cbdt, cbd_blkdev); + cbd_blkdev->blkdev_info->mapped_id = cbd_blkdev->blkdev_id; + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_running; + + set_capacity(cbd_blkdev->disk, cbd_blkdev->dev_size); + + set_disk_ro(cbd_blkdev->disk, false); + blk_queue_write_cache(cbd_blkdev->disk->queue, false, false); + + ret = add_disk(cbd_blkdev->disk); + if (ret) { + goto put_disk; + } + + ret = sysfs_create_link(&disk_to_dev(cbd_blkdev->disk)->kobj, + &cbd_blkdev->blkdev_dev->dev.kobj, "cbd_blkdev"); + if (ret) { + goto del_disk; + } + + blk_put_queue(cbd_blkdev->disk->queue); + + return 0; + +del_disk: + del_gendisk(cbd_blkdev->disk); +put_disk: + put_disk(cbd_blkdev->disk); +out_tag_set: + blk_mq_free_tag_set(&cbd_blkdev->tag_set); +err: + return ret; +} + +int cbd_blkdev_start(struct cbd_transport *cbdt, u32 backend_id, u32 queues) +{ + struct cbd_blkdev *cbd_blkdev; + struct cbd_backend_info *backend_info; + u64 dev_size; + int ret; + + backend_info = cbdt_get_backend_info(cbdt, backend_id); + cbdt_flush_range(cbdt, backend_info, sizeof(*backend_info)); + if (backend_info->blkdev_count == CBDB_BLKDEV_COUNT_MAX) { + return -EBUSY; + } + + dev_size = backend_info->dev_size; + + cbd_blkdev = kzalloc(sizeof(struct cbd_blkdev), GFP_KERNEL); + if (!cbd_blkdev) { + pr_err("fail to alloc cbd_blkdev"); + return -ENOMEM; + } + + ret = cbdt_get_empty_blkdev_id(cbdt, &cbd_blkdev->blkdev_id); + if (ret < 0) { + goto blkdev_free; + } + + cbd_blkdev->mapped_id = ida_simple_get(&cbd_mapped_id_ida, 0, + minor_to_cbd_mapped_id(1 << MINORBITS), + GFP_KERNEL); + if (cbd_blkdev->mapped_id < 0) { + ret = -ENOENT; + goto blkdev_free; + } + + INIT_LIST_HEAD(&cbd_blkdev->node); + cbd_blkdev->cbdt = cbdt; + cbd_blkdev->backend_id = backend_id; + cbd_blkdev->num_queues = queues; + cbd_blkdev->dev_size = dev_size; + cbd_blkdev->blkdev_info = cbdt_get_blkdev_info(cbdt, cbd_blkdev->blkdev_id); + cbd_blkdev->blkdev_dev = &cbdt->cbd_blkdevs_dev->blkdev_devs[cbd_blkdev->blkdev_id]; + + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_running; + cbdt_flush_range(cbdt, cbd_blkdev->blkdev_info, sizeof(*cbd_blkdev->blkdev_info)); + + INIT_DELAYED_WORK(&cbd_blkdev->hb_work, blkdev_hb_workfn); + queue_delayed_work(cbd_wq, &cbd_blkdev->hb_work, 0); + + ret = cbd_blkdev_create_queues(cbd_blkdev); + if (ret < 0) { + goto cancel_hb;; + } + + ret = disk_start(cbd_blkdev); + if (ret < 0) { + goto destroy_queues; + } + + backend_info->blkdev_count++; + cbdt_flush_range(cbdt, backend_info, sizeof(*backend_info)); + + return 0; + +destroy_queues: + cbd_blkdev_destroy_queues(cbd_blkdev); +cancel_hb: + cancel_delayed_work_sync(&cbd_blkdev->hb_work); + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_none; + cbdt_flush_range(cbdt, cbd_blkdev->blkdev_info, sizeof(*cbd_blkdev->blkdev_info)); + ida_simple_remove(&cbd_mapped_id_ida, cbd_blkdev->mapped_id); +blkdev_free: + kfree(cbd_blkdev); + return ret; +} + +static void disk_stop(struct cbd_blkdev *cbd_blkdev) +{ + sysfs_remove_link(&disk_to_dev(cbd_blkdev->disk)->kobj, "cache"); + del_gendisk(cbd_blkdev->disk); + put_disk(cbd_blkdev->disk); + blk_mq_free_tag_set(&cbd_blkdev->tag_set); +} + +int cbd_blkdev_stop(struct cbd_transport *cbdt, u32 devid) +{ + struct cbd_blkdev *cbd_blkdev; + struct cbd_backend_info *backend_info; + + cbd_blkdev = cbdt_fetch_blkdev(cbdt, devid); + if (!cbd_blkdev) { + return -EINVAL; + } + + backend_info = cbdt_get_backend_info(cbdt, cbd_blkdev->backend_id); + + disk_stop(cbd_blkdev); + cbd_blkdev_destroy_queues(cbd_blkdev); + cancel_delayed_work_sync(&cbd_blkdev->hb_work); + cbd_blkdev->blkdev_info->state = cbd_blkdev_state_none; + cbdt_flush_range(cbdt, cbd_blkdev->blkdev_info, sizeof(*cbd_blkdev->blkdev_info)); + ida_simple_remove(&cbd_mapped_id_ida, cbd_blkdev->mapped_id); + + kfree(cbd_blkdev); + + backend_info->blkdev_count--; + cbdt_flush_range(cbdt, backend_info, sizeof(*backend_info)); + + return 0; +} + +int cbd_blkdev_init(void) +{ + cbd_major = register_blkdev(0, "cbd"); + if (cbd_major < 0) + return cbd_major; + + return 0; +} + +void cbd_blkdev_exit(void) +{ + unregister_blkdev(cbd_major, "cbd"); +} diff --git a/drivers/block/cbd/cbd_main.c b/drivers/block/cbd/cbd_main.c index 8cfa60dde7c5..658233807b59 100644 --- a/drivers/block/cbd/cbd_main.c +++ b/drivers/block/cbd/cbd_main.c @@ -195,6 +195,11 @@ static int __init cbd_init(void) goto device_unregister; } + ret = cbd_blkdev_init(); + if (ret < 0) { + goto bus_unregister; + } + return 0; bus_unregister: @@ -209,6 +214,7 @@ static int __init cbd_init(void) static void cbd_exit(void) { + cbd_blkdev_exit(); bus_unregister(&cbd_bus_type); device_unregister(&cbd_root_dev); diff --git a/drivers/block/cbd/cbd_queue.c b/drivers/block/cbd/cbd_queue.c new file mode 100644 index 000000000000..6709ac016e18 --- /dev/null +++ b/drivers/block/cbd/cbd_queue.c @@ -0,0 +1,621 @@ +#include "cbd_internal.h" + +/* + * How do blkdev and backend interact through the channel? + * a) For reader side, before reading the data, if the data in this channel may + * be modified by the other party, then I need to flush the cache before reading to ensure + * that I get the latest data. For example, the blkdev needs to flush the cache before + * obtaining compr_head because compr_head will be updated by the backend handler. + * b) For writter side, if the written information will be read by others, then + * after writing, I need to flush the cache to let the other party see it immediately. + * For example, after blkdev submits cbd_se, it needs to update cmd_head to let the + * handler have a new cbd_se. Therefore, after updating cmd_head, I need to flush the + * cache to let the backend see it. + * + * For the blkdev queue, I am the only one who updates the `cmd_head`, `cmd_tail`, and `compr_tail'. + * Therefore, I don't need to flush_dcache before reading these data. However, after updating these data, + * I need to flush_dcache so that the backend handler can see these updates. + * + * On the other hand, `compr_head` is updated by the backend handler. So, I need to flush_dcache before + * reading `compr_head` to ensure that I can see the updates. + * + * ┌───────────┐ ┌─────────────┐ + * │ blkdev │ │ backend │ + * │ queue │ │ handler │ + * └─────┬─────┘ └──────┬──────┘ + * ▼ │ + * init data and cbd_se │ + * │ │ + * ▼ │ + * update cmd_head │ + * │ │ + * ▼ │ + * flush_cache │ + * │ ▼ + * │ flush_cache + * │ │ + * │ ▼ + * │ handle cmd + * │ │ + * │ ▼ + * │ fill cbd_ce + * │ │ + * │ ▼ + * │ flush_cache + * ▼ + * flush_cache + * │ + * ▼ + * complete_req + */ + +static inline struct cbd_se *get_submit_entry(struct cbd_queue *cbdq) +{ + return (struct cbd_se *)(cbdq->channel.cmdr + cbdq->channel_info->cmd_head); +} + +static inline struct cbd_se *get_oldest_se(struct cbd_queue *cbdq) +{ + if (cbdq->channel_info->cmd_tail == cbdq->channel_info->cmd_head) + return NULL; + + return (struct cbd_se *)(cbdq->channel.cmdr + cbdq->channel_info->cmd_tail); +} + +static inline struct cbd_ce *get_complete_entry(struct cbd_queue *cbdq) +{ + if (cbdq->channel_info->compr_tail == cbdq->channel_info->compr_head) + return NULL; + + return (struct cbd_ce *)(cbdq->channel.compr + cbdq->channel_info->compr_tail); +} + +static void cbd_req_init(struct cbd_queue *cbdq, enum cbd_op op, struct request *rq) +{ + struct cbd_request *cbd_req = blk_mq_rq_to_pdu(rq); + + cbd_req->req = rq; + cbd_req->cbdq = cbdq; + cbd_req->op = op; + + return; +} + +static bool cbd_req_nodata(struct cbd_request *cbd_req) +{ + switch (cbd_req->op) { + case CBD_OP_WRITE: + case CBD_OP_READ: + return false; + case CBD_OP_DISCARD: + case CBD_OP_WRITE_ZEROS: + case CBD_OP_FLUSH: + return true; + default: + BUG(); + } +} + +static uint32_t cbd_req_segments(struct cbd_request *cbd_req) +{ + uint32_t segs = 0; + struct bio *bio = cbd_req->req->bio; + + if (cbd_req_nodata(cbd_req)) + return 0; + + while (bio) { + segs += bio_segments(bio); + bio = bio->bi_next; + } + + return segs; +} + +static inline size_t cbd_get_cmd_size(struct cbd_request *cbd_req) +{ + u32 segs = cbd_req_segments(cbd_req); + u32 cmd_size = sizeof(struct cbd_se) + (sizeof(struct iovec) * segs); + + return round_up(cmd_size, CBD_OP_ALIGN_SIZE); +} + +static void insert_padding(struct cbd_queue *cbdq, u32 cmd_size) +{ + struct cbd_se_hdr *header; + u32 pad_len; + + if (cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head >= cmd_size) + return; + + pad_len = cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head; + cbd_queue_debug(cbdq, "insert pad:%d\n", pad_len); + + header = (struct cbd_se_hdr *)get_submit_entry(cbdq); + memset(header, 0, pad_len); + cbd_se_hdr_set_op(&header->len_op, CBD_OP_PAD); + cbd_se_hdr_set_len(&header->len_op, pad_len); + + cbdt_flush_range(cbdq->cbd_blkdev->cbdt, header, sizeof(*header)); + + CBDC_UPDATE_CMDR_HEAD(cbdq->channel_info->cmd_head, pad_len, cbdq->channel_info->cmdr_size); +} + +static void queue_req_se_init(struct cbd_request *cbd_req) +{ + struct cbd_se *se; + struct cbd_se_hdr *header; + u64 offset = (u64)blk_rq_pos(cbd_req->req) << SECTOR_SHIFT; + u64 length = blk_rq_bytes(cbd_req->req); + + se = get_submit_entry(cbd_req->cbdq); + memset(se, 0, cbd_get_cmd_size(cbd_req)); + header = &se->header; + + cbd_se_hdr_set_op(&header->len_op, cbd_req->op); + cbd_se_hdr_set_len(&header->len_op, cbd_get_cmd_size(cbd_req)); + + se->priv_data = cbd_req->req_tid; + se->offset = offset; + se->len = length; + + if (req_op(cbd_req->req) == REQ_OP_READ || req_op(cbd_req->req) == REQ_OP_WRITE) { + se->data_off = cbd_req->cbdq->channel.data_head; + se->data_len = length; + } + + cbd_req->se = se; +} + +static bool data_space_enough(struct cbd_queue *cbdq, struct cbd_request *cbd_req) +{ + u32 space_available; + u32 space_needed; + u32 space_used; + u32 space_max; + + space_max = cbdq->channel.data_size - 4096; + + if (cbdq->channel.data_head > cbdq->channel.data_tail) + space_used = cbdq->channel.data_head - cbdq->channel.data_tail; + else if (cbdq->channel.data_head < cbdq->channel.data_tail) + space_used = cbdq->channel.data_head + (cbdq->channel.data_size - cbdq->channel.data_tail); + else + space_used = 0; + + space_available = space_max - space_used; + + space_needed = round_up(cbd_req->data_len, 4096); + + if (space_available < space_needed) { + cbd_queue_err(cbdq, "data space is not enough: availaible: %u needed: %u", + space_available, space_needed); + return false; + } + + return true; +} + +static bool submit_ring_space_enough(struct cbd_queue *cbdq, u32 cmd_size) +{ + u32 space_available; + u32 space_needed; + u32 space_max, space_used; + + /* There is a CMDR_RESERVED we dont use to prevent the ring to be used up */ + space_max = cbdq->channel_info->cmdr_size - CBDC_CMDR_RESERVED; + + if (cbdq->channel_info->cmd_head > cbdq->channel_info->cmd_tail) + space_used = cbdq->channel_info->cmd_head - cbdq->channel_info->cmd_tail; + else if (cbdq->channel_info->cmd_head < cbdq->channel_info->cmd_tail) + space_used = cbdq->channel_info->cmd_head + (cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_tail); + else + space_used = 0; + + space_available = space_max - space_used; + + if (cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head > cmd_size) + space_needed = cmd_size; + else + space_needed = cmd_size + cbdq->channel_info->cmdr_size - cbdq->channel_info->cmd_head; + + if (space_available < space_needed) + return false; + + return true; +} + +static void queue_req_data_init(struct cbd_request *cbd_req) +{ + struct cbd_queue *cbdq = cbd_req->cbdq; + struct bio *bio = cbd_req->req->bio; + + if (cbd_req->op == CBD_OP_READ) { + goto advance_data_head; + } + + cbdc_copy_from_bio(&cbdq->channel, cbd_req->data_off, cbd_req->data_len, bio); + +advance_data_head: + cbdq->channel.data_head = round_up(cbdq->channel.data_head + cbd_req->data_len, PAGE_SIZE); + cbdq->channel.data_head %= cbdq->channel.data_size; + + return; +} + +static void complete_inflight_req(struct cbd_queue *cbdq, struct cbd_request *cbd_req, int ret); +static void cbd_queue_fn(struct cbd_request *cbd_req) +{ + struct cbd_queue *cbdq = cbd_req->cbdq; + int ret = 0; + size_t command_size; + + spin_lock(&cbdq->inflight_reqs_lock); + list_add_tail(&cbd_req->inflight_reqs_node, &cbdq->inflight_reqs); + spin_unlock(&cbdq->inflight_reqs_lock); + + command_size = cbd_get_cmd_size(cbd_req); + + spin_lock(&cbdq->channel.cmdr_lock); + if (req_op(cbd_req->req) == REQ_OP_WRITE || req_op(cbd_req->req) == REQ_OP_READ) { + cbd_req->data_off = cbdq->channel.data_head; + cbd_req->data_len = blk_rq_bytes(cbd_req->req); + } else { + cbd_req->data_off = -1; + cbd_req->data_len = 0; + } + + if (!submit_ring_space_enough(cbdq, command_size) || + !data_space_enough(cbdq, cbd_req)) { + spin_unlock(&cbdq->channel.cmdr_lock); + + /* remove request from inflight_reqs */ + spin_lock(&cbdq->inflight_reqs_lock); + list_del_init(&cbd_req->inflight_reqs_node); + spin_unlock(&cbdq->inflight_reqs_lock); + + cbd_blk_debug(cbdq->cbd_blkdev, "transport space is not enough"); + ret = -ENOMEM; + goto end_request; + } + + insert_padding(cbdq, command_size); + + cbd_req->req_tid = ++cbdq->req_tid; + queue_req_se_init(cbd_req); + cbdt_flush_range(cbdq->cbd_blkdev->cbdt, cbd_req->se, sizeof(struct cbd_se)); + + if (!cbd_req_nodata(cbd_req)) { + queue_req_data_init(cbd_req); + } + + queue_delayed_work(cbdq->task_wq, &cbdq->complete_work, 0); + + CBDC_UPDATE_CMDR_HEAD(cbdq->channel_info->cmd_head, + cbd_get_cmd_size(cbd_req), + cbdq->channel_info->cmdr_size); + cbdc_flush_ctrl(&cbdq->channel); + spin_unlock(&cbdq->channel.cmdr_lock); + + return; + +end_request: + if (ret == -ENOMEM || ret == -EBUSY) + blk_mq_requeue_request(cbd_req->req, true); + else + blk_mq_end_request(cbd_req->req, errno_to_blk_status(ret)); + + return; +} + +static void cbd_req_release(struct cbd_request *cbd_req) +{ + return; +} + +static void advance_cmd_ring(struct cbd_queue *cbdq) +{ + struct cbd_se *se; +again: + se = get_oldest_se(cbdq); + if (!se) + goto out; + + if (cbd_se_hdr_flags_test(se, CBD_SE_HDR_DONE)) { + CBDC_UPDATE_CMDR_TAIL(cbdq->channel_info->cmd_tail, + cbd_se_hdr_get_len(se->header.len_op), + cbdq->channel_info->cmdr_size); + cbdc_flush_ctrl(&cbdq->channel); + goto again; + } +out: + return; +} + +static bool __advance_data_tail(struct cbd_queue *cbdq, u32 data_off, u32 data_len) +{ + if (data_off == cbdq->channel.data_tail) { + cbdq->released_extents[data_off / 4096] = 0; + cbdq->channel.data_tail += data_len; + if (cbdq->channel.data_tail >= cbdq->channel.data_size) { + cbdq->channel.data_tail %= cbdq->channel.data_size; + } + return true; + } + + return false; +} + +static void advance_data_tail(struct cbd_queue *cbdq, u32 data_off, u32 data_len) +{ + cbdq->released_extents[data_off / 4096] = data_len; + + while (__advance_data_tail(cbdq, data_off, data_len)) { + data_off += data_len; + data_len = cbdq->released_extents[data_off / 4096]; + if (!data_len) { + break; + } + } +} + +static inline void complete_inflight_req(struct cbd_queue *cbdq, struct cbd_request *cbd_req, int ret) +{ + u32 data_off, data_len; + bool advance_data = false; + + spin_lock(&cbdq->inflight_reqs_lock); + list_del_init(&cbd_req->inflight_reqs_node); + spin_unlock(&cbdq->inflight_reqs_lock); + + cbd_se_hdr_flags_set(cbd_req->se, CBD_SE_HDR_DONE); + data_off = cbd_req->data_off; + data_len = cbd_req->data_len; + advance_data = (!cbd_req_nodata(cbd_req)); + + blk_mq_end_request(cbd_req->req, errno_to_blk_status(ret)); + + cbd_req_release(cbd_req); + + spin_lock(&cbdq->channel.cmdr_lock); + advance_cmd_ring(cbdq); + if (advance_data) + advance_data_tail(cbdq, data_off, round_up(data_len, PAGE_SIZE)); + spin_unlock(&cbdq->channel.cmdr_lock); +} + +static struct cbd_request *fetch_inflight_req(struct cbd_queue *cbdq, u64 req_tid) +{ + struct cbd_request *req; + bool found = false; + + list_for_each_entry(req, &cbdq->inflight_reqs, inflight_reqs_node) { + if (req->req_tid == req_tid) { + list_del_init(&req->inflight_reqs_node); + found = true; + break; + } + } + + if (found) + return req; + + return NULL; +} + +static void copy_data_from_cbdteq(struct cbd_request *cbd_req) +{ + struct bio *bio = cbd_req->req->bio; + struct cbd_queue *cbdq = cbd_req->cbdq; + + cbdc_copy_to_bio(&cbdq->channel, cbd_req->data_off, cbd_req->data_len, bio); + + return; +} + +static void complete_work_fn(struct work_struct *work) +{ + struct cbd_queue *cbdq = container_of(work, struct cbd_queue, complete_work.work); + struct cbd_ce *ce; + struct cbd_request *cbd_req; + +again: + /* compr_head would be updated by backend handler */ + cbdc_flush_ctrl(&cbdq->channel); + + spin_lock(&cbdq->channel.compr_lock); + ce = get_complete_entry(cbdq); + if (!ce) { + spin_unlock(&cbdq->channel.compr_lock); + if (cbdwc_need_retry(&cbdq->complete_worker_cfg)) { + goto again; + } + + spin_lock(&cbdq->inflight_reqs_lock); + if (list_empty(&cbdq->inflight_reqs)) { + spin_unlock(&cbdq->inflight_reqs_lock); + cbdwc_init(&cbdq->complete_worker_cfg); + return; + } + spin_unlock(&cbdq->inflight_reqs_lock); + + cbdwc_miss(&cbdq->complete_worker_cfg); + + queue_delayed_work(cbdq->task_wq, &cbdq->complete_work, 0); + return; + } + cbdwc_hit(&cbdq->complete_worker_cfg); + CBDC_UPDATE_COMPR_TAIL(cbdq->channel_info->compr_tail, + sizeof(struct cbd_ce), + cbdq->channel_info->compr_size); + cbdc_flush_ctrl(&cbdq->channel); + spin_unlock(&cbdq->channel.compr_lock); + + spin_lock(&cbdq->inflight_reqs_lock); + /* flush to ensure the content of ce is uptodate */ + cbdt_flush_range(cbdq->cbd_blkdev->cbdt, ce, sizeof(*ce)); + cbd_req = fetch_inflight_req(cbdq, ce->priv_data); + spin_unlock(&cbdq->inflight_reqs_lock); + if (!cbd_req) { + goto again; + } + + if (req_op(cbd_req->req) == REQ_OP_READ) { + spin_lock(&cbdq->channel.cmdr_lock); + copy_data_from_cbdteq(cbd_req); + spin_unlock(&cbdq->channel.cmdr_lock); + } + + complete_inflight_req(cbdq, cbd_req, ce->result); + + goto again; +} + +static blk_status_t cbd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *req = bd->rq; + struct cbd_queue *cbdq = hctx->driver_data; + struct cbd_request *cbd_req = blk_mq_rq_to_pdu(bd->rq); + + memset(cbd_req, 0, sizeof(struct cbd_request)); + INIT_LIST_HEAD(&cbd_req->inflight_reqs_node); + + blk_mq_start_request(bd->rq); + + switch (req_op(bd->rq)) { + case REQ_OP_FLUSH: + cbd_req_init(cbdq, CBD_OP_FLUSH, req); + break; + case REQ_OP_DISCARD: + cbd_req_init(cbdq, CBD_OP_DISCARD, req); + break; + case REQ_OP_WRITE_ZEROES: + cbd_req_init(cbdq, CBD_OP_WRITE_ZEROS, req); + break; + case REQ_OP_WRITE: + cbd_req_init(cbdq, CBD_OP_WRITE, req); + break; + case REQ_OP_READ: + cbd_req_init(cbdq, CBD_OP_READ, req); + break; + default: + return BLK_STS_IOERR; + } + + cbd_queue_fn(cbd_req); + + return BLK_STS_OK; +} + +static int cbd_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, + unsigned int hctx_idx) +{ + struct cbd_blkdev *cbd_blkdev = driver_data; + struct cbd_queue *cbdq; + + cbdq = &cbd_blkdev->queues[hctx_idx]; + hctx->driver_data = cbdq; + + return 0; +} + +const struct blk_mq_ops cbd_mq_ops = { + .queue_rq = cbd_queue_rq, + .init_hctx = cbd_init_hctx, +}; + +static int cbd_queue_channel_init(struct cbd_queue *cbdq, u32 channel_id) +{ + struct cbd_blkdev *cbd_blkdev = cbdq->cbd_blkdev; + struct cbd_transport *cbdt = cbd_blkdev->cbdt; + + cbdq->channel_id = channel_id; + cbd_channel_init(&cbdq->channel, cbdt, channel_id); + cbdq->channel_info = cbdq->channel.channel_info; + + cbdq->channel.data_head = cbdq->channel.data_tail = 0; + + /* Initialise the channel_info of the ring buffer */ + cbdq->channel_info->cmdr_off = CBDC_CMDR_OFF; + cbdq->channel_info->cmdr_size = CBDC_CMDR_SIZE; + cbdq->channel_info->compr_off = CBDC_COMPR_OFF; + cbdq->channel_info->compr_size = CBDC_COMPR_SIZE; + + cbdq->channel_info->backend_id = cbd_blkdev->backend_id; + cbdq->channel_info->blkdev_id = cbd_blkdev->blkdev_id; + cbdq->channel_info->blkdev_state = cbdc_blkdev_state_running; + cbdq->channel_info->state = cbd_channel_state_running; + + cbdc_flush_ctrl(&cbdq->channel); + + return 0; +} + +int cbd_queue_start(struct cbd_queue *cbdq) +{ + struct cbd_transport *cbdt = cbdq->cbd_blkdev->cbdt; + u32 channel_id; + int ret; + + ret = cbdt_get_empty_channel_id(cbdt, &channel_id); + if (ret < 0) { + cbdt_err(cbdt, "failed find available channel_id.\n"); + goto err; + } + + ret = cbd_queue_channel_init(cbdq, channel_id); + if (ret) { + cbd_queue_err(cbdq, "failed to init dev channel_info: %d.", ret); + goto err; + } + + INIT_LIST_HEAD(&cbdq->inflight_reqs); + spin_lock_init(&cbdq->inflight_reqs_lock); + cbdq->req_tid = 0; + INIT_DELAYED_WORK(&cbdq->complete_work, complete_work_fn); + cbdwc_init(&cbdq->complete_worker_cfg); + + cbdq->released_extents = kmalloc(sizeof(u32) * (CBDC_DATA_SIZE >> PAGE_SHIFT), GFP_KERNEL); + if (!cbdq->released_extents) { + ret = -ENOMEM; + goto err; + } + + cbdq->task_wq = alloc_workqueue("cbd%d-queue%u", WQ_UNBOUND | WQ_MEM_RECLAIM, + 0, cbdq->cbd_blkdev->mapped_id, cbdq->index); + if (!cbdq->task_wq) { + ret = -ENOMEM; + goto released_extents_free; + } + + queue_delayed_work(cbdq->task_wq, &cbdq->complete_work, 0); + + atomic_set(&cbdq->state, cbd_queue_state_running); + + return 0; + +released_extents_free: + kfree(cbdq->released_extents); +err: + return ret; +} + +void cbd_queue_stop(struct cbd_queue *cbdq) +{ + if (atomic_cmpxchg(&cbdq->state, + cbd_queue_state_running, + cbd_queue_state_none) != cbd_queue_state_running) + return; + + cancel_delayed_work_sync(&cbdq->complete_work); + drain_workqueue(cbdq->task_wq); + destroy_workqueue(cbdq->task_wq); + + kfree(cbdq->released_extents); + cbdq->channel_info->blkdev_state = cbdc_blkdev_state_none; + + cbdc_flush_ctrl(&cbdq->channel); + + return; +} diff --git a/drivers/block/cbd/cbd_transport.c b/drivers/block/cbd/cbd_transport.c index 4dd9bf1b5fd5..75b9d34218fc 100644 --- a/drivers/block/cbd/cbd_transport.c +++ b/drivers/block/cbd/cbd_transport.c @@ -315,8 +315,19 @@ static ssize_t cbd_adm_store(struct device *dev, case CBDT_ADM_OP_B_CLEAR: break; case CBDT_ADM_OP_DEV_START: + if (opts.blkdev.queues > CBD_QUEUES_MAX) { + cbdt_err(cbdt, "invalid queues = %u, larger than max %u\n", + opts.blkdev.queues, CBD_QUEUES_MAX); + return -EINVAL; + } + ret = cbd_blkdev_start(cbdt, opts.backend_id, opts.blkdev.queues); + if (ret < 0) + return ret; break; case CBDT_ADM_OP_DEV_STOP: + ret = cbd_blkdev_stop(cbdt, opts.blkdev.devid); + if (ret < 0) + return ret; break; default: pr_err("invalid op: %d\n", opts.op);