Message ID | 20220927014420.71141-2-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable alloc caching and batched freeing for passthrough | expand |
On Tue, Sep 27, 2022 at 7:19 AM Jens Axboe <axboe@kernel.dk> wrote: > > The filesystem IO path can take advantage of allocating batches of > requests, if the underlying submitter tells the block layer about it > through the blk_plug. For passthrough IO, the exported API is the > blk_mq_alloc_request() helper, and that one does not allow for > request caching. > > Wire up request caching for blk_mq_alloc_request(), which is generally > done without having a bio available upfront. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > block/blk-mq.c | 80 ++++++++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 71 insertions(+), 9 deletions(-) > > diff --git a/block/blk-mq.c b/block/blk-mq.c > index c11949d66163..d3a9f8b9c7ee 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -510,25 +510,87 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) > alloc_time_ns); > } > > -struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, > - blk_mq_req_flags_t flags) > +static struct request *blk_mq_rq_cache_fill(struct request_queue *q, > + struct blk_plug *plug, > + blk_opf_t opf, > + blk_mq_req_flags_t flags) > { > struct blk_mq_alloc_data data = { > .q = q, > .flags = flags, > .cmd_flags = opf, > - .nr_tags = 1, > + .nr_tags = plug->nr_ios, > + .cached_rq = &plug->cached_rq, > }; > struct request *rq; > - int ret; > > - ret = blk_queue_enter(q, flags); > - if (ret) > - return ERR_PTR(ret); > + if (blk_queue_enter(q, flags)) > + return NULL; > + > + plug->nr_ios = 1; > > rq = __blk_mq_alloc_requests(&data); > - if (!rq) > - goto out_queue_exit; > + if (unlikely(!rq)) > + blk_queue_exit(q); > + return rq; > +} > + > +static struct request *blk_mq_alloc_cached_request(struct request_queue *q, > + blk_opf_t opf, > + blk_mq_req_flags_t flags) > +{ > + struct blk_plug *plug = current->plug; > + struct request *rq; > + > + if (!plug) > + return NULL; > + if (rq_list_empty(plug->cached_rq)) { > + if (plug->nr_ios == 1) > + return NULL; > + rq = blk_mq_rq_cache_fill(q, plug, opf, flags); > + if (rq) > + goto got_it; > + return NULL; > + } > + rq = rq_list_peek(&plug->cached_rq); > + if (!rq || rq->q != q) > + return NULL; > + > + if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) > + return NULL; > + if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) > + return NULL; > + > + plug->cached_rq = rq_list_next(rq); > +got_it: > + rq->cmd_flags = opf; > + INIT_LIST_HEAD(&rq->queuelist); > + return rq; > +} > + > +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, > + blk_mq_req_flags_t flags) > +{ > + struct request *rq; > + > + rq = blk_mq_alloc_cached_request(q, opf, flags); > + if (!rq) { > + struct blk_mq_alloc_data data = { > + .q = q, > + .flags = flags, > + .cmd_flags = opf, > + .nr_tags = 1, > + }; > + int ret; > + > + ret = blk_queue_enter(q, flags); > + if (ret) > + return ERR_PTR(ret); > + > + rq = __blk_mq_alloc_requests(&data); > + if (!rq) > + goto out_queue_exit; > + } > rq->__data_len = 0; > rq->__sector = (sector_t) -1; > rq->bio = rq->biotail = NULL; > -- > 2.35.1 > A large chunk of this improvement in passthrough performance is coming by enabling request caching. On my setup, the performance improves from 2.34 to 2.54 MIOPS. I have tested this using the t/io_uring utility (in fio) and I am using an Intel Optane Gen2 device. Tested-by: Anuj Gupta <anuj20.g@samsung.com> -- Anuj Gupta
diff --git a/block/blk-mq.c b/block/blk-mq.c index c11949d66163..d3a9f8b9c7ee 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -510,25 +510,87 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) alloc_time_ns); } -struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, - blk_mq_req_flags_t flags) +static struct request *blk_mq_rq_cache_fill(struct request_queue *q, + struct blk_plug *plug, + blk_opf_t opf, + blk_mq_req_flags_t flags) { struct blk_mq_alloc_data data = { .q = q, .flags = flags, .cmd_flags = opf, - .nr_tags = 1, + .nr_tags = plug->nr_ios, + .cached_rq = &plug->cached_rq, }; struct request *rq; - int ret; - ret = blk_queue_enter(q, flags); - if (ret) - return ERR_PTR(ret); + if (blk_queue_enter(q, flags)) + return NULL; + + plug->nr_ios = 1; rq = __blk_mq_alloc_requests(&data); - if (!rq) - goto out_queue_exit; + if (unlikely(!rq)) + blk_queue_exit(q); + return rq; +} + +static struct request *blk_mq_alloc_cached_request(struct request_queue *q, + blk_opf_t opf, + blk_mq_req_flags_t flags) +{ + struct blk_plug *plug = current->plug; + struct request *rq; + + if (!plug) + return NULL; + if (rq_list_empty(plug->cached_rq)) { + if (plug->nr_ios == 1) + return NULL; + rq = blk_mq_rq_cache_fill(q, plug, opf, flags); + if (rq) + goto got_it; + return NULL; + } + rq = rq_list_peek(&plug->cached_rq); + if (!rq || rq->q != q) + return NULL; + + if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) + return NULL; + if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) + return NULL; + + plug->cached_rq = rq_list_next(rq); +got_it: + rq->cmd_flags = opf; + INIT_LIST_HEAD(&rq->queuelist); + return rq; +} + +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, + blk_mq_req_flags_t flags) +{ + struct request *rq; + + rq = blk_mq_alloc_cached_request(q, opf, flags); + if (!rq) { + struct blk_mq_alloc_data data = { + .q = q, + .flags = flags, + .cmd_flags = opf, + .nr_tags = 1, + }; + int ret; + + ret = blk_queue_enter(q, flags); + if (ret) + return ERR_PTR(ret); + + rq = __blk_mq_alloc_requests(&data); + if (!rq) + goto out_queue_exit; + } rq->__data_len = 0; rq->__sector = (sector_t) -1; rq->bio = rq->biotail = NULL;
The filesystem IO path can take advantage of allocating batches of requests, if the underlying submitter tells the block layer about it through the blk_plug. For passthrough IO, the exported API is the blk_mq_alloc_request() helper, and that one does not allow for request caching. Wire up request caching for blk_mq_alloc_request(), which is generally done without having a bio available upfront. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-mq.c | 80 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 9 deletions(-)