Message ID | 20190919094547.67194-3-hare@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | blk-mq I/O scheduling fixes | expand |
On 2019/09/19 11:45, Hannes Reinecke wrote: > From: Hannes Reinecke <hare@suse.com> > > A scheduler might be attached even for devices exposing more than > one hardware queue, so the check for the number of hardware queue > is pointless and should be removed. > > Signed-off-by: Hannes Reinecke <hare@suse.com> > --- > block/blk-mq.c | 6 +----- > 1 file changed, 1 insertion(+), 5 deletions(-) > > diff --git a/block/blk-mq.c b/block/blk-mq.c > index 44ff3c1442a4..faab542e4836 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -1931,7 +1931,6 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) > > static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) > { > - const int is_sync = op_is_sync(bio->bi_opf); > const int is_flush_fua = op_is_flush(bio->bi_opf); > struct blk_mq_alloc_data data = { .flags = 0}; > struct request *rq; > @@ -1977,7 +1976,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) > /* bypass scheduler for flush rq */ > blk_insert_flush(rq); > blk_mq_run_hw_queue(data.hctx, true); > - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { > + } else if (plug && q->mq_ops->commit_rqs) { > /* > * Use plugging if we have a ->commit_rqs() hook as well, as > * we know the driver uses bd->last in a smart fashion. > @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) > blk_mq_try_issue_directly(data.hctx, same_queue_rq, > &cookie); > } > - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && > - !data.hctx->dispatch_busy)) { > - blk_mq_try_issue_directly(data.hctx, rq, &cookie); It may be worth mentioning that blk_mq_sched_insert_request() will do a direct insert of the request using __blk_mq_insert_request(). But that insert is slightly different from what blk_mq_try_issue_directly() does with __blk_mq_issue_directly() as the request in that case is passed along to the device using queue->mq_ops->queue_rq() while __blk_mq_insert_request() will put the request in ctx->rq_lists[type]. This removes the optimized case !q->elevator && !data.hctx->dispatch_busy, but I am not sure of the actual performance impact yet. We may want to patch blk_mq_sched_insert_request() to handle that case. > } else { > blk_mq_sched_insert_request(rq, false, true, true); > } >
On Thu, Sep 19, 2019 at 10:21:54AM +0000, Damien Le Moal wrote: > On 2019/09/19 11:45, Hannes Reinecke wrote: > > From: Hannes Reinecke <hare@suse.com> > > > > A scheduler might be attached even for devices exposing more than > > one hardware queue, so the check for the number of hardware queue > > is pointless and should be removed. > > > > Signed-off-by: Hannes Reinecke <hare@suse.com> > > --- > > block/blk-mq.c | 6 +----- > > 1 file changed, 1 insertion(+), 5 deletions(-) > > > > diff --git a/block/blk-mq.c b/block/blk-mq.c > > index 44ff3c1442a4..faab542e4836 100644 > > --- a/block/blk-mq.c > > +++ b/block/blk-mq.c > > @@ -1931,7 +1931,6 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) > > > > static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) > > { > > - const int is_sync = op_is_sync(bio->bi_opf); > > const int is_flush_fua = op_is_flush(bio->bi_opf); > > struct blk_mq_alloc_data data = { .flags = 0}; > > struct request *rq; > > @@ -1977,7 +1976,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) > > /* bypass scheduler for flush rq */ > > blk_insert_flush(rq); > > blk_mq_run_hw_queue(data.hctx, true); > > - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { > > + } else if (plug && q->mq_ops->commit_rqs) { > > /* > > * Use plugging if we have a ->commit_rqs() hook as well, as > > * we know the driver uses bd->last in a smart fashion. > > @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) > > blk_mq_try_issue_directly(data.hctx, same_queue_rq, > > &cookie); > > } > > - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && > > - !data.hctx->dispatch_busy)) { > > - blk_mq_try_issue_directly(data.hctx, rq, &cookie); > > It may be worth mentioning that blk_mq_sched_insert_request() will do a direct > insert of the request using __blk_mq_insert_request(). But that insert is > slightly different from what blk_mq_try_issue_directly() does with > __blk_mq_issue_directly() as the request in that case is passed along to the > device using queue->mq_ops->queue_rq() while __blk_mq_insert_request() will put > the request in ctx->rq_lists[type]. > > This removes the optimized case !q->elevator && !data.hctx->dispatch_busy, but I > am not sure of the actual performance impact yet. We may want to patch > blk_mq_sched_insert_request() to handle that case. The optimization did improve IOPS of single queue SCSI SSD a lot, see commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8 Author: Ming Lei <ming.lei@redhat.com> Date: Tue Jul 10 09:03:31 2018 +0800 blk-mq: issue directly if hw queue isn't busy in case of 'none' In case of 'none' io scheduler, when hw queue isn't busy, it isn't necessary to enqueue request to sw queue and dequeue it from sw queue because request may be submitted to hw queue asap without extra cost, meantime there shouldn't be much request in sw queue, and we don't need to worry about effect on IO merge. There are still some single hw queue SCSI HBAs(HPSA, megaraid_sas, ...) which may connect high performance devices, so 'none' is often required for obtaining good performance. This patch improves IOPS and decreases CPU unilization on megaraid_sas, per Kashyap's test. Thanks, Ming
> > > - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops- > >commit_rqs)) { > > > + } else if (plug && q->mq_ops->commit_rqs) { > > > /* > > > * Use plugging if we have a ->commit_rqs() hook as well, as > > > * we know the driver uses bd->last in a smart fashion. > > > @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct > request_queue *q, struct bio *bio) > > > blk_mq_try_issue_directly(data.hctx, same_queue_rq, > > > &cookie); > > > } > > > - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && > > > - !data.hctx->dispatch_busy)) { > > > - blk_mq_try_issue_directly(data.hctx, rq, &cookie); Hannes - Earlier check prior to "commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8" was only (q->nr_hw_queues > 1 && is_sync). I am not sure if check of nr_hw_queues are required or not at this place, but other part of check (!q->elevator && !data.hctx->dispatch_busy) to qualify for direct dispatch is required for higher performance. Recent MegaRaid and MPT HBA Aero series controller is capable of doing ~3.0 M IOPs and for such high performance using single hardware queue, commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8 is very important. Kashyap > > > > It may be worth mentioning that blk_mq_sched_insert_request() will do > > a direct insert of the request using __blk_mq_insert_request(). But > > that insert is slightly different from what > > blk_mq_try_issue_directly() does with > > __blk_mq_issue_directly() as the request in that case is passed along > > to the device using queue->mq_ops->queue_rq() while > > __blk_mq_insert_request() will put the request in ctx->rq_lists[type]. > > > > This removes the optimized case !q->elevator && > > !data.hctx->dispatch_busy, but I am not sure of the actual performance > > impact yet. We may want to patch > > blk_mq_sched_insert_request() to handle that case. > > The optimization did improve IOPS of single queue SCSI SSD a lot, see > > commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8 > Author: Ming Lei <ming.lei@redhat.com> > Date: Tue Jul 10 09:03:31 2018 +0800 > > blk-mq: issue directly if hw queue isn't busy in case of 'none' > > In case of 'none' io scheduler, when hw queue isn't busy, it isn't > necessary to enqueue request to sw queue and dequeue it from > sw queue because request may be submitted to hw queue asap without > extra cost, meantime there shouldn't be much request in sw queue, > and we don't need to worry about effect on IO merge. > > There are still some single hw queue SCSI HBAs(HPSA, megaraid_sas, ...) > which may connect high performance devices, so 'none' is often required > for obtaining good performance. > > This patch improves IOPS and decreases CPU unilization on megaraid_sas, > per Kashyap's test. > > > Thanks, > Ming
On 2019/09/19 17:48, Kashyap Desai wrote: >>>> - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops- >>> commit_rqs)) { >>>> + } else if (plug && q->mq_ops->commit_rqs) { >>>> /* >>>> * Use plugging if we have a ->commit_rqs() hook as well, > as >>>> * we know the driver uses bd->last in a smart fashion. >>>> @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct >> request_queue *q, struct bio *bio) >>>> blk_mq_try_issue_directly(data.hctx, > same_queue_rq, >>>> &cookie); >>>> } >>>> - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && >>>> - !data.hctx->dispatch_busy)) { >>>> - blk_mq_try_issue_directly(data.hctx, rq, &cookie); > Hannes - > > Earlier check prior to "commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8" > was only (q->nr_hw_queues > 1 && is_sync). > I am not sure if check of nr_hw_queues are required or not at this place, > but other part of check (!q->elevator && !data.hctx->dispatch_busy) to > qualify for direct dispatch is required for higher performance. > > Recent MegaRaid and MPT HBA Aero series controller is capable of doing > ~3.0 M IOPs and for such high performance using single hardware queue, > commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8 is very important. Kashyap, Ming, Thanks for the information. We will restore this case. > > Kashyap > > >>> >>> It may be worth mentioning that blk_mq_sched_insert_request() will do >>> a direct insert of the request using __blk_mq_insert_request(). But >>> that insert is slightly different from what >>> blk_mq_try_issue_directly() does with >>> __blk_mq_issue_directly() as the request in that case is passed along >>> to the device using queue->mq_ops->queue_rq() while >>> __blk_mq_insert_request() will put the request in ctx->rq_lists[type]. >>> >>> This removes the optimized case !q->elevator && >>> !data.hctx->dispatch_busy, but I am not sure of the actual performance >>> impact yet. We may want to patch >>> blk_mq_sched_insert_request() to handle that case. >> >> The optimization did improve IOPS of single queue SCSI SSD a lot, see >> >> commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8 >> Author: Ming Lei <ming.lei@redhat.com> >> Date: Tue Jul 10 09:03:31 2018 +0800 >> >> blk-mq: issue directly if hw queue isn't busy in case of 'none' >> >> In case of 'none' io scheduler, when hw queue isn't busy, it isn't >> necessary to enqueue request to sw queue and dequeue it from >> sw queue because request may be submitted to hw queue asap without >> extra cost, meantime there shouldn't be much request in sw queue, >> and we don't need to worry about effect on IO merge. >> >> There are still some single hw queue SCSI HBAs(HPSA, megaraid_sas, > ...) >> which may connect high performance devices, so 'none' is often > required >> for obtaining good performance. >> >> This patch improves IOPS and decreases CPU unilization on > megaraid_sas, >> per Kashyap's test. >> >> >> Thanks, >> Ming >
diff --git a/block/blk-mq.c b/block/blk-mq.c index 44ff3c1442a4..faab542e4836 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1931,7 +1931,6 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { - const int is_sync = op_is_sync(bio->bi_opf); const int is_flush_fua = op_is_flush(bio->bi_opf); struct blk_mq_alloc_data data = { .flags = 0}; struct request *rq; @@ -1977,7 +1976,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) /* bypass scheduler for flush rq */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { + } else if (plug && q->mq_ops->commit_rqs) { /* * Use plugging if we have a ->commit_rqs() hook as well, as * we know the driver uses bd->last in a smart fashion. @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); } - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && - !data.hctx->dispatch_busy)) { - blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_sched_insert_request(rq, false, true, true); }