Message ID | 20211017013748.76461-7-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Various block layer optimizations | expand |
On Sat, Oct 16, 2021 at 07:37:40PM -0600, Jens Axboe wrote: > > static inline void blk_mq_sched_requeue_request(struct request *rq) > { > + if (rq->rq_flags & RQF_ELV) { > + struct request_queue *q = rq->q; > + struct elevator_queue *e = q->elevator; > > + if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request) > + e->type->ops.requeue_request(rq); > + } I think we can just kill of RQF_ELVPRIV now. It is equivalent to RQF_ELV for !op_is_flush requests.
On 10/18/21 2:58 AM, Christoph Hellwig wrote: > On Sat, Oct 16, 2021 at 07:37:40PM -0600, Jens Axboe wrote: >> >> static inline void blk_mq_sched_requeue_request(struct request *rq) >> { >> + if (rq->rq_flags & RQF_ELV) { >> + struct request_queue *q = rq->q; >> + struct elevator_queue *e = q->elevator; >> >> + if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request) >> + e->type->ops.requeue_request(rq); >> + } > > I think we can just kill of RQF_ELVPRIV now. It is equivalent to > RQF_ELV for !op_is_flush requests. I'd prefer not to for this round, as ELV could be set without PRIV depending on the scheduler.
Hi Jens, On 17/10/2021 02:37, Jens Axboe wrote: > Add an rq private RQF_ELV flag, which tells the block layer that this > request was initialized on a queue that has an IO scheduler attached. > This allows for faster checking in the fast path, rather than having to > deference rq->q later on. > > Elevator switching does full quiesce of the queue before detaching an > IO scheduler, so it's safe to cache this in the request itself. A kernelci.org automated bisection found that this patch introduced a regression in next-20211019 with a NULL pointer dereference, which only seems to be affecting QEMU but across all architectures. More details about the regression can be found here: https://linux.kernelci.org/test/job/next/branch/master/kernel/next-20211019/plan/baseline/ https://linux.kernelci.org/test/case/id/616ea20eb7104071c43358ea/ See also all the test jobs involved in the automated bisection: https://lava.collabora.co.uk/scheduler/device_type/qemu?dt_search=bisection-287 If you do send a fix, please include this trailer: Reported-by: "kernelci.org bot" <bot@kernelci.org> Please let us know if this seems like a valid bisection result and if you need any help to debug the issue or try a fix. Best wishes, Guillaume GitHub: https://github.com/kernelci/kernelci-project/issues/68 > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > block/blk-mq-sched.h | 27 ++++++++++++++++----------- > block/blk-mq.c | 20 +++++++++++--------- > include/linux/blk-mq.h | 2 ++ > 3 files changed, 29 insertions(+), 20 deletions(-) > > diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h > index fe252278ed9a..98836106b25f 100644 > --- a/block/blk-mq-sched.h > +++ b/block/blk-mq-sched.h > @@ -56,29 +56,34 @@ static inline bool > blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, > struct bio *bio) > { > - struct elevator_queue *e = q->elevator; > - > - if (e && e->type->ops.allow_merge) > - return e->type->ops.allow_merge(q, rq, bio); > + if (rq->rq_flags & RQF_ELV) { > + struct elevator_queue *e = q->elevator; > > + if (e->type->ops.allow_merge) > + return e->type->ops.allow_merge(q, rq, bio); > + } > return true; > } > > static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) > { > - struct elevator_queue *e = rq->q->elevator; > + if (rq->rq_flags & RQF_ELV) { > + struct elevator_queue *e = rq->q->elevator; > > - if (e && e->type->ops.completed_request) > - e->type->ops.completed_request(rq, now); > + if (e->type->ops.completed_request) > + e->type->ops.completed_request(rq, now); > + } > } > > static inline void blk_mq_sched_requeue_request(struct request *rq) > { > - struct request_queue *q = rq->q; > - struct elevator_queue *e = q->elevator; > + if (rq->rq_flags & RQF_ELV) { > + struct request_queue *q = rq->q; > + struct elevator_queue *e = q->elevator; > > - if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request) > - e->type->ops.requeue_request(rq); > + if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request) > + e->type->ops.requeue_request(rq); > + } > } > > static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) > diff --git a/block/blk-mq.c b/block/blk-mq.c > index fa5b12200404..5d22c228f6df 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -299,7 +299,7 @@ void blk_mq_wake_waiters(struct request_queue *q) > */ > static inline bool blk_mq_need_time_stamp(struct request *rq) > { > - return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator; > + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); > } > > static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, > @@ -309,9 +309,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, > struct request *rq = tags->static_rqs[tag]; > > if (data->q->elevator) { > + rq->rq_flags = RQF_ELV; > rq->tag = BLK_MQ_NO_TAG; > rq->internal_tag = tag; > } else { > + rq->rq_flags = 0; > rq->tag = tag; > rq->internal_tag = BLK_MQ_NO_TAG; > } > @@ -320,7 +322,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, > rq->q = data->q; > rq->mq_ctx = data->ctx; > rq->mq_hctx = data->hctx; > - rq->rq_flags = 0; > rq->cmd_flags = data->cmd_flags; > if (data->flags & BLK_MQ_REQ_PM) > rq->rq_flags |= RQF_PM; > @@ -356,11 +357,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, > data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++; > refcount_set(&rq->ref, 1); > > - if (!op_is_flush(data->cmd_flags)) { > + if (!op_is_flush(data->cmd_flags) && (rq->rq_flags & RQF_ELV)) { > struct elevator_queue *e = data->q->elevator; > > rq->elv.icq = NULL; > - if (e && e->type->ops.prepare_request) { > + if (e->type->ops.prepare_request) { > if (e->type->icq_cache) > blk_mq_sched_assign_ioc(rq); > > @@ -575,12 +576,13 @@ static void __blk_mq_free_request(struct request *rq) > void blk_mq_free_request(struct request *rq) > { > struct request_queue *q = rq->q; > - struct elevator_queue *e = q->elevator; > struct blk_mq_ctx *ctx = rq->mq_ctx; > struct blk_mq_hw_ctx *hctx = rq->mq_hctx; > > - if (rq->rq_flags & RQF_ELVPRIV) { > - if (e && e->type->ops.finish_request) > + if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) { > + struct elevator_queue *e = q->elevator; > + > + if (e->type->ops.finish_request) > e->type->ops.finish_request(rq); > if (rq->elv.icq) { > put_io_context(rq->elv.icq->ioc); > @@ -2239,7 +2241,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, > goto insert; > } > > - if (q->elevator && !bypass_insert) > + if ((rq->rq_flags & RQF_ELV) && !bypass_insert) > goto insert; > > budget_token = blk_mq_get_dispatch_budget(q); > @@ -2475,7 +2477,7 @@ void blk_mq_submit_bio(struct bio *bio) > } > > blk_add_rq_to_plug(plug, rq); > - } else if (q->elevator) { > + } else if (rq->rq_flags & RQF_ELV) { > /* Insert the request at the IO scheduler queue */ > blk_mq_sched_insert_request(rq, false, true, true); > } else if (plug && !blk_queue_nomerges(q)) { > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index a9c1d0882550..3a399aa372b5 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -55,6 +55,8 @@ typedef __u32 __bitwise req_flags_t; > #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) > /* ->timeout has been called, don't expire again */ > #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) > +/* queue has elevator attached */ > +#define RQF_ELV ((__force req_flags_t)(1 << 22)) > > /* flags that prevent us from merging requests: */ > #define RQF_NOMERGE_FLAGS \ >
On 10/19/21 4:21 PM, Guillaume Tucker wrote: > Hi Jens, > > On 17/10/2021 02:37, Jens Axboe wrote: >> Add an rq private RQF_ELV flag, which tells the block layer that this >> request was initialized on a queue that has an IO scheduler attached. >> This allows for faster checking in the fast path, rather than having to >> deference rq->q later on. >> >> Elevator switching does full quiesce of the queue before detaching an >> IO scheduler, so it's safe to cache this in the request itself. > > A kernelci.org automated bisection found that this patch > introduced a regression in next-20211019 with a NULL pointer > dereference, which only seems to be affecting QEMU but across all > architectures. > > More details about the regression can be found here: > > https://linux.kernelci.org/test/job/next/branch/master/kernel/next-20211019/plan/baseline/ > https://linux.kernelci.org/test/case/id/616ea20eb7104071c43358ea/ > > See also all the test jobs involved in the automated bisection: > > https://lava.collabora.co.uk/scheduler/device_type/qemu?dt_search=bisection-287 > > If you do send a fix, please include this trailer: > > Reported-by: "kernelci.org bot" <bot@kernelci.org> > > > Please let us know if this seems like a valid bisection result > and if you need any help to debug the issue or try a fix. This got fixed yesterday, current tree is fine.
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index fe252278ed9a..98836106b25f 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -56,29 +56,34 @@ static inline bool blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - struct elevator_queue *e = q->elevator; - - if (e && e->type->ops.allow_merge) - return e->type->ops.allow_merge(q, rq, bio); + if (rq->rq_flags & RQF_ELV) { + struct elevator_queue *e = q->elevator; + if (e->type->ops.allow_merge) + return e->type->ops.allow_merge(q, rq, bio); + } return true; } static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) { - struct elevator_queue *e = rq->q->elevator; + if (rq->rq_flags & RQF_ELV) { + struct elevator_queue *e = rq->q->elevator; - if (e && e->type->ops.completed_request) - e->type->ops.completed_request(rq, now); + if (e->type->ops.completed_request) + e->type->ops.completed_request(rq, now); + } } static inline void blk_mq_sched_requeue_request(struct request *rq) { - struct request_queue *q = rq->q; - struct elevator_queue *e = q->elevator; + if (rq->rq_flags & RQF_ELV) { + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; - if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request) - e->type->ops.requeue_request(rq); + if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request) + e->type->ops.requeue_request(rq); + } } static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) diff --git a/block/blk-mq.c b/block/blk-mq.c index fa5b12200404..5d22c228f6df 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -299,7 +299,7 @@ void blk_mq_wake_waiters(struct request_queue *q) */ static inline bool blk_mq_need_time_stamp(struct request *rq) { - return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator; + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); } static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, @@ -309,9 +309,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, struct request *rq = tags->static_rqs[tag]; if (data->q->elevator) { + rq->rq_flags = RQF_ELV; rq->tag = BLK_MQ_NO_TAG; rq->internal_tag = tag; } else { + rq->rq_flags = 0; rq->tag = tag; rq->internal_tag = BLK_MQ_NO_TAG; } @@ -320,7 +322,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->q = data->q; rq->mq_ctx = data->ctx; rq->mq_hctx = data->hctx; - rq->rq_flags = 0; rq->cmd_flags = data->cmd_flags; if (data->flags & BLK_MQ_REQ_PM) rq->rq_flags |= RQF_PM; @@ -356,11 +357,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++; refcount_set(&rq->ref, 1); - if (!op_is_flush(data->cmd_flags)) { + if (!op_is_flush(data->cmd_flags) && (rq->rq_flags & RQF_ELV)) { struct elevator_queue *e = data->q->elevator; rq->elv.icq = NULL; - if (e && e->type->ops.prepare_request) { + if (e->type->ops.prepare_request) { if (e->type->icq_cache) blk_mq_sched_assign_ioc(rq); @@ -575,12 +576,13 @@ static void __blk_mq_free_request(struct request *rq) void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; - if (rq->rq_flags & RQF_ELVPRIV) { - if (e && e->type->ops.finish_request) + if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) { + struct elevator_queue *e = q->elevator; + + if (e->type->ops.finish_request) e->type->ops.finish_request(rq); if (rq->elv.icq) { put_io_context(rq->elv.icq->ioc); @@ -2239,7 +2241,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, goto insert; } - if (q->elevator && !bypass_insert) + if ((rq->rq_flags & RQF_ELV) && !bypass_insert) goto insert; budget_token = blk_mq_get_dispatch_budget(q); @@ -2475,7 +2477,7 @@ void blk_mq_submit_bio(struct bio *bio) } blk_add_rq_to_plug(plug, rq); - } else if (q->elevator) { + } else if (rq->rq_flags & RQF_ELV) { /* Insert the request at the IO scheduler queue */ blk_mq_sched_insert_request(rq, false, true, true); } else if (plug && !blk_queue_nomerges(q)) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a9c1d0882550..3a399aa372b5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -55,6 +55,8 @@ typedef __u32 __bitwise req_flags_t; #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) /* ->timeout has been called, don't expire again */ #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) +/* queue has elevator attached */ +#define RQF_ELV ((__force req_flags_t)(1 << 22)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \
Add an rq private RQF_ELV flag, which tells the block layer that this request was initialized on a queue that has an IO scheduler attached. This allows for faster checking in the fast path, rather than having to deference rq->q later on. Elevator switching does full quiesce of the queue before detaching an IO scheduler, so it's safe to cache this in the request itself. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-mq-sched.h | 27 ++++++++++++++++----------- block/blk-mq.c | 20 +++++++++++--------- include/linux/blk-mq.h | 2 ++ 3 files changed, 29 insertions(+), 20 deletions(-)