Message ID | 20230717081602.255767-1-chengming.zhou@linux.dev (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | blk-mq: plug based timestamp caching | expand |
Hello, Jens and Tejun, does this patch look fine to you? Looking forward to your comments. Thanks. On 2023/7/17 16:16, chengming.zhou@linux.dev wrote: > From: Chengming Zhou <zhouchengming@bytedance.com> > > This idea is from Tejun [1] that don't like manually optimized timestamp > reads, so come up the plug based timestamp caching infrastructure, which > is more generic and has better performance. It works since we don't care > about nanosec accuracy. > > Have the plug init start with the timestamp invalid, and use blk_get_time() > helper that return the time for no plug, and set it in the plug if not set. > Flushing the plug would mark it invalid again at the end. > > We replaces all "alloc_time_ns", "start_time_ns" and "io_start_time_ns" > settings to use the blk_get_time() helper. > > The only direct caller of ktime_get_ns() left in blk-mq is in request end, > which don't use cached timestamp for better accuracy of completion time. > > [1] https://lore.kernel.org/lkml/ZLA7QAfSojxu_FMW@slm.duckdns.org/ > > Suggested-by: Tejun Heo <tj@kernel.org> > Suggested-by: Jens Axboe <axboe@kernel.dk> > Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> > --- > block/blk-core.c | 3 +++ > block/blk-mq.c | 22 +++++++++++++++++----- > include/linux/blkdev.h | 2 ++ > 3 files changed, 22 insertions(+), 5 deletions(-) > > diff --git a/block/blk-core.c b/block/blk-core.c > index 90de50082146..a63d33af7287 100644 > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -1054,6 +1054,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) > return; > > plug->mq_list = NULL; > + plug->cached_time_ns = 0; > plug->cached_rq = NULL; > plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); > plug->rq_count = 0; > @@ -1153,6 +1154,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) > */ > if (unlikely(!rq_list_empty(plug->cached_rq))) > blk_mq_free_plug_rqs(plug); > + > + plug->cached_time_ns = 0; > } > > /** > diff --git a/block/blk-mq.c b/block/blk-mq.c > index b04ff6f56926..54648bfaab9c 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -311,6 +311,18 @@ void blk_mq_wake_waiters(struct request_queue *q) > blk_mq_tag_wakeup_all(hctx->tags, true); > } > > +static inline u64 blk_get_time(void) > +{ > + struct blk_plug *plug = current->plug; > + > + if (!plug) > + return ktime_get_ns(); > + > + if (!plug->cached_time_ns) > + plug->cached_time_ns = ktime_get_ns(); > + return plug->cached_time_ns; > +} > + > void blk_rq_init(struct request_queue *q, struct request *rq) > { > memset(rq, 0, sizeof(*rq)); > @@ -322,7 +334,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) > RB_CLEAR_NODE(&rq->rb_node); > rq->tag = BLK_MQ_NO_TAG; > rq->internal_tag = BLK_MQ_NO_TAG; > - rq->start_time_ns = ktime_get_ns(); > + rq->start_time_ns = blk_get_time(); > rq->part = NULL; > blk_crypto_rq_set_defaults(rq); > } > @@ -332,7 +344,7 @@ EXPORT_SYMBOL(blk_rq_init); > static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns) > { > if (blk_mq_need_time_stamp(rq)) > - rq->start_time_ns = ktime_get_ns(); > + rq->start_time_ns = blk_get_time(); > else > rq->start_time_ns = 0; > > @@ -441,7 +453,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) > > /* alloc_time includes depth and tag waits */ > if (blk_queue_rq_alloc_time(q)) > - alloc_time_ns = ktime_get_ns(); > + alloc_time_ns = blk_get_time(); > > if (data->cmd_flags & REQ_NOWAIT) > data->flags |= BLK_MQ_REQ_NOWAIT; > @@ -624,7 +636,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, > > /* alloc_time includes depth and tag waits */ > if (blk_queue_rq_alloc_time(q)) > - alloc_time_ns = ktime_get_ns(); > + alloc_time_ns = blk_get_time(); > > /* > * If the tag allocator sleeps we could get an allocation for a > @@ -1235,7 +1247,7 @@ void blk_mq_start_request(struct request *rq) > trace_block_rq_issue(rq); > > if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { > - rq->io_start_time_ns = ktime_get_ns(); > + rq->io_start_time_ns = blk_get_time(); > rq->stats_sectors = blk_rq_sectors(rq); > rq->rq_flags |= RQF_STATS; > rq_qos_issue(q, rq); > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index ed44a997f629..21a3d4d7ab2b 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -961,6 +961,8 @@ void blk_mark_disk_dead(struct gendisk *disk); > struct blk_plug { > struct request *mq_list; /* blk-mq requests */ > > + u64 cached_time_ns; > + > /* if ios_left is > 1, we can batch tag/rq allocations */ > struct request *cached_rq; > unsigned short nr_ios;
diff --git a/block/blk-core.c b/block/blk-core.c index 90de50082146..a63d33af7287 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1054,6 +1054,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) return; plug->mq_list = NULL; + plug->cached_time_ns = 0; plug->cached_rq = NULL; plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); plug->rq_count = 0; @@ -1153,6 +1154,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) */ if (unlikely(!rq_list_empty(plug->cached_rq))) blk_mq_free_plug_rqs(plug); + + plug->cached_time_ns = 0; } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index b04ff6f56926..54648bfaab9c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -311,6 +311,18 @@ void blk_mq_wake_waiters(struct request_queue *q) blk_mq_tag_wakeup_all(hctx->tags, true); } +static inline u64 blk_get_time(void) +{ + struct blk_plug *plug = current->plug; + + if (!plug) + return ktime_get_ns(); + + if (!plug->cached_time_ns) + plug->cached_time_ns = ktime_get_ns(); + return plug->cached_time_ns; +} + void blk_rq_init(struct request_queue *q, struct request *rq) { memset(rq, 0, sizeof(*rq)); @@ -322,7 +334,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) RB_CLEAR_NODE(&rq->rb_node); rq->tag = BLK_MQ_NO_TAG; rq->internal_tag = BLK_MQ_NO_TAG; - rq->start_time_ns = ktime_get_ns(); + rq->start_time_ns = blk_get_time(); rq->part = NULL; blk_crypto_rq_set_defaults(rq); } @@ -332,7 +344,7 @@ EXPORT_SYMBOL(blk_rq_init); static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns) { if (blk_mq_need_time_stamp(rq)) - rq->start_time_ns = ktime_get_ns(); + rq->start_time_ns = blk_get_time(); else rq->start_time_ns = 0; @@ -441,7 +453,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) /* alloc_time includes depth and tag waits */ if (blk_queue_rq_alloc_time(q)) - alloc_time_ns = ktime_get_ns(); + alloc_time_ns = blk_get_time(); if (data->cmd_flags & REQ_NOWAIT) data->flags |= BLK_MQ_REQ_NOWAIT; @@ -624,7 +636,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, /* alloc_time includes depth and tag waits */ if (blk_queue_rq_alloc_time(q)) - alloc_time_ns = ktime_get_ns(); + alloc_time_ns = blk_get_time(); /* * If the tag allocator sleeps we could get an allocation for a @@ -1235,7 +1247,7 @@ void blk_mq_start_request(struct request *rq) trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { - rq->io_start_time_ns = ktime_get_ns(); + rq->io_start_time_ns = blk_get_time(); rq->stats_sectors = blk_rq_sectors(rq); rq->rq_flags |= RQF_STATS; rq_qos_issue(q, rq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ed44a997f629..21a3d4d7ab2b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -961,6 +961,8 @@ void blk_mark_disk_dead(struct gendisk *disk); struct blk_plug { struct request *mq_list; /* blk-mq requests */ + u64 cached_time_ns; + /* if ios_left is > 1, we can batch tag/rq allocations */ struct request *cached_rq; unsigned short nr_ios;