blk-mq: plug based timestamp caching

Message ID	20230717081602.255767-1-chengming.zhou@linux.dev (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@vger.kernel.org> From: chengming.zhou@linux.dev To: axboe@kernel.dk, tj@kernel.org Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org, zhouchengming@bytedance.com Subject: [PATCH] blk-mq: plug based timestamp caching Date: Mon, 17 Jul 2023 16:16:02 +0800 Message-ID: <20230717081602.255767-1-chengming.zhou@linux.dev> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	blk-mq: plug based timestamp caching \| expand blk-mq: plug based timestamp caching

Message ID

20230717081602.255767-1-chengming.zhou@linux.dev (mailing list archive)

State

New, archived

Headers

From: chengming.zhou@linux.dev
To: axboe@kernel.dk, tj@kernel.org
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
        zhouchengming@bytedance.com
Subject: [PATCH] blk-mq: plug based timestamp caching
Date: Mon, 17 Jul 2023 16:16:02 +0800
Message-ID: <20230717081602.255767-1-chengming.zhou@linux.dev>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

blk-mq: plug based timestamp caching | expand

Commit Message

Chengming Zhou July 17, 2023, 8:16 a.m. UTC

From: Chengming Zhou <zhouchengming@bytedance.com>

This idea is from Tejun [1] that don't like manually optimized timestamp
reads, so come up the plug based timestamp caching infrastructure, which
is more generic and has better performance. It works since we don't care
about nanosec accuracy.

Have the plug init start with the timestamp invalid, and use blk_get_time()
helper that return the time for no plug, and set it in the plug if not set.
Flushing the plug would mark it invalid again at the end.

We replaces all "alloc_time_ns", "start_time_ns" and "io_start_time_ns"
settings to use the blk_get_time() helper.

The only direct caller of ktime_get_ns() left in blk-mq is in request end,
which don't use cached timestamp for better accuracy of completion time.

[1] https://lore.kernel.org/lkml/ZLA7QAfSojxu_FMW@slm.duckdns.org/

Suggested-by: Tejun Heo <tj@kernel.org>
Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 block/blk-core.c       |  3 +++
 block/blk-mq.c         | 22 +++++++++++++++++-----
 include/linux/blkdev.h |  2 ++
 3 files changed, 22 insertions(+), 5 deletions(-)

Comments

Chengming Zhou July 27, 2023, 3:35 p.m. UTC | #1

Hello, Jens and Tejun, does this patch look fine to you?
Looking forward to your comments.

Thanks.

On 2023/7/17 16:16, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
> 
> This idea is from Tejun [1] that don't like manually optimized timestamp
> reads, so come up the plug based timestamp caching infrastructure, which
> is more generic and has better performance. It works since we don't care
> about nanosec accuracy.
> 
> Have the plug init start with the timestamp invalid, and use blk_get_time()
> helper that return the time for no plug, and set it in the plug if not set.
> Flushing the plug would mark it invalid again at the end.
> 
> We replaces all "alloc_time_ns", "start_time_ns" and "io_start_time_ns"
> settings to use the blk_get_time() helper.
> 
> The only direct caller of ktime_get_ns() left in blk-mq is in request end,
> which don't use cached timestamp for better accuracy of completion time.
> 
> [1] https://lore.kernel.org/lkml/ZLA7QAfSojxu_FMW@slm.duckdns.org/
> 
> Suggested-by: Tejun Heo <tj@kernel.org>
> Suggested-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  block/blk-core.c       |  3 +++
>  block/blk-mq.c         | 22 +++++++++++++++++-----
>  include/linux/blkdev.h |  2 ++
>  3 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 90de50082146..a63d33af7287 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1054,6 +1054,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
>  		return;
>  
>  	plug->mq_list = NULL;
> +	plug->cached_time_ns = 0;
>  	plug->cached_rq = NULL;
>  	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
>  	plug->rq_count = 0;
> @@ -1153,6 +1154,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
>  	 */
>  	if (unlikely(!rq_list_empty(plug->cached_rq)))
>  		blk_mq_free_plug_rqs(plug);
> +
> +	plug->cached_time_ns = 0;
>  }
>  
>  /**
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index b04ff6f56926..54648bfaab9c 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -311,6 +311,18 @@ void blk_mq_wake_waiters(struct request_queue *q)
>  			blk_mq_tag_wakeup_all(hctx->tags, true);
>  }
>  
> +static inline u64 blk_get_time(void)
> +{
> +	struct blk_plug *plug = current->plug;
> +
> +	if (!plug)
> +		return ktime_get_ns();
> +
> +	if (!plug->cached_time_ns)
> +		plug->cached_time_ns = ktime_get_ns();
> +	return plug->cached_time_ns;
> +}
> +
>  void blk_rq_init(struct request_queue *q, struct request *rq)
>  {
>  	memset(rq, 0, sizeof(*rq));
> @@ -322,7 +334,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
>  	RB_CLEAR_NODE(&rq->rb_node);
>  	rq->tag = BLK_MQ_NO_TAG;
>  	rq->internal_tag = BLK_MQ_NO_TAG;
> -	rq->start_time_ns = ktime_get_ns();
> +	rq->start_time_ns = blk_get_time();
>  	rq->part = NULL;
>  	blk_crypto_rq_set_defaults(rq);
>  }
> @@ -332,7 +344,7 @@ EXPORT_SYMBOL(blk_rq_init);
>  static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
>  {
>  	if (blk_mq_need_time_stamp(rq))
> -		rq->start_time_ns = ktime_get_ns();
> +		rq->start_time_ns = blk_get_time();
>  	else
>  		rq->start_time_ns = 0;
>  
> @@ -441,7 +453,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>  
>  	/* alloc_time includes depth and tag waits */
>  	if (blk_queue_rq_alloc_time(q))
> -		alloc_time_ns = ktime_get_ns();
> +		alloc_time_ns = blk_get_time();
>  
>  	if (data->cmd_flags & REQ_NOWAIT)
>  		data->flags |= BLK_MQ_REQ_NOWAIT;
> @@ -624,7 +636,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
>  
>  	/* alloc_time includes depth and tag waits */
>  	if (blk_queue_rq_alloc_time(q))
> -		alloc_time_ns = ktime_get_ns();
> +		alloc_time_ns = blk_get_time();
>  
>  	/*
>  	 * If the tag allocator sleeps we could get an allocation for a
> @@ -1235,7 +1247,7 @@ void blk_mq_start_request(struct request *rq)
>  	trace_block_rq_issue(rq);
>  
>  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
> -		rq->io_start_time_ns = ktime_get_ns();
> +		rq->io_start_time_ns = blk_get_time();
>  		rq->stats_sectors = blk_rq_sectors(rq);
>  		rq->rq_flags |= RQF_STATS;
>  		rq_qos_issue(q, rq);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ed44a997f629..21a3d4d7ab2b 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -961,6 +961,8 @@ void blk_mark_disk_dead(struct gendisk *disk);
>  struct blk_plug {
>  	struct request *mq_list; /* blk-mq requests */
>  
> +	u64 cached_time_ns;
> +
>  	/* if ios_left is > 1, we can batch tag/rq allocations */
>  	struct request *cached_rq;
>  	unsigned short nr_ios;

diff --git a/block/blk-core.c b/block/blk-core.c
index 90de50082146..a63d33af7287 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1054,6 +1054,7 @@  void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
 		return;
 
 	plug->mq_list = NULL;
+	plug->cached_time_ns = 0;
 	plug->cached_rq = NULL;
 	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
 	plug->rq_count = 0;
@@ -1153,6 +1154,8 @@  void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 	 */
 	if (unlikely(!rq_list_empty(plug->cached_rq)))
 		blk_mq_free_plug_rqs(plug);
+
+	plug->cached_time_ns = 0;
 }
 
 /**
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b04ff6f56926..54648bfaab9c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -311,6 +311,18 @@  void blk_mq_wake_waiters(struct request_queue *q)
 			blk_mq_tag_wakeup_all(hctx->tags, true);
 }
 
+static inline u64 blk_get_time(void)
+{
+	struct blk_plug *plug = current->plug;
+
+	if (!plug)
+		return ktime_get_ns();
+
+	if (!plug->cached_time_ns)
+		plug->cached_time_ns = ktime_get_ns();
+	return plug->cached_time_ns;
+}
+
 void blk_rq_init(struct request_queue *q, struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
@@ -322,7 +334,7 @@  void blk_rq_init(struct request_queue *q, struct request *rq)
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->tag = BLK_MQ_NO_TAG;
 	rq->internal_tag = BLK_MQ_NO_TAG;
-	rq->start_time_ns = ktime_get_ns();
+	rq->start_time_ns = blk_get_time();
 	rq->part = NULL;
 	blk_crypto_rq_set_defaults(rq);
 }
@@ -332,7 +344,7 @@  EXPORT_SYMBOL(blk_rq_init);
 static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
 {
 	if (blk_mq_need_time_stamp(rq))
-		rq->start_time_ns = ktime_get_ns();
+		rq->start_time_ns = blk_get_time();
 	else
 		rq->start_time_ns = 0;
 
@@ -441,7 +453,7 @@  static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 
 	/* alloc_time includes depth and tag waits */
 	if (blk_queue_rq_alloc_time(q))
-		alloc_time_ns = ktime_get_ns();
+		alloc_time_ns = blk_get_time();
 
 	if (data->cmd_flags & REQ_NOWAIT)
 		data->flags |= BLK_MQ_REQ_NOWAIT;
@@ -624,7 +636,7 @@  struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 
 	/* alloc_time includes depth and tag waits */
 	if (blk_queue_rq_alloc_time(q))
-		alloc_time_ns = ktime_get_ns();
+		alloc_time_ns = blk_get_time();
 
 	/*
 	 * If the tag allocator sleeps we could get an allocation for a
@@ -1235,7 +1247,7 @@  void blk_mq_start_request(struct request *rq)
 	trace_block_rq_issue(rq);
 
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
-		rq->io_start_time_ns = ktime_get_ns();
+		rq->io_start_time_ns = blk_get_time();
 		rq->stats_sectors = blk_rq_sectors(rq);
 		rq->rq_flags |= RQF_STATS;
 		rq_qos_issue(q, rq);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed44a997f629..21a3d4d7ab2b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -961,6 +961,8 @@  void blk_mark_disk_dead(struct gendisk *disk);
 struct blk_plug {
 	struct request *mq_list; /* blk-mq requests */
 
+	u64 cached_time_ns;
+
 	/* if ios_left is > 1, we can batch tag/rq allocations */
 	struct request *cached_rq;
 	unsigned short nr_ios;

blk-mq: plug based timestamp caching

Commit Message

Comments

Patch