diff mbox series

[5/7] blk-throttle: Split the blkthrotl queue

Message ID 20250414132731.167620-6-wozizhi@huawei.com (mailing list archive)
State New
Headers show
Series blk-throttle: Split the blkthrotl queue to solve the IO delay issue | expand

Commit Message

Zizhi Wo April 14, 2025, 1:27 p.m. UTC
This patch splits the single queue into separate bps and iops queues. Now,
an IO request must first pass through the bps queue, then the iops queue,
and finally be dispatched. Due to the queue splitting, we need to modify
the throtl add/peek/pop function.

Additionally, the patch modifies the logic related to tg_dispatch_time().
If bio needs to wait for bps, function directly returns the bps wait time;
otherwise, it charges bps and returns the iops wait time so that bio can be
directly placed into the iops queue afterward. Note that this may lead to
more frequent updates to disptime, but the overhead is negligible for the
slow path.

Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
---
 block/blk-throttle.c | 49 ++++++++++++++++++++++++++++++--------------
 block/blk-throttle.h |  3 ++-
 2 files changed, 36 insertions(+), 16 deletions(-)

Comments

Yu Kuai April 15, 2025, 2:30 a.m. UTC | #1
在 2025/04/14 21:27, Zizhi Wo 写道:
> This patch splits the single queue into separate bps and iops queues. Now,
> an IO request must first pass through the bps queue, then the iops queue,
> and finally be dispatched. Due to the queue splitting, we need to modify
> the throtl add/peek/pop function.
> 
> Additionally, the patch modifies the logic related to tg_dispatch_time().
> If bio needs to wait for bps, function directly returns the bps wait time;
> otherwise, it charges bps and returns the iops wait time so that bio can be
> directly placed into the iops queue afterward. Note that this may lead to
> more frequent updates to disptime, but the overhead is negligible for the
> slow path.
> 
> Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
> ---
>   block/blk-throttle.c | 49 ++++++++++++++++++++++++++++++--------------
>   block/blk-throttle.h |  3 ++-
>   2 files changed, 36 insertions(+), 16 deletions(-)
> 

LGTM
Reviewed-by: Yu Kuai <yukuai3@huawei.com>

> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> index caae2e3b7534..542db54f995c 100644
> --- a/block/blk-throttle.c
> +++ b/block/blk-throttle.c
> @@ -143,7 +143,8 @@ static inline unsigned int throtl_bio_data_size(struct bio *bio)
>   static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
>   {
>   	INIT_LIST_HEAD(&qn->node);
> -	bio_list_init(&qn->bios);
> +	bio_list_init(&qn->bios_bps);
> +	bio_list_init(&qn->bios_iops);
>   	qn->tg = tg;
>   }
>   
> @@ -160,7 +161,11 @@ static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
>   static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
>   				 struct list_head *queued)
>   {
> -	bio_list_add(&qn->bios, bio);
> +	if (bio_flagged(bio, BIO_TG_BPS_THROTTLED))
> +		bio_list_add(&qn->bios_iops, bio);
> +	else
> +		bio_list_add(&qn->bios_bps, bio);
> +
>   	if (list_empty(&qn->node)) {
>   		list_add_tail(&qn->node, queued);
>   		blkg_get(tg_to_blkg(qn->tg));
> @@ -170,6 +175,10 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
>   /**
>    * throtl_peek_queued - peek the first bio on a qnode list
>    * @queued: the qnode list to peek
> + *
> + * Always take a bio from the head of the iops queue first. If the queue
> + * is empty, we then take it from the bps queue to maintain the overall
> + * idea of fetching bios from the head.
>    */
>   static struct bio *throtl_peek_queued(struct list_head *queued)
>   {
> @@ -180,7 +189,9 @@ static struct bio *throtl_peek_queued(struct list_head *queued)
>   		return NULL;
>   
>   	qn = list_first_entry(queued, struct throtl_qnode, node);
> -	bio = bio_list_peek(&qn->bios);
> +	bio = bio_list_peek(&qn->bios_iops);
> +	if (!bio)
> +		bio = bio_list_peek(&qn->bios_bps);
>   	WARN_ON_ONCE(!bio);
>   	return bio;
>   }
> @@ -190,9 +201,10 @@ static struct bio *throtl_peek_queued(struct list_head *queued)
>    * @queued: the qnode list to pop a bio from
>    * @tg_to_put: optional out argument for throtl_grp to put
>    *
> - * Pop the first bio from the qnode list @queued.  After popping, the first
> - * qnode is removed from @queued if empty or moved to the end of @queued so
> - * that the popping order is round-robin.
> + * Pop the first bio from the qnode list @queued.  Note that we firstly focus
> + * on the iops list here because bios are ultimately dispatched from it.
> + * After popping, the first qnode is removed from @queued if empty or moved to
> + * the end of @queued so that the popping order is round-robin.
>    *
>    * When the first qnode is removed, its associated throtl_grp should be put
>    * too.  If @tg_to_put is NULL, this function automatically puts it;
> @@ -209,10 +221,12 @@ static struct bio *throtl_pop_queued(struct list_head *queued,
>   		return NULL;
>   
>   	qn = list_first_entry(queued, struct throtl_qnode, node);
> -	bio = bio_list_pop(&qn->bios);
> +	bio = bio_list_pop(&qn->bios_iops);
> +	if (!bio)
> +		bio = bio_list_pop(&qn->bios_bps);
>   	WARN_ON_ONCE(!bio);
>   
> -	if (bio_list_empty(&qn->bios)) {
> +	if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) {
>   		list_del_init(&qn->node);
>   		if (tg_to_put)
>   			*tg_to_put = qn->tg;
> @@ -805,12 +819,12 @@ static unsigned long tg_dispatch_iops_time(struct throtl_grp *tg, struct bio *bi
>   
>   /*
>    * Returns approx number of jiffies to wait before this bio is with-in IO rate
> - * and can be dispatched.
> + * and can be moved to other queue or dispatched.
>    */
>   static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio)
>   {
>   	bool rw = bio_data_dir(bio);
> -	unsigned long bps_wait, iops_wait;
> +	unsigned long wait;
>   
>   	/*
>    	 * Currently whole state machine of group depends on first bio
> @@ -821,10 +835,17 @@ static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio)
>   	BUG_ON(tg->service_queue.nr_queued[rw] &&
>   	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
>   
> -	bps_wait = tg_dispatch_bps_time(tg, bio);
> -	iops_wait = tg_dispatch_iops_time(tg, bio);
> +	wait = tg_dispatch_bps_time(tg, bio);
> +	if (wait != 0)
> +		return wait;
>   
> -	return max(bps_wait, iops_wait);
> +	/*
> +	 * Charge bps here because @bio will be directly placed into the
> +	 * iops queue afterward.
> +	 */
> +	throtl_charge_bps_bio(tg, bio);
> +
> +	return tg_dispatch_iops_time(tg, bio);
>   }
>   
>   /**
> @@ -913,7 +934,6 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
>   	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
>   	sq->nr_queued[rw]--;
>   
> -	throtl_charge_bps_bio(tg, bio);
>   	throtl_charge_iops_bio(tg, bio);
>   
>   	/*
> @@ -1641,7 +1661,6 @@ bool __blk_throtl_bio(struct bio *bio)
>   	while (true) {
>   		if (tg_within_limit(tg, bio, rw)) {
>   			/* within limits, let's charge and dispatch directly */
> -			throtl_charge_bps_bio(tg, bio);
>   			throtl_charge_iops_bio(tg, bio);
>   
>   			/*
> diff --git a/block/blk-throttle.h b/block/blk-throttle.h
> index 7964cc041e06..5257e5c053e6 100644
> --- a/block/blk-throttle.h
> +++ b/block/blk-throttle.h
> @@ -28,7 +28,8 @@
>    */
>   struct throtl_qnode {
>   	struct list_head	node;		/* service_queue->queued[] */
> -	struct bio_list		bios;		/* queued bios */
> +	struct bio_list		bios_bps;	/* queued bios for bps limit */
> +	struct bio_list		bios_iops;	/* queued bios for iops limit */
>   	struct throtl_grp	*tg;		/* tg this qnode belongs to */
>   };
>   
>
diff mbox series

Patch

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index caae2e3b7534..542db54f995c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -143,7 +143,8 @@  static inline unsigned int throtl_bio_data_size(struct bio *bio)
 static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
 {
 	INIT_LIST_HEAD(&qn->node);
-	bio_list_init(&qn->bios);
+	bio_list_init(&qn->bios_bps);
+	bio_list_init(&qn->bios_iops);
 	qn->tg = tg;
 }
 
@@ -160,7 +161,11 @@  static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
 static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
 				 struct list_head *queued)
 {
-	bio_list_add(&qn->bios, bio);
+	if (bio_flagged(bio, BIO_TG_BPS_THROTTLED))
+		bio_list_add(&qn->bios_iops, bio);
+	else
+		bio_list_add(&qn->bios_bps, bio);
+
 	if (list_empty(&qn->node)) {
 		list_add_tail(&qn->node, queued);
 		blkg_get(tg_to_blkg(qn->tg));
@@ -170,6 +175,10 @@  static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
 /**
  * throtl_peek_queued - peek the first bio on a qnode list
  * @queued: the qnode list to peek
+ *
+ * Always take a bio from the head of the iops queue first. If the queue
+ * is empty, we then take it from the bps queue to maintain the overall
+ * idea of fetching bios from the head.
  */
 static struct bio *throtl_peek_queued(struct list_head *queued)
 {
@@ -180,7 +189,9 @@  static struct bio *throtl_peek_queued(struct list_head *queued)
 		return NULL;
 
 	qn = list_first_entry(queued, struct throtl_qnode, node);
-	bio = bio_list_peek(&qn->bios);
+	bio = bio_list_peek(&qn->bios_iops);
+	if (!bio)
+		bio = bio_list_peek(&qn->bios_bps);
 	WARN_ON_ONCE(!bio);
 	return bio;
 }
@@ -190,9 +201,10 @@  static struct bio *throtl_peek_queued(struct list_head *queued)
  * @queued: the qnode list to pop a bio from
  * @tg_to_put: optional out argument for throtl_grp to put
  *
- * Pop the first bio from the qnode list @queued.  After popping, the first
- * qnode is removed from @queued if empty or moved to the end of @queued so
- * that the popping order is round-robin.
+ * Pop the first bio from the qnode list @queued.  Note that we firstly focus
+ * on the iops list here because bios are ultimately dispatched from it.
+ * After popping, the first qnode is removed from @queued if empty or moved to
+ * the end of @queued so that the popping order is round-robin.
  *
  * When the first qnode is removed, its associated throtl_grp should be put
  * too.  If @tg_to_put is NULL, this function automatically puts it;
@@ -209,10 +221,12 @@  static struct bio *throtl_pop_queued(struct list_head *queued,
 		return NULL;
 
 	qn = list_first_entry(queued, struct throtl_qnode, node);
-	bio = bio_list_pop(&qn->bios);
+	bio = bio_list_pop(&qn->bios_iops);
+	if (!bio)
+		bio = bio_list_pop(&qn->bios_bps);
 	WARN_ON_ONCE(!bio);
 
-	if (bio_list_empty(&qn->bios)) {
+	if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) {
 		list_del_init(&qn->node);
 		if (tg_to_put)
 			*tg_to_put = qn->tg;
@@ -805,12 +819,12 @@  static unsigned long tg_dispatch_iops_time(struct throtl_grp *tg, struct bio *bi
 
 /*
  * Returns approx number of jiffies to wait before this bio is with-in IO rate
- * and can be dispatched.
+ * and can be moved to other queue or dispatched.
  */
 static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio)
 {
 	bool rw = bio_data_dir(bio);
-	unsigned long bps_wait, iops_wait;
+	unsigned long wait;
 
 	/*
  	 * Currently whole state machine of group depends on first bio
@@ -821,10 +835,17 @@  static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio)
 	BUG_ON(tg->service_queue.nr_queued[rw] &&
 	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
 
-	bps_wait = tg_dispatch_bps_time(tg, bio);
-	iops_wait = tg_dispatch_iops_time(tg, bio);
+	wait = tg_dispatch_bps_time(tg, bio);
+	if (wait != 0)
+		return wait;
 
-	return max(bps_wait, iops_wait);
+	/*
+	 * Charge bps here because @bio will be directly placed into the
+	 * iops queue afterward.
+	 */
+	throtl_charge_bps_bio(tg, bio);
+
+	return tg_dispatch_iops_time(tg, bio);
 }
 
 /**
@@ -913,7 +934,6 @@  static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
 	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
 	sq->nr_queued[rw]--;
 
-	throtl_charge_bps_bio(tg, bio);
 	throtl_charge_iops_bio(tg, bio);
 
 	/*
@@ -1641,7 +1661,6 @@  bool __blk_throtl_bio(struct bio *bio)
 	while (true) {
 		if (tg_within_limit(tg, bio, rw)) {
 			/* within limits, let's charge and dispatch directly */
-			throtl_charge_bps_bio(tg, bio);
 			throtl_charge_iops_bio(tg, bio);
 
 			/*
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 7964cc041e06..5257e5c053e6 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -28,7 +28,8 @@ 
  */
 struct throtl_qnode {
 	struct list_head	node;		/* service_queue->queued[] */
-	struct bio_list		bios;		/* queued bios */
+	struct bio_list		bios_bps;	/* queued bios for bps limit */
+	struct bio_list		bios_iops;	/* queued bios for iops limit */
 	struct throtl_grp	*tg;		/* tg this qnode belongs to */
 };