Message ID | 20250414132731.167620-6-wozizhi@huawei.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | blk-throttle: Split the blkthrotl queue to solve the IO delay issue | expand |
在 2025/04/14 21:27, Zizhi Wo 写道: > This patch splits the single queue into separate bps and iops queues. Now, > an IO request must first pass through the bps queue, then the iops queue, > and finally be dispatched. Due to the queue splitting, we need to modify > the throtl add/peek/pop function. > > Additionally, the patch modifies the logic related to tg_dispatch_time(). > If bio needs to wait for bps, function directly returns the bps wait time; > otherwise, it charges bps and returns the iops wait time so that bio can be > directly placed into the iops queue afterward. Note that this may lead to > more frequent updates to disptime, but the overhead is negligible for the > slow path. > > Signed-off-by: Zizhi Wo <wozizhi@huawei.com> > --- > block/blk-throttle.c | 49 ++++++++++++++++++++++++++++++-------------- > block/blk-throttle.h | 3 ++- > 2 files changed, 36 insertions(+), 16 deletions(-) > LGTM Reviewed-by: Yu Kuai <yukuai3@huawei.com> > diff --git a/block/blk-throttle.c b/block/blk-throttle.c > index caae2e3b7534..542db54f995c 100644 > --- a/block/blk-throttle.c > +++ b/block/blk-throttle.c > @@ -143,7 +143,8 @@ static inline unsigned int throtl_bio_data_size(struct bio *bio) > static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) > { > INIT_LIST_HEAD(&qn->node); > - bio_list_init(&qn->bios); > + bio_list_init(&qn->bios_bps); > + bio_list_init(&qn->bios_iops); > qn->tg = tg; > } > > @@ -160,7 +161,11 @@ static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) > static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, > struct list_head *queued) > { > - bio_list_add(&qn->bios, bio); > + if (bio_flagged(bio, BIO_TG_BPS_THROTTLED)) > + bio_list_add(&qn->bios_iops, bio); > + else > + bio_list_add(&qn->bios_bps, bio); > + > if (list_empty(&qn->node)) { > list_add_tail(&qn->node, queued); > blkg_get(tg_to_blkg(qn->tg)); > @@ -170,6 +175,10 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, > /** > * throtl_peek_queued - peek the first bio on a qnode list > * @queued: the qnode list to peek > + * > + * Always take a bio from the head of the iops queue first. If the queue > + * is empty, we then take it from the bps queue to maintain the overall > + * idea of fetching bios from the head. > */ > static struct bio *throtl_peek_queued(struct list_head *queued) > { > @@ -180,7 +189,9 @@ static struct bio *throtl_peek_queued(struct list_head *queued) > return NULL; > > qn = list_first_entry(queued, struct throtl_qnode, node); > - bio = bio_list_peek(&qn->bios); > + bio = bio_list_peek(&qn->bios_iops); > + if (!bio) > + bio = bio_list_peek(&qn->bios_bps); > WARN_ON_ONCE(!bio); > return bio; > } > @@ -190,9 +201,10 @@ static struct bio *throtl_peek_queued(struct list_head *queued) > * @queued: the qnode list to pop a bio from > * @tg_to_put: optional out argument for throtl_grp to put > * > - * Pop the first bio from the qnode list @queued. After popping, the first > - * qnode is removed from @queued if empty or moved to the end of @queued so > - * that the popping order is round-robin. > + * Pop the first bio from the qnode list @queued. Note that we firstly focus > + * on the iops list here because bios are ultimately dispatched from it. > + * After popping, the first qnode is removed from @queued if empty or moved to > + * the end of @queued so that the popping order is round-robin. > * > * When the first qnode is removed, its associated throtl_grp should be put > * too. If @tg_to_put is NULL, this function automatically puts it; > @@ -209,10 +221,12 @@ static struct bio *throtl_pop_queued(struct list_head *queued, > return NULL; > > qn = list_first_entry(queued, struct throtl_qnode, node); > - bio = bio_list_pop(&qn->bios); > + bio = bio_list_pop(&qn->bios_iops); > + if (!bio) > + bio = bio_list_pop(&qn->bios_bps); > WARN_ON_ONCE(!bio); > > - if (bio_list_empty(&qn->bios)) { > + if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) { > list_del_init(&qn->node); > if (tg_to_put) > *tg_to_put = qn->tg; > @@ -805,12 +819,12 @@ static unsigned long tg_dispatch_iops_time(struct throtl_grp *tg, struct bio *bi > > /* > * Returns approx number of jiffies to wait before this bio is with-in IO rate > - * and can be dispatched. > + * and can be moved to other queue or dispatched. > */ > static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio) > { > bool rw = bio_data_dir(bio); > - unsigned long bps_wait, iops_wait; > + unsigned long wait; > > /* > * Currently whole state machine of group depends on first bio > @@ -821,10 +835,17 @@ static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio) > BUG_ON(tg->service_queue.nr_queued[rw] && > bio != throtl_peek_queued(&tg->service_queue.queued[rw])); > > - bps_wait = tg_dispatch_bps_time(tg, bio); > - iops_wait = tg_dispatch_iops_time(tg, bio); > + wait = tg_dispatch_bps_time(tg, bio); > + if (wait != 0) > + return wait; > > - return max(bps_wait, iops_wait); > + /* > + * Charge bps here because @bio will be directly placed into the > + * iops queue afterward. > + */ > + throtl_charge_bps_bio(tg, bio); > + > + return tg_dispatch_iops_time(tg, bio); > } > > /** > @@ -913,7 +934,6 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw) > bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); > sq->nr_queued[rw]--; > > - throtl_charge_bps_bio(tg, bio); > throtl_charge_iops_bio(tg, bio); > > /* > @@ -1641,7 +1661,6 @@ bool __blk_throtl_bio(struct bio *bio) > while (true) { > if (tg_within_limit(tg, bio, rw)) { > /* within limits, let's charge and dispatch directly */ > - throtl_charge_bps_bio(tg, bio); > throtl_charge_iops_bio(tg, bio); > > /* > diff --git a/block/blk-throttle.h b/block/blk-throttle.h > index 7964cc041e06..5257e5c053e6 100644 > --- a/block/blk-throttle.h > +++ b/block/blk-throttle.h > @@ -28,7 +28,8 @@ > */ > struct throtl_qnode { > struct list_head node; /* service_queue->queued[] */ > - struct bio_list bios; /* queued bios */ > + struct bio_list bios_bps; /* queued bios for bps limit */ > + struct bio_list bios_iops; /* queued bios for iops limit */ > struct throtl_grp *tg; /* tg this qnode belongs to */ > }; > >
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index caae2e3b7534..542db54f995c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -143,7 +143,8 @@ static inline unsigned int throtl_bio_data_size(struct bio *bio) static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(&qn->node); - bio_list_init(&qn->bios); + bio_list_init(&qn->bios_bps); + bio_list_init(&qn->bios_iops); qn->tg = tg; } @@ -160,7 +161,11 @@ static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, struct list_head *queued) { - bio_list_add(&qn->bios, bio); + if (bio_flagged(bio, BIO_TG_BPS_THROTTLED)) + bio_list_add(&qn->bios_iops, bio); + else + bio_list_add(&qn->bios_bps, bio); + if (list_empty(&qn->node)) { list_add_tail(&qn->node, queued); blkg_get(tg_to_blkg(qn->tg)); @@ -170,6 +175,10 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, /** * throtl_peek_queued - peek the first bio on a qnode list * @queued: the qnode list to peek + * + * Always take a bio from the head of the iops queue first. If the queue + * is empty, we then take it from the bps queue to maintain the overall + * idea of fetching bios from the head. */ static struct bio *throtl_peek_queued(struct list_head *queued) { @@ -180,7 +189,9 @@ static struct bio *throtl_peek_queued(struct list_head *queued) return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); - bio = bio_list_peek(&qn->bios); + bio = bio_list_peek(&qn->bios_iops); + if (!bio) + bio = bio_list_peek(&qn->bios_bps); WARN_ON_ONCE(!bio); return bio; } @@ -190,9 +201,10 @@ static struct bio *throtl_peek_queued(struct list_head *queued) * @queued: the qnode list to pop a bio from * @tg_to_put: optional out argument for throtl_grp to put * - * Pop the first bio from the qnode list @queued. After popping, the first - * qnode is removed from @queued if empty or moved to the end of @queued so - * that the popping order is round-robin. + * Pop the first bio from the qnode list @queued. Note that we firstly focus + * on the iops list here because bios are ultimately dispatched from it. + * After popping, the first qnode is removed from @queued if empty or moved to + * the end of @queued so that the popping order is round-robin. * * When the first qnode is removed, its associated throtl_grp should be put * too. If @tg_to_put is NULL, this function automatically puts it; @@ -209,10 +221,12 @@ static struct bio *throtl_pop_queued(struct list_head *queued, return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); - bio = bio_list_pop(&qn->bios); + bio = bio_list_pop(&qn->bios_iops); + if (!bio) + bio = bio_list_pop(&qn->bios_bps); WARN_ON_ONCE(!bio); - if (bio_list_empty(&qn->bios)) { + if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) { list_del_init(&qn->node); if (tg_to_put) *tg_to_put = qn->tg; @@ -805,12 +819,12 @@ static unsigned long tg_dispatch_iops_time(struct throtl_grp *tg, struct bio *bi /* * Returns approx number of jiffies to wait before this bio is with-in IO rate - * and can be dispatched. + * and can be moved to other queue or dispatched. */ static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - unsigned long bps_wait, iops_wait; + unsigned long wait; /* * Currently whole state machine of group depends on first bio @@ -821,10 +835,17 @@ static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio) BUG_ON(tg->service_queue.nr_queued[rw] && bio != throtl_peek_queued(&tg->service_queue.queued[rw])); - bps_wait = tg_dispatch_bps_time(tg, bio); - iops_wait = tg_dispatch_iops_time(tg, bio); + wait = tg_dispatch_bps_time(tg, bio); + if (wait != 0) + return wait; - return max(bps_wait, iops_wait); + /* + * Charge bps here because @bio will be directly placed into the + * iops queue afterward. + */ + throtl_charge_bps_bio(tg, bio); + + return tg_dispatch_iops_time(tg, bio); } /** @@ -913,7 +934,6 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw) bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); sq->nr_queued[rw]--; - throtl_charge_bps_bio(tg, bio); throtl_charge_iops_bio(tg, bio); /* @@ -1641,7 +1661,6 @@ bool __blk_throtl_bio(struct bio *bio) while (true) { if (tg_within_limit(tg, bio, rw)) { /* within limits, let's charge and dispatch directly */ - throtl_charge_bps_bio(tg, bio); throtl_charge_iops_bio(tg, bio); /* diff --git a/block/blk-throttle.h b/block/blk-throttle.h index 7964cc041e06..5257e5c053e6 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -28,7 +28,8 @@ */ struct throtl_qnode { struct list_head node; /* service_queue->queued[] */ - struct bio_list bios; /* queued bios */ + struct bio_list bios_bps; /* queued bios for bps limit */ + struct bio_list bios_iops; /* queued bios for iops limit */ struct throtl_grp *tg; /* tg this qnode belongs to */ };
This patch splits the single queue into separate bps and iops queues. Now, an IO request must first pass through the bps queue, then the iops queue, and finally be dispatched. Due to the queue splitting, we need to modify the throtl add/peek/pop function. Additionally, the patch modifies the logic related to tg_dispatch_time(). If bio needs to wait for bps, function directly returns the bps wait time; otherwise, it charges bps and returns the iops wait time so that bio can be directly placed into the iops queue afterward. Note that this may lead to more frequent updates to disptime, but the overhead is negligible for the slow path. Signed-off-by: Zizhi Wo <wozizhi@huawei.com> --- block/blk-throttle.c | 49 ++++++++++++++++++++++++++++++-------------- block/blk-throttle.h | 3 ++- 2 files changed, 36 insertions(+), 16 deletions(-)