diff mbox series

[RFC,v2,2/6] blk-throttle: delay initialization until configuration

Message ID 20240406080059.2248314-3-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series blk-throttle: support enable and disable during runtime | expand

Commit Message

Yu Kuai April 6, 2024, 8 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

Other cgroup policy like bfq, iocost are lazy-initialized when they are
configured for the first time for the device, but blk-throttle is
initialized unconditionally from blkcg_init_disk().

Delay initialization of blk-throttle as well, to save some cpu and
memory overhead if it's not configured.

Noted that once it's initialized, it can't be destroyed until disk
removal, even if it's disabled. And following patches will support
to destroy blk-throttle after it's disabled.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-cgroup.c   |   6 ---
 block/blk-sysfs.c    |   1 -
 block/blk-throttle.c | 114 +++++++++++++++++++++++++++----------------
 block/blk-throttle.h |  10 ++--
 4 files changed, 78 insertions(+), 53 deletions(-)

Comments

Tejun Heo April 12, 2024, 5:59 p.m. UTC | #1
Hello,

On Sat, Apr 06, 2024 at 04:00:55PM +0800, Yu Kuai wrote:
> @@ -1480,6 +1547,9 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
>  	struct cgroup_subsys_state *pos_css;
>  	struct blkcg_gq *blkg;
>  
> +	if (!q->td)
> +		return;

So, this naked test is safe because the interface functions are shut down by
the time this function is called.

>  static inline bool blk_should_throtl(struct bio *bio)
>  {
> -	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
> +	struct throtl_grp *tg;
>  	int rw = bio_data_dir(bio);
>  
> +	if (!bio->bi_bdev->bd_queue->td)
> +		return false;

and this one because ->td is set while the queue is frozen and this path
shouldn't be running while it gets set, right?

Can you please add comments explaining why those are safe? Otherwise, the
patch looks generally sane to me on the first glance. Can you please also
add how you tested the change?

Thanks.
Yu Kuai April 13, 2024, 1:59 a.m. UTC | #2
Hi,

在 2024/04/13 1:59, Tejun Heo 写道:
> Hello,
> 
> On Sat, Apr 06, 2024 at 04:00:55PM +0800, Yu Kuai wrote:
>> @@ -1480,6 +1547,9 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
>>   	struct cgroup_subsys_state *pos_css;
>>   	struct blkcg_gq *blkg;
>>   
>> +	if (!q->td)
>> +		return;
> 
> So, this naked test is safe because the interface functions are shut down by
> the time this function is called.
> 
>>   static inline bool blk_should_throtl(struct bio *bio)
>>   {
>> -	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
>> +	struct throtl_grp *tg;
>>   	int rw = bio_data_dir(bio);
>>   
>> +	if (!bio->bi_bdev->bd_queue->td)
>> +		return false;
> 
> and this one because ->td is set while the queue is frozen and this path
> shouldn't be running while it gets set, right?

Yes, this is called under bio_queue_enter()
> 
> Can you please add comments explaining why those are safe? Otherwise, the
> patch looks generally sane to me on the first glance. Can you please also
> add how you tested the change?

And I realized that there are no tests for bkl-throttle from blktests,
and I'm using some other tests from our testers to cover basic
functionality. Perhaps will it make sense to add some tests to blktests?

Thanks,
Kuai

> 
> Thanks.
>
Yu Kuai April 16, 2024, 2:11 a.m. UTC | #3
Hi, Tejun!

在 2024/04/13 9:59, Yu Kuai 写道:
> Can you please also
> add how you tested the change?

I just sent a patchset to add some tests to blktests, a basic functional
test and a few regression test, this is not enough but it's a start.

https://lore.kernel.org/all/20240416020042.509291-1-yukuai1@huaweicloud.com/

Thanks,
Kuai
Chaitanya Kulkarni April 16, 2024, 3:05 a.m. UTC | #4
On 4/15/24 19:11, Yu Kuai wrote:
> Hi, Tejun!
>
> 在 2024/04/13 9:59, Yu Kuai 写道:
>> Can you please also
>> add how you tested the change?
>
> I just sent a patchset to add some tests to blktests, a basic functional
> test and a few regression test, this is not enough but it's a start.
>
> https://lore.kernel.org/all/20240416020042.509291-1-yukuai1@huaweicloud.com/ 
>
>
> Thanks,
> Kuai
>
>

It'd be really nice if we come with at least few complex scenarios to
increase the test coverage in this area. Also, please CC
Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com> on any blktests emails.

-ck
diff mbox series

Patch

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bdbb557feb5a..b5e626a16758 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1441,14 +1441,8 @@  int blkcg_init_disk(struct gendisk *disk)
 	if (ret)
 		goto err_destroy_all;
 
-	ret = blk_throtl_init(disk);
-	if (ret)
-		goto err_ioprio_exit;
-
 	return 0;
 
-err_ioprio_exit:
-	blk_ioprio_exit(disk);
 err_destroy_all:
 	blkg_destroy_all(disk);
 	return ret;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 674c5c602364..1e2a0b18360c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -807,7 +807,6 @@  int blk_register_queue(struct gendisk *disk)
 
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
 	wbt_enable_default(disk);
-	blk_throtl_register(disk);
 
 	/* Now everything is ready and send out KOBJ_ADD uevent */
 	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 1e45e48564f4..acb2758f45ef 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1213,6 +1213,53 @@  static void tg_conf_updated(struct throtl_grp *tg, bool global)
 	}
 }
 
+static int blk_throtl_init(struct gendisk *disk)
+{
+	struct request_queue *q = disk->queue;
+	struct throtl_data *td;
+	int ret;
+
+	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
+	if (!td)
+		return -ENOMEM;
+
+	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
+	throtl_service_queue_init(&td->service_queue);
+
+	/*
+	 * freeze queue before setting q->td, so that IO path won't see
+	 * q->td while policy is not activated yet.
+	 */
+	blk_mq_freeze_queue(disk->queue);
+	blk_mq_quiesce_queue(disk->queue);
+
+	q->td = td;
+	td->queue = q;
+
+	/* activate policy */
+	ret = blkcg_activate_policy(disk, &blkcg_policy_throtl);
+	if (ret) {
+		q->td = NULL;
+		kfree(td);
+		goto out;
+	}
+
+	if (blk_queue_nonrot(q))
+		td->throtl_slice = DFL_THROTL_SLICE_SSD;
+	else
+		td->throtl_slice = DFL_THROTL_SLICE_HD;
+	td->track_bio_latency = !queue_is_mq(q);
+	if (!td->track_bio_latency)
+		blk_stat_enable_accounting(q);
+
+out:
+	blk_mq_unquiesce_queue(disk->queue);
+	blk_mq_unfreeze_queue(disk->queue);
+
+	return ret;
+}
+
+
 static ssize_t tg_set_conf(struct kernfs_open_file *of,
 			   char *buf, size_t nbytes, loff_t off, bool is_u64)
 {
@@ -1224,6 +1271,16 @@  static ssize_t tg_set_conf(struct kernfs_open_file *of,
 
 	blkg_conf_init(&ctx, buf);
 
+	ret = blkg_conf_open_bdev(&ctx);
+	if (ret)
+		goto out_finish;
+
+	if (!ctx.bdev->bd_queue->td) {
+		ret = blk_throtl_init(ctx.bdev->bd_disk);
+		if (ret)
+			goto out_finish;
+	}
+
 	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
 	if (ret)
 		goto out_finish;
@@ -1388,6 +1445,16 @@  static ssize_t tg_set_limit(struct kernfs_open_file *of,
 
 	blkg_conf_init(&ctx, buf);
 
+	ret = blkg_conf_open_bdev(&ctx);
+	if (ret)
+		goto out_finish;
+
+	if (!ctx.bdev->bd_queue->td) {
+		ret = blk_throtl_init(ctx.bdev->bd_disk);
+		if (ret)
+			goto out_finish;
+	}
+
 	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
 	if (ret)
 		goto out_finish;
@@ -1480,6 +1547,9 @@  void blk_throtl_cancel_bios(struct gendisk *disk)
 	struct cgroup_subsys_state *pos_css;
 	struct blkcg_gq *blkg;
 
+	if (!q->td)
+		return;
+
 	spin_lock_irq(&q->queue_lock);
 	/*
 	 * queue_lock is held, rcu lock is not needed here technically.
@@ -1609,57 +1679,19 @@  bool __blk_throtl_bio(struct bio *bio)
 	return throttled;
 }
 
-int blk_throtl_init(struct gendisk *disk)
-{
-	struct request_queue *q = disk->queue;
-	struct throtl_data *td;
-	int ret;
-
-	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
-	if (!td)
-		return -ENOMEM;
-
-	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
-	throtl_service_queue_init(&td->service_queue);
-
-	q->td = td;
-	td->queue = q;
-
-	/* activate policy */
-	ret = blkcg_activate_policy(disk, &blkcg_policy_throtl);
-	if (ret)
-		kfree(td);
-	return ret;
-}
-
 void blk_throtl_exit(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
 
-	BUG_ON(!q->td);
+	if (!q->td)
+		return;
+
 	del_timer_sync(&q->td->service_queue.pending_timer);
 	throtl_shutdown_wq(q);
 	blkcg_deactivate_policy(disk, &blkcg_policy_throtl);
 	kfree(q->td);
 }
 
-void blk_throtl_register(struct gendisk *disk)
-{
-	struct request_queue *q = disk->queue;
-	struct throtl_data *td;
-
-	td = q->td;
-	BUG_ON(!td);
-
-	if (blk_queue_nonrot(q))
-		td->throtl_slice = DFL_THROTL_SLICE_SSD;
-	else
-		td->throtl_slice = DFL_THROTL_SLICE_HD;
-	td->track_bio_latency = !queue_is_mq(q);
-	if (!td->track_bio_latency)
-		blk_stat_enable_accounting(q);
-}
-
 static int __init throtl_init(void)
 {
 	kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 32503fd83a84..e7f5562a32e9 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -150,23 +150,23 @@  static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
  * Internal throttling interface
  */
 #ifndef CONFIG_BLK_DEV_THROTTLING
-static inline int blk_throtl_init(struct gendisk *disk) { return 0; }
 static inline void blk_throtl_exit(struct gendisk *disk) { }
-static inline void blk_throtl_register(struct gendisk *disk) { }
 static inline bool blk_throtl_bio(struct bio *bio) { return false; }
 static inline void blk_throtl_cancel_bios(struct gendisk *disk) { }
 #else /* CONFIG_BLK_DEV_THROTTLING */
-int blk_throtl_init(struct gendisk *disk);
 void blk_throtl_exit(struct gendisk *disk);
-void blk_throtl_register(struct gendisk *disk);
 bool __blk_throtl_bio(struct bio *bio);
 void blk_throtl_cancel_bios(struct gendisk *disk);
 
 static inline bool blk_should_throtl(struct bio *bio)
 {
-	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
+	struct throtl_grp *tg;
 	int rw = bio_data_dir(bio);
 
+	if (!bio->bi_bdev->bd_queue->td)
+		return false;
+
+	tg = blkg_to_tg(bio->bi_blkg);
 	if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
 		if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
 			bio_set_flag(bio, BIO_CGROUP_ACCT);