Message ID | 20250312102903.3584358-1-nilay@linux.ibm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | block: protect debugfs attributes using q->elevator_lock | expand |
On 3/12/25 19:28, Nilay Shroff wrote: > diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c > index adf5f0697b6b..d26a6df945bd 100644 > --- a/block/blk-mq-debugfs.c > +++ b/block/blk-mq-debugfs.c > @@ -347,11 +347,16 @@ static int hctx_busy_show(void *data, struct seq_file *m) > { > struct blk_mq_hw_ctx *hctx = data; > struct show_busy_params params = { .m = m, .hctx = hctx }; > + int res; > > + res = mutex_lock_interruptible(&hctx->queue->elevator_lock); > + if (res) > + goto out; There is no need for the goto here. You can "return res;" directly. Same comment for all the other changes below. > blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq, > ¶ms); > - > - return 0; > + mutex_unlock(&hctx->queue->elevator_lock); > +out: > + return res; And you can keep the "return 0;" here. > } > > static const char *const hctx_types[] = { > @@ -400,12 +405,12 @@ static int hctx_tags_show(void *data, struct seq_file *m) > struct request_queue *q = hctx->queue; > int res; > > - res = mutex_lock_interruptible(&q->sysfs_lock); > + res = mutex_lock_interruptible(&q->elevator_lock); > if (res) > goto out; > if (hctx->tags) > blk_mq_debugfs_tags_show(m, hctx->tags); > - mutex_unlock(&q->sysfs_lock); > + mutex_unlock(&q->elevator_lock); > > out: > return res; > @@ -417,12 +422,12 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m) > struct request_queue *q = hctx->queue; > int res; > > - res = mutex_lock_interruptible(&q->sysfs_lock); > + res = mutex_lock_interruptible(&q->elevator_lock); > if (res) > goto out; > if (hctx->tags) > sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); > - mutex_unlock(&q->sysfs_lock); > + mutex_unlock(&q->elevator_lock); > > out: > return res; > @@ -434,12 +439,12 @@ static int hctx_sched_tags_show(void *data, struct seq_file *m) > struct request_queue *q = hctx->queue; > int res; > > - res = mutex_lock_interruptible(&q->sysfs_lock); > + res = mutex_lock_interruptible(&q->elevator_lock); > if (res) > goto out; > if (hctx->sched_tags) > blk_mq_debugfs_tags_show(m, hctx->sched_tags); > - mutex_unlock(&q->sysfs_lock); > + mutex_unlock(&q->elevator_lock); > > out: > return res; > @@ -451,12 +456,12 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) > struct request_queue *q = hctx->queue; > int res; > > - res = mutex_lock_interruptible(&q->sysfs_lock); > + res = mutex_lock_interruptible(&q->elevator_lock); > if (res) > goto out; > if (hctx->sched_tags) > sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); > - mutex_unlock(&q->sysfs_lock); > + mutex_unlock(&q->elevator_lock); > > out: > return res; > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index 22600420799c..709a32022c78 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -568,9 +568,9 @@ struct request_queue { > * nr_requests and wbt latency, this lock also protects the sysfs attrs > * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update > * may modify hctx tags, reserved-tags and cpumask, so this lock also > - * helps protect the hctx attrs. To ensure proper locking order during > - * an elevator or nr_hw_queue update, first freeze the queue, then > - * acquire ->elevator_lock. > + * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking > + * order during an elevator or nr_hw_queue update, first freeze the > + * queue, then acquire ->elevator_lock. > */ > struct mutex elevator_lock; >
On 3/12/25 4:21 PM, Damien Le Moal wrote: > On 3/12/25 19:28, Nilay Shroff wrote: >> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c >> index adf5f0697b6b..d26a6df945bd 100644 >> --- a/block/blk-mq-debugfs.c >> +++ b/block/blk-mq-debugfs.c >> @@ -347,11 +347,16 @@ static int hctx_busy_show(void *data, struct seq_file *m) >> { >> struct blk_mq_hw_ctx *hctx = data; >> struct show_busy_params params = { .m = m, .hctx = hctx }; >> + int res; >> >> + res = mutex_lock_interruptible(&hctx->queue->elevator_lock); >> + if (res) >> + goto out; > > There is no need for the goto here. You can "return res;" directly. > Same comment for all the other changes below. > >> blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq, >> ¶ms); >> - >> - return 0; >> + mutex_unlock(&hctx->queue->elevator_lock); >> +out: >> + return res; > > And you can keep the "return 0;" here. > Yes I agree. And in fact, that's how exactly I initially prepared the patch however later I saw that other places in this file where we use mutex_lock_ interruptible(), we use goto when acquiring the lock fails and so I changed it here to match those other occurrences. So if everyone prefer, shall I use your suggested pattern at other places as well in this file? >> >> >> static const char *const hctx_types[] = { >> @@ -400,12 +405,12 @@ static int hctx_tags_show(void *data, struct seq_file *m) >> struct request_queue *q = hctx->queue; >> int res; >> >> - res = mutex_lock_interruptible(&q->sysfs_lock); >> + res = mutex_lock_interruptible(&q->elevator_lock); >> if (res) >> goto out; >> if (hctx->tags) >> blk_mq_debugfs_tags_show(m, hctx->tags); >> - mutex_unlock(&q->sysfs_lock); >> + mutex_unlock(&q->elevator_lock); >> >> out: >> return res; >> @@ -417,12 +422,12 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m) >> struct request_queue *q = hctx->queue; >> int res; >> >> - res = mutex_lock_interruptible(&q->sysfs_lock); >> + res = mutex_lock_interruptible(&q->elevator_lock); >> if (res) >> goto out; >> if (hctx->tags) >> sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); >> - mutex_unlock(&q->sysfs_lock); >> + mutex_unlock(&q->elevator_lock); >> >> out: >> return res; >> @@ -434,12 +439,12 @@ static int hctx_sched_tags_show(void *data, struct seq_file *m) >> struct request_queue *q = hctx->queue; >> int res; >> >> - res = mutex_lock_interruptible(&q->sysfs_lock); >> + res = mutex_lock_interruptible(&q->elevator_lock); >> if (res) >> goto out; >> if (hctx->sched_tags) >> blk_mq_debugfs_tags_show(m, hctx->sched_tags); >> - mutex_unlock(&q->sysfs_lock); >> + mutex_unlock(&q->elevator_lock); >> >> out: >> return res; >> @@ -451,12 +456,12 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) >> struct request_queue *q = hctx->queue; >> int res; >> >> - res = mutex_lock_interruptible(&q->sysfs_lock); >> + res = mutex_lock_interruptible(&q->elevator_lock); >> if (res) >> goto out; >> if (hctx->sched_tags) >> sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); >> - mutex_unlock(&q->sysfs_lock); >> + mutex_unlock(&q->elevator_lock); >> >> out: >> return res; >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >> index 22600420799c..709a32022c78 100644 >> --- a/include/linux/blkdev.h >> +++ b/include/linux/blkdev.h >> @@ -568,9 +568,9 @@ struct request_queue { >> * nr_requests and wbt latency, this lock also protects the sysfs attrs >> * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update >> * may modify hctx tags, reserved-tags and cpumask, so this lock also >> - * helps protect the hctx attrs. To ensure proper locking order during >> - * an elevator or nr_hw_queue update, first freeze the queue, then >> - * acquire ->elevator_lock. >> + * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking >> + * order during an elevator or nr_hw_queue update, first freeze the >> + * queue, then acquire ->elevator_lock. >> */ >> struct mutex elevator_lock; >> Thanks, --Nilay
On 3/12/25 4:33 PM, Nilay Shroff wrote: > > > On 3/12/25 4:21 PM, Damien Le Moal wrote: >> On 3/12/25 19:28, Nilay Shroff wrote: >>> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c >>> index adf5f0697b6b..d26a6df945bd 100644 >>> --- a/block/blk-mq-debugfs.c >>> +++ b/block/blk-mq-debugfs.c >>> @@ -347,11 +347,16 @@ static int hctx_busy_show(void *data, struct seq_file *m) >>> { >>> struct blk_mq_hw_ctx *hctx = data; >>> struct show_busy_params params = { .m = m, .hctx = hctx }; >>> + int res; >>> >>> + res = mutex_lock_interruptible(&hctx->queue->elevator_lock); >>> + if (res) >>> + goto out; >> >> There is no need for the goto here. You can "return res;" directly. >> Same comment for all the other changes below. >> >>> blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq, >>> ¶ms); >>> - >>> - return 0; >>> + mutex_unlock(&hctx->queue->elevator_lock); >>> +out: >>> + return res; >> >> And you can keep the "return 0;" here. >> > Yes I agree. And in fact, that's how exactly I initially prepared the patch > however later I saw that other places in this file where we use mutex_lock_ > interruptible(), we use goto when acquiring the lock fails and so I changed > it here to match those other occurrences. So if everyone prefer, shall I use > your suggested pattern at other places as well in this file? > Oh okay, sorry but I overlooked the fact that you've already suggested to change all occurrences of this pattern in the file. I will do it in the next patch. >>> >>> >>> static const char *const hctx_types[] = { >>> @@ -400,12 +405,12 @@ static int hctx_tags_show(void *data, struct seq_file *m) >>> struct request_queue *q = hctx->queue; >>> int res; >>> >>> - res = mutex_lock_interruptible(&q->sysfs_lock); >>> + res = mutex_lock_interruptible(&q->elevator_lock); >>> if (res) >>> goto out; >>> if (hctx->tags) >>> blk_mq_debugfs_tags_show(m, hctx->tags); >>> - mutex_unlock(&q->sysfs_lock); >>> + mutex_unlock(&q->elevator_lock); >>> >>> out: >>> return res; >>> @@ -417,12 +422,12 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m) >>> struct request_queue *q = hctx->queue; >>> int res; >>> >>> - res = mutex_lock_interruptible(&q->sysfs_lock); >>> + res = mutex_lock_interruptible(&q->elevator_lock); >>> if (res) >>> goto out; >>> if (hctx->tags) >>> sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); >>> - mutex_unlock(&q->sysfs_lock); >>> + mutex_unlock(&q->elevator_lock); >>> >>> out: >>> return res; >>> @@ -434,12 +439,12 @@ static int hctx_sched_tags_show(void *data, struct seq_file *m) >>> struct request_queue *q = hctx->queue; >>> int res; >>> >>> - res = mutex_lock_interruptible(&q->sysfs_lock); >>> + res = mutex_lock_interruptible(&q->elevator_lock); >>> if (res) >>> goto out; >>> if (hctx->sched_tags) >>> blk_mq_debugfs_tags_show(m, hctx->sched_tags); >>> - mutex_unlock(&q->sysfs_lock); >>> + mutex_unlock(&q->elevator_lock); >>> >>> out: >>> return res; >>> @@ -451,12 +456,12 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) >>> struct request_queue *q = hctx->queue; >>> int res; >>> >>> - res = mutex_lock_interruptible(&q->sysfs_lock); >>> + res = mutex_lock_interruptible(&q->elevator_lock); >>> if (res) >>> goto out; >>> if (hctx->sched_tags) >>> sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); >>> - mutex_unlock(&q->sysfs_lock); >>> + mutex_unlock(&q->elevator_lock); >>> >>> out: >>> return res; >>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >>> index 22600420799c..709a32022c78 100644 >>> --- a/include/linux/blkdev.h >>> +++ b/include/linux/blkdev.h >>> @@ -568,9 +568,9 @@ struct request_queue { >>> * nr_requests and wbt latency, this lock also protects the sysfs attrs >>> * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update >>> * may modify hctx tags, reserved-tags and cpumask, so this lock also >>> - * helps protect the hctx attrs. To ensure proper locking order during >>> - * an elevator or nr_hw_queue update, first freeze the queue, then >>> - * acquire ->elevator_lock. >>> + * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking >>> + * order during an elevator or nr_hw_queue update, first freeze the >>> + * queue, then acquire ->elevator_lock. >>> */ >>> struct mutex elevator_lock; >>> > Thanks, --Nilay
On Wed, Mar 12, 2025 at 03:58:38PM +0530, Nilay Shroff wrote: > Additionally, debugfs attribute "busy" is currently unprotected. This > attribute iterates over all started requests in a tagset and prints them. > However, the tags can be updated simultaneously via the sysfs attribute > "nr_requests" or "scheduler" (elevator switch), leading to potential race > conditions. Since the sysfs attributes "nr_requests" and "scheduler" are > already protected using q->elevator_lock, extend this protection to the > debugfs "busy" attribute as well. I'd split that into a separate patch for bisectability. > struct show_busy_params params = { .m = m, .hctx = hctx }; > + int res; > > + res = mutex_lock_interruptible(&hctx->queue->elevator_lock); > + if (res) > + goto out; Is mutex_lock_interruptible really the right primitive here? We don't really want this read system call interrupted by random signals, do we? I guess this should be mutex_lock_killable. (and the same for the existing methods this is copy and pasted from). > blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq, > ¶ms); > - > - return 0; > + mutex_unlock(&hctx->queue->elevator_lock); > +out: > + return res; And as Damien already said, no need for the labels here, including for the existing code. That should probably be alsot changed in an extra patch for the existing code while you're at it.
On 3/12/25 7:42 PM, Christoph Hellwig wrote: > On Wed, Mar 12, 2025 at 03:58:38PM +0530, Nilay Shroff wrote: >> Additionally, debugfs attribute "busy" is currently unprotected. This >> attribute iterates over all started requests in a tagset and prints them. >> However, the tags can be updated simultaneously via the sysfs attribute >> "nr_requests" or "scheduler" (elevator switch), leading to potential race >> conditions. Since the sysfs attributes "nr_requests" and "scheduler" are >> already protected using q->elevator_lock, extend this protection to the >> debugfs "busy" attribute as well. > > I'd split that into a separate patch for bisectability. Ok I will split it. > >> struct show_busy_params params = { .m = m, .hctx = hctx }; >> + int res; >> >> + res = mutex_lock_interruptible(&hctx->queue->elevator_lock); >> + if (res) >> + goto out; > > Is mutex_lock_interruptible really the right primitive here? We don't > really want this read system call interrupted by random signals, do > we? I guess this should be mutex_lock_killable. > > (and the same for the existing methods this is copy and pasted from). > I thought we wanted to interrupt using SIGINT (CTRL+C) in case user opens this file using cat. Maybe that's more convenient than sending SIGKILL. And yes I used mutex_lock_interruptible because for other attributes we are already using it. But if mutex_lock_killable is preferred then I'd change it for all methods. >> blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq, >> ¶ms); >> - >> - return 0; >> + mutex_unlock(&hctx->queue->elevator_lock); >> +out: >> + return res; > > And as Damien already said, no need for the labels here, including for > the existing code. That should probably be alsot changed in an extra > patch for the existing code while you're at it. > Okay will do in next patch. Thanks, --Nilay
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index adf5f0697b6b..d26a6df945bd 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -347,11 +347,16 @@ static int hctx_busy_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; struct show_busy_params params = { .m = m, .hctx = hctx }; + int res; + res = mutex_lock_interruptible(&hctx->queue->elevator_lock); + if (res) + goto out; blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq, ¶ms); - - return 0; + mutex_unlock(&hctx->queue->elevator_lock); +out: + return res; } static const char *const hctx_types[] = { @@ -400,12 +405,12 @@ static int hctx_tags_show(void *data, struct seq_file *m) struct request_queue *q = hctx->queue; int res; - res = mutex_lock_interruptible(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->elevator_lock); if (res) goto out; if (hctx->tags) blk_mq_debugfs_tags_show(m, hctx->tags); - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->elevator_lock); out: return res; @@ -417,12 +422,12 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m) struct request_queue *q = hctx->queue; int res; - res = mutex_lock_interruptible(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->elevator_lock); if (res) goto out; if (hctx->tags) sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->elevator_lock); out: return res; @@ -434,12 +439,12 @@ static int hctx_sched_tags_show(void *data, struct seq_file *m) struct request_queue *q = hctx->queue; int res; - res = mutex_lock_interruptible(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->elevator_lock); if (res) goto out; if (hctx->sched_tags) blk_mq_debugfs_tags_show(m, hctx->sched_tags); - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->elevator_lock); out: return res; @@ -451,12 +456,12 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) struct request_queue *q = hctx->queue; int res; - res = mutex_lock_interruptible(&q->sysfs_lock); + res = mutex_lock_interruptible(&q->elevator_lock); if (res) goto out; if (hctx->sched_tags) sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); - mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->elevator_lock); out: return res; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22600420799c..709a32022c78 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -568,9 +568,9 @@ struct request_queue { * nr_requests and wbt latency, this lock also protects the sysfs attrs * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update * may modify hctx tags, reserved-tags and cpumask, so this lock also - * helps protect the hctx attrs. To ensure proper locking order during - * an elevator or nr_hw_queue update, first freeze the queue, then - * acquire ->elevator_lock. + * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking + * order during an elevator or nr_hw_queue update, first freeze the + * queue, then acquire ->elevator_lock. */ struct mutex elevator_lock;
Currently, the block debugfs attributes (tags, tags_bitmap, sched_tags, and sched_tags_bitmap) are protected using q->sysfs_lock. However, these attributes are updated in multiple scenarios: - During driver probe method - During an elevator switch/update - During an nr_hw_queues update - When writing to the sysfs attribute nr_requests All these update paths (except driver probe method which anyways doesn't not require any protection) are already protected using q->elevator_lock .So to ensure consistency and proper synchronization, replace q->sysfs_ lock with q->elevator_lock for protecting these debugfs attributes. Additionally, debugfs attribute "busy" is currently unprotected. This attribute iterates over all started requests in a tagset and prints them. However, the tags can be updated simultaneously via the sysfs attribute "nr_requests" or "scheduler" (elevator switch), leading to potential race conditions. Since the sysfs attributes "nr_requests" and "scheduler" are already protected using q->elevator_lock, extend this protection to the debugfs "busy" attribute as well. This change ensures that all relevant debugfs attributes are properly synchronized, preventing potential data inconsistencies. Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> --- Please note that this patch was unit tested against blktests and quick xfstests. --- block/blk-mq-debugfs.c | 25 +++++++++++++++---------- include/linux/blkdev.h | 6 +++--- 2 files changed, 18 insertions(+), 13 deletions(-)