Message ID | 1591810159-240929-8-git-send-email-john.garry@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | blk-mq/scsi: Provide hostwide shared tags for SCSI HBAs | expand |
On 6/10/20 7:29 PM, John Garry wrote: > Since a set-wide shared tag sbitmap may be used, it is no longer valid to > examine the per-hctx tagset for getting the active requests for a hctx > (when a shared sbitmap is used). > > As such, add support for the shared sbitmap by using an intermediate > sbitmap per hctx, iterating all active tags for the specific hctx in the > shared sbitmap. > > Originally-by: Bart Van Assche <bvanassche@acm.org> > Reviewed-by: Hannes Reinecke <hare@suse.de> #earlier version > Signed-off-by: John Garry <john.garry@huawei.com> > --- > block/blk-mq-debugfs.c | 62 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 62 insertions(+) > > diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c > index 05b4be0c03d9..4da7e54adf3b 100644 > --- a/block/blk-mq-debugfs.c > +++ b/block/blk-mq-debugfs.c > @@ -495,6 +495,67 @@ static int hctx_tags_show(void *data, struct seq_file *m) > return res; > } > > +struct hctx_sb_data { > + struct sbitmap *sb; /* output bitmap */ > + struct blk_mq_hw_ctx *hctx; /* input hctx */ > +}; > + > +static bool hctx_filter_fn(struct blk_mq_hw_ctx *hctx, struct request *req, > + void *priv, bool reserved) > +{ > + struct hctx_sb_data *hctx_sb_data = priv; > + > + if (hctx == hctx_sb_data->hctx) > + sbitmap_set_bit(hctx_sb_data->sb, req->tag); > + > + return true; > +} > + > +static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx) > +{ > + struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx }; > + > + blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data); > +} > + > +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct seq_file *m) > +{ > + struct blk_mq_hw_ctx *hctx = data; > + struct request_queue *q = hctx->queue; > + struct blk_mq_tag_set *set = q->tag_set; > + struct sbitmap shared_sb, *sb; > + int res; > + > + if (!set) > + return 0; > + > + /* > + * We could use the allocated sbitmap for that hctx here, but > + * that would mean that we would need to clean it prior to use. > + */ > + res = sbitmap_init_node(&shared_sb, > + set->__bitmap_tags.sb.depth, > + set->__bitmap_tags.sb.shift, > + GFP_KERNEL, NUMA_NO_NODE); > + if (res) > + return res; > + sb = &shared_sb; > + > + res = mutex_lock_interruptible(&q->sysfs_lock); > + if (res) > + goto out; > + if (hctx->tags) { > + hctx_filter_sb(sb, hctx); > + sbitmap_bitmap_show(sb, m); > + } > + > + mutex_unlock(&q->sysfs_lock); > + > +out: > + sbitmap_free(&shared_sb); > + return res; > +} > + > static int hctx_tags_bitmap_show(void *data, struct seq_file *m) > { > struct blk_mq_hw_ctx *hctx = data; > @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs > {"busy", 0400, hctx_busy_show}, > {"ctx_map", 0400, hctx_ctx_map_show}, > {"tags", 0400, hctx_tags_show}, > + {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show}, > {"sched_tags", 0400, hctx_sched_tags_show}, > {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, > {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, > Ah, right. Here it is. But I don't get it; why do we have to allocate a temporary bitmap and can't walk the existing shared sbitmap? Cheers, Hannes
>> +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct >> seq_file *m) >> +{ >> + struct blk_mq_hw_ctx *hctx = data; >> + struct request_queue *q = hctx->queue; >> + struct blk_mq_tag_set *set = q->tag_set; >> + struct sbitmap shared_sb, *sb; >> + int res; >> + >> + if (!set) >> + return 0; >> + >> + /* >> + * We could use the allocated sbitmap for that hctx here, but >> + * that would mean that we would need to clean it prior to use. >> + */ * >> + res = sbitmap_init_node(&shared_sb, >> + set->__bitmap_tags.sb.depth, >> + set->__bitmap_tags.sb.shift, >> + GFP_KERNEL, NUMA_NO_NODE); >> + if (res) >> + return res; >> + sb = &shared_sb; >> + >> + res = mutex_lock_interruptible(&q->sysfs_lock); >> + if (res) >> + goto out; >> + if (hctx->tags) { >> + hctx_filter_sb(sb, hctx); >> + sbitmap_bitmap_show(sb, m); >> + } >> + >> + mutex_unlock(&q->sysfs_lock); >> + >> +out: >> + sbitmap_free(&shared_sb); >> + return res; >> +} >> + >> static int hctx_tags_bitmap_show(void *data, struct seq_file *m) >> { >> struct blk_mq_hw_ctx *hctx = data; >> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr >> blk_mq_debugfs_hctx_shared_sbitmap_attrs >> {"busy", 0400, hctx_busy_show}, >> {"ctx_map", 0400, hctx_ctx_map_show}, >> {"tags", 0400, hctx_tags_show}, >> + {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show}, >> {"sched_tags", 0400, hctx_sched_tags_show}, >> {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, >> {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, >> > Ah, right. Here it is. > > But I don't get it; why do we have to allocate a temporary bitmap and > can't walk the existing shared sbitmap? For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct sbitmap *. So we have to filter into a temp per-hctx struct sbitmap. We could change sbitmap_bitmap_show() to accept a filter iterator - which I think you're getting at - but I am not sure it's worth the change. Or else use the allocated sbitmap for the hctx, as above*, but I may be remove that (see 4/12 response). Cheers, John
On 6/11/20 4:33 PM, John Garry wrote: >>> +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct >>> seq_file *m) >>> +{ >>> + struct blk_mq_hw_ctx *hctx = data; >>> + struct request_queue *q = hctx->queue; >>> + struct blk_mq_tag_set *set = q->tag_set; >>> + struct sbitmap shared_sb, *sb; >>> + int res; >>> + >>> + if (!set) >>> + return 0; >>> + >>> + /* >>> + * We could use the allocated sbitmap for that hctx here, but >>> + * that would mean that we would need to clean it prior to use. >>> + */ > > * > >>> + res = sbitmap_init_node(&shared_sb, >>> + set->__bitmap_tags.sb.depth, >>> + set->__bitmap_tags.sb.shift, >>> + GFP_KERNEL, NUMA_NO_NODE); >>> + if (res) >>> + return res; >>> + sb = &shared_sb; >>> + >>> + res = mutex_lock_interruptible(&q->sysfs_lock); >>> + if (res) >>> + goto out; >>> + if (hctx->tags) { >>> + hctx_filter_sb(sb, hctx); >>> + sbitmap_bitmap_show(sb, m); >>> + } >>> + >>> + mutex_unlock(&q->sysfs_lock); >>> + >>> +out: >>> + sbitmap_free(&shared_sb); >>> + return res; >>> +} >>> + >>> static int hctx_tags_bitmap_show(void *data, struct seq_file *m) >>> { >>> struct blk_mq_hw_ctx *hctx = data; >>> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr >>> blk_mq_debugfs_hctx_shared_sbitmap_attrs >>> {"busy", 0400, hctx_busy_show}, >>> {"ctx_map", 0400, hctx_ctx_map_show}, >>> {"tags", 0400, hctx_tags_show}, >>> + {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show}, >>> {"sched_tags", 0400, hctx_sched_tags_show}, >>> {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, >>> {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, >>> >> Ah, right. Here it is. >> >> But I don't get it; why do we have to allocate a temporary bitmap and >> can't walk the existing shared sbitmap? > > For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct > sbitmap *. So we have to filter into a temp per-hctx struct sbitmap. We > could change sbitmap_bitmap_show() to accept a filter iterator - which I > think you're getting at - but I am not sure it's worth the change. Or > else use the allocated sbitmap for the hctx, as above*, but I may be > remove that (see 4/12 response). > Yes, I do think I would prefer updating sbitmap_bitmap_show() to accept a filter. Especially as Ming Lei has now updated the tag iterators to accept a filter, too, so it should be an easy change. Cheers, Hannes
Hi all, I noticed that sbitmap_bitmap_show() only shows set bits and does not consider cleared bits. Is that the proper thing to do? I ask, as from trying to support sbitmap_bitmap_show() for hostwide shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to find active requests (and associated tags/bits) for a particular hctx. So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(), in that it would effectively show set and not cleared bits. Any thoughts on this? Thanks!
On 6/29/20 5:32 PM, John Garry wrote: > Hi all, > > I noticed that sbitmap_bitmap_show() only shows set bits and does not > consider cleared bits. Is that the proper thing to do? > > I ask, as from trying to support sbitmap_bitmap_show() for hostwide > shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to > find active requests (and associated tags/bits) for a particular hctx. > So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(), > in that it would effectively show set and not cleared bits. > Why would you need to do this? Where would be the point traversing cleared bits? Cheers, Hannes
On 30/06/2020 07:33, Hannes Reinecke wrote: > On 6/29/20 5:32 PM, John Garry wrote: >> Hi all, >> >> I noticed that sbitmap_bitmap_show() only shows set bits and does not >> consider cleared bits. Is that the proper thing to do? >> >> I ask, as from trying to support sbitmap_bitmap_show() for hostwide >> shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to >> find active requests (and associated tags/bits) for a particular hctx. >> So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(), >> in that it would effectively show set and not cleared bits. >> > Why would you need to do this? > Where would be the point traversing cleared bits? I'm not talking about traversing cleared bits specifically. I just think that today sbitmap_bitmap_show() only showing the bits in sbitmap_word.word may not be useful or even misleading, as in reality the "set" bits are sbitmap_word.word & ~sbitmap_word.cleared. And for hostwide shared tags feature, iterating the busy rqs to find the per-hctx tags/bits would effectively give us the "set" bits, above, so there would be a difference. Thanks, John
On 30/06/2020 08:30, John Garry wrote: > On 30/06/2020 07:33, Hannes Reinecke wrote: >> On 6/29/20 5:32 PM, John Garry wrote: >>> Hi all, >>> >>> I noticed that sbitmap_bitmap_show() only shows set bits and does not >>> consider cleared bits. Is that the proper thing to do? >>> >>> I ask, as from trying to support sbitmap_bitmap_show() for hostwide >>> shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to >>> find active requests (and associated tags/bits) for a particular >>> hctx. So, AFAICT, would give a change in behavior for >>> sbitmap_bitmap_show(), in that it would effectively show set and not >>> cleared bits. >>> >> Why would you need to do this? >> Where would be the point traversing cleared bits? > > I'm not talking about traversing cleared bits specifically. I just think > that today sbitmap_bitmap_show() only showing the bits in > sbitmap_word.word may not be useful or even misleading, as in reality > the "set" bits are sbitmap_word.word & ~sbitmap_word.cleared. > > And for hostwide shared tags feature, iterating the busy rqs to find the > per-hctx tags/bits would effectively give us the "set" bits, above, so > there would be a difference. > As an example, here's a sample tags_bitmap output: 00000000: 00f0 0fff 03c0 0000 0000 0000 efff fdff 00000010: 0000 c0f7 7fff ffff 0000 00e0 fef7 ffff 00000020: 0000 0000 f0ff ffff 0000 ffff 01d0 ffff 00000030: 0f80 And here's what we would have taking cleared bits into account: 00000000: 00f0 0fff 03c0 0000 0000 0000 0000 0000 (20 bits set) 00000010: 0000 0000 0000 0000 0000 0000 0000 0000 00000020: 0000 0000 0000 0000 0000 f8ff 0110 8000 (16 bits set) 00000030: 0f00 (1 bit set) Here's tags file output: nr_tags=400 nr_reserved_tags=0 active_queues=2 bitmap_tags: depth=400 busy=40 cleared=182 bits_per_word=64 map_nr=7 alloc_hint={22, 0, 0, 0, 103, 389, 231, 57, 377, 167, 0, 0, 69, 24, 44, 50, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} wake_batch=8 wake_index=0 [snip] 20+16+1=39 more closely matches with busy=40. So it seems sensible to go this way for whether hostwide tags are used or not. thanks, John
On 2020-06-29 08:32, John Garry wrote: > I noticed that sbitmap_bitmap_show() only shows set bits and does not > consider cleared bits. Is that the proper thing to do? > > I ask, as from trying to support sbitmap_bitmap_show() for hostwide > shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to > find active requests (and associated tags/bits) for a particular hctx. > So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(), > in that it would effectively show set and not cleared bits. > > Any thoughts on this? Probably this is something that got overlooked when the cleared bits were introduced? See e.g. 8c2def893afc ("sbitmap: fix sbitmap_for_each_set()"). Bart.
On 12/06/2020 07:06, Hannes Reinecke wrote: >>>> >>>> +out: >>>> + sbitmap_free(&shared_sb); >>>> + return res; >>>> +} >>>> + >>>> static int hctx_tags_bitmap_show(void *data, struct seq_file *m) >>>> { >>>> struct blk_mq_hw_ctx *hctx = data; >>>> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr >>>> blk_mq_debugfs_hctx_shared_sbitmap_attrs >>>> {"busy", 0400, hctx_busy_show}, >>>> {"ctx_map", 0400, hctx_ctx_map_show}, >>>> {"tags", 0400, hctx_tags_show}, >>>> + {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show}, >>>> {"sched_tags", 0400, hctx_sched_tags_show}, >>>> {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, >>>> {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, >>>> >>> Ah, right. Here it is. >>> >>> But I don't get it; why do we have to allocate a temporary bitmap and >>> can't walk the existing shared sbitmap? >> Just coming back to this now... >> For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct >> sbitmap *. So we have to filter into a temp per-hctx struct sbitmap. >> We could change sbitmap_bitmap_show() to accept a filter iterator - >> which I think you're getting at - but I am not sure it's worth the >> change. Or else use the allocated sbitmap for the hctx, as above*, but >> I may be remove that (see 4/12 response). >> > Yes, I do think I would prefer updating sbitmap_bitmap_show() to accept > a filter. Especially as Ming Lei has now updated the tag iterators to > accept a filter, too, so it should be an easy change. Can you please explain how you would use an iterator here? In fact, I am half thinking of dropping this patch entirely. So I feel that current behavior is a little strange, as some may expect /sys/kernel/debug/block/sdX/hctxY/tags_bitmap would only show tags for hctxY for sdX, while it is for hctxY for all queues. Same for /sys/kernel/debug/block/sdX/hctxY/tags And then, for what we have in this patch: static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx) { struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx }; blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data); } This will give tags only for this queue. So not the same. So I feel it's better to change current behavior (so consistent) or change neither. And changing current behavior would also mean we need to change what we show in /sys/kernel/debug/block/sdX/hctxY/tags, and that looks troublesome also. Opinion? Thanks, John
On 7/13/20 11:41 AM, John Garry wrote: > On 12/06/2020 07:06, Hannes Reinecke wrote: >>>>> >>>>> +out: >>>>> + sbitmap_free(&shared_sb); >>>>> + return res; >>>>> +} >>>>> + >>>>> static int hctx_tags_bitmap_show(void *data, struct seq_file *m) >>>>> { >>>>> struct blk_mq_hw_ctx *hctx = data; >>>>> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr >>>>> blk_mq_debugfs_hctx_shared_sbitmap_attrs >>>>> {"busy", 0400, hctx_busy_show}, >>>>> {"ctx_map", 0400, hctx_ctx_map_show}, >>>>> {"tags", 0400, hctx_tags_show}, >>>>> + {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show}, >>>>> {"sched_tags", 0400, hctx_sched_tags_show}, >>>>> {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, >>>>> {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, >>>>> >>>> Ah, right. Here it is. >>>> >>>> But I don't get it; why do we have to allocate a temporary bitmap >>>> and can't walk the existing shared sbitmap? >>> > > Just coming back to this now... > >>> For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct >>> sbitmap *. So we have to filter into a temp per-hctx struct sbitmap. >>> We could change sbitmap_bitmap_show() to accept a filter iterator - >>> which I think you're getting at - but I am not sure it's worth the >>> change. Or else use the allocated sbitmap for the hctx, as above*, >>> but I may be remove that (see 4/12 response). >>> >> Yes, I do think I would prefer updating sbitmap_bitmap_show() to >> accept a filter. Especially as Ming Lei has now updated the tag >> iterators to accept a filter, too, so it should be an easy change. > > Can you please explain how you would use an iterator here? > > In fact, I am half thinking of dropping this patch entirely. > > So I feel that current behavior is a little strange, as some may expect > /sys/kernel/debug/block/sdX/hctxY/tags_bitmap would only show tags for > hctxY for sdX, while it is for hctxY for all queues. Same for > /sys/kernel/debug/block/sdX/hctxY/tags > > And then, for what we have in this patch: > > static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx) > { > struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx }; > > blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data); > } > > This will give tags only for this queue. So not the same. So I feel it's > better to change current behavior (so consistent) or change neither. And > changing current behavior would also mean we need to change what we show > in /sys/kernel/debug/block/sdX/hctxY/tags, and that looks troublesome also. > > Opinion? > The whole notion of having sysfs presenting tags per hctx doesn't really apply anymore when running with shared tags. We could be sticking with the per-hctx attribute, but then busy tags won't be displayed correctly as tags might be busy, but not on this hctx. The alternative idea of projecting everything over to hctx0 (or just duplicating the output from hctx0) would be technically correct, but would be missing the per-hctx information. Ideally we would have some sort of tri-state information here: busy, busy on other hctx, not busy. Then the per-hctx attribute would start making sense again. Otherwise I would just leave it for now. Cheers, Hannes
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 05b4be0c03d9..4da7e54adf3b 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -495,6 +495,67 @@ static int hctx_tags_show(void *data, struct seq_file *m) return res; } +struct hctx_sb_data { + struct sbitmap *sb; /* output bitmap */ + struct blk_mq_hw_ctx *hctx; /* input hctx */ +}; + +static bool hctx_filter_fn(struct blk_mq_hw_ctx *hctx, struct request *req, + void *priv, bool reserved) +{ + struct hctx_sb_data *hctx_sb_data = priv; + + if (hctx == hctx_sb_data->hctx) + sbitmap_set_bit(hctx_sb_data->sb, req->tag); + + return true; +} + +static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx) +{ + struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx }; + + blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data); +} + +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct seq_file *m) +{ + struct blk_mq_hw_ctx *hctx = data; + struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; + struct sbitmap shared_sb, *sb; + int res; + + if (!set) + return 0; + + /* + * We could use the allocated sbitmap for that hctx here, but + * that would mean that we would need to clean it prior to use. + */ + res = sbitmap_init_node(&shared_sb, + set->__bitmap_tags.sb.depth, + set->__bitmap_tags.sb.shift, + GFP_KERNEL, NUMA_NO_NODE); + if (res) + return res; + sb = &shared_sb; + + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; + if (hctx->tags) { + hctx_filter_sb(sb, hctx); + sbitmap_bitmap_show(sb, m); + } + + mutex_unlock(&q->sysfs_lock); + +out: + sbitmap_free(&shared_sb); + return res; +} + static int hctx_tags_bitmap_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs {"busy", 0400, hctx_busy_show}, {"ctx_map", 0400, hctx_ctx_map_show}, {"tags", 0400, hctx_tags_show}, + {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show}, {"sched_tags", 0400, hctx_sched_tags_show}, {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},