Message ID | 20230223091226.1135678-1-yukuai1@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [-next,RFC] block: count 'ios' and 'sectors' when io is done for bio-based device | expand |
Hi, friendly ping ... Thanks, Kuai 在 2023/02/23 17:12, Yu Kuai 写道: > From: Yu Kuai <yukuai3@huawei.com> > > While using iostat for raid, I observed very strange 'await' > occasionally, and turns out it's due to that 'ios' and 'sectors' is > counted in bdev_start_io_acct(), while 'nsecs' is counted in > bdev_end_io_acct(). I'm not sure why they are ccounted like that > but I think this behaviour is obviously wrong because user will get > wrong disk stats. > > Fix the problem by counting 'ios' and 'sectors' when io is done, like > what rq-based device does. > > Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function") > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > block/blk-core.c | 16 ++++++---------- > drivers/md/dm.c | 6 +++--- > drivers/nvme/host/multipath.c | 8 ++++---- > include/linux/blkdev.h | 5 ++--- > 4 files changed, 15 insertions(+), 20 deletions(-) > > diff --git a/block/blk-core.c b/block/blk-core.c > index 82b5b2c53f1e..fe1d320f5f07 100644 > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end) > } > } > > -unsigned long bdev_start_io_acct(struct block_device *bdev, > - unsigned int sectors, enum req_op op, > +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, > unsigned long start_time) > { > - const int sgrp = op_stat_group(op); > - > part_stat_lock(); > update_io_ticks(bdev, start_time, false); > - part_stat_inc(bdev, ios[sgrp]); > - part_stat_add(bdev, sectors[sgrp], sectors); > part_stat_local_inc(bdev, in_flight[op_is_write(op)]); > part_stat_unlock(); > > @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct); > */ > unsigned long bio_start_io_acct(struct bio *bio) > { > - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), > - bio_op(bio), jiffies); > + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); > } > EXPORT_SYMBOL_GPL(bio_start_io_acct); > > void bdev_end_io_acct(struct block_device *bdev, enum req_op op, > - unsigned long start_time) > + unsigned int sectors, unsigned long start_time) > { > const int sgrp = op_stat_group(op); > unsigned long now = READ_ONCE(jiffies); > @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op, > > part_stat_lock(); > update_io_ticks(bdev, now, true); > + part_stat_inc(bdev, ios[sgrp]); > + part_stat_add(bdev, sectors[sgrp], sectors); > part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); > part_stat_local_dec(bdev, in_flight[op_is_write(op)]); > part_stat_unlock(); > @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct); > void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, > struct block_device *orig_bdev) > { > - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); > + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time); > } > EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); > > diff --git a/drivers/md/dm.c b/drivers/md/dm.c > index eace45a18d45..f5cc330bb549 100644 > --- a/drivers/md/dm.c > +++ b/drivers/md/dm.c > @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end) > sectors = io->sectors; > > if (!end) > - bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio), > - start_time); > + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time); > else > - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); > + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors, > + start_time); > > if (static_branch_unlikely(&stats_enabled) && > unlikely(dm_stats_used(&md->stats))) { > diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c > index fc39d01e7b63..9171452e2f6d 100644 > --- a/drivers/nvme/host/multipath.c > +++ b/drivers/nvme/host/multipath.c > @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq) > return; > > nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; > - nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, > - blk_rq_bytes(rq) >> SECTOR_SHIFT, > - req_op(rq), jiffies); > + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq), > + jiffies); > } > EXPORT_SYMBOL_GPL(nvme_mpath_start_request); > > @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq) > if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) > return; > bdev_end_io_acct(ns->head->disk->part0, req_op(rq), > - nvme_req(rq)->start_time); > + blk_rq_bytes(rq) >> SECTOR_SHIFT, > + nvme_req(rq)->start_time); > } > > void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index d1aee08f8c18..941304f17492 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter) > wake_up_process(waiter); > } > > -unsigned long bdev_start_io_acct(struct block_device *bdev, > - unsigned int sectors, enum req_op op, > +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, > unsigned long start_time); > void bdev_end_io_acct(struct block_device *bdev, enum req_op op, > - unsigned long start_time); > + unsigned int sectors, unsigned long start_time); > > unsigned long bio_start_io_acct(struct bio *bio); > void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, >
Hi, 在 2023/02/28 9:01, Yu Kuai 写道: > Hi, > > friendly ping ... > > Thanks, > Kuai > > 在 2023/02/23 17:12, Yu Kuai 写道: >> From: Yu Kuai <yukuai3@huawei.com> >> >> While using iostat for raid, I observed very strange 'await' >> occasionally, and turns out it's due to that 'ios' and 'sectors' is >> counted in bdev_start_io_acct(), while 'nsecs' is counted in >> bdev_end_io_acct(). I'm not sure why they are ccounted like that >> but I think this behaviour is obviously wrong because user will get >> wrong disk stats. >> >> Fix the problem by counting 'ios' and 'sectors' when io is done, like >> what rq-based device does. Can anyone help to review this change? Or is there any reason to count 'ios' and 'sectors' when io is started? Thanks, Kuai >> >> Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help >> function") >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> block/blk-core.c | 16 ++++++---------- >> drivers/md/dm.c | 6 +++--- >> drivers/nvme/host/multipath.c | 8 ++++---- >> include/linux/blkdev.h | 5 ++--- >> 4 files changed, 15 insertions(+), 20 deletions(-) >> >> diff --git a/block/blk-core.c b/block/blk-core.c >> index 82b5b2c53f1e..fe1d320f5f07 100644 >> --- a/block/blk-core.c >> +++ b/block/blk-core.c >> @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, >> unsigned long now, bool end) >> } >> } >> -unsigned long bdev_start_io_acct(struct block_device *bdev, >> - unsigned int sectors, enum req_op op, >> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum >> req_op op, >> unsigned long start_time) >> { >> - const int sgrp = op_stat_group(op); >> - >> part_stat_lock(); >> update_io_ticks(bdev, start_time, false); >> - part_stat_inc(bdev, ios[sgrp]); >> - part_stat_add(bdev, sectors[sgrp], sectors); >> part_stat_local_inc(bdev, in_flight[op_is_write(op)]); >> part_stat_unlock(); >> @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct); >> */ >> unsigned long bio_start_io_acct(struct bio *bio) >> { >> - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), >> - bio_op(bio), jiffies); >> + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); >> } >> EXPORT_SYMBOL_GPL(bio_start_io_acct); >> void bdev_end_io_acct(struct block_device *bdev, enum req_op op, >> - unsigned long start_time) >> + unsigned int sectors, unsigned long start_time) >> { >> const int sgrp = op_stat_group(op); >> unsigned long now = READ_ONCE(jiffies); >> @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, >> enum req_op op, >> part_stat_lock(); >> update_io_ticks(bdev, now, true); >> + part_stat_inc(bdev, ios[sgrp]); >> + part_stat_add(bdev, sectors[sgrp], sectors); >> part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); >> part_stat_local_dec(bdev, in_flight[op_is_write(op)]); >> part_stat_unlock(); >> @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct); >> void bio_end_io_acct_remapped(struct bio *bio, unsigned long >> start_time, >> struct block_device *orig_bdev) >> { >> - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); >> + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), >> start_time); >> } >> EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); >> diff --git a/drivers/md/dm.c b/drivers/md/dm.c >> index eace45a18d45..f5cc330bb549 100644 >> --- a/drivers/md/dm.c >> +++ b/drivers/md/dm.c >> @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end) >> sectors = io->sectors; >> if (!end) >> - bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio), >> - start_time); >> + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time); >> else >> - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); >> + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors, >> + start_time); >> if (static_branch_unlikely(&stats_enabled) && >> unlikely(dm_stats_used(&md->stats))) { >> diff --git a/drivers/nvme/host/multipath.c >> b/drivers/nvme/host/multipath.c >> index fc39d01e7b63..9171452e2f6d 100644 >> --- a/drivers/nvme/host/multipath.c >> +++ b/drivers/nvme/host/multipath.c >> @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq) >> return; >> nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; >> - nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, >> - blk_rq_bytes(rq) >> SECTOR_SHIFT, >> - req_op(rq), jiffies); >> + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, >> req_op(rq), >> + jiffies); >> } >> EXPORT_SYMBOL_GPL(nvme_mpath_start_request); >> @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq) >> if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) >> return; >> bdev_end_io_acct(ns->head->disk->part0, req_op(rq), >> - nvme_req(rq)->start_time); >> + blk_rq_bytes(rq) >> SECTOR_SHIFT, >> + nvme_req(rq)->start_time); >> } >> void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >> index d1aee08f8c18..941304f17492 100644 >> --- a/include/linux/blkdev.h >> +++ b/include/linux/blkdev.h >> @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct >> task_struct *waiter) >> wake_up_process(waiter); >> } >> -unsigned long bdev_start_io_acct(struct block_device *bdev, >> - unsigned int sectors, enum req_op op, >> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum >> req_op op, >> unsigned long start_time); >> void bdev_end_io_acct(struct block_device *bdev, enum req_op op, >> - unsigned long start_time); >> + unsigned int sectors, unsigned long start_time); >> unsigned long bio_start_io_acct(struct bio *bio); >> void bio_end_io_acct_remapped(struct bio *bio, unsigned long >> start_time, >> > > . >
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
On Thu, 23 Feb 2023 17:12:26 +0800, Yu Kuai wrote: > While using iostat for raid, I observed very strange 'await' > occasionally, and turns out it's due to that 'ios' and 'sectors' is > counted in bdev_start_io_acct(), while 'nsecs' is counted in > bdev_end_io_acct(). I'm not sure why they are ccounted like that > but I think this behaviour is obviously wrong because user will get > wrong disk stats. > > [...] Applied, thanks! [1/1] block: count 'ios' and 'sectors' when io is done for bio-based device commit: 5f27571382ca42daa3e3d40d1b252bf18c2b61d2 Best regards,
diff --git a/block/blk-core.c b/block/blk-core.c index 82b5b2c53f1e..fe1d320f5f07 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end) } } -unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, enum req_op op, +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time) { - const int sgrp = op_stat_group(op); - part_stat_lock(); update_io_ticks(bdev, start_time, false); - part_stat_inc(bdev, ios[sgrp]); - part_stat_add(bdev, sectors[sgrp], sectors); part_stat_local_inc(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct); */ unsigned long bio_start_io_acct(struct bio *bio) { - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), - bio_op(bio), jiffies); + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, - unsigned long start_time) + unsigned int sectors, unsigned long start_time) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op, part_stat_lock(); update_io_ticks(bdev, now, true); + part_stat_inc(bdev, ios[sgrp]); + part_stat_add(bdev, sectors[sgrp], sectors); part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_local_dec(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev) { - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time); } EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index eace45a18d45..f5cc330bb549 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end) sectors = io->sectors; if (!end) - bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio), - start_time); + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time); else - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors, + start_time); if (static_branch_unlikely(&stats_enabled) && unlikely(dm_stats_used(&md->stats))) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index fc39d01e7b63..9171452e2f6d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq) return; nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; - nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, - blk_rq_bytes(rq) >> SECTOR_SHIFT, - req_op(rq), jiffies); + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq), + jiffies); } EXPORT_SYMBOL_GPL(nvme_mpath_start_request); @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq) if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) return; bdev_end_io_acct(ns->head->disk->part0, req_op(rq), - nvme_req(rq)->start_time); + blk_rq_bytes(rq) >> SECTOR_SHIFT, + nvme_req(rq)->start_time); } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d1aee08f8c18..941304f17492 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } -unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, enum req_op op, +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, - unsigned long start_time); + unsigned int sectors, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,