diff mbox series

[RFC,1/1] block: introduce activity based ioprio

Message ID 20240117092348.2873928-1-zhaoyang.huang@unisoc.com (mailing list archive)
State New, archived
Headers show
Series [RFC,1/1] block: introduce activity based ioprio | expand

Commit Message

zhaoyang.huang Jan. 17, 2024, 9:23 a.m. UTC
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>

This commit try to introduce a feature which adjust the request ioprio
based on the folio's activity. The original idea comes from LRU_GEN
which provides more precised folio activity than before. This commit try
to adjust the request's ioprio when certain part of its folios are hot,
which indicate that this request carry important contents and need be
scheduled ealier.

This commit is tested via bellowing script[1] on a v6.6 android system and
get better iowait result[2] when mglru enabled, where
1. fault_latency.bin is an ebpf based test result which measure all task's
   iowait latency when scheduled in/out.
2. costmem generate page fault by mmaping a file and access the VA.
3. dd generate concurrent vfs io.

[1]
./fault_latency.bin 1 5 > /data/dd_costmem &
costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
dd if=/dev/block/sda of=/data/ddtest bs=1024 count=2048000 &
dd if=/dev/block/sda of=/data/ddtest1 bs=1024 count=2048000 &
dd if=/dev/block/sda of=/data/ddtest2 bs=1024 count=2048000 &
dd if=/dev/block/sda of=/data/ddtest3 bs=1024 count=2048000

[2]
mainline:
Summary for 5932.00(ms)
    All Faults:       1398   235.67 counts/sec
        Iowait:        553    93.22 counts/sec
   All latency:    7432948  1253.03 us/ms
        Iowait:    1321971   222.85 us/ms
Summary for 6706.00(ms)
    All Faults:       1921   286.46 counts/sec
        Iowait:       1273   189.83 counts/sec
   All latency:   25890252  3860.76 us/ms
        Iowait:    4468861   666.40 us/ms
Summary for 5838.00(ms)
    All Faults:       1580   270.64 counts/sec
        Iowait:        619   106.03 counts/sec
   All latency:    6862215  1175.44 us/ms
        Iowait:    1077616   184.59 us/ms
Summary for 5916.00(ms)
    All Faults:       1195   201.99 counts/sec
        Iowait:        494    83.50 counts/sec
   All latency:    4555134   769.97 us/ms
        Iowait:     902513   152.55 us/ms
Summary for 6229.00(ms)
    All Faults:       1395   223.95 counts/sec
        Iowait:        359    57.63 counts/sec
   All latency:    6091882   977.99 us/ms
        Iowait:    1251183   200.86 us/ms
Summary for 6059.00(ms)
    All Faults:       1201   198.22 counts/sec
        Iowait:        299    49.35 counts/sec
   All latency:    5612143   926.25 us/ms
        Iowait:    1155555   190.72 us/ms
Summary for 6005.00(ms)
    All Faults:        847   141.05 counts/sec
        Iowait:        320    53.29 counts/sec
   All latency:    5852541   974.61 us/ms
        Iowait:     433719    72.23 us/ms
Summary for 5895.00(ms)
    All Faults:       1039   176.25 counts/sec
        Iowait:        288    48.85 counts/sec
   All latency:    4184680   709.87 us/ms
        Iowait:     686266   116.41 us/ms
Summary for 6371.00(ms)
    All Faults:       1176   184.59 counts/sec
        Iowait:        269    42.22 counts/sec
   All latency:    6282918   986.17 us/ms
        Iowait:    1160952   182.22 us/ms
Summary for 6113.00(ms)
    All Faults:       1322   216.26 counts/sec
        Iowait:        281    45.97 counts/sec
   All latency:    7208880  1179.27 us/ms
        Iowait:    1336650   218.66 us/ms

commit:
Summary for 7225.00(ms)
    All Faults:       1384   191.56 counts/sec
        Iowait:        285    39.45 counts/sec
   All latency:    6593081   912.54 us/ms
        Iowait:     934041   129.28 us/ms
Summary for 6567.00(ms)
    All Faults:       1378   209.84 counts/sec
        Iowait:        167    25.43 counts/sec
   All latency:    3761554   572.80 us/ms
        Iowait:     220621    33.60 us/ms
Summary for 6118.00(ms)
    All Faults:       1304   213.14 counts/sec
        Iowait:        268    43.81 counts/sec
   All latency:    3835332   626.89 us/ms
        Iowait:     413900    67.65 us/ms
Summary for 6155.00(ms)
    All Faults:       1177   191.23 counts/sec
        Iowait:        185    30.06 counts/sec
   All latency:    4839084   786.20 us/ms
        Iowait:     660002   107.23 us/ms
Summary for 6448.00(ms)
    All Faults:       1283   198.98 counts/sec
        Iowait:        353    54.75 counts/sec
   All latency:    4798334   744.16 us/ms
        Iowait:    1258045   195.11 us/ms
Summary for 6179.00(ms)
    All Faults:       1285   207.96 counts/sec
        Iowait:        137    22.17 counts/sec
   All latency:    3668456   593.70 us/ms
        Iowait:     419731    67.93 us/ms
Summary for 6165.00(ms)
    All Faults:       1500   243.31 counts/sec
        Iowait:        182    29.52 counts/sec
   All latency:    3357435   544.60 us/ms
        Iowait:     279828    45.39 us/ms
Summary for 6270.00(ms)
    All Faults:       1507   240.35 counts/sec
        Iowait:        361    57.58 counts/sec
   All latency:    4428320   706.27 us/ms
        Iowait:     741304   118.23 us/ms
Summary for 6597.00(ms)
    All Faults:       1263   191.45 counts/sec
        Iowait:        238    36.08 counts/sec
   All latency:    5115168   775.38 us/ms
        Iowait:     950482   144.08 us/ms
Summary for 6503.00(ms)
    All Faults:       1456   223.90 counts/sec
        Iowait:        402    61.82 counts/sec
   All latency:    6782757  1043.02 us/ms
        Iowait:    1483803   228.17 us/ms

Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
 block/Kconfig.iosched |  7 +++++
 block/mq-deadline.c   | 70 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 64 insertions(+), 13 deletions(-)

Comments

Jens Axboe Jan. 17, 2024, 3:20 p.m. UTC | #1
On 1/17/24 2:23 AM, zhaoyang.huang wrote:
> [1]
> ./fault_latency.bin 1 5 > /data/dd_costmem &
> costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> dd if=/dev/block/sda of=/data/ddtest bs=1024 count=2048000 &
> dd if=/dev/block/sda of=/data/ddtest1 bs=1024 count=2048000 &
> dd if=/dev/block/sda of=/data/ddtest2 bs=1024 count=2048000 &
> dd if=/dev/block/sda of=/data/ddtest3 bs=1024 count=2048000
> 
> [2]
> mainline:
> Summary for 5932.00(ms)
>     All Faults:       1398   235.67 counts/sec
>         Iowait:        553    93.22 counts/sec
>    All latency:    7432948  1253.03 us/ms
>         Iowait:    1321971   222.85 us/ms
> Summary for 6706.00(ms)
>     All Faults:       1921   286.46 counts/sec
>         Iowait:       1273   189.83 counts/sec
>    All latency:   25890252  3860.76 us/ms
>         Iowait:    4468861   666.40 us/ms
> Summary for 5838.00(ms)
>     All Faults:       1580   270.64 counts/sec
>         Iowait:        619   106.03 counts/sec
>    All latency:    6862215  1175.44 us/ms
>         Iowait:    1077616   184.59 us/ms
> Summary for 5916.00(ms)
>     All Faults:       1195   201.99 counts/sec
>         Iowait:        494    83.50 counts/sec
>    All latency:    4555134   769.97 us/ms
>         Iowait:     902513   152.55 us/ms
> Summary for 6229.00(ms)
>     All Faults:       1395   223.95 counts/sec
>         Iowait:        359    57.63 counts/sec
>    All latency:    6091882   977.99 us/ms
>         Iowait:    1251183   200.86 us/ms
> Summary for 6059.00(ms)
>     All Faults:       1201   198.22 counts/sec
>         Iowait:        299    49.35 counts/sec
>    All latency:    5612143   926.25 us/ms
>         Iowait:    1155555   190.72 us/ms
> Summary for 6005.00(ms)
>     All Faults:        847   141.05 counts/sec
>         Iowait:        320    53.29 counts/sec
>    All latency:    5852541   974.61 us/ms
>         Iowait:     433719    72.23 us/ms
> Summary for 5895.00(ms)
>     All Faults:       1039   176.25 counts/sec
>         Iowait:        288    48.85 counts/sec
>    All latency:    4184680   709.87 us/ms
>         Iowait:     686266   116.41 us/ms
> Summary for 6371.00(ms)
>     All Faults:       1176   184.59 counts/sec
>         Iowait:        269    42.22 counts/sec
>    All latency:    6282918   986.17 us/ms
>         Iowait:    1160952   182.22 us/ms
> Summary for 6113.00(ms)
>     All Faults:       1322   216.26 counts/sec
>         Iowait:        281    45.97 counts/sec
>    All latency:    7208880  1179.27 us/ms
>         Iowait:    1336650   218.66 us/ms
> 
> commit:
> Summary for 7225.00(ms)
>     All Faults:       1384   191.56 counts/sec
>         Iowait:        285    39.45 counts/sec
>    All latency:    6593081   912.54 us/ms
>         Iowait:     934041   129.28 us/ms
> Summary for 6567.00(ms)
>     All Faults:       1378   209.84 counts/sec
>         Iowait:        167    25.43 counts/sec
>    All latency:    3761554   572.80 us/ms
>         Iowait:     220621    33.60 us/ms
> Summary for 6118.00(ms)
>     All Faults:       1304   213.14 counts/sec
>         Iowait:        268    43.81 counts/sec
>    All latency:    3835332   626.89 us/ms
>         Iowait:     413900    67.65 us/ms
> Summary for 6155.00(ms)
>     All Faults:       1177   191.23 counts/sec
>         Iowait:        185    30.06 counts/sec
>    All latency:    4839084   786.20 us/ms
>         Iowait:     660002   107.23 us/ms
> Summary for 6448.00(ms)
>     All Faults:       1283   198.98 counts/sec
>         Iowait:        353    54.75 counts/sec
>    All latency:    4798334   744.16 us/ms
>         Iowait:    1258045   195.11 us/ms
> Summary for 6179.00(ms)
>     All Faults:       1285   207.96 counts/sec
>         Iowait:        137    22.17 counts/sec
>    All latency:    3668456   593.70 us/ms
>         Iowait:     419731    67.93 us/ms
> Summary for 6165.00(ms)
>     All Faults:       1500   243.31 counts/sec
>         Iowait:        182    29.52 counts/sec
>    All latency:    3357435   544.60 us/ms
>         Iowait:     279828    45.39 us/ms
> Summary for 6270.00(ms)
>     All Faults:       1507   240.35 counts/sec
>         Iowait:        361    57.58 counts/sec
>    All latency:    4428320   706.27 us/ms
>         Iowait:     741304   118.23 us/ms
> Summary for 6597.00(ms)
>     All Faults:       1263   191.45 counts/sec
>         Iowait:        238    36.08 counts/sec
>    All latency:    5115168   775.38 us/ms
>         Iowait:     950482   144.08 us/ms
> Summary for 6503.00(ms)
>     All Faults:       1456   223.90 counts/sec
>         Iowait:        402    61.82 counts/sec
>    All latency:    6782757  1043.02 us/ms
>         Iowait:    1483803   228.17 us/ms

That's a lot of data, and I'm sure you did some analysis of this to
conclude that the change makes a positive difference. It would be
prudent to include such analysis, rather than just a raw dump of data.
On top of that, you don't mention what you are testing on - what is sda?

A few comments - regardless of whether or not this change makes
functional sense.

> diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
> index 27f11320b8d1..cd6fcfca7782 100644
> --- a/block/Kconfig.iosched
> +++ b/block/Kconfig.iosched
> @@ -44,4 +44,11 @@ config BFQ_CGROUP_DEBUG
>  	Enable some debugging help. Currently it exports additional stat
>  	files in a cgroup which can be useful for debugging.
>  
> +config ACTIVITY_BASED_IOPRIO
> +	bool "Enable folio activity based ioprio on deadline"
> +	depends on LRU_GEN && MQ_IOSCHED_DEADLINE
> +	default n
> +	help
> +	This item enable the feature of adjust request's priority by
> +	calculating its folio's activity.

Doesn't seem like this should be a config thing. In any case, 'default
n' is the default, so you should kill that line.

> @@ -224,14 +225,42 @@ static void deadline_remove_request(struct request_queue *q,
>  		q->last_merge = NULL;
>  }
>  
> +static enum dd_prio dd_req_ioprio(struct request *rq)
> +{
> +	enum dd_prio prio;
> +	const u8 ioprio_class = dd_rq_ioclass(rq);
> +#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
> +	struct bio *bio;
> +	struct bio_vec bv;
> +	struct bvec_iter iter;
> +	struct page *page;
> +	int gen = 0;
> +	int cnt = 0;
> +
> +	if (req_op(rq) == REQ_OP_READ) {
> +		__rq_for_each_bio(bio, rq) {
> +			bio_for_each_bvec(bv, bio, iter) {
> +				page = bv.bv_page;
> +				gen += PageWorkingset(page) ? 1 : 0;
> +				cnt++;
> +			}
> +		}
> +		prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
> +			ioprio_class_to_prio[ioprio_class];
> +	} else
> +		prio = ioprio_class_to_prio[ioprio_class];
> +#else
> +	prio = ioprio_class_to_prio[ioprio_class];
> +#endif
> +	return prio;
> +}

This is pretty awful imho, you're iterating the pages which isn't
exactly cheap. There's also a ternary operator (get rid of it), and
magic cnt / 2 which isn't even explained.

This would make much more sense to do when the page is added to the bio,
rather than try and fix up the prio after the fact.

>  static void dd_request_merged(struct request_queue *q, struct request *req,
>  			      enum elv_merge type)
>  {
>  	struct deadline_data *dd = q->elevator->elevator_data;
> -	const u8 ioprio_class = dd_rq_ioclass(req);
> -	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> +	const enum dd_prio prio = dd_req_ioprio(req);
>  	struct dd_per_prio *per_prio = &dd->per_prio[prio];
> -
>  	/*
>  	 * if the merge was a front merge, we need to reposition request
>  	 */
> @@ -248,8 +277,7 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
>  			       struct request *next)
>  {
>  	struct deadline_data *dd = q->elevator->elevator_data;
> -	const u8 ioprio_class = dd_rq_ioclass(next);
> -	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> +	const enum dd_prio prio = dd_req_ioprio(next);
>  
>  	lockdep_assert_held(&dd->lock);

What are these changes?

> @@ -745,10 +773,30 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
>  {
>  	struct deadline_data *dd = q->elevator->elevator_data;
>  	const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
> -	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> -	struct dd_per_prio *per_prio = &dd->per_prio[prio];
> +	struct dd_per_prio *per_prio;
>  	sector_t sector = bio_end_sector(bio);
>  	struct request *__rq;
> +#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
> +	enum dd_prio prio;
> +	struct bio_vec bv;
> +	struct bvec_iter iter;
> +	struct page *page;
> +	int gen = 0;
> +	int cnt = 0;
> +
> +	if (bio_op(bio) == REQ_OP_READ) {
> +		bio_for_each_bvec(bv, bio, iter) {
> +			page = bv.bv_page;
> +			gen += PageWorkingset(page) ? 1 : 0;
> +			cnt++;
> +		}
> +	}
> +	prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
> +			ioprio_class_to_prio[ioprio_class];
> +#else
> +	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> +#endif
> +	per_prio = &dd->per_prio[prio];

And here you duplicate the entire thing from above again?

> @@ -798,10 +846,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>  	struct request_queue *q = hctx->queue;
>  	struct deadline_data *dd = q->elevator->elevator_data;
>  	const enum dd_data_dir data_dir = rq_data_dir(rq);
> -	u16 ioprio = req_get_ioprio(rq);
> -	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
>  	struct dd_per_prio *per_prio;
> -	enum dd_prio prio;
> +	enum dd_prio prio = dd_req_ioprio(rq);
>  
>  	lockdep_assert_held(&dd->lock);
>  
> @@ -811,7 +857,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>  	 */
>  	blk_req_zone_write_unlock(rq);
>  
> -	prio = ioprio_class_to_prio[ioprio_class];
>  	per_prio = &dd->per_prio[prio];
>  	if (!rq->elv.priv[0]) {
>  		per_prio->stats.inserted++;
> @@ -920,8 +965,7 @@ static void dd_finish_request(struct request *rq)
>  {
>  	struct request_queue *q = rq->q;
>  	struct deadline_data *dd = q->elevator->elevator_data;
> -	const u8 ioprio_class = dd_rq_ioclass(rq);
> -	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> +	const enum dd_prio prio = dd_req_ioprio(rq);
>  	struct dd_per_prio *per_prio = &dd->per_prio[prio];

And again these changes?
Bart Van Assche Jan. 17, 2024, 5:29 p.m. UTC | #2
On 1/17/24 01:23, zhaoyang.huang wrote:
> +static enum dd_prio dd_req_ioprio(struct request *rq)
> +{
> +	enum dd_prio prio;
> +	const u8 ioprio_class = dd_rq_ioclass(rq);
> +#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
> +	struct bio *bio;
> +	struct bio_vec bv;
> +	struct bvec_iter iter;
> +	struct page *page;
> +	int gen = 0;
> +	int cnt = 0;
> +
> +	if (req_op(rq) == REQ_OP_READ) {
> +		__rq_for_each_bio(bio, rq) {
> +			bio_for_each_bvec(bv, bio, iter) {
> +				page = bv.bv_page;
> +				gen += PageWorkingset(page) ? 1 : 0;
> +				cnt++;
> +			}
> +		}
> +		prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
> +			ioprio_class_to_prio[ioprio_class];
> +	} else
> +		prio = ioprio_class_to_prio[ioprio_class];
> +#else
> +	prio = ioprio_class_to_prio[ioprio_class];
> +#endif
> +	return prio;
> +}

I don't like it that code is introduced in the mq-deadline scheduler
that accesses page cache information. Isn't that a layering violation?
Additionally, this approach only works for buffered I/O and not for
direct I/O. Shouldn't the I/O submitter set the I/O priority instead of
deciding the I/O priority in the mq-deadline scheduler?

Bart.
Zhaoyang Huang Jan. 18, 2024, 7:48 a.m. UTC | #3
On Wed, Jan 17, 2024 at 11:20 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 1/17/24 2:23 AM, zhaoyang.huang wrote:
> > [1]
> > ./fault_latency.bin 1 5 > /data/dd_costmem &
> > costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> > costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> > costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> > costmem -c0 -a2048000 -b128000 -o0 1>/dev/null &
> > dd if=/dev/block/sda of=/data/ddtest bs=1024 count=2048000 &
> > dd if=/dev/block/sda of=/data/ddtest1 bs=1024 count=2048000 &
> > dd if=/dev/block/sda of=/data/ddtest2 bs=1024 count=2048000 &
> > dd if=/dev/block/sda of=/data/ddtest3 bs=1024 count=2048000
> >
> > [2]
> > mainline:
> > Summary for 5932.00(ms)
> >     All Faults:       1398   235.67 counts/sec
> >         Iowait:        553    93.22 counts/sec
> >    All latency:    7432948  1253.03 us/ms
> >         Iowait:    1321971   222.85 us/ms
> > Summary for 6706.00(ms)
> >     All Faults:       1921   286.46 counts/sec
> >         Iowait:       1273   189.83 counts/sec
> >    All latency:   25890252  3860.76 us/ms
> >         Iowait:    4468861   666.40 us/ms
> > Summary for 5838.00(ms)
> >     All Faults:       1580   270.64 counts/sec
> >         Iowait:        619   106.03 counts/sec
> >    All latency:    6862215  1175.44 us/ms
> >         Iowait:    1077616   184.59 us/ms
> > Summary for 5916.00(ms)
> >     All Faults:       1195   201.99 counts/sec
> >         Iowait:        494    83.50 counts/sec
> >    All latency:    4555134   769.97 us/ms
> >         Iowait:     902513   152.55 us/ms
> > Summary for 6229.00(ms)
> >     All Faults:       1395   223.95 counts/sec
> >         Iowait:        359    57.63 counts/sec
> >    All latency:    6091882   977.99 us/ms
> >         Iowait:    1251183   200.86 us/ms
> > Summary for 6059.00(ms)
> >     All Faults:       1201   198.22 counts/sec
> >         Iowait:        299    49.35 counts/sec
> >    All latency:    5612143   926.25 us/ms
> >         Iowait:    1155555   190.72 us/ms
> > Summary for 6005.00(ms)
> >     All Faults:        847   141.05 counts/sec
> >         Iowait:        320    53.29 counts/sec
> >    All latency:    5852541   974.61 us/ms
> >         Iowait:     433719    72.23 us/ms
> > Summary for 5895.00(ms)
> >     All Faults:       1039   176.25 counts/sec
> >         Iowait:        288    48.85 counts/sec
> >    All latency:    4184680   709.87 us/ms
> >         Iowait:     686266   116.41 us/ms
> > Summary for 6371.00(ms)
> >     All Faults:       1176   184.59 counts/sec
> >         Iowait:        269    42.22 counts/sec
> >    All latency:    6282918   986.17 us/ms
> >         Iowait:    1160952   182.22 us/ms
> > Summary for 6113.00(ms)
> >     All Faults:       1322   216.26 counts/sec
> >         Iowait:        281    45.97 counts/sec
> >    All latency:    7208880  1179.27 us/ms
> >         Iowait:    1336650   218.66 us/ms
> >
> > commit:
> > Summary for 7225.00(ms)
> >     All Faults:       1384   191.56 counts/sec
> >         Iowait:        285    39.45 counts/sec
> >    All latency:    6593081   912.54 us/ms
> >         Iowait:     934041   129.28 us/ms
> > Summary for 6567.00(ms)
> >     All Faults:       1378   209.84 counts/sec
> >         Iowait:        167    25.43 counts/sec
> >    All latency:    3761554   572.80 us/ms
> >         Iowait:     220621    33.60 us/ms
> > Summary for 6118.00(ms)
> >     All Faults:       1304   213.14 counts/sec
> >         Iowait:        268    43.81 counts/sec
> >    All latency:    3835332   626.89 us/ms
> >         Iowait:     413900    67.65 us/ms
> > Summary for 6155.00(ms)
> >     All Faults:       1177   191.23 counts/sec
> >         Iowait:        185    30.06 counts/sec
> >    All latency:    4839084   786.20 us/ms
> >         Iowait:     660002   107.23 us/ms
> > Summary for 6448.00(ms)
> >     All Faults:       1283   198.98 counts/sec
> >         Iowait:        353    54.75 counts/sec
> >    All latency:    4798334   744.16 us/ms
> >         Iowait:    1258045   195.11 us/ms
> > Summary for 6179.00(ms)
> >     All Faults:       1285   207.96 counts/sec
> >         Iowait:        137    22.17 counts/sec
> >    All latency:    3668456   593.70 us/ms
> >         Iowait:     419731    67.93 us/ms
> > Summary for 6165.00(ms)
> >     All Faults:       1500   243.31 counts/sec
> >         Iowait:        182    29.52 counts/sec
> >    All latency:    3357435   544.60 us/ms
> >         Iowait:     279828    45.39 us/ms
> > Summary for 6270.00(ms)
> >     All Faults:       1507   240.35 counts/sec
> >         Iowait:        361    57.58 counts/sec
> >    All latency:    4428320   706.27 us/ms
> >         Iowait:     741304   118.23 us/ms
> > Summary for 6597.00(ms)
> >     All Faults:       1263   191.45 counts/sec
> >         Iowait:        238    36.08 counts/sec
> >    All latency:    5115168   775.38 us/ms
> >         Iowait:     950482   144.08 us/ms
> > Summary for 6503.00(ms)
> >     All Faults:       1456   223.90 counts/sec
> >         Iowait:        402    61.82 counts/sec
> >    All latency:    6782757  1043.02 us/ms
> >         Iowait:    1483803   228.17 us/ms
>
> That's a lot of data, and I'm sure you did some analysis of this to
> conclude that the change makes a positive difference. It would be
> prudent to include such analysis, rather than just a raw dump of data.
> On top of that, you don't mention what you are testing on - what is sda?
Please find below for summary of the test result. The test case is
collecting io time of mmaped VA(from scheduler point of view) among
mix access of mmaped and vfs file data concurrently. sda is a raw disk
partition which is used for source data of command dd.
                             mainline                           commit
io cost                    1379us                             736us
>
> A few comments - regardless of whether or not this change makes
> functional sense.
The commit's basic idea is the request priority should be decided by
the pages it carry instead of(or together with) the launcher.
>
> > diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
> > index 27f11320b8d1..cd6fcfca7782 100644
> > --- a/block/Kconfig.iosched
> > +++ b/block/Kconfig.iosched
> > @@ -44,4 +44,11 @@ config BFQ_CGROUP_DEBUG
> >       Enable some debugging help. Currently it exports additional stat
> >       files in a cgroup which can be useful for debugging.
> >
> > +config ACTIVITY_BASED_IOPRIO
> > +     bool "Enable folio activity based ioprio on deadline"
> > +     depends on LRU_GEN && MQ_IOSCHED_DEADLINE
> > +     default n
> > +     help
> > +     This item enable the feature of adjust request's priority by
> > +     calculating its folio's activity.
>
> Doesn't seem like this should be a config thing. In any case, 'default
> n' is the default, so you should kill that line.
ok
>
> > @@ -224,14 +225,42 @@ static void deadline_remove_request(struct request_queue *q,
> >               q->last_merge = NULL;
> >  }
> >
> > +static enum dd_prio dd_req_ioprio(struct request *rq)
> > +{
> > +     enum dd_prio prio;
> > +     const u8 ioprio_class = dd_rq_ioclass(rq);
> > +#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
> > +     struct bio *bio;
> > +     struct bio_vec bv;
> > +     struct bvec_iter iter;
> > +     struct page *page;
> > +     int gen = 0;
> > +     int cnt = 0;
> > +
> > +     if (req_op(rq) == REQ_OP_READ) {
> > +             __rq_for_each_bio(bio, rq) {
> > +                     bio_for_each_bvec(bv, bio, iter) {
> > +                             page = bv.bv_page;
> > +                             gen += PageWorkingset(page) ? 1 : 0;
> > +                             cnt++;
> > +                     }
> > +             }
> > +             prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
> > +                     ioprio_class_to_prio[ioprio_class];
> > +     } else
> > +             prio = ioprio_class_to_prio[ioprio_class];
> > +#else
> > +     prio = ioprio_class_to_prio[ioprio_class];
> > +#endif
> > +     return prio;
> > +}
>
> This is pretty awful imho, you're iterating the pages which isn't
> exactly cheap. There's also a ternary operator (get rid of it), and
> magic cnt / 2 which isn't even explained.
ok. The iterating is on purpose here to not enlarge the bio and
request structure. The magic number would be replaced by an more
sensible criteria like MULTI_GEN's thrashing tier things.
>
> This would make much more sense to do when the page is added to the bio,
> rather than try and fix up the prio after the fact.
>
> >  static void dd_request_merged(struct request_queue *q, struct request *req,
> >                             enum elv_merge type)
> >  {
> >       struct deadline_data *dd = q->elevator->elevator_data;
> > -     const u8 ioprio_class = dd_rq_ioclass(req);
> > -     const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> > +     const enum dd_prio prio = dd_req_ioprio(req);
> >       struct dd_per_prio *per_prio = &dd->per_prio[prio];
> > -
> >       /*
> >        * if the merge was a front merge, we need to reposition request
> >        */
> > @@ -248,8 +277,7 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
> >                              struct request *next)
> >  {
> >       struct deadline_data *dd = q->elevator->elevator_data;
> > -     const u8 ioprio_class = dd_rq_ioclass(next);
> > -     const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> > +     const enum dd_prio prio = dd_req_ioprio(next);
> >
> >       lockdep_assert_held(&dd->lock);
>
> What are these changes?
That is for code's integrity of getting the request's prio for either
this feature enabled or not.
>
> > @@ -745,10 +773,30 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
> >  {
> >       struct deadline_data *dd = q->elevator->elevator_data;
> >       const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
> > -     const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> > -     struct dd_per_prio *per_prio = &dd->per_prio[prio];
> > +     struct dd_per_prio *per_prio;
> >       sector_t sector = bio_end_sector(bio);
> >       struct request *__rq;
> > +#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
> > +     enum dd_prio prio;
> > +     struct bio_vec bv;
> > +     struct bvec_iter iter;
> > +     struct page *page;
> > +     int gen = 0;
> > +     int cnt = 0;
> > +
> > +     if (bio_op(bio) == REQ_OP_READ) {
> > +             bio_for_each_bvec(bv, bio, iter) {
> > +                     page = bv.bv_page;
> > +                     gen += PageWorkingset(page) ? 1 : 0;
> > +                     cnt++;
> > +             }
> > +     }
> > +     prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
> > +                     ioprio_class_to_prio[ioprio_class];
> > +#else
> > +     const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> > +#endif
> > +     per_prio = &dd->per_prio[prio];
>
> And here you duplicate the entire thing from above again?
could be solved by updating the dd_req_ioprio
>
> > @@ -798,10 +846,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
> >       struct request_queue *q = hctx->queue;
> >       struct deadline_data *dd = q->elevator->elevator_data;
> >       const enum dd_data_dir data_dir = rq_data_dir(rq);
> > -     u16 ioprio = req_get_ioprio(rq);
> > -     u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
> >       struct dd_per_prio *per_prio;
> > -     enum dd_prio prio;
> > +     enum dd_prio prio = dd_req_ioprio(rq);
> >
> >       lockdep_assert_held(&dd->lock);
> >
> > @@ -811,7 +857,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
> >        */
> >       blk_req_zone_write_unlock(rq);
> >
> > -     prio = ioprio_class_to_prio[ioprio_class];
> >       per_prio = &dd->per_prio[prio];
> >       if (!rq->elv.priv[0]) {
> >               per_prio->stats.inserted++;
> > @@ -920,8 +965,7 @@ static void dd_finish_request(struct request *rq)
> >  {
> >       struct request_queue *q = rq->q;
> >       struct deadline_data *dd = q->elevator->elevator_data;
> > -     const u8 ioprio_class = dd_rq_ioclass(rq);
> > -     const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> > +     const enum dd_prio prio = dd_req_ioprio(rq);
> >       struct dd_per_prio *per_prio = &dd->per_prio[prio];
>
> And again these changes?
>
> --
> Jens Axboe
>
Zhaoyang Huang Jan. 18, 2024, 8:40 a.m. UTC | #4
On Thu, Jan 18, 2024 at 1:29 AM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 1/17/24 01:23, zhaoyang.huang wrote:
> > +static enum dd_prio dd_req_ioprio(struct request *rq)
> > +{
> > +     enum dd_prio prio;
> > +     const u8 ioprio_class = dd_rq_ioclass(rq);
> > +#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
> > +     struct bio *bio;
> > +     struct bio_vec bv;
> > +     struct bvec_iter iter;
> > +     struct page *page;
> > +     int gen = 0;
> > +     int cnt = 0;
> > +
> > +     if (req_op(rq) == REQ_OP_READ) {
> > +             __rq_for_each_bio(bio, rq) {
> > +                     bio_for_each_bvec(bv, bio, iter) {
> > +                             page = bv.bv_page;
> > +                             gen += PageWorkingset(page) ? 1 : 0;
> > +                             cnt++;
> > +                     }
> > +             }
> > +             prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
> > +                     ioprio_class_to_prio[ioprio_class];
> > +     } else
> > +             prio = ioprio_class_to_prio[ioprio_class];
> > +#else
> > +     prio = ioprio_class_to_prio[ioprio_class];
> > +#endif
> > +     return prio;
> > +}
>
> I don't like it that code is introduced in the mq-deadline scheduler
> that accesses page cache information. Isn't that a layering violation?
ok. I will try to update a new version to implement these in block layer

> Additionally, this approach only works for buffered I/O and not for
> direct I/O. Shouldn't the I/O submitter set the I/O priority instead of
> deciding the I/O priority in the mq-deadline scheduler?
That's just the purpose of this commit, that is, introducing content
activity based ioprio OR the one that submitter decided
>
> Bart.
Jens Axboe Jan. 18, 2024, 3:25 p.m. UTC | #5
On 1/18/24 12:48 AM, Zhaoyang Huang wrote:
>>> +static enum dd_prio dd_req_ioprio(struct request *rq)
>>> +{
>>> +     enum dd_prio prio;
>>> +     const u8 ioprio_class = dd_rq_ioclass(rq);
>>> +#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
>>> +     struct bio *bio;
>>> +     struct bio_vec bv;
>>> +     struct bvec_iter iter;
>>> +     struct page *page;
>>> +     int gen = 0;
>>> +     int cnt = 0;
>>> +
>>> +     if (req_op(rq) == REQ_OP_READ) {
>>> +             __rq_for_each_bio(bio, rq) {
>>> +                     bio_for_each_bvec(bv, bio, iter) {
>>> +                             page = bv.bv_page;
>>> +                             gen += PageWorkingset(page) ? 1 : 0;
>>> +                             cnt++;
>>> +                     }
>>> +             }
>>> +             prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
>>> +                     ioprio_class_to_prio[ioprio_class];
>>> +     } else
>>> +             prio = ioprio_class_to_prio[ioprio_class];
>>> +#else
>>> +     prio = ioprio_class_to_prio[ioprio_class];
>>> +#endif
>>> +     return prio;
>>> +}
>>
>> This is pretty awful imho, you're iterating the pages which isn't
>> exactly cheap. There's also a ternary operator (get rid of it), and
>> magic cnt / 2 which isn't even explained.

> ok. The iterating is on purpose here to not enlarge the bio and
> request structure. The magic number would be replaced by an more
> sensible criteria like MULTI_GEN's thrashing tier things.

It's totally backwards, you're adding code that both dips into parts it
very much should not, and attempting to fix things up after the fact.
Again, not passing any judgement on whether this kind of thing makes
sense, but if it did, you'd do it when adding the page to the bio, as I
mentioned elsewhere in the email. There's no need to grow struct bio, it
already has an ioprio field.
diff mbox series

Patch

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 27f11320b8d1..cd6fcfca7782 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -44,4 +44,11 @@  config BFQ_CGROUP_DEBUG
 	Enable some debugging help. Currently it exports additional stat
 	files in a cgroup which can be useful for debugging.
 
+config ACTIVITY_BASED_IOPRIO
+	bool "Enable folio activity based ioprio on deadline"
+	depends on LRU_GEN && MQ_IOSCHED_DEADLINE
+	default n
+	help
+	This item enable the feature of adjust request's priority by
+	calculating its folio's activity.
 endmenu
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index f958e79277b8..dfb43fff2ffe 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -15,6 +15,7 @@ 
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
 #include <linux/sbitmap.h>
+#include <linux/mm_inline.h>
 
 #include <trace/events/block.h>
 
@@ -224,14 +225,42 @@  static void deadline_remove_request(struct request_queue *q,
 		q->last_merge = NULL;
 }
 
+static enum dd_prio dd_req_ioprio(struct request *rq)
+{
+	enum dd_prio prio;
+	const u8 ioprio_class = dd_rq_ioclass(rq);
+#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
+	struct bio *bio;
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	struct page *page;
+	int gen = 0;
+	int cnt = 0;
+
+	if (req_op(rq) == REQ_OP_READ) {
+		__rq_for_each_bio(bio, rq) {
+			bio_for_each_bvec(bv, bio, iter) {
+				page = bv.bv_page;
+				gen += PageWorkingset(page) ? 1 : 0;
+				cnt++;
+			}
+		}
+		prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
+			ioprio_class_to_prio[ioprio_class];
+	} else
+		prio = ioprio_class_to_prio[ioprio_class];
+#else
+	prio = ioprio_class_to_prio[ioprio_class];
+#endif
+	return prio;
+}
+
 static void dd_request_merged(struct request_queue *q, struct request *req,
 			      enum elv_merge type)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
-	const u8 ioprio_class = dd_rq_ioclass(req);
-	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+	const enum dd_prio prio = dd_req_ioprio(req);
 	struct dd_per_prio *per_prio = &dd->per_prio[prio];
-
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 */
@@ -248,8 +277,7 @@  static void dd_merged_requests(struct request_queue *q, struct request *req,
 			       struct request *next)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
-	const u8 ioprio_class = dd_rq_ioclass(next);
-	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+	const enum dd_prio prio = dd_req_ioprio(next);
 
 	lockdep_assert_held(&dd->lock);
 
@@ -745,10 +773,30 @@  static int dd_request_merge(struct request_queue *q, struct request **rq,
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
-	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
-	struct dd_per_prio *per_prio = &dd->per_prio[prio];
+	struct dd_per_prio *per_prio;
 	sector_t sector = bio_end_sector(bio);
 	struct request *__rq;
+#ifdef CONFIG_ACTIVITY_BASED_IOPRIO
+	enum dd_prio prio;
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	struct page *page;
+	int gen = 0;
+	int cnt = 0;
+
+	if (bio_op(bio) == REQ_OP_READ) {
+		bio_for_each_bvec(bv, bio, iter) {
+			page = bv.bv_page;
+			gen += PageWorkingset(page) ? 1 : 0;
+			cnt++;
+		}
+	}
+	prio = (gen >= cnt / 2) ? ioprio_class_to_prio[IOPRIO_CLASS_RT] :
+			ioprio_class_to_prio[ioprio_class];
+#else
+	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+#endif
+	per_prio = &dd->per_prio[prio];
 
 	if (!dd->front_merges)
 		return ELEVATOR_NO_MERGE;
@@ -798,10 +846,8 @@  static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const enum dd_data_dir data_dir = rq_data_dir(rq);
-	u16 ioprio = req_get_ioprio(rq);
-	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
 	struct dd_per_prio *per_prio;
-	enum dd_prio prio;
+	enum dd_prio prio = dd_req_ioprio(rq);
 
 	lockdep_assert_held(&dd->lock);
 
@@ -811,7 +857,6 @@  static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	 */
 	blk_req_zone_write_unlock(rq);
 
-	prio = ioprio_class_to_prio[ioprio_class];
 	per_prio = &dd->per_prio[prio];
 	if (!rq->elv.priv[0]) {
 		per_prio->stats.inserted++;
@@ -920,8 +965,7 @@  static void dd_finish_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	struct deadline_data *dd = q->elevator->elevator_data;
-	const u8 ioprio_class = dd_rq_ioclass(rq);
-	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+	const enum dd_prio prio = dd_req_ioprio(rq);
 	struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
 	/*