diff mbox series

[RESEND,v6,1/8] blk-throttle: fix that io throttle can only work for single bio

Message ID 20220701093441.885741-2-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series bugfix and cleanup for blk-throttle | expand

Commit Message

Yu Kuai July 1, 2022, 9:34 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

commit 9f5ede3c01f9 ("block: throttle split bio in case of iops limit")
introduce a new problem, for example:

Test scripts:
cd /sys/fs/cgroup/blkio/
echo "8:0 1024" > blkio.throttle.write_bps_device
echo $$ > cgroup.procs
dd if=/dev/zero of=/dev/sda bs=10k count=1 oflag=direct &
dd if=/dev/zero of=/dev/sda bs=10k count=1 oflag=direct &

Test result:
10240 bytes (10 kB, 10 KiB) copied, 10.0134 s, 1.0 kB/s
10240 bytes (10 kB, 10 KiB) copied, 10.0135 s, 1.0 kB/s

The problem is that the second bio is finished after 10s instead of 20s.
This is because if some bios are already queued, current bio is queued
directly and the flag 'BIO_THROTTLED' is set. And later, when former
bios are dispatched, this bio will be dispatched without waiting at all,
this is due to tg_with_in_bps_limit() return 0 for this bio.

In order to fix the problem, don't skip flaged bio in
tg_with_in_bps_limit(), and for the problem that split bio can be
double accounted, compensate the over-accounting in __blk_throtl_bio().

Fixes: 9f5ede3c01f9 ("block: throttle split bio in case of iops limit")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-throttle.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

Comments

Tejun Heo July 27, 2022, 6:27 p.m. UTC | #1
Sorry about the long delay.

So, the code looks nice but I have a difficult time following the logic.

On Fri, Jul 01, 2022 at 05:34:34PM +0800, Yu Kuai wrote:
> @@ -811,7 +811,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
>  	unsigned int bio_size = throtl_bio_data_size(bio);
>  
>  	/* no need to throttle if this bio's bytes have been accounted */
> -	if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
> +	if (bps_limit == U64_MAX) {
>  		if (wait)
>  			*wait = 0;
>  		return true;
> @@ -921,11 +921,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
>  	unsigned int bio_size = throtl_bio_data_size(bio);
>  
>  	/* Charge the bio to the group */
> -	if (!bio_flagged(bio, BIO_THROTTLED)) {
> -		tg->bytes_disp[rw] += bio_size;
> -		tg->last_bytes_disp[rw] += bio_size;
> -	}
> -
> +	tg->bytes_disp[rw] += bio_size;
> +	tg->last_bytes_disp[rw] += bio_size;
>  	tg->io_disp[rw]++;
>  	tg->last_io_disp[rw]++;

So, we're charging and controlling whether it has already been throttled or
not.

> @@ -2121,6 +2118,21 @@ bool __blk_throtl_bio(struct bio *bio)
>  			tg->last_low_overflow_time[rw] = jiffies;
>  		throtl_downgrade_check(tg);
>  		throtl_upgrade_check(tg);
> +
> +		/*
> +		 * re-entered bio has accounted bytes already, so try to
> +		 * compensate previous over-accounting. However, if new
> +		 * slice is started, just forget it.
> +		 */
> +		if (bio_flagged(bio, BIO_THROTTLED)) {
> +			unsigned int bio_size = throtl_bio_data_size(bio);
> +
> +			if (tg->bytes_disp[rw] >= bio_size)
> +				tg->bytes_disp[rw] -= bio_size;
> +			if (tg->last_bytes_disp[rw] >= bio_size)
> +				tg->last_bytes_disp[rw] -= bio_size;
> +		}

and trying to restore the overaccounting. However, it's not clear why this
helps with the problem you're describing. The comment should be clearly
spelling out why it's done this way and how this works.

Also, blk_throttl_bio() doesn't call into __blk_throtl_bio() at all if
THROTTLED is set and HAS_IOPS_LIMIT is not, so if there are only bw limits,
we end up accounting these IOs twice?

Thanks.
Yu Kuai July 29, 2022, 6:32 a.m. UTC | #2
Hi, Tejun!

在 2022/07/28 2:27, Tejun Heo 写道:
> Sorry about the long delay.
> 
> So, the code looks nice but I have a difficult time following the logic.
> 
> On Fri, Jul 01, 2022 at 05:34:34PM +0800, Yu Kuai wrote:
>> @@ -811,7 +811,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
>>   	unsigned int bio_size = throtl_bio_data_size(bio);
>>   
>>   	/* no need to throttle if this bio's bytes have been accounted */
>> -	if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
>> +	if (bps_limit == U64_MAX) {
>>   		if (wait)
>>   			*wait = 0;
>>   		return true;
>> @@ -921,11 +921,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
>>   	unsigned int bio_size = throtl_bio_data_size(bio);
>>   
>>   	/* Charge the bio to the group */
>> -	if (!bio_flagged(bio, BIO_THROTTLED)) {
>> -		tg->bytes_disp[rw] += bio_size;
>> -		tg->last_bytes_disp[rw] += bio_size;
>> -	}
>> -
>> +	tg->bytes_disp[rw] += bio_size;
>> +	tg->last_bytes_disp[rw] += bio_size;
>>   	tg->io_disp[rw]++;
>>   	tg->last_io_disp[rw]++;
> 
> So, we're charging and controlling whether it has already been throttled or
> not.
> 
>> @@ -2121,6 +2118,21 @@ bool __blk_throtl_bio(struct bio *bio)
>>   			tg->last_low_overflow_time[rw] = jiffies;
>>   		throtl_downgrade_check(tg);
>>   		throtl_upgrade_check(tg);
>> +
>> +		/*
>> +		 * re-entered bio has accounted bytes already, so try to
>> +		 * compensate previous over-accounting. However, if new
>> +		 * slice is started, just forget it.
>> +		 */
>> +		if (bio_flagged(bio, BIO_THROTTLED)) {
>> +			unsigned int bio_size = throtl_bio_data_size(bio);
>> +
>> +			if (tg->bytes_disp[rw] >= bio_size)
>> +				tg->bytes_disp[rw] -= bio_size;
>> +			if (tg->last_bytes_disp[rw] >= bio_size)
>> +				tg->last_bytes_disp[rw] -= bio_size;
>> +		}
> 
> and trying to restore the overaccounting. However, it's not clear why this
> helps with the problem you're describing. The comment should be clearly
> spelling out why it's done this way and how this works.
> 
> Also, blk_throttl_bio() doesn't call into __blk_throtl_bio() at all if
> THROTTLED is set and HAS_IOPS_LIMIT is not, so if there are only bw limits,
> we end up accounting these IOs twice?
> 

We need to make sure following conditions is always hold:

1) If a bio is splited, iops limits should count multiple times, while
bps limits should only count once.
2) If a bio is issued while some bios are already throttled, bps limits
should not be ignored.

commit 9f5ede3c01f9 ("block: throttle split bio in case of iops limit")
fixes that 1) is not hold, while it breaks 2). Root cause is that such
bio will be flaged in __blk_throtl_bio(), and later
tg_with_in_bps_limit() will skip flaged bio.

In order to fix this problem, at first, I change that flaged bio won't
be skipped in tg_with_in_bps_limit():

-	if (!bio_flagged(bio, BIO_THROTTLED)) {
-		tg->bytes_disp[rw] += bio_size;
-		tg->last_bytes_disp[rw] += bio_size;
-	}
-
+	tg->bytes_disp[rw] += bio_size;
+	tg->last_bytes_disp[rw] += bio_size;

However, this will break that bps limits should only count once. Thus I
try to restore the overaccounting in __blk_throtl_bio() in such case:

+		if (bio_flagged(bio, BIO_THROTTLED)) {
+			unsigned int bio_size = throtl_bio_data_size(bio);
+
+			if (tg->bytes_disp[rw] >= bio_size)
+				tg->bytes_disp[rw] -= bio_size;
+			if (tg->last_bytes_disp[rw] >= bio_size)
+				tg->last_bytes_disp[rw] -= bio_size;
+		}

If new slice is not started, then the decrement should make sure this
bio won't be counted again. However, if new slice is started and the
condition 'bytes_disp >= bio_size' doesn't hold, this bio will end up
accounting twice.

Pleas let me know if you think this suituation is problematic, I'll try
to figure out a new way...

Thanks,
Kuai
Tejun Heo July 29, 2022, 6:04 p.m. UTC | #3
Hello,

On Fri, Jul 29, 2022 at 02:32:36PM +0800, Yu Kuai wrote:
> We need to make sure following conditions is always hold:
> 
> 1) If a bio is splited, iops limits should count multiple times, while
> bps limits should only count once.
> 2) If a bio is issued while some bios are already throttled, bps limits
> should not be ignored.
> 
> commit 9f5ede3c01f9 ("block: throttle split bio in case of iops limit")
> fixes that 1) is not hold, while it breaks 2). Root cause is that such
> bio will be flaged in __blk_throtl_bio(), and later
> tg_with_in_bps_limit() will skip flaged bio.
> 
> In order to fix this problem, at first, I change that flaged bio won't
> be skipped in tg_with_in_bps_limit():
> 
> -	if (!bio_flagged(bio, BIO_THROTTLED)) {
> -		tg->bytes_disp[rw] += bio_size;
> -		tg->last_bytes_disp[rw] += bio_size;
> -	}
> -
> +	tg->bytes_disp[rw] += bio_size;
> +	tg->last_bytes_disp[rw] += bio_size;
> 
> However, this will break that bps limits should only count once. Thus I
> try to restore the overaccounting in __blk_throtl_bio() in such case:
> 
> +		if (bio_flagged(bio, BIO_THROTTLED)) {
> +			unsigned int bio_size = throtl_bio_data_size(bio);
> +
> +			if (tg->bytes_disp[rw] >= bio_size)
> +				tg->bytes_disp[rw] -= bio_size;
> +			if (tg->last_bytes_disp[rw] >= bio_size)
> +				tg->last_bytes_disp[rw] -= bio_size;
> +		}
> 
> If new slice is not started, then the decrement should make sure this
> bio won't be counted again. However, if new slice is started and the
> condition 'bytes_disp >= bio_size' doesn't hold, this bio will end up
> accounting twice.
> 
> Pleas let me know if you think this suituation is problematic, I'll try
> to figure out a new way...

While a bit tricky, I think it's fine but please add comments in the code
explaining what's going on and why. Also, can you please explain why
__blk_throtl_bio() being skipped when iops limit is not set doesn't skew the
result?

Thanks.
Yu Kuai July 30, 2022, 1:06 a.m. UTC | #4
在 2022/07/30 2:04, Tejun Heo 写道:
> Hello,
> 
> On Fri, Jul 29, 2022 at 02:32:36PM +0800, Yu Kuai wrote:
>> We need to make sure following conditions is always hold:
>>
>> 1) If a bio is splited, iops limits should count multiple times, while
>> bps limits should only count once.
>> 2) If a bio is issued while some bios are already throttled, bps limits
>> should not be ignored.
>>
>> commit 9f5ede3c01f9 ("block: throttle split bio in case of iops limit")
>> fixes that 1) is not hold, while it breaks 2). Root cause is that such
>> bio will be flaged in __blk_throtl_bio(), and later
>> tg_with_in_bps_limit() will skip flaged bio.
>>
>> In order to fix this problem, at first, I change that flaged bio won't
>> be skipped in tg_with_in_bps_limit():
>>
>> -	if (!bio_flagged(bio, BIO_THROTTLED)) {
>> -		tg->bytes_disp[rw] += bio_size;
>> -		tg->last_bytes_disp[rw] += bio_size;
>> -	}
>> -
>> +	tg->bytes_disp[rw] += bio_size;
>> +	tg->last_bytes_disp[rw] += bio_size;
>>
>> However, this will break that bps limits should only count once. Thus I
>> try to restore the overaccounting in __blk_throtl_bio() in such case:
>>
>> +		if (bio_flagged(bio, BIO_THROTTLED)) {
>> +			unsigned int bio_size = throtl_bio_data_size(bio);
>> +
>> +			if (tg->bytes_disp[rw] >= bio_size)
>> +				tg->bytes_disp[rw] -= bio_size;
>> +			if (tg->last_bytes_disp[rw] >= bio_size)
>> +				tg->last_bytes_disp[rw] -= bio_size;
>> +		}
>>
>> If new slice is not started, then the decrement should make sure this
>> bio won't be counted again. However, if new slice is started and the
>> condition 'bytes_disp >= bio_size' doesn't hold, this bio will end up
>> accounting twice.
>>
>> Pleas let me know if you think this suituation is problematic, I'll try
>> to figure out a new way...
> 
> While a bit tricky, I think it's fine but please add comments in the code
> explaining what's going on and why. Also, can you please explain why
> __blk_throtl_bio() being skipped when iops limit is not set doesn't skew the
> result?

Because bps limit is already counted the first time __blk_throtl_bio()
is called for the orignal bio. When splited bio is reentered, we only
need to throttle it again if iops limit is set.

By the way, I found that this way is better after patch 4:

in __blk_throtl_bio():

if (bio_flagged(bio, BIO_THROTTLED)) {
	tg->bytes_skipped[rw] += bio_size;
	if (tg->last_bytes_disp[rw] >= bio_size)
		tg->last_bytes_disp[rw] -= bio_size;
}

The overaccounting can be restored even if new slice is started.

Thanks,
Kuai
diff mbox series

Patch

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 139b2d7a99e2..5c1d1c4d8188 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -811,7 +811,7 @@  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
 	unsigned int bio_size = throtl_bio_data_size(bio);
 
 	/* no need to throttle if this bio's bytes have been accounted */
-	if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
+	if (bps_limit == U64_MAX) {
 		if (wait)
 			*wait = 0;
 		return true;
@@ -921,11 +921,8 @@  static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 	unsigned int bio_size = throtl_bio_data_size(bio);
 
 	/* Charge the bio to the group */
-	if (!bio_flagged(bio, BIO_THROTTLED)) {
-		tg->bytes_disp[rw] += bio_size;
-		tg->last_bytes_disp[rw] += bio_size;
-	}
-
+	tg->bytes_disp[rw] += bio_size;
+	tg->last_bytes_disp[rw] += bio_size;
 	tg->io_disp[rw]++;
 	tg->last_io_disp[rw]++;
 
@@ -2121,6 +2118,21 @@  bool __blk_throtl_bio(struct bio *bio)
 			tg->last_low_overflow_time[rw] = jiffies;
 		throtl_downgrade_check(tg);
 		throtl_upgrade_check(tg);
+
+		/*
+		 * re-entered bio has accounted bytes already, so try to
+		 * compensate previous over-accounting. However, if new
+		 * slice is started, just forget it.
+		 */
+		if (bio_flagged(bio, BIO_THROTTLED)) {
+			unsigned int bio_size = throtl_bio_data_size(bio);
+
+			if (tg->bytes_disp[rw] >= bio_size)
+				tg->bytes_disp[rw] -= bio_size;
+			if (tg->last_bytes_disp[rw] >= bio_size)
+				tg->last_bytes_disp[rw] -= bio_size;
+		}
+
 		/* throtl is FIFO - if bios are already queued, should queue */
 		if (sq->nr_queued[rw])
 			break;