[v4] md/raid1: don't allow_barrier() before r1bio got freed

Message ID	20230808033211.197383-1-xueshi.hu@smartx.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-raid-owner@vger.kernel.org> From: Xueshi Hu <xueshi.hu@smartx.com> To: song@kernel.org, djeffery@redhat.com, dan.j.williams@intel.com, neilb@suse.de, akpm@linux-foundation.org, shli@fb.com, neilb@suse.com Cc: linux-raid@vger.kernel.org, Xueshi Hu <xueshi.hu@smartx.com>, Yu Kuai <yukuai3@huawei.com> Subject: [PATCH v4] md/raid1: don't allow_barrier() before r1bio got freed Date: Tue, 8 Aug 2023 11:32:11 +0800 Message-Id: <20230808033211.197383-1-xueshi.hu@smartx.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	[v4] md/raid1: don't allow_barrier() before r1bio got freed \| expand [v4] md/raid1: don't allow_barrier() before r1bio got freed

Message ID

20230808033211.197383-1-xueshi.hu@smartx.com (mailing list archive)

State

New, archived

Headers

From: Xueshi Hu <xueshi.hu@smartx.com>
To: song@kernel.org, djeffery@redhat.com, dan.j.williams@intel.com,
        neilb@suse.de, akpm@linux-foundation.org, shli@fb.com,
        neilb@suse.com
Cc: linux-raid@vger.kernel.org, Xueshi Hu <xueshi.hu@smartx.com>,
        Yu Kuai <yukuai3@huawei.com>
Subject: [PATCH v4] md/raid1: don't allow_barrier() before r1bio got freed
Date: Tue,  8 Aug 2023 11:32:11 +0800
Message-Id: <20230808033211.197383-1-xueshi.hu@smartx.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

[v4] md/raid1: don't allow_barrier() before r1bio got freed | expand

Commit Message

Xueshi Hu Aug. 8, 2023, 3:32 a.m. UTC

Because raid reshape changes the r1conf::raid_disks and the mempool, it
orders that there's no in-flight r1bio when reshaping. However, the
current caller of allow_barrier() allows the reshape
operation to proceed even if the old r1bio requests have not been freed.

Free the r1bio firstly before allow_barrier()

Fixes: c91114c2b89d ("md/raid1: release pending accounting for an I/O only after write-behind is also finished")
Fixes: 6bfe0b499082 ("md: support blocking writes to an array on device failure")
Fixes: 689389a06ce7 ("md/raid1: simplify handle_read_error().")
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Xueshi Hu <xueshi.hu@smartx.com>
---
-> v2:
	- fix the problem one by one instead of calling
	blk_mq_freeze_queue() as suggested by Yu Kuai
-> v3:
	- add freeze_array_totally() to replace freeze_array() instead
	  of gave up in raid1_reshape()
	- add a missed fix in raid_end_bio_io()
	- add a small check at the start of raid1_reshape()
-> v4:
	- add fix tag and revise the commit message
	- drop patch 1 as there is an ongoing systematic fix for the bug
	- drop patch 3 as it's unrelated which will be sent in
	another patch

 drivers/md/raid1.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

Comments

Yu Kuai Aug. 12, 2023, 8:06 a.m. UTC | #1

Hi,

在 2023/08/08 11:32, Xueshi Hu 写道:
> Because raid reshape changes the r1conf::raid_disks and the mempool, it
> orders that there's no in-flight r1bio when reshaping. However, the
> current caller of allow_barrier() allows the reshape
> operation to proceed even if the old r1bio requests have not been freed.
> 
> Free the r1bio firstly before allow_barrier()
> 
> Fixes: c91114c2b89d ("md/raid1: release pending accounting for an I/O only after write-behind is also finished")
> Fixes: 6bfe0b499082 ("md: support blocking writes to an array on device failure")
> Fixes: 689389a06ce7 ("md/raid1: simplify handle_read_error().")

It's just a suggestion, you can split this patch into 3 patches, one for
each fix.

> Reviewed-by: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Xueshi Hu <xueshi.hu@smartx.com>
> ---
> -> v2:
> 	- fix the problem one by one instead of calling
> 	blk_mq_freeze_queue() as suggested by Yu Kuai
> -> v3:
> 	- add freeze_array_totally() to replace freeze_array() instead
> 	  of gave up in raid1_reshape()
> 	- add a missed fix in raid_end_bio_io()
> 	- add a small check at the start of raid1_reshape()
> -> v4:
> 	- add fix tag and revise the commit message
> 	- drop patch 1 as there is an ongoing systematic fix for the bug

I think it's okay to fix the bug as I suggested in v3, the refactor is a
long term...

Thanks,
Kuai

> 	- drop patch 3 as it's unrelated which will be sent in
> 	another patch
> 
>   drivers/md/raid1.c | 15 +++++++++------
>   1 file changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index dd25832eb045..5a5eb5f1a224 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -313,6 +313,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
>   {
>   	struct bio *bio = r1_bio->master_bio;
>   	struct r1conf *conf = r1_bio->mddev->private;
> +	sector_t sector = r1_bio->sector;
>   
>   	/* if nobody has done the final endio yet, do it now */
>   	if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
> @@ -323,13 +324,13 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
>   
>   		call_bio_endio(r1_bio);
>   	}
> +
> +	free_r1bio(r1_bio);
>   	/*
>   	 * Wake up any possible resync thread that waits for the device
>   	 * to go idle.  All I/Os, even write-behind writes, are done.
>   	 */
> -	allow_barrier(conf, r1_bio->sector);
> -
> -	free_r1bio(r1_bio);
> +	allow_barrier(conf, sector);
>   }
>   
>   /*
> @@ -1373,6 +1374,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
>   		return;
>   	}
>   
> + retry_write:
>   	r1_bio = alloc_r1bio(mddev, bio);
>   	r1_bio->sectors = max_write_sectors;
>   
> @@ -1388,7 +1390,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
>   	 */
>   
>   	disks = conf->raid_disks * 2;
> - retry_write:
>   	blocked_rdev = NULL;
>   	rcu_read_lock();
>   	max_sectors = r1_bio->sectors;
> @@ -1468,7 +1469,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
>   		for (j = 0; j < i; j++)
>   			if (r1_bio->bios[j])
>   				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
> -		r1_bio->state = 0;
> +		free_r1bio(r1_bio);
>   		allow_barrier(conf, bio->bi_iter.bi_sector);
>   
>   		if (bio->bi_opf & REQ_NOWAIT) {
> @@ -2498,6 +2499,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>   	struct mddev *mddev = conf->mddev;
>   	struct bio *bio;
>   	struct md_rdev *rdev;
> +	sector_t sector;
>   
>   	clear_bit(R1BIO_ReadError, &r1_bio->state);
>   	/* we got a read error. Maybe the drive is bad.  Maybe just
> @@ -2527,12 +2529,13 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>   	}
>   
>   	rdev_dec_pending(rdev, conf->mddev);
> -	allow_barrier(conf, r1_bio->sector);
> +	sector = r1_bio->sector;
>   	bio = r1_bio->master_bio;
>   
>   	/* Reuse the old r1_bio so that the IO_BLOCKED settings are preserved */
>   	r1_bio->state = 0;
>   	raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio);
> +	allow_barrier(conf, sector);
>   }
>   
>   static void raid1d(struct md_thread *thread)
>

Xueshi Hu Aug. 12, 2023, 1:24 p.m. UTC | #2

On 8/12/23 16:06, Yu Kuai wrote:
> Hi,
> 
> 在 2023/08/08 11:32, Xueshi Hu 写道:
>> Because raid reshape changes the r1conf::raid_disks and the mempool, it
>> orders that there's no in-flight r1bio when reshaping. However, the
>> current caller of allow_barrier() allows the reshape
>> operation to proceed even if the old r1bio requests have not been freed.
>>
>> Free the r1bio firstly before allow_barrier()
>>
>> Fixes: c91114c2b89d ("md/raid1: release pending accounting for an I/O 
>> only after write-behind is also finished")
>> Fixes: 6bfe0b499082 ("md: support blocking writes to an array on 
>> device failure")
>> Fixes: 689389a06ce7 ("md/raid1: simplify handle_read_error().")
> 
> It's just a suggestion, you can split this patch into 3 patches, one for
> each fix.
> 
>> Reviewed-by: Yu Kuai <yukuai3@huawei.com>
>> Signed-off-by: Xueshi Hu <xueshi.hu@smartx.com>
>> ---
>> -> v2:
>>     - fix the problem one by one instead of calling
>>     blk_mq_freeze_queue() as suggested by Yu Kuai
>> -> v3:
>>     - add freeze_array_totally() to replace freeze_array() instead
>>       of gave up in raid1_reshape()
>>     - add a missed fix in raid_end_bio_io()
>>     - add a small check at the start of raid1_reshape()
>> -> v4:
>>     - add fix tag and revise the commit message
>>     - drop patch 1 as there is an ongoing systematic fix for the bug
> 
> I think it's okay to fix the bug as I suggested in v3, the refactor is a
> long term...
> 
> Thanks,
> Kuai
Good suggestion, I'll remake the patch.

Thanks,
Hu
> 
>>     - drop patch 3 as it's unrelated which will be sent in
>>     another patch
>>
>>   drivers/md/raid1.c | 15 +++++++++------
>>   1 file changed, 9 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index dd25832eb045..5a5eb5f1a224 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
>> @@ -313,6 +313,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
>>   {
>>       struct bio *bio = r1_bio->master_bio;
>>       struct r1conf *conf = r1_bio->mddev->private;
>> +    sector_t sector = r1_bio->sector;
>>       /* if nobody has done the final endio yet, do it now */
>>       if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
>> @@ -323,13 +324,13 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
>>           call_bio_endio(r1_bio);
>>       }
>> +
>> +    free_r1bio(r1_bio);
>>       /*
>>        * Wake up any possible resync thread that waits for the device
>>        * to go idle.  All I/Os, even write-behind writes, are done.
>>        */
>> -    allow_barrier(conf, r1_bio->sector);
>> -
>> -    free_r1bio(r1_bio);
>> +    allow_barrier(conf, sector);
>>   }
>>   /*
>> @@ -1373,6 +1374,7 @@ static void raid1_write_request(struct mddev 
>> *mddev, struct bio *bio,
>>           return;
>>       }
>> + retry_write:
>>       r1_bio = alloc_r1bio(mddev, bio);
>>       r1_bio->sectors = max_write_sectors;
>> @@ -1388,7 +1390,6 @@ static void raid1_write_request(struct mddev 
>> *mddev, struct bio *bio,
>>        */
>>       disks = conf->raid_disks * 2;
>> - retry_write:
>>       blocked_rdev = NULL;
>>       rcu_read_lock();
>>       max_sectors = r1_bio->sectors;
>> @@ -1468,7 +1469,7 @@ static void raid1_write_request(struct mddev 
>> *mddev, struct bio *bio,
>>           for (j = 0; j < i; j++)
>>               if (r1_bio->bios[j])
>>                   rdev_dec_pending(conf->mirrors[j].rdev, mddev);
>> -        r1_bio->state = 0;
>> +        free_r1bio(r1_bio);
>>           allow_barrier(conf, bio->bi_iter.bi_sector);
>>           if (bio->bi_opf & REQ_NOWAIT) {
>> @@ -2498,6 +2499,7 @@ static void handle_read_error(struct r1conf 
>> *conf, struct r1bio *r1_bio)
>>       struct mddev *mddev = conf->mddev;
>>       struct bio *bio;
>>       struct md_rdev *rdev;
>> +    sector_t sector;
>>       clear_bit(R1BIO_ReadError, &r1_bio->state);
>>       /* we got a read error. Maybe the drive is bad.  Maybe just
>> @@ -2527,12 +2529,13 @@ static void handle_read_error(struct r1conf 
>> *conf, struct r1bio *r1_bio)
>>       }
>>       rdev_dec_pending(rdev, conf->mddev);
>> -    allow_barrier(conf, r1_bio->sector);
>> +    sector = r1_bio->sector;
>>       bio = r1_bio->master_bio;
>>       /* Reuse the old r1_bio so that the IO_BLOCKED settings are 
>> preserved */
>>       r1_bio->state = 0;
>>       raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio);
>> +    allow_barrier(conf, sector);
>>   }
>>   static void raid1d(struct md_thread *thread)
>>
>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index dd25832eb045..5a5eb5f1a224 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -313,6 +313,7 @@  static void raid_end_bio_io(struct r1bio *r1_bio)
 {
 	struct bio *bio = r1_bio->master_bio;
 	struct r1conf *conf = r1_bio->mddev->private;
+	sector_t sector = r1_bio->sector;
 
 	/* if nobody has done the final endio yet, do it now */
 	if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
@@ -323,13 +324,13 @@  static void raid_end_bio_io(struct r1bio *r1_bio)
 
 		call_bio_endio(r1_bio);
 	}
+
+	free_r1bio(r1_bio);
 	/*
 	 * Wake up any possible resync thread that waits for the device
 	 * to go idle.  All I/Os, even write-behind writes, are done.
 	 */
-	allow_barrier(conf, r1_bio->sector);
-
-	free_r1bio(r1_bio);
+	allow_barrier(conf, sector);
 }
 
 /*
@@ -1373,6 +1374,7 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		return;
 	}
 
+ retry_write:
 	r1_bio = alloc_r1bio(mddev, bio);
 	r1_bio->sectors = max_write_sectors;
 
@@ -1388,7 +1390,6 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 	 */
 
 	disks = conf->raid_disks * 2;
- retry_write:
 	blocked_rdev = NULL;
 	rcu_read_lock();
 	max_sectors = r1_bio->sectors;
@@ -1468,7 +1469,7 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		for (j = 0; j < i; j++)
 			if (r1_bio->bios[j])
 				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-		r1_bio->state = 0;
+		free_r1bio(r1_bio);
 		allow_barrier(conf, bio->bi_iter.bi_sector);
 
 		if (bio->bi_opf & REQ_NOWAIT) {
@@ -2498,6 +2499,7 @@  static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
 	struct mddev *mddev = conf->mddev;
 	struct bio *bio;
 	struct md_rdev *rdev;
+	sector_t sector;
 
 	clear_bit(R1BIO_ReadError, &r1_bio->state);
 	/* we got a read error. Maybe the drive is bad.  Maybe just
@@ -2527,12 +2529,13 @@  static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
 	}
 
 	rdev_dec_pending(rdev, conf->mddev);
-	allow_barrier(conf, r1_bio->sector);
+	sector = r1_bio->sector;
 	bio = r1_bio->master_bio;
 
 	/* Reuse the old r1_bio so that the IO_BLOCKED settings are preserved */
 	r1_bio->state = 0;
 	raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio);
+	allow_barrier(conf, sector);
 }
 
 static void raid1d(struct md_thread *thread)

[v4] md/raid1: don't allow_barrier() before r1bio got freed

Commit Message

Comments

Patch