diff mbox series

[10/16] btrfs: remove non-standard extent handling in __extent_writepage_io

Message ID 20230531060505.468704-11-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/16] btrfs: fix range_end calculation in extent_write_locked_range | expand

Commit Message

Christoph Hellwig May 31, 2023, 6:04 a.m. UTC
__extent_writepage_io is never called for compressed or inline extents,
or holes.  Remove the not quite working code for them and replace it with
asserts that these cases don't happen.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/extent_io.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

Comments

Qu Wenruo Feb. 10, 2024, 9:39 a.m. UTC | #1
On 2023/5/31 15:34, Christoph Hellwig wrote:
> __extent_writepage_io is never called for compressed or inline extents,
> or holes.  Remove the not quite working code for them and replace it with
> asserts that these cases don't happen.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

It's a little too late, but this patch is causing crashing for subpage.

> ---
>   fs/btrfs/extent_io.c | 23 +++++------------------
>   1 file changed, 5 insertions(+), 18 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 09a9973c27ccfb..a2e1dbd9b92309 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -1361,7 +1361,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>   	struct extent_map *em;
>   	int ret = 0;
>   	int nr = 0;
> -	bool compressed;
>
>   	ret = btrfs_writepage_cow_fixup(page);
>   	if (ret) {
> @@ -1419,10 +1418,14 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>   		ASSERT(cur < end);
>   		ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
>   		ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
> +
>   		block_start = em->block_start;
> -		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
>   		disk_bytenr = em->block_start + extent_offset;
>
> +		ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
> +		ASSERT(block_start != EXTENT_MAP_HOLE);

For subpage cases, __extent_writepage_io() can be triggered to write
only a subset of the page, from extent_write_locked_range().

In that case, if we have submitted the target range, since our @len is
to the end of the page, we can hit a hole.

In that case, this ASSERT() would be triggered.
And even worse, if CONFIG_BTRFS_ASSERT() is not enabled, we can do wrong
writeback using the wrong disk_bytenr.

So at least we need to skip the hole ranges for subpage.
And thankfully the remaining two cases are impossible for subpage.

Thanks,
Qu

> +		ASSERT(block_start != EXTENT_MAP_INLINE);
> +
>   		/*
>   		 * Note that em_end from extent_map_end() and dirty_range_end from
>   		 * find_next_dirty_byte() are all exclusive
> @@ -1431,22 +1434,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>   		free_extent_map(em);
>   		em = NULL;
>
> -		/*
> -		 * compressed and inline extents are written through other
> -		 * paths in the FS
> -		 */
> -		if (compressed || block_start == EXTENT_MAP_HOLE ||
> -		    block_start == EXTENT_MAP_INLINE) {
> -			if (compressed)
> -				nr++;
> -			else
> -				btrfs_writepage_endio_finish_ordered(inode,
> -						page, cur, cur + iosize - 1, true);
> -			btrfs_page_clear_dirty(fs_info, page, cur, iosize);
> -			cur += iosize;
> -			continue;
> -		}
> -
>   		btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
>   		if (!PageWriteback(page)) {
>   			btrfs_err(inode->root->fs_info,
Christoph Hellwig Feb. 12, 2024, 3:52 p.m. UTC | #2
On Sat, Feb 10, 2024 at 08:09:47PM +1030, Qu Wenruo wrote:
>> @@ -1419,10 +1418,14 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>>   		ASSERT(cur < end);
>>   		ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
>>   		ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
>> +
>>   		block_start = em->block_start;
>> -		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
>>   		disk_bytenr = em->block_start + extent_offset;
>>
>> +		ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
>> +		ASSERT(block_start != EXTENT_MAP_HOLE);
>
> For subpage cases, __extent_writepage_io() can be triggered to write
> only a subset of the page, from extent_write_locked_range().

Yes.

> In that case, if we have submitted the target range, since our @len is
> to the end of the page, we can hit a hole.
>
> In that case, this ASSERT() would be triggered.
> And even worse, if CONFIG_BTRFS_ASSERT() is not enabled, we can do wrong
> writeback using the wrong disk_bytenr.
>
> So at least we need to skip the hole ranges for subpage.
> And thankfully the remaining two cases are impossible for subpage.

The patch below reinstates the hole handling.  I don't have a system
that tests the btrfs subpage code right now, so this is untested:

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cfd2967f04a293..a106036641104c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1388,7 +1388,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		disk_bytenr = em->block_start + extent_offset;
 
 		ASSERT(!extent_map_is_compressed(em));
-		ASSERT(block_start != EXTENT_MAP_HOLE);
 		ASSERT(block_start != EXTENT_MAP_INLINE);
 
 		/*
@@ -1399,6 +1398,15 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		free_extent_map(em);
 		em = NULL;
 
+		if (block_start == EXTENT_MAP_HOLE) {
+			btrfs_mark_ordered_io_finished(inode, page, cur, iosize,
+						       true);
+			btrfs_folio_clear_dirty(fs_info, page_folio(page), cur,
+						iosize);
+			cur += iosize;
+			continue;
+		}
+
 		btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
 		if (!PageWriteback(page)) {
 			btrfs_err(inode->root->fs_info,
Qu Wenruo Feb. 12, 2024, 11:06 p.m. UTC | #3
On 2024/2/13 02:22, Christoph Hellwig wrote:
> On Sat, Feb 10, 2024 at 08:09:47PM +1030, Qu Wenruo wrote:
>>> @@ -1419,10 +1418,14 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>>>    		ASSERT(cur < end);
>>>    		ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
>>>    		ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
>>> +
>>>    		block_start = em->block_start;
>>> -		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
>>>    		disk_bytenr = em->block_start + extent_offset;
>>>
>>> +		ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
>>> +		ASSERT(block_start != EXTENT_MAP_HOLE);
>>
>> For subpage cases, __extent_writepage_io() can be triggered to write
>> only a subset of the page, from extent_write_locked_range().
>
> Yes.
>
>> In that case, if we have submitted the target range, since our @len is
>> to the end of the page, we can hit a hole.
>>
>> In that case, this ASSERT() would be triggered.
>> And even worse, if CONFIG_BTRFS_ASSERT() is not enabled, we can do wrong
>> writeback using the wrong disk_bytenr.
>>
>> So at least we need to skip the hole ranges for subpage.
>> And thankfully the remaining two cases are impossible for subpage.
>
> The patch below reinstates the hole handling.  I don't have a system
> that tests the btrfs subpage code right now, so this is untested:
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index cfd2967f04a293..a106036641104c 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -1388,7 +1388,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>   		disk_bytenr = em->block_start + extent_offset;
>
>   		ASSERT(!extent_map_is_compressed(em));
> -		ASSERT(block_start != EXTENT_MAP_HOLE);
>   		ASSERT(block_start != EXTENT_MAP_INLINE);
>
>   		/*
> @@ -1399,6 +1398,15 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>   		free_extent_map(em);
>   		em = NULL;
>
> +		if (block_start == EXTENT_MAP_HOLE) {
> +			btrfs_mark_ordered_io_finished(inode, page, cur, iosize,
> +						       true);
> +			btrfs_folio_clear_dirty(fs_info, page_folio(page), cur,
> +						iosize);
> +			cur += iosize;
> +			continue;
> +		}
> +
>   		btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
>   		if (!PageWriteback(page)) {
>   			btrfs_err(inode->root->fs_info,

There are more problem than I initially thought.

In fact, I went another path to make __extent_writepage_io() to only
submit IO for the desired range.

But we would have another problem related to @locked_page handling.

For extent_write_locked_range(), we expect to unlock the @locked_page.
But for subpage case, we can have multiple dirty ranges.

In that case, if we unlock @locked_page for the first iteration, the
next extent_write_locked_range() iteration can still try to get the same
page, and found the page unlocked, triggering an ASSERT().

So the patch itself is not the root cause, it's the lack of subpage
locking causing the problem.

Sorry for the false alerts.

Thanks,
Qu
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 09a9973c27ccfb..a2e1dbd9b92309 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1361,7 +1361,6 @@  static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 	struct extent_map *em;
 	int ret = 0;
 	int nr = 0;
-	bool compressed;
 
 	ret = btrfs_writepage_cow_fixup(page);
 	if (ret) {
@@ -1419,10 +1418,14 @@  static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		ASSERT(cur < end);
 		ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
 		ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
+
 		block_start = em->block_start;
-		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 		disk_bytenr = em->block_start + extent_offset;
 
+		ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
+		ASSERT(block_start != EXTENT_MAP_HOLE);
+		ASSERT(block_start != EXTENT_MAP_INLINE);
+
 		/*
 		 * Note that em_end from extent_map_end() and dirty_range_end from
 		 * find_next_dirty_byte() are all exclusive
@@ -1431,22 +1434,6 @@  static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		free_extent_map(em);
 		em = NULL;
 
-		/*
-		 * compressed and inline extents are written through other
-		 * paths in the FS
-		 */
-		if (compressed || block_start == EXTENT_MAP_HOLE ||
-		    block_start == EXTENT_MAP_INLINE) {
-			if (compressed)
-				nr++;
-			else
-				btrfs_writepage_endio_finish_ordered(inode,
-						page, cur, cur + iosize - 1, true);
-			btrfs_page_clear_dirty(fs_info, page, cur, iosize);
-			cur += iosize;
-			continue;
-		}
-
 		btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
 		if (!PageWriteback(page)) {
 			btrfs_err(inode->root->fs_info,