diff mbox series

[v3,08/10] btrfs: add extra error messages for delalloc range related errors

Message ID ac9345c0d31fc1b669ccfe436e49883ed998ab07.1736479224.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series btrfs: error handling fixes | expand

Commit Message

Qu Wenruo Jan. 10, 2025, 3:31 a.m. UTC
All the error handling bugs I hit so far are all -ENOSPC from either:

- cow_file_range()
- run_delalloc_nocow()
- submit_uncompressed_range()

Previously when those functions failed, there is no error message at
all, making the debugging much harder.

So here we introduce extra error messages for:

- cow_file_range()
- run_delalloc_nocow()
- submit_uncompressed_range()
- writepage_delalloc() when btrfs_run_delalloc_range() failed
- extent_writepage() when extent_writepage_io() failed

One example of the new debug error messages is the following one:

 run fstests generic/750 at 2024-12-08 12:41:41
 BTRFS: device fsid 461b25f5-e240-4543-8deb-e7c2bd01a6d3 devid 1 transid 8 /dev/mapper/test-scratch1 (253:4) scanned by mount (2436600)
 BTRFS info (device dm-4): first mount of filesystem 461b25f5-e240-4543-8deb-e7c2bd01a6d3
 BTRFS info (device dm-4): using crc32c (crc32c-arm64) checksum algorithm
 BTRFS info (device dm-4): forcing free space tree for sector size 4096 with page size 65536
 BTRFS info (device dm-4): using free-space-tree
 BTRFS warning (device dm-4): read-write for sector size 4096 with page size 65536 is experimental
 BTRFS info (device dm-4): checking UUID tree
 BTRFS error (device dm-4): cow_file_range failed, root=363 inode=412 start=503808 len=98304: -28
 BTRFS error (device dm-4): run_delalloc_nocow failed, root=363 inode=412 start=503808 len=98304: -28
 BTRFS error (device dm-4): failed to run delalloc range, root=363 ino=412 folio=458752 submit_bitmap=11-15 start=503808 len=98304: -28

Which shows an error from cow_file_range() which is called inside a
nocow write attempt, along with the extra bitmap from
writepage_delalloc().

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 16 ++++++++++++++++
 fs/btrfs/inode.c     | 14 +++++++++++++-
 fs/btrfs/subpage.c   |  3 ++-
 3 files changed, 31 insertions(+), 2 deletions(-)

Comments

David Sterba Jan. 10, 2025, 4:20 p.m. UTC | #1
On Fri, Jan 10, 2025 at 02:01:39PM +1030, Qu Wenruo wrote:
> @@ -1561,6 +1570,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
>  				  PAGE_SIZE, bio_ctrl, i_size);
>  	if (ret == 1)
>  		return 0;
> +	if (ret < 0)
> +		btrfs_err_rl(fs_info,
> +"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
> +			     BTRFS_I(inode)->root->root_key.objectid,
> +			     btrfs_ino(BTRFS_I(inode)),
> +			     folio_pos(folio), fs_info->sectors_per_page,
> +			     &bio_ctrl->submit_bitmap, ret);

I've merged my cleanup series, and there will be a minor conflict in
this hunk,

@@ -1565,8 +1565,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
        if (ret < 0)
                btrfs_err_rl(fs_info,
 "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
-                            BTRFS_I(inode)->root->root_key.objectid,
-                            btrfs_ino(BTRFS_I(inode)),
+                            inode->root->root_key.objectid, btrfs_ino(inode),
                             folio_pos(folio), fs_info->sectors_per_page,
                             &bio_ctrl->submit_bitmap, ret);
---
Filipe Manana Jan. 10, 2025, 4:29 p.m. UTC | #2
On Fri, Jan 10, 2025 at 3:32 AM Qu Wenruo <wqu@suse.com> wrote:
>
> All the error handling bugs I hit so far are all -ENOSPC from either:
>
> - cow_file_range()
> - run_delalloc_nocow()
> - submit_uncompressed_range()
>
> Previously when those functions failed, there is no error message at
> all, making the debugging much harder.
>
> So here we introduce extra error messages for:
>
> - cow_file_range()
> - run_delalloc_nocow()
> - submit_uncompressed_range()
> - writepage_delalloc() when btrfs_run_delalloc_range() failed
> - extent_writepage() when extent_writepage_io() failed
>
> One example of the new debug error messages is the following one:
>
>  run fstests generic/750 at 2024-12-08 12:41:41
>  BTRFS: device fsid 461b25f5-e240-4543-8deb-e7c2bd01a6d3 devid 1 transid 8 /dev/mapper/test-scratch1 (253:4) scanned by mount (2436600)
>  BTRFS info (device dm-4): first mount of filesystem 461b25f5-e240-4543-8deb-e7c2bd01a6d3
>  BTRFS info (device dm-4): using crc32c (crc32c-arm64) checksum algorithm
>  BTRFS info (device dm-4): forcing free space tree for sector size 4096 with page size 65536
>  BTRFS info (device dm-4): using free-space-tree
>  BTRFS warning (device dm-4): read-write for sector size 4096 with page size 65536 is experimental
>  BTRFS info (device dm-4): checking UUID tree
>  BTRFS error (device dm-4): cow_file_range failed, root=363 inode=412 start=503808 len=98304: -28
>  BTRFS error (device dm-4): run_delalloc_nocow failed, root=363 inode=412 start=503808 len=98304: -28
>  BTRFS error (device dm-4): failed to run delalloc range, root=363 ino=412 folio=458752 submit_bitmap=11-15 start=503808 len=98304: -28
>
> Which shows an error from cow_file_range() which is called inside a
> nocow write attempt, along with the extra bitmap from
> writepage_delalloc().
>
> Reviewed-by: Boris Burkov <boris@bur.io>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/extent_io.c | 16 ++++++++++++++++
>  fs/btrfs/inode.c     | 14 +++++++++++++-
>  fs/btrfs/subpage.c   |  3 ++-
>  3 files changed, 31 insertions(+), 2 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 54081b1783fc..615f03fb5d97 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -1263,6 +1263,15 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
>                                                        wbc);
>                         if (ret >= 0)
>                                 last_finished_delalloc_end = found_start + found_len;
> +                       if (unlikely(ret < 0))
> +                               btrfs_err_rl(fs_info,
> +"failed to run delalloc range, root=%lld ino=%llu folio=%llu submit_bitmap=%*pbl start=%llu len=%u: %d",
> +                                            inode->root->root_key.objectid,

You can use btrfs_root_id(), it's shorter and it's what we use everywhere else.

> +                                            btrfs_ino(inode),
> +                                            folio_pos(folio),
> +                                            fs_info->sectors_per_page,
> +                                            &bio_ctrl->submit_bitmap,
> +                                            found_start, found_len, ret);
>                 } else {
>                         /*
>                          * We've hit an error during previous delalloc range,
> @@ -1561,6 +1570,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
>                                   PAGE_SIZE, bio_ctrl, i_size);
>         if (ret == 1)
>                 return 0;
> +       if (ret < 0)
> +               btrfs_err_rl(fs_info,
> +"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
> +                            BTRFS_I(inode)->root->root_key.objectid,

Same here.

> +                            btrfs_ino(BTRFS_I(inode)),
> +                            folio_pos(folio), fs_info->sectors_per_page,
> +                            &bio_ctrl->submit_bitmap, ret);
>
>         bio_ctrl->wbc->nr_to_write--;
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a450fc080ca3..c8145a588f45 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -1134,6 +1134,10 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
>                 if (locked_folio)
>                         btrfs_folio_end_lock(inode->root->fs_info, locked_folio,
>                                              start, async_extent->ram_size);
> +               btrfs_err_rl(inode->root->fs_info,
> +               "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
> +                            __func__, btrfs_root_id(inode->root),

And here, and below, it's using btrfs_root_id(). So we should be
consistent and use it everywhere.
Thanks.

> +                            btrfs_ino(inode), start, async_extent->ram_size, ret);
>         }
>  }
>
> @@ -1246,7 +1250,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>         free_async_extent_pages(async_extent);
>         if (async_chunk->blkcg_css)
>                 kthread_associate_blkcg(NULL);
> -       btrfs_debug(fs_info,
> +       btrfs_debug_rl(fs_info,
>  "async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
>                     btrfs_root_id(root), btrfs_ino(inode), start,
>                     async_extent->ram_size, ret);
> @@ -1580,6 +1584,10 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>                 btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size,
>                                        end - start - cur_alloc_size + 1, NULL);
>         }
> +       btrfs_err_rl(fs_info,
> +                    "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
> +                    __func__, btrfs_root_id(inode->root),
> +                    btrfs_ino(inode), orig_start, end + 1 - orig_start, ret);
>         return ret;
>  }
>
> @@ -2327,6 +2335,10 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
>                 btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
>         }
>         btrfs_free_path(path);
> +       btrfs_err_rl(fs_info,
> +                    "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
> +                    __func__, btrfs_root_id(inode->root),
> +                    btrfs_ino(inode), start, end + 1 - start, ret);
>         return ret;
>  }
>
> diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
> index d692bc34a3af..7f47bc61389c 100644
> --- a/fs/btrfs/subpage.c
> +++ b/fs/btrfs/subpage.c
> @@ -652,7 +652,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
>                                                                         \
>         GET_SUBPAGE_BITMAP(subpage, fs_info, name, &bitmap);            \
>         btrfs_warn(fs_info,                                             \
> -       "dumpping bitmap start=%llu len=%u folio=%llu" #name "_bitmap=%*pbl", \
> +       "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
>                    start, len, folio_pos(folio),                        \
>                    fs_info->sectors_per_page, &bitmap);                 \
>  }
> @@ -717,6 +717,7 @@ void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
>         /* Target range should not yet be locked. */
>         if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
>                 subpage_dump_bitmap(fs_info, folio, locked, start, len);
> +               btrfs_warn(fs_info, "nr_locked=%u\n", atomic_read(&subpage->nr_locked));
>                 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
>         }
>         bitmap_set(subpage->bitmaps, start_bit, nbits);
> --
> 2.47.1
>
>
Qu Wenruo Jan. 10, 2025, 11:17 p.m. UTC | #3
在 2025/1/11 02:50, David Sterba 写道:
> On Fri, Jan 10, 2025 at 02:01:39PM +1030, Qu Wenruo wrote:
>> @@ -1561,6 +1570,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
>>   				  PAGE_SIZE, bio_ctrl, i_size);
>>   	if (ret == 1)
>>   		return 0;
>> +	if (ret < 0)
>> +		btrfs_err_rl(fs_info,
>> +"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
>> +			     BTRFS_I(inode)->root->root_key.objectid,
>> +			     btrfs_ino(BTRFS_I(inode)),
>> +			     folio_pos(folio), fs_info->sectors_per_page,
>> +			     &bio_ctrl->submit_bitmap, ret);
> 
> I've merged my cleanup series, and there will be a minor conflict in
> this hunk,
> 
> @@ -1565,8 +1565,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
>          if (ret < 0)
>                  btrfs_err_rl(fs_info,
>   "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
> -                            BTRFS_I(inode)->root->root_key.objectid,
> -                            btrfs_ino(BTRFS_I(inode)),
> +                            inode->root->root_key.objectid, btrfs_ino(inode),
>                               folio_pos(folio), fs_info->sectors_per_page,
>                               &bio_ctrl->submit_bitmap, ret);

Thanks for pointing this out.

I'll resolve the conflict when merging, along with the usage of 
btrfs_root_id() mentioned by Filipe.

Thanks,
Qu
> ---
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 54081b1783fc..615f03fb5d97 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1263,6 +1263,15 @@  static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 						       wbc);
 			if (ret >= 0)
 				last_finished_delalloc_end = found_start + found_len;
+			if (unlikely(ret < 0))
+				btrfs_err_rl(fs_info,
+"failed to run delalloc range, root=%lld ino=%llu folio=%llu submit_bitmap=%*pbl start=%llu len=%u: %d",
+					     inode->root->root_key.objectid,
+					     btrfs_ino(inode),
+					     folio_pos(folio),
+					     fs_info->sectors_per_page,
+					     &bio_ctrl->submit_bitmap,
+					     found_start, found_len, ret);
 		} else {
 			/*
 			 * We've hit an error during previous delalloc range,
@@ -1561,6 +1570,13 @@  static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
 				  PAGE_SIZE, bio_ctrl, i_size);
 	if (ret == 1)
 		return 0;
+	if (ret < 0)
+		btrfs_err_rl(fs_info,
+"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
+			     BTRFS_I(inode)->root->root_key.objectid,
+			     btrfs_ino(BTRFS_I(inode)),
+			     folio_pos(folio), fs_info->sectors_per_page,
+			     &bio_ctrl->submit_bitmap, ret);
 
 	bio_ctrl->wbc->nr_to_write--;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a450fc080ca3..c8145a588f45 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1134,6 +1134,10 @@  static void submit_uncompressed_range(struct btrfs_inode *inode,
 		if (locked_folio)
 			btrfs_folio_end_lock(inode->root->fs_info, locked_folio,
 					     start, async_extent->ram_size);
+		btrfs_err_rl(inode->root->fs_info,
+		"%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
+			     __func__, btrfs_root_id(inode->root),
+			     btrfs_ino(inode), start, async_extent->ram_size, ret);
 	}
 }
 
@@ -1246,7 +1250,7 @@  static void submit_one_async_extent(struct async_chunk *async_chunk,
 	free_async_extent_pages(async_extent);
 	if (async_chunk->blkcg_css)
 		kthread_associate_blkcg(NULL);
-	btrfs_debug(fs_info,
+	btrfs_debug_rl(fs_info,
 "async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
 		    btrfs_root_id(root), btrfs_ino(inode), start,
 		    async_extent->ram_size, ret);
@@ -1580,6 +1584,10 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 		btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size,
 				       end - start - cur_alloc_size + 1, NULL);
 	}
+	btrfs_err_rl(fs_info,
+		     "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
+		     __func__, btrfs_root_id(inode->root),
+		     btrfs_ino(inode), orig_start, end + 1 - orig_start, ret);
 	return ret;
 }
 
@@ -2327,6 +2335,10 @@  static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 		btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
 	}
 	btrfs_free_path(path);
+	btrfs_err_rl(fs_info,
+		     "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d",
+		     __func__, btrfs_root_id(inode->root),
+		     btrfs_ino(inode), start, end + 1 - start, ret);
 	return ret;
 }
 
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index d692bc34a3af..7f47bc61389c 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -652,7 +652,7 @@  IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
 									\
 	GET_SUBPAGE_BITMAP(subpage, fs_info, name, &bitmap);		\
 	btrfs_warn(fs_info,						\
-	"dumpping bitmap start=%llu len=%u folio=%llu" #name "_bitmap=%*pbl", \
+	"dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
 		   start, len, folio_pos(folio),			\
 		   fs_info->sectors_per_page, &bitmap);			\
 }
@@ -717,6 +717,7 @@  void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
 	/* Target range should not yet be locked. */
 	if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
 		subpage_dump_bitmap(fs_info, folio, locked, start, len);
+		btrfs_warn(fs_info, "nr_locked=%u\n", atomic_read(&subpage->nr_locked));
 		ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
 	}
 	bitmap_set(subpage->bitmaps, start_bit, nbits);