Message ID | ac9345c0d31fc1b669ccfe436e49883ed998ab07.1736479224.git.wqu@suse.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | btrfs: error handling fixes | expand |
On Fri, Jan 10, 2025 at 02:01:39PM +1030, Qu Wenruo wrote: > @@ -1561,6 +1570,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl > PAGE_SIZE, bio_ctrl, i_size); > if (ret == 1) > return 0; > + if (ret < 0) > + btrfs_err_rl(fs_info, > +"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", > + BTRFS_I(inode)->root->root_key.objectid, > + btrfs_ino(BTRFS_I(inode)), > + folio_pos(folio), fs_info->sectors_per_page, > + &bio_ctrl->submit_bitmap, ret); I've merged my cleanup series, and there will be a minor conflict in this hunk, @@ -1565,8 +1565,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl if (ret < 0) btrfs_err_rl(fs_info, "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", - BTRFS_I(inode)->root->root_key.objectid, - btrfs_ino(BTRFS_I(inode)), + inode->root->root_key.objectid, btrfs_ino(inode), folio_pos(folio), fs_info->sectors_per_page, &bio_ctrl->submit_bitmap, ret); ---
On Fri, Jan 10, 2025 at 3:32 AM Qu Wenruo <wqu@suse.com> wrote: > > All the error handling bugs I hit so far are all -ENOSPC from either: > > - cow_file_range() > - run_delalloc_nocow() > - submit_uncompressed_range() > > Previously when those functions failed, there is no error message at > all, making the debugging much harder. > > So here we introduce extra error messages for: > > - cow_file_range() > - run_delalloc_nocow() > - submit_uncompressed_range() > - writepage_delalloc() when btrfs_run_delalloc_range() failed > - extent_writepage() when extent_writepage_io() failed > > One example of the new debug error messages is the following one: > > run fstests generic/750 at 2024-12-08 12:41:41 > BTRFS: device fsid 461b25f5-e240-4543-8deb-e7c2bd01a6d3 devid 1 transid 8 /dev/mapper/test-scratch1 (253:4) scanned by mount (2436600) > BTRFS info (device dm-4): first mount of filesystem 461b25f5-e240-4543-8deb-e7c2bd01a6d3 > BTRFS info (device dm-4): using crc32c (crc32c-arm64) checksum algorithm > BTRFS info (device dm-4): forcing free space tree for sector size 4096 with page size 65536 > BTRFS info (device dm-4): using free-space-tree > BTRFS warning (device dm-4): read-write for sector size 4096 with page size 65536 is experimental > BTRFS info (device dm-4): checking UUID tree > BTRFS error (device dm-4): cow_file_range failed, root=363 inode=412 start=503808 len=98304: -28 > BTRFS error (device dm-4): run_delalloc_nocow failed, root=363 inode=412 start=503808 len=98304: -28 > BTRFS error (device dm-4): failed to run delalloc range, root=363 ino=412 folio=458752 submit_bitmap=11-15 start=503808 len=98304: -28 > > Which shows an error from cow_file_range() which is called inside a > nocow write attempt, along with the extra bitmap from > writepage_delalloc(). > > Reviewed-by: Boris Burkov <boris@bur.io> > Signed-off-by: Qu Wenruo <wqu@suse.com> > --- > fs/btrfs/extent_io.c | 16 ++++++++++++++++ > fs/btrfs/inode.c | 14 +++++++++++++- > fs/btrfs/subpage.c | 3 ++- > 3 files changed, 31 insertions(+), 2 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index 54081b1783fc..615f03fb5d97 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -1263,6 +1263,15 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, > wbc); > if (ret >= 0) > last_finished_delalloc_end = found_start + found_len; > + if (unlikely(ret < 0)) > + btrfs_err_rl(fs_info, > +"failed to run delalloc range, root=%lld ino=%llu folio=%llu submit_bitmap=%*pbl start=%llu len=%u: %d", > + inode->root->root_key.objectid, You can use btrfs_root_id(), it's shorter and it's what we use everywhere else. > + btrfs_ino(inode), > + folio_pos(folio), > + fs_info->sectors_per_page, > + &bio_ctrl->submit_bitmap, > + found_start, found_len, ret); > } else { > /* > * We've hit an error during previous delalloc range, > @@ -1561,6 +1570,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl > PAGE_SIZE, bio_ctrl, i_size); > if (ret == 1) > return 0; > + if (ret < 0) > + btrfs_err_rl(fs_info, > +"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", > + BTRFS_I(inode)->root->root_key.objectid, Same here. > + btrfs_ino(BTRFS_I(inode)), > + folio_pos(folio), fs_info->sectors_per_page, > + &bio_ctrl->submit_bitmap, ret); > > bio_ctrl->wbc->nr_to_write--; > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index a450fc080ca3..c8145a588f45 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -1134,6 +1134,10 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, > if (locked_folio) > btrfs_folio_end_lock(inode->root->fs_info, locked_folio, > start, async_extent->ram_size); > + btrfs_err_rl(inode->root->fs_info, > + "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", > + __func__, btrfs_root_id(inode->root), And here, and below, it's using btrfs_root_id(). So we should be consistent and use it everywhere. Thanks. > + btrfs_ino(inode), start, async_extent->ram_size, ret); > } > } > > @@ -1246,7 +1250,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, > free_async_extent_pages(async_extent); > if (async_chunk->blkcg_css) > kthread_associate_blkcg(NULL); > - btrfs_debug(fs_info, > + btrfs_debug_rl(fs_info, > "async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d", > btrfs_root_id(root), btrfs_ino(inode), start, > async_extent->ram_size, ret); > @@ -1580,6 +1584,10 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size, > end - start - cur_alloc_size + 1, NULL); > } > + btrfs_err_rl(fs_info, > + "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", > + __func__, btrfs_root_id(inode->root), > + btrfs_ino(inode), orig_start, end + 1 - orig_start, ret); > return ret; > } > > @@ -2327,6 +2335,10 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, > btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); > } > btrfs_free_path(path); > + btrfs_err_rl(fs_info, > + "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", > + __func__, btrfs_root_id(inode->root), > + btrfs_ino(inode), start, end + 1 - start, ret); > return ret; > } > > diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c > index d692bc34a3af..7f47bc61389c 100644 > --- a/fs/btrfs/subpage.c > +++ b/fs/btrfs/subpage.c > @@ -652,7 +652,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, > \ > GET_SUBPAGE_BITMAP(subpage, fs_info, name, &bitmap); \ > btrfs_warn(fs_info, \ > - "dumpping bitmap start=%llu len=%u folio=%llu" #name "_bitmap=%*pbl", \ > + "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ > start, len, folio_pos(folio), \ > fs_info->sectors_per_page, &bitmap); \ > } > @@ -717,6 +717,7 @@ void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, > /* Target range should not yet be locked. */ > if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { > subpage_dump_bitmap(fs_info, folio, locked, start, len); > + btrfs_warn(fs_info, "nr_locked=%u\n", atomic_read(&subpage->nr_locked)); > ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); > } > bitmap_set(subpage->bitmaps, start_bit, nbits); > -- > 2.47.1 > >
在 2025/1/11 02:50, David Sterba 写道: > On Fri, Jan 10, 2025 at 02:01:39PM +1030, Qu Wenruo wrote: >> @@ -1561,6 +1570,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl >> PAGE_SIZE, bio_ctrl, i_size); >> if (ret == 1) >> return 0; >> + if (ret < 0) >> + btrfs_err_rl(fs_info, >> +"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", >> + BTRFS_I(inode)->root->root_key.objectid, >> + btrfs_ino(BTRFS_I(inode)), >> + folio_pos(folio), fs_info->sectors_per_page, >> + &bio_ctrl->submit_bitmap, ret); > > I've merged my cleanup series, and there will be a minor conflict in > this hunk, > > @@ -1565,8 +1565,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl > if (ret < 0) > btrfs_err_rl(fs_info, > "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", > - BTRFS_I(inode)->root->root_key.objectid, > - btrfs_ino(BTRFS_I(inode)), > + inode->root->root_key.objectid, btrfs_ino(inode), > folio_pos(folio), fs_info->sectors_per_page, > &bio_ctrl->submit_bitmap, ret); Thanks for pointing this out. I'll resolve the conflict when merging, along with the usage of btrfs_root_id() mentioned by Filipe. Thanks, Qu > ---
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 54081b1783fc..615f03fb5d97 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1263,6 +1263,15 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, wbc); if (ret >= 0) last_finished_delalloc_end = found_start + found_len; + if (unlikely(ret < 0)) + btrfs_err_rl(fs_info, +"failed to run delalloc range, root=%lld ino=%llu folio=%llu submit_bitmap=%*pbl start=%llu len=%u: %d", + inode->root->root_key.objectid, + btrfs_ino(inode), + folio_pos(folio), + fs_info->sectors_per_page, + &bio_ctrl->submit_bitmap, + found_start, found_len, ret); } else { /* * We've hit an error during previous delalloc range, @@ -1561,6 +1570,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl PAGE_SIZE, bio_ctrl, i_size); if (ret == 1) return 0; + if (ret < 0) + btrfs_err_rl(fs_info, +"failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", + BTRFS_I(inode)->root->root_key.objectid, + btrfs_ino(BTRFS_I(inode)), + folio_pos(folio), fs_info->sectors_per_page, + &bio_ctrl->submit_bitmap, ret); bio_ctrl->wbc->nr_to_write--; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a450fc080ca3..c8145a588f45 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1134,6 +1134,10 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, if (locked_folio) btrfs_folio_end_lock(inode->root->fs_info, locked_folio, start, async_extent->ram_size); + btrfs_err_rl(inode->root->fs_info, + "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", + __func__, btrfs_root_id(inode->root), + btrfs_ino(inode), start, async_extent->ram_size, ret); } } @@ -1246,7 +1250,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, free_async_extent_pages(async_extent); if (async_chunk->blkcg_css) kthread_associate_blkcg(NULL); - btrfs_debug(fs_info, + btrfs_debug_rl(fs_info, "async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d", btrfs_root_id(root), btrfs_ino(inode), start, async_extent->ram_size, ret); @@ -1580,6 +1584,10 @@ static noinline int cow_file_range(struct btrfs_inode *inode, btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size, end - start - cur_alloc_size + 1, NULL); } + btrfs_err_rl(fs_info, + "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", + __func__, btrfs_root_id(inode->root), + btrfs_ino(inode), orig_start, end + 1 - orig_start, ret); return ret; } @@ -2327,6 +2335,10 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); } btrfs_free_path(path); + btrfs_err_rl(fs_info, + "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", + __func__, btrfs_root_id(inode->root), + btrfs_ino(inode), start, end + 1 - start, ret); return ret; } diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index d692bc34a3af..7f47bc61389c 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -652,7 +652,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, \ GET_SUBPAGE_BITMAP(subpage, fs_info, name, &bitmap); \ btrfs_warn(fs_info, \ - "dumpping bitmap start=%llu len=%u folio=%llu" #name "_bitmap=%*pbl", \ + "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ start, len, folio_pos(folio), \ fs_info->sectors_per_page, &bitmap); \ } @@ -717,6 +717,7 @@ void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, /* Target range should not yet be locked. */ if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { subpage_dump_bitmap(fs_info, folio, locked, start, len); + btrfs_warn(fs_info, "nr_locked=%u\n", atomic_read(&subpage->nr_locked)); ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); } bitmap_set(subpage->bitmaps, start_bit, nbits);