Message ID | 20230309090526.332550-6-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/20] btrfs: mark extent_buffer_under_io static | expand |
Looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
On 2023/3/9 17:05, Christoph Hellwig wrote: > The btrfs_bio_ctrl machinery is overkill for reading extent_buffers > as we always operate on PAGE SIZE chunks (or one smaller one for the > subpage case) that are contigous and are guaranteed to fit into a > single bio. This is the legacy left by older stripe boundary based bio split code. (Please note that, metadata crossing stripe boundaries is not ideal and is very rare nowadays, but we should still support it). But now we have btrfs_submit_bio() handling such cases, thus it should be fine. > Replace it with open coded btrfs_bio_alloc, __bio_add_page > and btrfs_submit_bio calls in a helper function shared between > the subpage and node size >= PAGE_SIZE cases. > > Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Qu Wenruo <wqu@suse.com> Thanks, Qu > --- > fs/btrfs/extent_io.c | 99 ++++++++++++++++---------------------------- > 1 file changed, 36 insertions(+), 63 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index 26d8162bee000d..5169e73ffea647 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -98,22 +98,12 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info) > */ > struct btrfs_bio_ctrl { > struct btrfs_bio *bbio; > - int mirror_num; > enum btrfs_compression_type compress_type; > u32 len_to_oe_boundary; > blk_opf_t opf; > btrfs_bio_end_io_t end_io_func; > struct writeback_control *wbc; > > - /* > - * This is for metadata read, to provide the extra needed verification > - * info. This has to be provided for submit_one_bio(), as > - * submit_one_bio() can submit a bio if it ends at stripe boundary. If > - * no such parent_check is provided, the metadata can hit false alert at > - * endio time. > - */ > - struct btrfs_tree_parent_check *parent_check; > - > /* > * Tell writepage not to lock the state bits for this range, it still > * does the unlocking. > @@ -124,7 +114,6 @@ struct btrfs_bio_ctrl { > static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) > { > struct btrfs_bio *bbio = bio_ctrl->bbio; > - int mirror_num = bio_ctrl->mirror_num; > > if (!bbio) > return; > @@ -132,25 +121,14 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) > /* Caller should ensure the bio has at least some range added */ > ASSERT(bbio->bio.bi_iter.bi_size); > > - if (!is_data_inode(&bbio->inode->vfs_inode)) { > - if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) { > - /* > - * For metadata read, we should have the parent_check, > - * and copy it to bbio for metadata verification. > - */ > - ASSERT(bio_ctrl->parent_check); > - memcpy(&bbio->parent_check, > - bio_ctrl->parent_check, > - sizeof(struct btrfs_tree_parent_check)); > - } > + if (!is_data_inode(&bbio->inode->vfs_inode)) > bbio->bio.bi_opf |= REQ_META; > - } > > if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ && > bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) > - btrfs_submit_compressed_read(bbio, mirror_num); > + btrfs_submit_compressed_read(bbio, 0); > else > - btrfs_submit_bio(bbio, mirror_num); > + btrfs_submit_bio(bbio, 0); > > /* The bbio is owned by the end_io handler now */ > bio_ctrl->bbio = NULL; > @@ -4241,6 +4219,36 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb) > } > } > > +static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num, > + struct btrfs_tree_parent_check *check) > +{ > + int num_pages = num_extent_pages(eb), i; > + struct btrfs_bio *bbio; > + > + clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); > + eb->read_mirror = 0; > + atomic_set(&eb->io_pages, num_pages); > + check_buffer_tree_ref(eb); > + > + bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES, > + REQ_OP_READ | REQ_META, > + BTRFS_I(eb->fs_info->btree_inode), > + end_bio_extent_readpage, NULL); > + bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT; > + bbio->file_offset = eb->start; > + memcpy(&bbio->parent_check, check, sizeof(*check)); > + if (eb->fs_info->nodesize < PAGE_SIZE) { > + __bio_add_page(&bbio->bio, eb->pages[0], eb->len, > + eb->start - page_offset(eb->pages[0])); > + } else { > + for (i = 0; i < num_pages; i++) { > + ClearPageError(eb->pages[i]); > + __bio_add_page(&bbio->bio, eb->pages[i], PAGE_SIZE, 0); > + } > + } > + btrfs_submit_bio(bbio, mirror_num); > +} > + > static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, > int mirror_num, > struct btrfs_tree_parent_check *check) > @@ -4249,11 +4257,6 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, > struct extent_io_tree *io_tree; > struct page *page = eb->pages[0]; > struct extent_state *cached_state = NULL; > - struct btrfs_bio_ctrl bio_ctrl = { > - .opf = REQ_OP_READ, > - .mirror_num = mirror_num, > - .parent_check = check, > - }; > int ret; > > ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)); > @@ -4281,18 +4284,10 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, > return 0; > } > > - clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); > - eb->read_mirror = 0; > - atomic_set(&eb->io_pages, 1); > - check_buffer_tree_ref(eb); > - bio_ctrl.end_io_func = end_bio_extent_readpage; > - > btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len); > - > btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len); > - submit_extent_page(&bio_ctrl, eb->start, page, eb->len, > - eb->start - page_offset(page)); > - submit_one_bio(&bio_ctrl); > + > + __read_extent_buffer_pages(eb, mirror_num, check); > if (wait != WAIT_COMPLETE) { > free_extent_state(cached_state); > return 0; > @@ -4313,11 +4308,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, > int locked_pages = 0; > int all_uptodate = 1; > int num_pages; > - struct btrfs_bio_ctrl bio_ctrl = { > - .opf = REQ_OP_READ, > - .mirror_num = mirror_num, > - .parent_check = check, > - }; > > if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) > return 0; > @@ -4367,24 +4357,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, > goto unlock_exit; > } > > - clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); > - eb->read_mirror = 0; > - atomic_set(&eb->io_pages, num_pages); > - /* > - * It is possible for release_folio to clear the TREE_REF bit before we > - * set io_pages. See check_buffer_tree_ref for a more detailed comment. > - */ > - check_buffer_tree_ref(eb); > - bio_ctrl.end_io_func = end_bio_extent_readpage; > - for (i = 0; i < num_pages; i++) { > - page = eb->pages[i]; > - > - ClearPageError(page); > - submit_extent_page(&bio_ctrl, page_offset(page), page, > - PAGE_SIZE, 0); > - } > - > - submit_one_bio(&bio_ctrl); > + __read_extent_buffer_pages(eb, mirror_num, check); > > if (wait != WAIT_COMPLETE) > return 0;
On Fri, Mar 10, 2023 at 03:42:04PM +0800, Qu Wenruo wrote: > This is the legacy left by older stripe boundary based bio split code. > (Please note that, metadata crossing stripe boundaries is not ideal and is > very rare nowadays, but we should still support it). How can metadata cross a stripe boundary?
On 2023/3/10 15:47, Christoph Hellwig wrote: > On Fri, Mar 10, 2023 at 03:42:04PM +0800, Qu Wenruo wrote: >> This is the legacy left by older stripe boundary based bio split code. >> (Please note that, metadata crossing stripe boundaries is not ideal and is >> very rare nowadays, but we should still support it). > > How can metadata cross a stripe boundary? Like this inside a RAID0 bg: 0 32K 64K 96K 128K | |//|//| | There is an old chunk allocator behavior that we can have a chunk starts at some physical bytenr which is only 4K aligned, but not yet STRIPE_LEN aligned. In that case, extent allocator can give us some range which crosses stripe boundary. That's also why we have metadata crossing stripe boundary checks in btrfs check and scrub. Thanks, Qu
On Fri, Mar 10, 2023 at 04:02:02PM +0800, Qu Wenruo wrote: > Like this inside a RAID0 bg: > > 0 32K 64K 96K 128K > | |//|//| | > > There is an old chunk allocator behavior that we can have a chunk starts at > some physical bytenr which is only 4K aligned, but not yet STRIPE_LEN > aligned. > > In that case, extent allocator can give us some range which crosses stripe > boundary. Ewww, ok. So the metadata isn't required to be naturally aligned.
On 2023/3/10 16:03, Christoph Hellwig wrote: > On Fri, Mar 10, 2023 at 04:02:02PM +0800, Qu Wenruo wrote: >> Like this inside a RAID0 bg: >> >> 0 32K 64K 96K 128K >> | |//|//| | >> >> There is an old chunk allocator behavior that we can have a chunk starts at >> some physical bytenr which is only 4K aligned, but not yet STRIPE_LEN >> aligned. >> >> In that case, extent allocator can give us some range which crosses stripe >> boundary. > > Ewww, ok. So the metadata isn't required to be naturally aligned. Yes, but you don't need to spend too much time on that. We haven't hit such case for a long long time. Unless the fs is super old and never balanced by any currently supported LTS kernel, it should be very rare to hit. Thanks, Qu
On Fri, Mar 10, 2023 at 04:07:42PM +0800, Qu Wenruo wrote: > Yes, but you don't need to spend too much time on that. > We haven't hit such case for a long long time. > > Unless the fs is super old and never balanced by any currently supported > LTS kernel, it should be very rare to hit. Well, if it is a valid format we'll need to handle it. And we probably want a test case to exercise the code path to make sure it doesn't break when it is so rarely exercised.
On 2023/3/10 16:15, Christoph Hellwig wrote: > On Fri, Mar 10, 2023 at 04:07:42PM +0800, Qu Wenruo wrote: >> Yes, but you don't need to spend too much time on that. >> We haven't hit such case for a long long time. >> >> Unless the fs is super old and never balanced by any currently supported >> LTS kernel, it should be very rare to hit. > > Well, if it is a valid format we'll need to handle it. And we probably > want a test case to exercise the code path to make sure it doesn't break > when it is so rarely exercised. Then we're already in a big trouble: - Fstests doesn't accept binary dump - We don't have any good way to create such slightly unaligned chunks Older progs may not even compile, and any currently supported LTS kernel won't create such chunk at all... Thanks, Qu
On Fri, Mar 10, 2023 at 8:02 AM Christoph Hellwig <hch@lst.de> wrote: > > On Fri, Mar 10, 2023 at 03:42:04PM +0800, Qu Wenruo wrote: > > This is the legacy left by older stripe boundary based bio split code. > > (Please note that, metadata crossing stripe boundaries is not ideal and is > > very rare nowadays, but we should still support it). > > How can metadata cross a stripe boundary? Probably with mixed block groups (mkfs.btrfs -O mixed-bg) you can get that, as block groups are used to allocate both data extents (variable length) and metadata extents (fixed length).
On 2023/3/10 18:54, Filipe Manana wrote: > On Fri, Mar 10, 2023 at 8:02 AM Christoph Hellwig <hch@lst.de> wrote: >> >> On Fri, Mar 10, 2023 at 03:42:04PM +0800, Qu Wenruo wrote: >>> This is the legacy left by older stripe boundary based bio split code. >>> (Please note that, metadata crossing stripe boundaries is not ideal and is >>> very rare nowadays, but we should still support it). >> >> How can metadata cross a stripe boundary? > > Probably with mixed block groups (mkfs.btrfs -O mixed-bg) you can get > that, as block groups are used to allocate both data extents (variable > length) and metadata extents (fixed length). Mixed block groups requires nodesize to be the same as sectorsize, thus not possible. Thanks, Qu
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 26d8162bee000d..5169e73ffea647 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -98,22 +98,12 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info) */ struct btrfs_bio_ctrl { struct btrfs_bio *bbio; - int mirror_num; enum btrfs_compression_type compress_type; u32 len_to_oe_boundary; blk_opf_t opf; btrfs_bio_end_io_t end_io_func; struct writeback_control *wbc; - /* - * This is for metadata read, to provide the extra needed verification - * info. This has to be provided for submit_one_bio(), as - * submit_one_bio() can submit a bio if it ends at stripe boundary. If - * no such parent_check is provided, the metadata can hit false alert at - * endio time. - */ - struct btrfs_tree_parent_check *parent_check; - /* * Tell writepage not to lock the state bits for this range, it still * does the unlocking. @@ -124,7 +114,6 @@ struct btrfs_bio_ctrl { static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) { struct btrfs_bio *bbio = bio_ctrl->bbio; - int mirror_num = bio_ctrl->mirror_num; if (!bbio) return; @@ -132,25 +121,14 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) /* Caller should ensure the bio has at least some range added */ ASSERT(bbio->bio.bi_iter.bi_size); - if (!is_data_inode(&bbio->inode->vfs_inode)) { - if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) { - /* - * For metadata read, we should have the parent_check, - * and copy it to bbio for metadata verification. - */ - ASSERT(bio_ctrl->parent_check); - memcpy(&bbio->parent_check, - bio_ctrl->parent_check, - sizeof(struct btrfs_tree_parent_check)); - } + if (!is_data_inode(&bbio->inode->vfs_inode)) bbio->bio.bi_opf |= REQ_META; - } if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ && bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) - btrfs_submit_compressed_read(bbio, mirror_num); + btrfs_submit_compressed_read(bbio, 0); else - btrfs_submit_bio(bbio, mirror_num); + btrfs_submit_bio(bbio, 0); /* The bbio is owned by the end_io handler now */ bio_ctrl->bbio = NULL; @@ -4241,6 +4219,36 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb) } } +static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num, + struct btrfs_tree_parent_check *check) +{ + int num_pages = num_extent_pages(eb), i; + struct btrfs_bio *bbio; + + clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); + eb->read_mirror = 0; + atomic_set(&eb->io_pages, num_pages); + check_buffer_tree_ref(eb); + + bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES, + REQ_OP_READ | REQ_META, + BTRFS_I(eb->fs_info->btree_inode), + end_bio_extent_readpage, NULL); + bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT; + bbio->file_offset = eb->start; + memcpy(&bbio->parent_check, check, sizeof(*check)); + if (eb->fs_info->nodesize < PAGE_SIZE) { + __bio_add_page(&bbio->bio, eb->pages[0], eb->len, + eb->start - page_offset(eb->pages[0])); + } else { + for (i = 0; i < num_pages; i++) { + ClearPageError(eb->pages[i]); + __bio_add_page(&bbio->bio, eb->pages[i], PAGE_SIZE, 0); + } + } + btrfs_submit_bio(bbio, mirror_num); +} + static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, int mirror_num, struct btrfs_tree_parent_check *check) @@ -4249,11 +4257,6 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, struct extent_io_tree *io_tree; struct page *page = eb->pages[0]; struct extent_state *cached_state = NULL; - struct btrfs_bio_ctrl bio_ctrl = { - .opf = REQ_OP_READ, - .mirror_num = mirror_num, - .parent_check = check, - }; int ret; ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)); @@ -4281,18 +4284,10 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, return 0; } - clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); - eb->read_mirror = 0; - atomic_set(&eb->io_pages, 1); - check_buffer_tree_ref(eb); - bio_ctrl.end_io_func = end_bio_extent_readpage; - btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len); - btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len); - submit_extent_page(&bio_ctrl, eb->start, page, eb->len, - eb->start - page_offset(page)); - submit_one_bio(&bio_ctrl); + + __read_extent_buffer_pages(eb, mirror_num, check); if (wait != WAIT_COMPLETE) { free_extent_state(cached_state); return 0; @@ -4313,11 +4308,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, int locked_pages = 0; int all_uptodate = 1; int num_pages; - struct btrfs_bio_ctrl bio_ctrl = { - .opf = REQ_OP_READ, - .mirror_num = mirror_num, - .parent_check = check, - }; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; @@ -4367,24 +4357,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, goto unlock_exit; } - clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); - eb->read_mirror = 0; - atomic_set(&eb->io_pages, num_pages); - /* - * It is possible for release_folio to clear the TREE_REF bit before we - * set io_pages. See check_buffer_tree_ref for a more detailed comment. - */ - check_buffer_tree_ref(eb); - bio_ctrl.end_io_func = end_bio_extent_readpage; - for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; - - ClearPageError(page); - submit_extent_page(&bio_ctrl, page_offset(page), page, - PAGE_SIZE, 0); - } - - submit_one_bio(&bio_ctrl); + __read_extent_buffer_pages(eb, mirror_num, check); if (wait != WAIT_COMPLETE) return 0;
The btrfs_bio_ctrl machinery is overkill for reading extent_buffers as we always operate on PAGE SIZE chunks (or one smaller one for the subpage case) that are contigous and are guaranteed to fit into a single bio. Replace it with open coded btrfs_bio_alloc, __bio_add_page and btrfs_submit_bio calls in a helper function shared between the subpage and node size >= PAGE_SIZE cases. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/btrfs/extent_io.c | 99 ++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 63 deletions(-)