Message ID | 41be25a4c77c46f8725c13636098f5f37e5c3d93.1716440169.git.wqu@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: extent-map: unify the members with btrfs_ordered_extent | expand |
On Thu, May 23, 2024 at 6:04 AM Qu Wenruo <wqu@suse.com> wrote: > > Introduce two new members for extent_map: > > - disk_bytenr > - offset > > Both are matching the members with the same name inside > btrfs_file_extent_items. > > For now this patch only touches those members when: > > - Reading btrfs_file_extent_items from disk > - Inserting new holes > - Merging two extent maps > With the new disk_bytenr and disk_num_bytes, doing merging would be a > little more complex, as we have 3 different cases: > > * Both extent maps are referring to the same data extents > |<----- data extent A ----->| > |<- em 1 ->|<- em 2 ->| > > * Both extent maps are referring to different data extents > |<-- data extent A -->|<-- data extent B -->| > |<- em 1 ->|<- em 2 ->| > > * One of the extent maps is referring to a merged and larger data > extent that covers both extent maps > > This is not really valid case other than some selftests. > So this test case would be removed. > > A new helper merge_ondisk_extents() would be introduced to handle > above valid cases. > > To properly assign values for those new members, a new btrfs_file_extent > parameter is introduced to all the involved call sites. > > - For NOCOW writes the btrfs_file_extent would be exposed from > can_nocow_file_extent(). > > - For other writes, the members can be easily calculated > As most of them have 0 offset and utilizing the whole on-disk data > extent. > The exception is encoded write, but thankfully that interface provided > offset directly and all other needed info. > > For now, both the old members (block_start/block_len/orig_start) are > co-existing with the new members (disk_bytenr/offset), meanwhile all the > critical code is still using the old members only. > > The cleanup would happen later after all the older and newer members are > properly validated. > > Signed-off-by: Qu Wenruo <wqu@suse.com> > Signed-off-by: David Sterba <dsterba@suse.com> > --- > fs/btrfs/defrag.c | 4 +++ > fs/btrfs/extent_map.c | 78 ++++++++++++++++++++++++++++++++++++++++--- > fs/btrfs/extent_map.h | 17 ++++++++++ > fs/btrfs/file-item.c | 9 ++++- > fs/btrfs/file.c | 1 + > fs/btrfs/inode.c | 57 +++++++++++++++++++++++++++---- > 6 files changed, 155 insertions(+), 11 deletions(-) > > diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c > index 407ccec3e57e..242c5469f4ba 100644 > --- a/fs/btrfs/defrag.c > +++ b/fs/btrfs/defrag.c > @@ -709,6 +709,10 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode, > em->start = start; > em->orig_start = start; > em->block_start = EXTENT_MAP_HOLE; > + em->disk_bytenr = EXTENT_MAP_HOLE; > + em->disk_num_bytes = 0; > + em->ram_bytes = 0; > + em->offset = 0; > em->len = key.offset - start; > break; > } > diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c > index a9d60d1eade9..c7d2393692e6 100644 > --- a/fs/btrfs/extent_map.c > +++ b/fs/btrfs/extent_map.c > @@ -229,6 +229,60 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma > return next->block_start == prev->block_start; > } > > +/* > + * Handle the ondisk data extents merge for @prev and @next. > + * > + * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes. > + * For now only uncompressed regular extent can be merged. > + * > + * @prev and @next will be both updated to point to the new merged range. > + * Thus one of them should be removed by the caller. > + */ > +static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next) > +{ > + u64 new_disk_bytenr; > + u64 new_disk_num_bytes; > + u64 new_offset; > + > + /* @prev and @next should not be compressed. */ > + ASSERT(!extent_map_is_compressed(prev)); > + ASSERT(!extent_map_is_compressed(next)); > + > + /* > + * There are two different cases where @prev and @next can be merged. > + * > + * 1) They are referring to the same data extent > + * |<----- data extent A ----->| > + * |<- prev ->|<- next ->| > + * > + * 2) They are referring to different data extents but still adjacent > + * > + * |<-- data extent A -->|<-- data extent B -->| > + * |<- prev ->|<- next ->| > + * > + * The calculation here always merge the data extents first, then update > + * @offset using the new data extents. > + * > + * For case 1), the merged data extent would be the same. > + * For case 2), we just merge the two data extents into one. > + */ > + new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr); > + new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes, > + next->disk_bytenr + next->disk_num_bytes) - > + new_disk_bytenr; > + new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr; > + > + prev->disk_bytenr = new_disk_bytenr; > + prev->disk_num_bytes = new_disk_num_bytes; > + prev->ram_bytes = new_disk_num_bytes; > + prev->offset = new_offset; > + > + next->disk_bytenr = new_disk_bytenr; > + next->disk_num_bytes = new_disk_num_bytes; > + next->ram_bytes = new_disk_num_bytes; > + next->offset = new_offset; > +} > + > static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) > { > struct extent_map_tree *tree = &inode->extent_tree; > @@ -260,6 +314,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) > em->block_len += merge->block_len; > em->block_start = merge->block_start; > em->generation = max(em->generation, merge->generation); > + > + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) > + merge_ondisk_extents(merge, em); > em->flags |= EXTENT_FLAG_MERGED; > > rb_erase(&merge->rb_node, &tree->root); > @@ -275,6 +332,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) > if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) { > em->len += merge->len; > em->block_len += merge->block_len; > + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) > + merge_ondisk_extents(em, merge); > rb_erase(&merge->rb_node, &tree->root); > RB_CLEAR_NODE(&merge->rb_node); > em->generation = max(em->generation, merge->generation); > @@ -562,6 +621,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, > !extent_map_is_compressed(em)) { > em->block_start += start_diff; > em->block_len = em->len; > + em->offset += start_diff; > } > return add_extent_mapping(inode, em, 0); > } > @@ -785,14 +845,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, > split->block_len = em->block_len; > else > split->block_len = split->len; > + split->disk_bytenr = em->disk_bytenr; > split->disk_num_bytes = max(split->block_len, > em->disk_num_bytes); > + split->offset = em->offset; > split->ram_bytes = em->ram_bytes; > } else { > split->orig_start = split->start; > split->block_len = 0; > split->block_start = em->block_start; > + split->disk_bytenr = em->disk_bytenr; > split->disk_num_bytes = 0; > + split->offset = 0; > split->ram_bytes = split->len; > } > > @@ -813,13 +877,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, > split->start = end; > split->len = em_end - end; > split->block_start = em->block_start; > + split->disk_bytenr = em->disk_bytenr; > split->flags = flags; > split->generation = gen; > > if (em->block_start < EXTENT_MAP_LAST_BYTE) { > split->disk_num_bytes = max(em->block_len, > em->disk_num_bytes); > - > + split->offset = em->offset + end - em->start; > split->ram_bytes = em->ram_bytes; > if (compressed) { > split->block_len = em->block_len; > @@ -832,10 +897,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, > split->orig_start = em->orig_start; > } > } else { > + split->disk_num_bytes = 0; > + split->offset = 0; > split->ram_bytes = split->len; > split->orig_start = split->start; > split->block_len = 0; > - split->disk_num_bytes = 0; Why move the assignment of ->disk_num_bytes ? This is sort of distracting, doing unnecessary changes. > } > > if (extent_map_in_tree(em)) { > @@ -989,10 +1055,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, > /* First, replace the em with a new extent_map starting from * em->start */ > split_pre->start = em->start; > split_pre->len = pre; > + split_pre->disk_bytenr = new_logical; We are already setting disk_bytenr to the same value a few lines below. > + split_pre->disk_num_bytes = split_pre->len; > + split_pre->offset = 0; > split_pre->orig_start = split_pre->start; > split_pre->block_start = new_logical; > split_pre->block_len = split_pre->len; > - split_pre->disk_num_bytes = split_pre->block_len; Here, where slit_pre->block_len has the same value as split->pre_len. This sort of apparently accidental change makes it harder to review. > split_pre->ram_bytes = split_pre->len; > split_pre->flags = flags; > split_pre->generation = em->generation; > @@ -1007,10 +1075,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, > /* Insert the middle extent_map. */ > split_mid->start = em->start + pre; > split_mid->len = em->len - pre; > + split_mid->disk_bytenr = em->block_start + pre; Same here. > + split_mid->disk_num_bytes = split_mid->len; > + split_mid->offset = 0; > split_mid->orig_start = split_mid->start; > split_mid->block_start = em->block_start + pre; > split_mid->block_len = split_mid->len; > - split_mid->disk_num_bytes = split_mid->block_len; Which relates to this. Otherwise it looks fine, and could be fixed up when cherry picked to for-next. Reviewed-by: Filipe Manana <fdmanana@suse.com> Thanks. > split_mid->ram_bytes = split_mid->len; > split_mid->flags = flags; > split_mid->generation = em->generation; > diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h > index 2b7bbffd594b..0b1a8e409377 100644 > --- a/fs/btrfs/extent_map.h > +++ b/fs/btrfs/extent_map.h > @@ -70,12 +70,29 @@ struct extent_map { > */ > u64 orig_start; > > + /* > + * The bytenr of the full on-disk extent. > + * > + * For regular extents it's btrfs_file_extent_item::disk_bytenr. > + * For holes it's EXTENT_MAP_HOLE and for inline extents it's > + * EXTENT_MAP_INLINE. > + */ > + u64 disk_bytenr; > + > /* > * The full on-disk extent length, matching > * btrfs_file_extent_item::disk_num_bytes. > */ > u64 disk_num_bytes; > > + /* > + * Offset inside the decompressed extent. > + * > + * For regular extents it's btrfs_file_extent_item::offset. > + * For holes and inline extents it's 0. > + */ > + u64 offset; > + > /* > * The decompressed size of the whole on-disk extent, matching > * btrfs_file_extent_item::ram_bytes. > diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c > index 430dce44ebd2..1298afea9503 100644 > --- a/fs/btrfs/file-item.c > +++ b/fs/btrfs/file-item.c > @@ -1295,12 +1295,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, > em->len = btrfs_file_extent_end(path) - extent_start; > em->orig_start = extent_start - > btrfs_file_extent_offset(leaf, fi); > - em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); > bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); > if (bytenr == 0) { > em->block_start = EXTENT_MAP_HOLE; > + em->disk_bytenr = EXTENT_MAP_HOLE; > + em->disk_num_bytes = 0; > + em->offset = 0; > return; > } > + em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); > + em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); > + em->offset = btrfs_file_extent_offset(leaf, fi); > if (compress_type != BTRFS_COMPRESS_NONE) { > extent_map_set_compression(em, compress_type); > em->block_start = bytenr; > @@ -1317,8 +1322,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, > ASSERT(extent_start == 0); > > em->block_start = EXTENT_MAP_INLINE; > + em->disk_bytenr = EXTENT_MAP_INLINE; > em->start = 0; > em->len = fs_info->sectorsize; > + em->offset = 0; > /* > * Initialize orig_start and block_len with the same values > * as in inode.c:btrfs_get_extent(). > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c > index 7c42565da70c..5133c6705d74 100644 > --- a/fs/btrfs/file.c > +++ b/fs/btrfs/file.c > @@ -2350,6 +2350,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, > hole_em->orig_start = offset; > > hole_em->block_start = EXTENT_MAP_HOLE; > + hole_em->disk_bytenr = EXTENT_MAP_HOLE; > hole_em->block_len = 0; > hole_em->disk_num_bytes = 0; > hole_em->generation = trans->transid; > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 8ac489fb5e39..7afcdea27782 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -141,6 +141,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > u64 len, u64 orig_start, u64 block_start, > u64 block_len, u64 disk_num_bytes, > u64 ram_bytes, int compress_type, > + struct btrfs_file_extent *file_extent, > int type); > > static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, > @@ -1152,6 +1153,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, > struct btrfs_root *root = inode->root; > struct btrfs_fs_info *fs_info = root->fs_info; > struct btrfs_ordered_extent *ordered; > + struct btrfs_file_extent file_extent; > struct btrfs_key ins; > struct page *locked_page = NULL; > struct extent_state *cached = NULL; > @@ -1198,6 +1200,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, > lock_extent(io_tree, start, end, &cached); > > /* Here we're doing allocation and writeback of the compressed pages */ > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.ram_bytes = async_extent->ram_size; > + file_extent.num_bytes = async_extent->ram_size; > + file_extent.offset = 0; > + file_extent.compression = async_extent->compress_type; > + > em = create_io_em(inode, start, > async_extent->ram_size, /* len */ > start, /* orig_start */ > @@ -1206,6 +1215,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, > ins.offset, /* orig_block_len */ > async_extent->ram_size, /* ram_bytes */ > async_extent->compress_type, > + &file_extent, > BTRFS_ORDERED_COMPRESSED); > if (IS_ERR(em)) { > ret = PTR_ERR(em); > @@ -1395,6 +1405,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > > while (num_bytes > 0) { > struct btrfs_ordered_extent *ordered; > + struct btrfs_file_extent file_extent; > > cur_alloc_size = num_bytes; > ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, > @@ -1431,6 +1442,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > extent_reserved = true; > > ram_size = ins.offset; > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.num_bytes = ins.offset; > + file_extent.ram_bytes = ins.offset; > + file_extent.offset = 0; > + file_extent.compression = BTRFS_COMPRESS_NONE; > > lock_extent(&inode->io_tree, start, start + ram_size - 1, > &cached); > @@ -1442,6 +1459,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > ins.offset, /* orig_block_len */ > ram_size, /* ram_bytes */ > BTRFS_COMPRESS_NONE, /* compress_type */ > + &file_extent, > BTRFS_ORDERED_REGULAR /* type */); > if (IS_ERR(em)) { > unlock_extent(&inode->io_tree, start, > @@ -2180,6 +2198,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, > nocow_args.num_bytes, /* block_len */ > nocow_args.disk_num_bytes, /* orig_block_len */ > ram_bytes, BTRFS_COMPRESS_NONE, > + &nocow_args.file_extent, > BTRFS_ORDERED_PREALLOC); > if (IS_ERR(em)) { > unlock_extent(&inode->io_tree, cur_offset, > @@ -5012,6 +5031,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size) > hole_em->orig_start = cur_offset; > > hole_em->block_start = EXTENT_MAP_HOLE; > + hole_em->disk_bytenr = EXTENT_MAP_HOLE; > hole_em->block_len = 0; > hole_em->disk_num_bytes = 0; > hole_em->ram_bytes = hole_size; > @@ -6880,6 +6900,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, > } > em->start = EXTENT_MAP_HOLE; > em->orig_start = EXTENT_MAP_HOLE; > + em->disk_bytenr = EXTENT_MAP_HOLE; > em->len = (u64)-1; > em->block_len = (u64)-1; > > @@ -7045,7 +7066,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, > const u64 block_len, > const u64 orig_block_len, > const u64 ram_bytes, > - const int type) > + const int type, > + struct btrfs_file_extent *file_extent) > { > struct extent_map *em = NULL; > struct btrfs_ordered_extent *ordered; > @@ -7054,7 +7076,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, > em = create_io_em(inode, start, len, orig_start, block_start, > block_len, orig_block_len, ram_bytes, > BTRFS_COMPRESS_NONE, /* compress_type */ > - type); > + file_extent, type); > if (IS_ERR(em)) > goto out; > } > @@ -7085,6 +7107,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, > { > struct btrfs_root *root = inode->root; > struct btrfs_fs_info *fs_info = root->fs_info; > + struct btrfs_file_extent file_extent; > struct extent_map *em; > struct btrfs_key ins; > u64 alloc_hint; > @@ -7103,9 +7126,16 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, > if (ret) > return ERR_PTR(ret); > > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.num_bytes = ins.offset; > + file_extent.ram_bytes = ins.offset; > + file_extent.offset = 0; > + file_extent.compression = BTRFS_COMPRESS_NONE; > em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start, > ins.objectid, ins.offset, ins.offset, > - ins.offset, BTRFS_ORDERED_REGULAR); > + ins.offset, BTRFS_ORDERED_REGULAR, > + &file_extent); > btrfs_dec_block_group_reservations(fs_info, ins.objectid); > if (IS_ERR(em)) > btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, > @@ -7348,6 +7378,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > u64 len, u64 orig_start, u64 block_start, > u64 block_len, u64 disk_num_bytes, > u64 ram_bytes, int compress_type, > + struct btrfs_file_extent *file_extent, > int type) > { > struct extent_map *em; > @@ -7405,9 +7436,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > em->len = len; > em->block_len = block_len; > em->block_start = block_start; > + em->disk_bytenr = file_extent->disk_bytenr; > em->disk_num_bytes = disk_num_bytes; > em->ram_bytes = ram_bytes; > em->generation = -1; > + em->offset = file_extent->offset; > em->flags |= EXTENT_FLAG_PINNED; > if (type == BTRFS_ORDERED_COMPRESSED) > extent_map_set_compression(em, compress_type); > @@ -7431,6 +7464,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, > { > const bool nowait = (iomap_flags & IOMAP_NOWAIT); > struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); > + struct btrfs_file_extent file_extent; > struct extent_map *em = *map; > int type; > u64 block_start, orig_start, orig_block_len, ram_bytes; > @@ -7461,7 +7495,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, > block_start = em->block_start + (start - em->start); > > if (can_nocow_extent(inode, start, &len, &orig_start, > - &orig_block_len, &ram_bytes, NULL, false, false) == 1) { > + &orig_block_len, &ram_bytes, > + &file_extent, false, false) == 1) { > bg = btrfs_inc_nocow_writers(fs_info, block_start); > if (bg) > can_nocow = true; > @@ -7489,7 +7524,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, > em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len, > orig_start, block_start, > len, orig_block_len, > - ram_bytes, type); > + ram_bytes, type, > + &file_extent); > btrfs_dec_nocow_writers(bg); > if (type == BTRFS_ORDERED_PREALLOC) { > free_extent_map(em); > @@ -9629,6 +9665,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, > em->orig_start = cur_offset; > em->len = ins.offset; > em->block_start = ins.objectid; > + em->disk_bytenr = ins.objectid; > + em->offset = 0; > em->block_len = ins.offset; > em->disk_num_bytes = ins.offset; > em->ram_bytes = ins.offset; > @@ -10195,6 +10233,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, > struct extent_changeset *data_reserved = NULL; > struct extent_state *cached_state = NULL; > struct btrfs_ordered_extent *ordered; > + struct btrfs_file_extent file_extent; > int compression; > size_t orig_count; > u64 start, end; > @@ -10370,10 +10409,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, > goto out_delalloc_release; > extent_reserved = true; > > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.num_bytes = num_bytes; > + file_extent.ram_bytes = ram_bytes; > + file_extent.offset = encoded->unencoded_offset; > + file_extent.compression = compression; > em = create_io_em(inode, start, num_bytes, > start - encoded->unencoded_offset, ins.objectid, > ins.offset, ins.offset, ram_bytes, compression, > - BTRFS_ORDERED_COMPRESSED); > + &file_extent, BTRFS_ORDERED_COMPRESSED); > if (IS_ERR(em)) { > ret = PTR_ERR(em); > goto out_free_reserved; > -- > 2.45.1 > >
On Thu, May 23, 2024 at 6:04 AM Qu Wenruo <wqu@suse.com> wrote: > > Introduce two new members for extent_map: > > - disk_bytenr > - offset > > Both are matching the members with the same name inside > btrfs_file_extent_items. > > For now this patch only touches those members when: > > - Reading btrfs_file_extent_items from disk > - Inserting new holes > - Merging two extent maps > With the new disk_bytenr and disk_num_bytes, doing merging would be a > little more complex, as we have 3 different cases: > > * Both extent maps are referring to the same data extents > |<----- data extent A ----->| > |<- em 1 ->|<- em 2 ->| > > * Both extent maps are referring to different data extents > |<-- data extent A -->|<-- data extent B -->| > |<- em 1 ->|<- em 2 ->| > > * One of the extent maps is referring to a merged and larger data > extent that covers both extent maps > > This is not really valid case other than some selftests. > So this test case would be removed. > > A new helper merge_ondisk_extents() would be introduced to handle > above valid cases. > > To properly assign values for those new members, a new btrfs_file_extent > parameter is introduced to all the involved call sites. > > - For NOCOW writes the btrfs_file_extent would be exposed from > can_nocow_file_extent(). > > - For other writes, the members can be easily calculated > As most of them have 0 offset and utilizing the whole on-disk data > extent. > The exception is encoded write, but thankfully that interface provided > offset directly and all other needed info. > > For now, both the old members (block_start/block_len/orig_start) are > co-existing with the new members (disk_bytenr/offset), meanwhile all the > critical code is still using the old members only. > > The cleanup would happen later after all the older and newer members are > properly validated. > > Signed-off-by: Qu Wenruo <wqu@suse.com> > Signed-off-by: David Sterba <dsterba@suse.com> > --- > fs/btrfs/defrag.c | 4 +++ > fs/btrfs/extent_map.c | 78 ++++++++++++++++++++++++++++++++++++++++--- > fs/btrfs/extent_map.h | 17 ++++++++++ > fs/btrfs/file-item.c | 9 ++++- > fs/btrfs/file.c | 1 + > fs/btrfs/inode.c | 57 +++++++++++++++++++++++++++---- > 6 files changed, 155 insertions(+), 11 deletions(-) > > diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c > index 407ccec3e57e..242c5469f4ba 100644 > --- a/fs/btrfs/defrag.c > +++ b/fs/btrfs/defrag.c > @@ -709,6 +709,10 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode, > em->start = start; > em->orig_start = start; > em->block_start = EXTENT_MAP_HOLE; > + em->disk_bytenr = EXTENT_MAP_HOLE; > + em->disk_num_bytes = 0; > + em->ram_bytes = 0; > + em->offset = 0; > em->len = key.offset - start; > break; > } > diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c > index a9d60d1eade9..c7d2393692e6 100644 > --- a/fs/btrfs/extent_map.c > +++ b/fs/btrfs/extent_map.c > @@ -229,6 +229,60 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma > return next->block_start == prev->block_start; > } > > +/* > + * Handle the ondisk data extents merge for @prev and @next. > + * > + * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes. > + * For now only uncompressed regular extent can be merged. > + * > + * @prev and @next will be both updated to point to the new merged range. > + * Thus one of them should be removed by the caller. > + */ > +static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next) > +{ > + u64 new_disk_bytenr; > + u64 new_disk_num_bytes; > + u64 new_offset; > + > + /* @prev and @next should not be compressed. */ > + ASSERT(!extent_map_is_compressed(prev)); > + ASSERT(!extent_map_is_compressed(next)); > + > + /* > + * There are two different cases where @prev and @next can be merged. > + * > + * 1) They are referring to the same data extent > + * |<----- data extent A ----->| > + * |<- prev ->|<- next ->| > + * > + * 2) They are referring to different data extents but still adjacent > + * > + * |<-- data extent A -->|<-- data extent B -->| > + * |<- prev ->|<- next ->| > + * > + * The calculation here always merge the data extents first, then update > + * @offset using the new data extents. > + * > + * For case 1), the merged data extent would be the same. > + * For case 2), we just merge the two data extents into one. > + */ > + new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr); > + new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes, > + next->disk_bytenr + next->disk_num_bytes) - > + new_disk_bytenr; > + new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr; > + > + prev->disk_bytenr = new_disk_bytenr; > + prev->disk_num_bytes = new_disk_num_bytes; > + prev->ram_bytes = new_disk_num_bytes; > + prev->offset = new_offset; > + > + next->disk_bytenr = new_disk_bytenr; > + next->disk_num_bytes = new_disk_num_bytes; > + next->ram_bytes = new_disk_num_bytes; > + next->offset = new_offset; > +} > + > static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) > { > struct extent_map_tree *tree = &inode->extent_tree; > @@ -260,6 +314,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) > em->block_len += merge->block_len; > em->block_start = merge->block_start; > em->generation = max(em->generation, merge->generation); > + > + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) > + merge_ondisk_extents(merge, em); > em->flags |= EXTENT_FLAG_MERGED; > > rb_erase(&merge->rb_node, &tree->root); > @@ -275,6 +332,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) > if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) { > em->len += merge->len; > em->block_len += merge->block_len; > + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) > + merge_ondisk_extents(em, merge); > rb_erase(&merge->rb_node, &tree->root); > RB_CLEAR_NODE(&merge->rb_node); > em->generation = max(em->generation, merge->generation); > @@ -562,6 +621,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, > !extent_map_is_compressed(em)) { > em->block_start += start_diff; > em->block_len = em->len; > + em->offset += start_diff; > } > return add_extent_mapping(inode, em, 0); > } > @@ -785,14 +845,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, > split->block_len = em->block_len; > else > split->block_len = split->len; > + split->disk_bytenr = em->disk_bytenr; > split->disk_num_bytes = max(split->block_len, > em->disk_num_bytes); > + split->offset = em->offset; > split->ram_bytes = em->ram_bytes; > } else { > split->orig_start = split->start; > split->block_len = 0; > split->block_start = em->block_start; > + split->disk_bytenr = em->disk_bytenr; > split->disk_num_bytes = 0; > + split->offset = 0; > split->ram_bytes = split->len; > } > > @@ -813,13 +877,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, > split->start = end; > split->len = em_end - end; > split->block_start = em->block_start; > + split->disk_bytenr = em->disk_bytenr; > split->flags = flags; > split->generation = gen; > > if (em->block_start < EXTENT_MAP_LAST_BYTE) { > split->disk_num_bytes = max(em->block_len, > em->disk_num_bytes); > - > + split->offset = em->offset + end - em->start; > split->ram_bytes = em->ram_bytes; > if (compressed) { > split->block_len = em->block_len; > @@ -832,10 +897,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, > split->orig_start = em->orig_start; > } > } else { > + split->disk_num_bytes = 0; > + split->offset = 0; > split->ram_bytes = split->len; > split->orig_start = split->start; > split->block_len = 0; > - split->disk_num_bytes = 0; > } > > if (extent_map_in_tree(em)) { > @@ -989,10 +1055,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, > /* First, replace the em with a new extent_map starting from * em->start */ > split_pre->start = em->start; > split_pre->len = pre; > + split_pre->disk_bytenr = new_logical; > + split_pre->disk_num_bytes = split_pre->len; > + split_pre->offset = 0; > split_pre->orig_start = split_pre->start; > split_pre->block_start = new_logical; > split_pre->block_len = split_pre->len; > - split_pre->disk_num_bytes = split_pre->block_len; > split_pre->ram_bytes = split_pre->len; > split_pre->flags = flags; > split_pre->generation = em->generation; > @@ -1007,10 +1075,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, > /* Insert the middle extent_map. */ > split_mid->start = em->start + pre; > split_mid->len = em->len - pre; > + split_mid->disk_bytenr = em->block_start + pre; > + split_mid->disk_num_bytes = split_mid->len; > + split_mid->offset = 0; > split_mid->orig_start = split_mid->start; > split_mid->block_start = em->block_start + pre; > split_mid->block_len = split_mid->len; > - split_mid->disk_num_bytes = split_mid->block_len; > split_mid->ram_bytes = split_mid->len; > split_mid->flags = flags; > split_mid->generation = em->generation; > diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h > index 2b7bbffd594b..0b1a8e409377 100644 > --- a/fs/btrfs/extent_map.h > +++ b/fs/btrfs/extent_map.h > @@ -70,12 +70,29 @@ struct extent_map { > */ > u64 orig_start; > > + /* > + * The bytenr of the full on-disk extent. > + * > + * For regular extents it's btrfs_file_extent_item::disk_bytenr. > + * For holes it's EXTENT_MAP_HOLE and for inline extents it's > + * EXTENT_MAP_INLINE. > + */ > + u64 disk_bytenr; > + > /* > * The full on-disk extent length, matching > * btrfs_file_extent_item::disk_num_bytes. > */ > u64 disk_num_bytes; > > + /* > + * Offset inside the decompressed extent. > + * > + * For regular extents it's btrfs_file_extent_item::offset. > + * For holes and inline extents it's 0. > + */ > + u64 offset; > + > /* > * The decompressed size of the whole on-disk extent, matching > * btrfs_file_extent_item::ram_bytes. > diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c > index 430dce44ebd2..1298afea9503 100644 > --- a/fs/btrfs/file-item.c > +++ b/fs/btrfs/file-item.c > @@ -1295,12 +1295,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, > em->len = btrfs_file_extent_end(path) - extent_start; > em->orig_start = extent_start - > btrfs_file_extent_offset(leaf, fi); > - em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); > bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); > if (bytenr == 0) { > em->block_start = EXTENT_MAP_HOLE; > + em->disk_bytenr = EXTENT_MAP_HOLE; > + em->disk_num_bytes = 0; > + em->offset = 0; > return; > } > + em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); > + em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); > + em->offset = btrfs_file_extent_offset(leaf, fi); > if (compress_type != BTRFS_COMPRESS_NONE) { > extent_map_set_compression(em, compress_type); > em->block_start = bytenr; > @@ -1317,8 +1322,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, > ASSERT(extent_start == 0); > > em->block_start = EXTENT_MAP_INLINE; > + em->disk_bytenr = EXTENT_MAP_INLINE; > em->start = 0; > em->len = fs_info->sectorsize; > + em->offset = 0; > /* > * Initialize orig_start and block_len with the same values > * as in inode.c:btrfs_get_extent(). > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c > index 7c42565da70c..5133c6705d74 100644 > --- a/fs/btrfs/file.c > +++ b/fs/btrfs/file.c > @@ -2350,6 +2350,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, > hole_em->orig_start = offset; > > hole_em->block_start = EXTENT_MAP_HOLE; > + hole_em->disk_bytenr = EXTENT_MAP_HOLE; > hole_em->block_len = 0; > hole_em->disk_num_bytes = 0; > hole_em->generation = trans->transid; > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 8ac489fb5e39..7afcdea27782 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -141,6 +141,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > u64 len, u64 orig_start, u64 block_start, > u64 block_len, u64 disk_num_bytes, > u64 ram_bytes, int compress_type, > + struct btrfs_file_extent *file_extent, Can be made const. > int type); > > static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, > @@ -1152,6 +1153,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, > struct btrfs_root *root = inode->root; > struct btrfs_fs_info *fs_info = root->fs_info; > struct btrfs_ordered_extent *ordered; > + struct btrfs_file_extent file_extent; > struct btrfs_key ins; > struct page *locked_page = NULL; > struct extent_state *cached = NULL; > @@ -1198,6 +1200,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, > lock_extent(io_tree, start, end, &cached); > > /* Here we're doing allocation and writeback of the compressed pages */ > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.ram_bytes = async_extent->ram_size; > + file_extent.num_bytes = async_extent->ram_size; > + file_extent.offset = 0; > + file_extent.compression = async_extent->compress_type; > + > em = create_io_em(inode, start, > async_extent->ram_size, /* len */ > start, /* orig_start */ > @@ -1206,6 +1215,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, > ins.offset, /* orig_block_len */ > async_extent->ram_size, /* ram_bytes */ > async_extent->compress_type, > + &file_extent, > BTRFS_ORDERED_COMPRESSED); > if (IS_ERR(em)) { > ret = PTR_ERR(em); > @@ -1395,6 +1405,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > > while (num_bytes > 0) { > struct btrfs_ordered_extent *ordered; > + struct btrfs_file_extent file_extent; > > cur_alloc_size = num_bytes; > ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, > @@ -1431,6 +1442,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > extent_reserved = true; > > ram_size = ins.offset; > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.num_bytes = ins.offset; > + file_extent.ram_bytes = ins.offset; > + file_extent.offset = 0; > + file_extent.compression = BTRFS_COMPRESS_NONE; > > lock_extent(&inode->io_tree, start, start + ram_size - 1, > &cached); > @@ -1442,6 +1459,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, > ins.offset, /* orig_block_len */ > ram_size, /* ram_bytes */ > BTRFS_COMPRESS_NONE, /* compress_type */ > + &file_extent, > BTRFS_ORDERED_REGULAR /* type */); > if (IS_ERR(em)) { > unlock_extent(&inode->io_tree, start, > @@ -2180,6 +2198,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, > nocow_args.num_bytes, /* block_len */ > nocow_args.disk_num_bytes, /* orig_block_len */ > ram_bytes, BTRFS_COMPRESS_NONE, > + &nocow_args.file_extent, > BTRFS_ORDERED_PREALLOC); > if (IS_ERR(em)) { > unlock_extent(&inode->io_tree, cur_offset, > @@ -5012,6 +5031,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size) > hole_em->orig_start = cur_offset; > > hole_em->block_start = EXTENT_MAP_HOLE; > + hole_em->disk_bytenr = EXTENT_MAP_HOLE; > hole_em->block_len = 0; > hole_em->disk_num_bytes = 0; > hole_em->ram_bytes = hole_size; > @@ -6880,6 +6900,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, > } > em->start = EXTENT_MAP_HOLE; > em->orig_start = EXTENT_MAP_HOLE; > + em->disk_bytenr = EXTENT_MAP_HOLE; > em->len = (u64)-1; > em->block_len = (u64)-1; > > @@ -7045,7 +7066,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, > const u64 block_len, > const u64 orig_block_len, > const u64 ram_bytes, > - const int type) > + const int type, > + struct btrfs_file_extent *file_extent) Can be made const too. > { > struct extent_map *em = NULL; > struct btrfs_ordered_extent *ordered; > @@ -7054,7 +7076,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, > em = create_io_em(inode, start, len, orig_start, block_start, > block_len, orig_block_len, ram_bytes, > BTRFS_COMPRESS_NONE, /* compress_type */ > - type); > + file_extent, type); > if (IS_ERR(em)) > goto out; > } > @@ -7085,6 +7107,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, > { > struct btrfs_root *root = inode->root; > struct btrfs_fs_info *fs_info = root->fs_info; > + struct btrfs_file_extent file_extent; > struct extent_map *em; > struct btrfs_key ins; > u64 alloc_hint; > @@ -7103,9 +7126,16 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, > if (ret) > return ERR_PTR(ret); > > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.num_bytes = ins.offset; > + file_extent.ram_bytes = ins.offset; > + file_extent.offset = 0; > + file_extent.compression = BTRFS_COMPRESS_NONE; > em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start, > ins.objectid, ins.offset, ins.offset, > - ins.offset, BTRFS_ORDERED_REGULAR); > + ins.offset, BTRFS_ORDERED_REGULAR, > + &file_extent); > btrfs_dec_block_group_reservations(fs_info, ins.objectid); > if (IS_ERR(em)) > btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, > @@ -7348,6 +7378,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > u64 len, u64 orig_start, u64 block_start, > u64 block_len, u64 disk_num_bytes, > u64 ram_bytes, int compress_type, > + struct btrfs_file_extent *file_extent, > int type) > { > struct extent_map *em; > @@ -7405,9 +7436,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > em->len = len; > em->block_len = block_len; > em->block_start = block_start; > + em->disk_bytenr = file_extent->disk_bytenr; > em->disk_num_bytes = disk_num_bytes; > em->ram_bytes = ram_bytes; > em->generation = -1; > + em->offset = file_extent->offset; > em->flags |= EXTENT_FLAG_PINNED; > if (type == BTRFS_ORDERED_COMPRESSED) > extent_map_set_compression(em, compress_type); > @@ -7431,6 +7464,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, > { > const bool nowait = (iomap_flags & IOMAP_NOWAIT); > struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); > + struct btrfs_file_extent file_extent; > struct extent_map *em = *map; > int type; > u64 block_start, orig_start, orig_block_len, ram_bytes; > @@ -7461,7 +7495,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, > block_start = em->block_start + (start - em->start); > > if (can_nocow_extent(inode, start, &len, &orig_start, > - &orig_block_len, &ram_bytes, NULL, false, false) == 1) { > + &orig_block_len, &ram_bytes, > + &file_extent, false, false) == 1) { > bg = btrfs_inc_nocow_writers(fs_info, block_start); > if (bg) > can_nocow = true; > @@ -7489,7 +7524,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, > em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len, > orig_start, block_start, > len, orig_block_len, > - ram_bytes, type); > + ram_bytes, type, > + &file_extent); > btrfs_dec_nocow_writers(bg); > if (type == BTRFS_ORDERED_PREALLOC) { > free_extent_map(em); > @@ -9629,6 +9665,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, > em->orig_start = cur_offset; > em->len = ins.offset; > em->block_start = ins.objectid; > + em->disk_bytenr = ins.objectid; > + em->offset = 0; > em->block_len = ins.offset; > em->disk_num_bytes = ins.offset; > em->ram_bytes = ins.offset; > @@ -10195,6 +10233,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, > struct extent_changeset *data_reserved = NULL; > struct extent_state *cached_state = NULL; > struct btrfs_ordered_extent *ordered; > + struct btrfs_file_extent file_extent; > int compression; > size_t orig_count; > u64 start, end; > @@ -10370,10 +10409,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, > goto out_delalloc_release; > extent_reserved = true; > > + file_extent.disk_bytenr = ins.objectid; > + file_extent.disk_num_bytes = ins.offset; > + file_extent.num_bytes = num_bytes; > + file_extent.ram_bytes = ram_bytes; > + file_extent.offset = encoded->unencoded_offset; > + file_extent.compression = compression; > em = create_io_em(inode, start, num_bytes, > start - encoded->unencoded_offset, ins.objectid, > ins.offset, ins.offset, ram_bytes, compression, > - BTRFS_ORDERED_COMPRESSED); > + &file_extent, BTRFS_ORDERED_COMPRESSED); > if (IS_ERR(em)) { > ret = PTR_ERR(em); > goto out_free_reserved; > -- > 2.45.1 > >
在 2024/5/24 02:23, Filipe Manana 写道: [...] >> @@ -832,10 +897,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, >> split->orig_start = em->orig_start; >> } >> } else { >> + split->disk_num_bytes = 0; >> + split->offset = 0; >> split->ram_bytes = split->len; >> split->orig_start = split->start; >> split->block_len = 0; >> - split->disk_num_bytes = 0; > > Why move the assignment of ->disk_num_bytes ? > This is sort of distracting, doing unnecessary changes. It's to group the newer members together, and to follow the new trend to put them in disk_bytenr disk_num_bytes offset ram_bytes order. I know with structures, there is really no need to keep any order between the member assignment, but with fixed ordering, it would be better in the long run. And unfortunately the cost is that, the first patch doing the re-ordering the members would be harder to review. > >> } >> >> if (extent_map_in_tree(em)) { >> @@ -989,10 +1055,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, >> /* First, replace the em with a new extent_map starting from * em->start */ >> split_pre->start = em->start; >> split_pre->len = pre; >> + split_pre->disk_bytenr = new_logical; > > We are already setting disk_bytenr to the same value a few lines below.' Sorry, I didn't see any location touching disk_bytenr, either inside the patch, nor else where, especially the disk_bytenr is a new member. > >> + split_pre->disk_num_bytes = split_pre->len; >> + split_pre->offset = 0; >> split_pre->orig_start = split_pre->start; >> split_pre->block_start = new_logical; >> split_pre->block_len = split_pre->len; >> - split_pre->disk_num_bytes = split_pre->block_len; > > Here, where slit_pre->block_len has the same value as split->pre_len. > This sort of apparently accidental change makes it harder to review. Again, to keep a consistent order of members. > >> split_pre->ram_bytes = split_pre->len; >> split_pre->flags = flags; >> split_pre->generation = em->generation; >> @@ -1007,10 +1075,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, >> /* Insert the middle extent_map. */ >> split_mid->start = em->start + pre; >> split_mid->len = em->len - pre; >> + split_mid->disk_bytenr = em->block_start + pre; > > Same here. > >> + split_mid->disk_num_bytes = split_mid->len; >> + split_mid->offset = 0; >> split_mid->orig_start = split_mid->start; >> split_mid->block_start = em->block_start + pre; >> split_mid->block_len = split_mid->len; >> - split_mid->disk_num_bytes = split_mid->block_len; > > Which relates to this. > > Otherwise it looks fine, and could be fixed up when cherry picked to for-next. So although it's indeed harder to review, we would have a very consistent order when assigning those members. Thankfully this is only a one time pain, there should be no more member order related problems. Thanks, Qu > > Reviewed-by: Filipe Manana <fdmanana@suse.com> > > Thanks.
On Fri, May 24, 2024 at 08:49:25AM +0930, Qu Wenruo wrote: > > > 在 2024/5/24 02:23, Filipe Manana 写道: > [...] > >> @@ -832,10 +897,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, > >> split->orig_start = em->orig_start; > >> } > >> } else { > >> + split->disk_num_bytes = 0; > >> + split->offset = 0; > >> split->ram_bytes = split->len; > >> split->orig_start = split->start; > >> split->block_len = 0; > >> - split->disk_num_bytes = 0; > > > > Why move the assignment of ->disk_num_bytes ? > > This is sort of distracting, doing unnecessary changes. > > It's to group the newer members together, and to follow the new trend to > put them in disk_bytenr disk_num_bytes offset ram_bytes order. > > I know with structures, there is really no need to keep any order > between the member assignment, but with fixed ordering, it would be > better in the long run. I agree this pays of in the long run. The most prominent example is ordering of the btrfs_key initialization, if it's always objectid/type/offset it's does not slow down reading, it's enough to read the values. Admittedly for the extent_map it's not the same because there are more members. The important thing is to keep the same order everywhere.
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index 407ccec3e57e..242c5469f4ba 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -709,6 +709,10 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode, em->start = start; em->orig_start = start; em->block_start = EXTENT_MAP_HOLE; + em->disk_bytenr = EXTENT_MAP_HOLE; + em->disk_num_bytes = 0; + em->ram_bytes = 0; + em->offset = 0; em->len = key.offset - start; break; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a9d60d1eade9..c7d2393692e6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -229,6 +229,60 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma return next->block_start == prev->block_start; } +/* + * Handle the ondisk data extents merge for @prev and @next. + * + * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes. + * For now only uncompressed regular extent can be merged. + * + * @prev and @next will be both updated to point to the new merged range. + * Thus one of them should be removed by the caller. + */ +static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next) +{ + u64 new_disk_bytenr; + u64 new_disk_num_bytes; + u64 new_offset; + + /* @prev and @next should not be compressed. */ + ASSERT(!extent_map_is_compressed(prev)); + ASSERT(!extent_map_is_compressed(next)); + + /* + * There are two different cases where @prev and @next can be merged. + * + * 1) They are referring to the same data extent + * |<----- data extent A ----->| + * |<- prev ->|<- next ->| + * + * 2) They are referring to different data extents but still adjacent + * + * |<-- data extent A -->|<-- data extent B -->| + * |<- prev ->|<- next ->| + * + * The calculation here always merge the data extents first, then update + * @offset using the new data extents. + * + * For case 1), the merged data extent would be the same. + * For case 2), we just merge the two data extents into one. + */ + new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr); + new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes, + next->disk_bytenr + next->disk_num_bytes) - + new_disk_bytenr; + new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr; + + prev->disk_bytenr = new_disk_bytenr; + prev->disk_num_bytes = new_disk_num_bytes; + prev->ram_bytes = new_disk_num_bytes; + prev->offset = new_offset; + + next->disk_bytenr = new_disk_bytenr; + next->disk_num_bytes = new_disk_num_bytes; + next->ram_bytes = new_disk_num_bytes; + next->offset = new_offset; +} + static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) { struct extent_map_tree *tree = &inode->extent_tree; @@ -260,6 +314,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) em->block_len += merge->block_len; em->block_start = merge->block_start; em->generation = max(em->generation, merge->generation); + + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) + merge_ondisk_extents(merge, em); em->flags |= EXTENT_FLAG_MERGED; rb_erase(&merge->rb_node, &tree->root); @@ -275,6 +332,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) { em->len += merge->len; em->block_len += merge->block_len; + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) + merge_ondisk_extents(em, merge); rb_erase(&merge->rb_node, &tree->root); RB_CLEAR_NODE(&merge->rb_node); em->generation = max(em->generation, merge->generation); @@ -562,6 +621,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, !extent_map_is_compressed(em)) { em->block_start += start_diff; em->block_len = em->len; + em->offset += start_diff; } return add_extent_mapping(inode, em, 0); } @@ -785,14 +845,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, split->block_len = em->block_len; else split->block_len = split->len; + split->disk_bytenr = em->disk_bytenr; split->disk_num_bytes = max(split->block_len, em->disk_num_bytes); + split->offset = em->offset; split->ram_bytes = em->ram_bytes; } else { split->orig_start = split->start; split->block_len = 0; split->block_start = em->block_start; + split->disk_bytenr = em->disk_bytenr; split->disk_num_bytes = 0; + split->offset = 0; split->ram_bytes = split->len; } @@ -813,13 +877,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, split->start = end; split->len = em_end - end; split->block_start = em->block_start; + split->disk_bytenr = em->disk_bytenr; split->flags = flags; split->generation = gen; if (em->block_start < EXTENT_MAP_LAST_BYTE) { split->disk_num_bytes = max(em->block_len, em->disk_num_bytes); - + split->offset = em->offset + end - em->start; split->ram_bytes = em->ram_bytes; if (compressed) { split->block_len = em->block_len; @@ -832,10 +897,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, split->orig_start = em->orig_start; } } else { + split->disk_num_bytes = 0; + split->offset = 0; split->ram_bytes = split->len; split->orig_start = split->start; split->block_len = 0; - split->disk_num_bytes = 0; } if (extent_map_in_tree(em)) { @@ -989,10 +1055,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, /* First, replace the em with a new extent_map starting from * em->start */ split_pre->start = em->start; split_pre->len = pre; + split_pre->disk_bytenr = new_logical; + split_pre->disk_num_bytes = split_pre->len; + split_pre->offset = 0; split_pre->orig_start = split_pre->start; split_pre->block_start = new_logical; split_pre->block_len = split_pre->len; - split_pre->disk_num_bytes = split_pre->block_len; split_pre->ram_bytes = split_pre->len; split_pre->flags = flags; split_pre->generation = em->generation; @@ -1007,10 +1075,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, /* Insert the middle extent_map. */ split_mid->start = em->start + pre; split_mid->len = em->len - pre; + split_mid->disk_bytenr = em->block_start + pre; + split_mid->disk_num_bytes = split_mid->len; + split_mid->offset = 0; split_mid->orig_start = split_mid->start; split_mid->block_start = em->block_start + pre; split_mid->block_len = split_mid->len; - split_mid->disk_num_bytes = split_mid->block_len; split_mid->ram_bytes = split_mid->len; split_mid->flags = flags; split_mid->generation = em->generation; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 2b7bbffd594b..0b1a8e409377 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -70,12 +70,29 @@ struct extent_map { */ u64 orig_start; + /* + * The bytenr of the full on-disk extent. + * + * For regular extents it's btrfs_file_extent_item::disk_bytenr. + * For holes it's EXTENT_MAP_HOLE and for inline extents it's + * EXTENT_MAP_INLINE. + */ + u64 disk_bytenr; + /* * The full on-disk extent length, matching * btrfs_file_extent_item::disk_num_bytes. */ u64 disk_num_bytes; + /* + * Offset inside the decompressed extent. + * + * For regular extents it's btrfs_file_extent_item::offset. + * For holes and inline extents it's 0. + */ + u64 offset; + /* * The decompressed size of the whole on-disk extent, matching * btrfs_file_extent_item::ram_bytes. diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 430dce44ebd2..1298afea9503 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1295,12 +1295,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, em->len = btrfs_file_extent_end(path) - extent_start; em->orig_start = extent_start - btrfs_file_extent_offset(leaf, fi); - em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); if (bytenr == 0) { em->block_start = EXTENT_MAP_HOLE; + em->disk_bytenr = EXTENT_MAP_HOLE; + em->disk_num_bytes = 0; + em->offset = 0; return; } + em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + em->offset = btrfs_file_extent_offset(leaf, fi); if (compress_type != BTRFS_COMPRESS_NONE) { extent_map_set_compression(em, compress_type); em->block_start = bytenr; @@ -1317,8 +1322,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, ASSERT(extent_start == 0); em->block_start = EXTENT_MAP_INLINE; + em->disk_bytenr = EXTENT_MAP_INLINE; em->start = 0; em->len = fs_info->sectorsize; + em->offset = 0; /* * Initialize orig_start and block_len with the same values * as in inode.c:btrfs_get_extent(). diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7c42565da70c..5133c6705d74 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2350,6 +2350,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, hole_em->orig_start = offset; hole_em->block_start = EXTENT_MAP_HOLE; + hole_em->disk_bytenr = EXTENT_MAP_HOLE; hole_em->block_len = 0; hole_em->disk_num_bytes = 0; hole_em->generation = trans->transid; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8ac489fb5e39..7afcdea27782 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -141,6 +141,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, u64 len, u64 orig_start, u64 block_start, u64 block_len, u64 disk_num_bytes, u64 ram_bytes, int compress_type, + struct btrfs_file_extent *file_extent, int type); static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, @@ -1152,6 +1153,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_ordered_extent *ordered; + struct btrfs_file_extent file_extent; struct btrfs_key ins; struct page *locked_page = NULL; struct extent_state *cached = NULL; @@ -1198,6 +1200,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, lock_extent(io_tree, start, end, &cached); /* Here we're doing allocation and writeback of the compressed pages */ + file_extent.disk_bytenr = ins.objectid; + file_extent.disk_num_bytes = ins.offset; + file_extent.ram_bytes = async_extent->ram_size; + file_extent.num_bytes = async_extent->ram_size; + file_extent.offset = 0; + file_extent.compression = async_extent->compress_type; + em = create_io_em(inode, start, async_extent->ram_size, /* len */ start, /* orig_start */ @@ -1206,6 +1215,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, ins.offset, /* orig_block_len */ async_extent->ram_size, /* ram_bytes */ async_extent->compress_type, + &file_extent, BTRFS_ORDERED_COMPRESSED); if (IS_ERR(em)) { ret = PTR_ERR(em); @@ -1395,6 +1405,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, while (num_bytes > 0) { struct btrfs_ordered_extent *ordered; + struct btrfs_file_extent file_extent; cur_alloc_size = num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, @@ -1431,6 +1442,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode, extent_reserved = true; ram_size = ins.offset; + file_extent.disk_bytenr = ins.objectid; + file_extent.disk_num_bytes = ins.offset; + file_extent.num_bytes = ins.offset; + file_extent.ram_bytes = ins.offset; + file_extent.offset = 0; + file_extent.compression = BTRFS_COMPRESS_NONE; lock_extent(&inode->io_tree, start, start + ram_size - 1, &cached); @@ -1442,6 +1459,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, ins.offset, /* orig_block_len */ ram_size, /* ram_bytes */ BTRFS_COMPRESS_NONE, /* compress_type */ + &file_extent, BTRFS_ORDERED_REGULAR /* type */); if (IS_ERR(em)) { unlock_extent(&inode->io_tree, start, @@ -2180,6 +2198,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, nocow_args.num_bytes, /* block_len */ nocow_args.disk_num_bytes, /* orig_block_len */ ram_bytes, BTRFS_COMPRESS_NONE, + &nocow_args.file_extent, BTRFS_ORDERED_PREALLOC); if (IS_ERR(em)) { unlock_extent(&inode->io_tree, cur_offset, @@ -5012,6 +5031,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size) hole_em->orig_start = cur_offset; hole_em->block_start = EXTENT_MAP_HOLE; + hole_em->disk_bytenr = EXTENT_MAP_HOLE; hole_em->block_len = 0; hole_em->disk_num_bytes = 0; hole_em->ram_bytes = hole_size; @@ -6880,6 +6900,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, } em->start = EXTENT_MAP_HOLE; em->orig_start = EXTENT_MAP_HOLE; + em->disk_bytenr = EXTENT_MAP_HOLE; em->len = (u64)-1; em->block_len = (u64)-1; @@ -7045,7 +7066,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, const u64 block_len, const u64 orig_block_len, const u64 ram_bytes, - const int type) + const int type, + struct btrfs_file_extent *file_extent) { struct extent_map *em = NULL; struct btrfs_ordered_extent *ordered; @@ -7054,7 +7076,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, em = create_io_em(inode, start, len, orig_start, block_start, block_len, orig_block_len, ram_bytes, BTRFS_COMPRESS_NONE, /* compress_type */ - type); + file_extent, type); if (IS_ERR(em)) goto out; } @@ -7085,6 +7107,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_file_extent file_extent; struct extent_map *em; struct btrfs_key ins; u64 alloc_hint; @@ -7103,9 +7126,16 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, if (ret) return ERR_PTR(ret); + file_extent.disk_bytenr = ins.objectid; + file_extent.disk_num_bytes = ins.offset; + file_extent.num_bytes = ins.offset; + file_extent.ram_bytes = ins.offset; + file_extent.offset = 0; + file_extent.compression = BTRFS_COMPRESS_NONE; em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start, ins.objectid, ins.offset, ins.offset, - ins.offset, BTRFS_ORDERED_REGULAR); + ins.offset, BTRFS_ORDERED_REGULAR, + &file_extent); btrfs_dec_block_group_reservations(fs_info, ins.objectid); if (IS_ERR(em)) btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, @@ -7348,6 +7378,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, u64 len, u64 orig_start, u64 block_start, u64 block_len, u64 disk_num_bytes, u64 ram_bytes, int compress_type, + struct btrfs_file_extent *file_extent, int type) { struct extent_map *em; @@ -7405,9 +7436,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, em->len = len; em->block_len = block_len; em->block_start = block_start; + em->disk_bytenr = file_extent->disk_bytenr; em->disk_num_bytes = disk_num_bytes; em->ram_bytes = ram_bytes; em->generation = -1; + em->offset = file_extent->offset; em->flags |= EXTENT_FLAG_PINNED; if (type == BTRFS_ORDERED_COMPRESSED) extent_map_set_compression(em, compress_type); @@ -7431,6 +7464,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, { const bool nowait = (iomap_flags & IOMAP_NOWAIT); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); + struct btrfs_file_extent file_extent; struct extent_map *em = *map; int type; u64 block_start, orig_start, orig_block_len, ram_bytes; @@ -7461,7 +7495,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, block_start = em->block_start + (start - em->start); if (can_nocow_extent(inode, start, &len, &orig_start, - &orig_block_len, &ram_bytes, NULL, false, false) == 1) { + &orig_block_len, &ram_bytes, + &file_extent, false, false) == 1) { bg = btrfs_inc_nocow_writers(fs_info, block_start); if (bg) can_nocow = true; @@ -7489,7 +7524,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len, orig_start, block_start, len, orig_block_len, - ram_bytes, type); + ram_bytes, type, + &file_extent); btrfs_dec_nocow_writers(bg); if (type == BTRFS_ORDERED_PREALLOC) { free_extent_map(em); @@ -9629,6 +9665,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, em->orig_start = cur_offset; em->len = ins.offset; em->block_start = ins.objectid; + em->disk_bytenr = ins.objectid; + em->offset = 0; em->block_len = ins.offset; em->disk_num_bytes = ins.offset; em->ram_bytes = ins.offset; @@ -10195,6 +10233,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, struct extent_changeset *data_reserved = NULL; struct extent_state *cached_state = NULL; struct btrfs_ordered_extent *ordered; + struct btrfs_file_extent file_extent; int compression; size_t orig_count; u64 start, end; @@ -10370,10 +10409,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, goto out_delalloc_release; extent_reserved = true; + file_extent.disk_bytenr = ins.objectid; + file_extent.disk_num_bytes = ins.offset; + file_extent.num_bytes = num_bytes; + file_extent.ram_bytes = ram_bytes; + file_extent.offset = encoded->unencoded_offset; + file_extent.compression = compression; em = create_io_em(inode, start, num_bytes, start - encoded->unencoded_offset, ins.objectid, ins.offset, ins.offset, ram_bytes, compression, - BTRFS_ORDERED_COMPRESSED); + &file_extent, BTRFS_ORDERED_COMPRESSED); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out_free_reserved;