Message ID | 2b5929e96bbede278f63c68a149cb50645b094d6.1712074768.git.fdmanana@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: remove not needed mod_start and mod_len from struct extent_map | expand |
在 2024/4/3 03:10, fdmanana@kernel.org 写道: > From: Filipe Manana <fdmanana@suse.com> > > The mod_start and mod_len fields of struct extent_map were introduced by > commit 4e2f84e63dc1 ("Btrfs: improve fsync by filtering extents that we > want") in order to avoid too low performance when fsyncing a file that > keeps getting extent maps merge, because it resulted in each fsync logging > again csum ranges that were already merged before. > > We don't need this anymore as extent maps in the modified list of extents > that get merged with other extents and once we log an extent map we remove > it from the list of modified extent maps. > > So remove the mod_start and mod_len fields from struct extent_map and use > instead the start and len fields when logging checksums in the fast fsync > path. This also makes EXTENT_FLAG_FILLING unused so remove it as well. > > Running the reproducer from the commit mentioned before, with a larger > number of extents and against a null block device, so that IO is fast > and we can better see any impact from searching checkums items and logging > them, gave the following results from dd: > > Before this change: > > 409600000 bytes (410 MB, 391 MiB) copied, 22.948 s, 17.8 MB/s > > After this change: > > 409600000 bytes (410 MB, 391 MiB) copied, 22.9997 s, 17.8 MB/s > > So no changes in throughput. > The test was done in a release kernel (non-debug, Debian's default kernel > config) and its steps are the following: > > $ mkfs.btrfs -f /dev/nullb0 > $ mount /dev/sdb /mnt > $ dd if=/dev/zero of=/mnt/foobar bs=4k count=100000 oflag=sync > $ umount /mnt > > This also reduce the size of struct extent_map from 128 bytes down to 112 > bytes, so now we can have 36 extents maps per 4K page instead of 32. > > Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: Qu Wenruo <wqu@suse.com> Great we can start cleaning up the extent map members. Mind this patch to be included in my upcoming extent map cleaning series? Thanks, Qu > --- > fs/btrfs/extent_map.c | 18 ------------------ > fs/btrfs/extent_map.h | 4 ---- > fs/btrfs/inode.c | 4 +--- > fs/btrfs/tree-log.c | 4 ++-- > include/trace/events/btrfs.h | 3 +-- > 5 files changed, 4 insertions(+), 29 deletions(-) > > diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c > index 445f7716f1e2..471654cb65b0 100644 > --- a/fs/btrfs/extent_map.c > +++ b/fs/btrfs/extent_map.c > @@ -252,8 +252,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > em->len += merge->len; > em->block_len += merge->block_len; > em->block_start = merge->block_start; > - em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; > - em->mod_start = merge->mod_start; > em->generation = max(em->generation, merge->generation); > em->flags |= EXTENT_FLAG_MERGED; > > @@ -271,7 +269,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > em->block_len += merge->block_len; > rb_erase_cached(&merge->rb_node, &tree->map); > RB_CLEAR_NODE(&merge->rb_node); > - em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; > em->generation = max(em->generation, merge->generation); > em->flags |= EXTENT_FLAG_MERGED; > free_extent_map(merge); > @@ -300,7 +297,6 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) > struct extent_map_tree *tree = &inode->extent_tree; > int ret = 0; > struct extent_map *em; > - bool prealloc = false; > > write_lock(&tree->lock); > em = lookup_extent_mapping(tree, start, len); > @@ -325,21 +321,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) > > em->generation = gen; > em->flags &= ~EXTENT_FLAG_PINNED; > - em->mod_start = em->start; > - em->mod_len = em->len; > - > - if (em->flags & EXTENT_FLAG_FILLING) { > - prealloc = true; > - em->flags &= ~EXTENT_FLAG_FILLING; > - } > > try_merge_map(tree, em); > > - if (prealloc) { > - em->mod_start = em->start; > - em->mod_len = em->len; > - } > - > out: > write_unlock(&tree->lock); > free_extent_map(em); > @@ -361,8 +345,6 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, > int modified) > { > refcount_inc(&em->refs); > - em->mod_start = em->start; > - em->mod_len = em->len; > > ASSERT(list_empty(&em->list)); > > diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h > index c5a098c99cc6..10e9491865c9 100644 > --- a/fs/btrfs/extent_map.h > +++ b/fs/btrfs/extent_map.h > @@ -30,8 +30,6 @@ enum { > ENUM_BIT(EXTENT_FLAG_PREALLOC), > /* Logging this extent */ > ENUM_BIT(EXTENT_FLAG_LOGGING), > - /* Filling in a preallocated extent */ > - ENUM_BIT(EXTENT_FLAG_FILLING), > /* This em is merged from two or more physically adjacent ems */ > ENUM_BIT(EXTENT_FLAG_MERGED), > }; > @@ -46,8 +44,6 @@ struct extent_map { > /* all of these are in bytes */ > u64 start; > u64 len; > - u64 mod_start; > - u64 mod_len; > u64 orig_start; > u64 orig_block_len; > u64 ram_bytes; > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 3442dedff53d..c6f2b5d1dee1 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -7338,9 +7338,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > em->ram_bytes = ram_bytes; > em->generation = -1; > em->flags |= EXTENT_FLAG_PINNED; > - if (type == BTRFS_ORDERED_PREALLOC) > - em->flags |= EXTENT_FLAG_FILLING; > - else if (type == BTRFS_ORDERED_COMPRESSED) > + if (type == BTRFS_ORDERED_COMPRESSED) > extent_map_set_compression(em, compress_type); > > ret = btrfs_replace_extent_map_range(inode, em, true); > diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > index 472918a5bc73..d9777649e170 100644 > --- a/fs/btrfs/tree-log.c > +++ b/fs/btrfs/tree-log.c > @@ -4574,8 +4574,8 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, > struct btrfs_root *csum_root; > u64 csum_offset; > u64 csum_len; > - u64 mod_start = em->mod_start; > - u64 mod_len = em->mod_len; > + u64 mod_start = em->start; > + u64 mod_len = em->len; > LIST_HEAD(ordered_sums); > int ret = 0; > > diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h > index 90b0222390e5..766cfd48386c 100644 > --- a/include/trace/events/btrfs.h > +++ b/include/trace/events/btrfs.h > @@ -277,8 +277,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, > { EXTENT_FLAG_COMPRESS_LZO, "COMPRESS_LZO" },\ > { EXTENT_FLAG_COMPRESS_ZSTD, "COMPRESS_ZSTD" },\ > { EXTENT_FLAG_PREALLOC, "PREALLOC" },\ > - { EXTENT_FLAG_LOGGING, "LOGGING" },\ > - { EXTENT_FLAG_FILLING, "FILLING" }) > + { EXTENT_FLAG_LOGGING, "LOGGING" }) > > TRACE_EVENT_CONDITION(btrfs_get_extent, >
On Tue, Apr 2, 2024 at 10:18 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > 在 2024/4/3 03:10, fdmanana@kernel.org 写道: > > From: Filipe Manana <fdmanana@suse.com> > > > > The mod_start and mod_len fields of struct extent_map were introduced by > > commit 4e2f84e63dc1 ("Btrfs: improve fsync by filtering extents that we > > want") in order to avoid too low performance when fsyncing a file that > > keeps getting extent maps merge, because it resulted in each fsync logging > > again csum ranges that were already merged before. > > > > We don't need this anymore as extent maps in the modified list of extents > > that get merged with other extents and once we log an extent map we remove > > it from the list of modified extent maps. > > > > So remove the mod_start and mod_len fields from struct extent_map and use > > instead the start and len fields when logging checksums in the fast fsync > > path. This also makes EXTENT_FLAG_FILLING unused so remove it as well. > > > > Running the reproducer from the commit mentioned before, with a larger > > number of extents and against a null block device, so that IO is fast > > and we can better see any impact from searching checkums items and logging > > them, gave the following results from dd: > > > > Before this change: > > > > 409600000 bytes (410 MB, 391 MiB) copied, 22.948 s, 17.8 MB/s > > > > After this change: > > > > 409600000 bytes (410 MB, 391 MiB) copied, 22.9997 s, 17.8 MB/s > > > > So no changes in throughput. > > The test was done in a release kernel (non-debug, Debian's default kernel > > config) and its steps are the following: > > > > $ mkfs.btrfs -f /dev/nullb0 > > $ mount /dev/sdb /mnt > > $ dd if=/dev/zero of=/mnt/foobar bs=4k count=100000 oflag=sync > > $ umount /mnt > > > > This also reduce the size of struct extent_map from 128 bytes down to 112 > > bytes, so now we can have 36 extents maps per 4K page instead of 32. > > > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > > Reviewed-by: Qu Wenruo <wqu@suse.com> > > Great we can start cleaning up the extent map members. > > Mind this patch to be included in my upcoming extent map cleaning series? Well, the second paragraph needs to be updated to: "We don't need this anymore as extent maps in the modified list of extents are never merged with other extent maps and once we log an extent map we remove it from the list of modified extent maps, so it's never logged twice." Plus s/reduce/reduces/ in the last paragraph of the changelog. Why does it need to be included in your series? Can't I just push this to misc-next (with the updated changelog)? I've also been working on a large patchset for extent maps for quite some time, which I hope it's ready in 1 to 2 weeks. Several refactorings and a shrinker. What kind of cleanups do you have? > > Thanks, > Qu > > --- > > fs/btrfs/extent_map.c | 18 ------------------ > > fs/btrfs/extent_map.h | 4 ---- > > fs/btrfs/inode.c | 4 +--- > > fs/btrfs/tree-log.c | 4 ++-- > > include/trace/events/btrfs.h | 3 +-- > > 5 files changed, 4 insertions(+), 29 deletions(-) > > > > diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c > > index 445f7716f1e2..471654cb65b0 100644 > > --- a/fs/btrfs/extent_map.c > > +++ b/fs/btrfs/extent_map.c > > @@ -252,8 +252,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > > em->len += merge->len; > > em->block_len += merge->block_len; > > em->block_start = merge->block_start; > > - em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; > > - em->mod_start = merge->mod_start; > > em->generation = max(em->generation, merge->generation); > > em->flags |= EXTENT_FLAG_MERGED; > > > > @@ -271,7 +269,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > > em->block_len += merge->block_len; > > rb_erase_cached(&merge->rb_node, &tree->map); > > RB_CLEAR_NODE(&merge->rb_node); > > - em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; > > em->generation = max(em->generation, merge->generation); > > em->flags |= EXTENT_FLAG_MERGED; > > free_extent_map(merge); > > @@ -300,7 +297,6 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) > > struct extent_map_tree *tree = &inode->extent_tree; > > int ret = 0; > > struct extent_map *em; > > - bool prealloc = false; > > > > write_lock(&tree->lock); > > em = lookup_extent_mapping(tree, start, len); > > @@ -325,21 +321,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) > > > > em->generation = gen; > > em->flags &= ~EXTENT_FLAG_PINNED; > > - em->mod_start = em->start; > > - em->mod_len = em->len; > > - > > - if (em->flags & EXTENT_FLAG_FILLING) { > > - prealloc = true; > > - em->flags &= ~EXTENT_FLAG_FILLING; > > - } > > > > try_merge_map(tree, em); > > > > - if (prealloc) { > > - em->mod_start = em->start; > > - em->mod_len = em->len; > > - } > > - > > out: > > write_unlock(&tree->lock); > > free_extent_map(em); > > @@ -361,8 +345,6 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, > > int modified) > > { > > refcount_inc(&em->refs); > > - em->mod_start = em->start; > > - em->mod_len = em->len; > > > > ASSERT(list_empty(&em->list)); > > > > diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h > > index c5a098c99cc6..10e9491865c9 100644 > > --- a/fs/btrfs/extent_map.h > > +++ b/fs/btrfs/extent_map.h > > @@ -30,8 +30,6 @@ enum { > > ENUM_BIT(EXTENT_FLAG_PREALLOC), > > /* Logging this extent */ > > ENUM_BIT(EXTENT_FLAG_LOGGING), > > - /* Filling in a preallocated extent */ > > - ENUM_BIT(EXTENT_FLAG_FILLING), > > /* This em is merged from two or more physically adjacent ems */ > > ENUM_BIT(EXTENT_FLAG_MERGED), > > }; > > @@ -46,8 +44,6 @@ struct extent_map { > > /* all of these are in bytes */ > > u64 start; > > u64 len; > > - u64 mod_start; > > - u64 mod_len; > > u64 orig_start; > > u64 orig_block_len; > > u64 ram_bytes; > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > > index 3442dedff53d..c6f2b5d1dee1 100644 > > --- a/fs/btrfs/inode.c > > +++ b/fs/btrfs/inode.c > > @@ -7338,9 +7338,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > > em->ram_bytes = ram_bytes; > > em->generation = -1; > > em->flags |= EXTENT_FLAG_PINNED; > > - if (type == BTRFS_ORDERED_PREALLOC) > > - em->flags |= EXTENT_FLAG_FILLING; > > - else if (type == BTRFS_ORDERED_COMPRESSED) > > + if (type == BTRFS_ORDERED_COMPRESSED) > > extent_map_set_compression(em, compress_type); > > > > ret = btrfs_replace_extent_map_range(inode, em, true); > > diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > > index 472918a5bc73..d9777649e170 100644 > > --- a/fs/btrfs/tree-log.c > > +++ b/fs/btrfs/tree-log.c > > @@ -4574,8 +4574,8 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, > > struct btrfs_root *csum_root; > > u64 csum_offset; > > u64 csum_len; > > - u64 mod_start = em->mod_start; > > - u64 mod_len = em->mod_len; > > + u64 mod_start = em->start; > > + u64 mod_len = em->len; > > LIST_HEAD(ordered_sums); > > int ret = 0; > > > > diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h > > index 90b0222390e5..766cfd48386c 100644 > > --- a/include/trace/events/btrfs.h > > +++ b/include/trace/events/btrfs.h > > @@ -277,8 +277,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, > > { EXTENT_FLAG_COMPRESS_LZO, "COMPRESS_LZO" },\ > > { EXTENT_FLAG_COMPRESS_ZSTD, "COMPRESS_ZSTD" },\ > > { EXTENT_FLAG_PREALLOC, "PREALLOC" },\ > > - { EXTENT_FLAG_LOGGING, "LOGGING" },\ > > - { EXTENT_FLAG_FILLING, "FILLING" }) > > + { EXTENT_FLAG_LOGGING, "LOGGING" }) > > > > TRACE_EVENT_CONDITION(btrfs_get_extent, > >
在 2024/4/3 08:13, Filipe Manana 写道: > On Tue, Apr 2, 2024 at 10:18 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: >> >> >> >> 在 2024/4/3 03:10, fdmanana@kernel.org 写道: >>> From: Filipe Manana <fdmanana@suse.com> >>> >>> The mod_start and mod_len fields of struct extent_map were introduced by >>> commit 4e2f84e63dc1 ("Btrfs: improve fsync by filtering extents that we >>> want") in order to avoid too low performance when fsyncing a file that >>> keeps getting extent maps merge, because it resulted in each fsync logging >>> again csum ranges that were already merged before. >>> >>> We don't need this anymore as extent maps in the modified list of extents >>> that get merged with other extents and once we log an extent map we remove >>> it from the list of modified extent maps. >>> >>> So remove the mod_start and mod_len fields from struct extent_map and use >>> instead the start and len fields when logging checksums in the fast fsync >>> path. This also makes EXTENT_FLAG_FILLING unused so remove it as well. >>> >>> Running the reproducer from the commit mentioned before, with a larger >>> number of extents and against a null block device, so that IO is fast >>> and we can better see any impact from searching checkums items and logging >>> them, gave the following results from dd: >>> >>> Before this change: >>> >>> 409600000 bytes (410 MB, 391 MiB) copied, 22.948 s, 17.8 MB/s >>> >>> After this change: >>> >>> 409600000 bytes (410 MB, 391 MiB) copied, 22.9997 s, 17.8 MB/s >>> >>> So no changes in throughput. >>> The test was done in a release kernel (non-debug, Debian's default kernel >>> config) and its steps are the following: >>> >>> $ mkfs.btrfs -f /dev/nullb0 >>> $ mount /dev/sdb /mnt >>> $ dd if=/dev/zero of=/mnt/foobar bs=4k count=100000 oflag=sync >>> $ umount /mnt >>> >>> This also reduce the size of struct extent_map from 128 bytes down to 112 >>> bytes, so now we can have 36 extents maps per 4K page instead of 32. >>> >>> Signed-off-by: Filipe Manana <fdmanana@suse.com> >> >> Reviewed-by: Qu Wenruo <wqu@suse.com> >> >> Great we can start cleaning up the extent map members. >> >> Mind this patch to be included in my upcoming extent map cleaning series? > > Well, the second paragraph needs to be updated to: > > "We don't need this anymore as extent maps in the modified list of extents > are never merged with other extent maps and once we log an extent map we > remove it from the list of modified extent maps, so it's never logged twice." > > Plus s/reduce/reduces/ in the last paragraph of the changelog. > > Why does it need to be included in your series? Can't I just push this > to misc-next (with the updated changelog)? > > I've also been working on a large patchset for extent maps for quite > some time, which I hope it's ready in 1 to 2 weeks. > Several refactorings and a shrinker. > > What kind of cleanups do you have? My plan is to sync the em members with the file extent item members, so that there is no/less confusion on how to convert a file extent to extent map. Orig_start/block_start/block_len/orig_block_len to be replaced by disk_bytenr/disk_num_bytes/offset, and keep the old members which are already the more-or-less the same as the file extent items. (ram_bytes, start, len, generation) Unfortunately this would result quite some name changes, and would definitely going to conflict with your changes. My bad as I'm not aware of your em work, I'm totally fine to wait for your patch and re-base my work then. Thanks, Qu > > >> >> Thanks, >> Qu >>> --- >>> fs/btrfs/extent_map.c | 18 ------------------ >>> fs/btrfs/extent_map.h | 4 ---- >>> fs/btrfs/inode.c | 4 +--- >>> fs/btrfs/tree-log.c | 4 ++-- >>> include/trace/events/btrfs.h | 3 +-- >>> 5 files changed, 4 insertions(+), 29 deletions(-) >>> >>> diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c >>> index 445f7716f1e2..471654cb65b0 100644 >>> --- a/fs/btrfs/extent_map.c >>> +++ b/fs/btrfs/extent_map.c >>> @@ -252,8 +252,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) >>> em->len += merge->len; >>> em->block_len += merge->block_len; >>> em->block_start = merge->block_start; >>> - em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; >>> - em->mod_start = merge->mod_start; >>> em->generation = max(em->generation, merge->generation); >>> em->flags |= EXTENT_FLAG_MERGED; >>> >>> @@ -271,7 +269,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) >>> em->block_len += merge->block_len; >>> rb_erase_cached(&merge->rb_node, &tree->map); >>> RB_CLEAR_NODE(&merge->rb_node); >>> - em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; >>> em->generation = max(em->generation, merge->generation); >>> em->flags |= EXTENT_FLAG_MERGED; >>> free_extent_map(merge); >>> @@ -300,7 +297,6 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) >>> struct extent_map_tree *tree = &inode->extent_tree; >>> int ret = 0; >>> struct extent_map *em; >>> - bool prealloc = false; >>> >>> write_lock(&tree->lock); >>> em = lookup_extent_mapping(tree, start, len); >>> @@ -325,21 +321,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) >>> >>> em->generation = gen; >>> em->flags &= ~EXTENT_FLAG_PINNED; >>> - em->mod_start = em->start; >>> - em->mod_len = em->len; >>> - >>> - if (em->flags & EXTENT_FLAG_FILLING) { >>> - prealloc = true; >>> - em->flags &= ~EXTENT_FLAG_FILLING; >>> - } >>> >>> try_merge_map(tree, em); >>> >>> - if (prealloc) { >>> - em->mod_start = em->start; >>> - em->mod_len = em->len; >>> - } >>> - >>> out: >>> write_unlock(&tree->lock); >>> free_extent_map(em); >>> @@ -361,8 +345,6 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, >>> int modified) >>> { >>> refcount_inc(&em->refs); >>> - em->mod_start = em->start; >>> - em->mod_len = em->len; >>> >>> ASSERT(list_empty(&em->list)); >>> >>> diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h >>> index c5a098c99cc6..10e9491865c9 100644 >>> --- a/fs/btrfs/extent_map.h >>> +++ b/fs/btrfs/extent_map.h >>> @@ -30,8 +30,6 @@ enum { >>> ENUM_BIT(EXTENT_FLAG_PREALLOC), >>> /* Logging this extent */ >>> ENUM_BIT(EXTENT_FLAG_LOGGING), >>> - /* Filling in a preallocated extent */ >>> - ENUM_BIT(EXTENT_FLAG_FILLING), >>> /* This em is merged from two or more physically adjacent ems */ >>> ENUM_BIT(EXTENT_FLAG_MERGED), >>> }; >>> @@ -46,8 +44,6 @@ struct extent_map { >>> /* all of these are in bytes */ >>> u64 start; >>> u64 len; >>> - u64 mod_start; >>> - u64 mod_len; >>> u64 orig_start; >>> u64 orig_block_len; >>> u64 ram_bytes; >>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c >>> index 3442dedff53d..c6f2b5d1dee1 100644 >>> --- a/fs/btrfs/inode.c >>> +++ b/fs/btrfs/inode.c >>> @@ -7338,9 +7338,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, >>> em->ram_bytes = ram_bytes; >>> em->generation = -1; >>> em->flags |= EXTENT_FLAG_PINNED; >>> - if (type == BTRFS_ORDERED_PREALLOC) >>> - em->flags |= EXTENT_FLAG_FILLING; >>> - else if (type == BTRFS_ORDERED_COMPRESSED) >>> + if (type == BTRFS_ORDERED_COMPRESSED) >>> extent_map_set_compression(em, compress_type); >>> >>> ret = btrfs_replace_extent_map_range(inode, em, true); >>> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c >>> index 472918a5bc73..d9777649e170 100644 >>> --- a/fs/btrfs/tree-log.c >>> +++ b/fs/btrfs/tree-log.c >>> @@ -4574,8 +4574,8 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, >>> struct btrfs_root *csum_root; >>> u64 csum_offset; >>> u64 csum_len; >>> - u64 mod_start = em->mod_start; >>> - u64 mod_len = em->mod_len; >>> + u64 mod_start = em->start; >>> + u64 mod_len = em->len; >>> LIST_HEAD(ordered_sums); >>> int ret = 0; >>> >>> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h >>> index 90b0222390e5..766cfd48386c 100644 >>> --- a/include/trace/events/btrfs.h >>> +++ b/include/trace/events/btrfs.h >>> @@ -277,8 +277,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, >>> { EXTENT_FLAG_COMPRESS_LZO, "COMPRESS_LZO" },\ >>> { EXTENT_FLAG_COMPRESS_ZSTD, "COMPRESS_ZSTD" },\ >>> { EXTENT_FLAG_PREALLOC, "PREALLOC" },\ >>> - { EXTENT_FLAG_LOGGING, "LOGGING" },\ >>> - { EXTENT_FLAG_FILLING, "FILLING" }) >>> + { EXTENT_FLAG_LOGGING, "LOGGING" }) >>> >>> TRACE_EVENT_CONDITION(btrfs_get_extent, >>>
On Tue, Apr 2, 2024 at 10:53 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > 在 2024/4/3 08:13, Filipe Manana 写道: > > On Tue, Apr 2, 2024 at 10:18 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > >> > >> > >> > >> 在 2024/4/3 03:10, fdmanana@kernel.org 写道: > >>> From: Filipe Manana <fdmanana@suse.com> > >>> > >>> The mod_start and mod_len fields of struct extent_map were introduced by > >>> commit 4e2f84e63dc1 ("Btrfs: improve fsync by filtering extents that we > >>> want") in order to avoid too low performance when fsyncing a file that > >>> keeps getting extent maps merge, because it resulted in each fsync logging > >>> again csum ranges that were already merged before. > >>> > >>> We don't need this anymore as extent maps in the modified list of extents > >>> that get merged with other extents and once we log an extent map we remove > >>> it from the list of modified extent maps. > >>> > >>> So remove the mod_start and mod_len fields from struct extent_map and use > >>> instead the start and len fields when logging checksums in the fast fsync > >>> path. This also makes EXTENT_FLAG_FILLING unused so remove it as well. > >>> > >>> Running the reproducer from the commit mentioned before, with a larger > >>> number of extents and against a null block device, so that IO is fast > >>> and we can better see any impact from searching checkums items and logging > >>> them, gave the following results from dd: > >>> > >>> Before this change: > >>> > >>> 409600000 bytes (410 MB, 391 MiB) copied, 22.948 s, 17.8 MB/s > >>> > >>> After this change: > >>> > >>> 409600000 bytes (410 MB, 391 MiB) copied, 22.9997 s, 17.8 MB/s > >>> > >>> So no changes in throughput. > >>> The test was done in a release kernel (non-debug, Debian's default kernel > >>> config) and its steps are the following: > >>> > >>> $ mkfs.btrfs -f /dev/nullb0 > >>> $ mount /dev/sdb /mnt > >>> $ dd if=/dev/zero of=/mnt/foobar bs=4k count=100000 oflag=sync > >>> $ umount /mnt > >>> > >>> This also reduce the size of struct extent_map from 128 bytes down to 112 > >>> bytes, so now we can have 36 extents maps per 4K page instead of 32. > >>> > >>> Signed-off-by: Filipe Manana <fdmanana@suse.com> > >> > >> Reviewed-by: Qu Wenruo <wqu@suse.com> > >> > >> Great we can start cleaning up the extent map members. > >> > >> Mind this patch to be included in my upcoming extent map cleaning series? > > > > Well, the second paragraph needs to be updated to: > > > > "We don't need this anymore as extent maps in the modified list of extents > > are never merged with other extent maps and once we log an extent map we > > remove it from the list of modified extent maps, so it's never logged twice." > > > > Plus s/reduce/reduces/ in the last paragraph of the changelog. > > > > Why does it need to be included in your series? Can't I just push this > > to misc-next (with the updated changelog)? > > > > I've also been working on a large patchset for extent maps for quite > > some time, which I hope it's ready in 1 to 2 weeks. > > Several refactorings and a shrinker. > > > > What kind of cleanups do you have? > > My plan is to sync the em members with the file extent item members, so > that there is no/less confusion on how to convert a file extent to > extent map. > Orig_start/block_start/block_len/orig_block_len to be replaced by > disk_bytenr/disk_num_bytes/offset, and keep the old members which are > already the more-or-less the same as the file extent items. > (ram_bytes, start, len, generation) Ok, I see. I'm not completely sure because orig_block_len is a max value across splits. It may work to always set it to block_len of the original extent that was split, this is just from a quick look, so I may be wrong about it. > > Unfortunately this would result quite some name changes, and would > definitely going to conflict with your changes. That would be a huge change, touching lines everywhere due to member renames. > > My bad as I'm not aware of your em work, I'm totally fine to wait for > your patch and re-base my work then. I'm not going to change anything in the extent_map structure itself, but all major operations like adding/removing/replacing/merging/etc are being changed, mostly changing function parameters and updating a percpu counter, and some tiny change for fsync to avoid races and data loss with the shrinker itself. So all potential conflicts are likely simple to resolve. I pushed the patch to for-next with the fixed changelog. Thanks. > > Thanks, > Qu > > > > > > >> > >> Thanks, > >> Qu > >>> --- > >>> fs/btrfs/extent_map.c | 18 ------------------ > >>> fs/btrfs/extent_map.h | 4 ---- > >>> fs/btrfs/inode.c | 4 +--- > >>> fs/btrfs/tree-log.c | 4 ++-- > >>> include/trace/events/btrfs.h | 3 +-- > >>> 5 files changed, 4 insertions(+), 29 deletions(-) > >>> > >>> diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c > >>> index 445f7716f1e2..471654cb65b0 100644 > >>> --- a/fs/btrfs/extent_map.c > >>> +++ b/fs/btrfs/extent_map.c > >>> @@ -252,8 +252,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > >>> em->len += merge->len; > >>> em->block_len += merge->block_len; > >>> em->block_start = merge->block_start; > >>> - em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; > >>> - em->mod_start = merge->mod_start; > >>> em->generation = max(em->generation, merge->generation); > >>> em->flags |= EXTENT_FLAG_MERGED; > >>> > >>> @@ -271,7 +269,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > >>> em->block_len += merge->block_len; > >>> rb_erase_cached(&merge->rb_node, &tree->map); > >>> RB_CLEAR_NODE(&merge->rb_node); > >>> - em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; > >>> em->generation = max(em->generation, merge->generation); > >>> em->flags |= EXTENT_FLAG_MERGED; > >>> free_extent_map(merge); > >>> @@ -300,7 +297,6 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) > >>> struct extent_map_tree *tree = &inode->extent_tree; > >>> int ret = 0; > >>> struct extent_map *em; > >>> - bool prealloc = false; > >>> > >>> write_lock(&tree->lock); > >>> em = lookup_extent_mapping(tree, start, len); > >>> @@ -325,21 +321,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) > >>> > >>> em->generation = gen; > >>> em->flags &= ~EXTENT_FLAG_PINNED; > >>> - em->mod_start = em->start; > >>> - em->mod_len = em->len; > >>> - > >>> - if (em->flags & EXTENT_FLAG_FILLING) { > >>> - prealloc = true; > >>> - em->flags &= ~EXTENT_FLAG_FILLING; > >>> - } > >>> > >>> try_merge_map(tree, em); > >>> > >>> - if (prealloc) { > >>> - em->mod_start = em->start; > >>> - em->mod_len = em->len; > >>> - } > >>> - > >>> out: > >>> write_unlock(&tree->lock); > >>> free_extent_map(em); > >>> @@ -361,8 +345,6 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, > >>> int modified) > >>> { > >>> refcount_inc(&em->refs); > >>> - em->mod_start = em->start; > >>> - em->mod_len = em->len; > >>> > >>> ASSERT(list_empty(&em->list)); > >>> > >>> diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h > >>> index c5a098c99cc6..10e9491865c9 100644 > >>> --- a/fs/btrfs/extent_map.h > >>> +++ b/fs/btrfs/extent_map.h > >>> @@ -30,8 +30,6 @@ enum { > >>> ENUM_BIT(EXTENT_FLAG_PREALLOC), > >>> /* Logging this extent */ > >>> ENUM_BIT(EXTENT_FLAG_LOGGING), > >>> - /* Filling in a preallocated extent */ > >>> - ENUM_BIT(EXTENT_FLAG_FILLING), > >>> /* This em is merged from two or more physically adjacent ems */ > >>> ENUM_BIT(EXTENT_FLAG_MERGED), > >>> }; > >>> @@ -46,8 +44,6 @@ struct extent_map { > >>> /* all of these are in bytes */ > >>> u64 start; > >>> u64 len; > >>> - u64 mod_start; > >>> - u64 mod_len; > >>> u64 orig_start; > >>> u64 orig_block_len; > >>> u64 ram_bytes; > >>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > >>> index 3442dedff53d..c6f2b5d1dee1 100644 > >>> --- a/fs/btrfs/inode.c > >>> +++ b/fs/btrfs/inode.c > >>> @@ -7338,9 +7338,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, > >>> em->ram_bytes = ram_bytes; > >>> em->generation = -1; > >>> em->flags |= EXTENT_FLAG_PINNED; > >>> - if (type == BTRFS_ORDERED_PREALLOC) > >>> - em->flags |= EXTENT_FLAG_FILLING; > >>> - else if (type == BTRFS_ORDERED_COMPRESSED) > >>> + if (type == BTRFS_ORDERED_COMPRESSED) > >>> extent_map_set_compression(em, compress_type); > >>> > >>> ret = btrfs_replace_extent_map_range(inode, em, true); > >>> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > >>> index 472918a5bc73..d9777649e170 100644 > >>> --- a/fs/btrfs/tree-log.c > >>> +++ b/fs/btrfs/tree-log.c > >>> @@ -4574,8 +4574,8 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, > >>> struct btrfs_root *csum_root; > >>> u64 csum_offset; > >>> u64 csum_len; > >>> - u64 mod_start = em->mod_start; > >>> - u64 mod_len = em->mod_len; > >>> + u64 mod_start = em->start; > >>> + u64 mod_len = em->len; > >>> LIST_HEAD(ordered_sums); > >>> int ret = 0; > >>> > >>> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h > >>> index 90b0222390e5..766cfd48386c 100644 > >>> --- a/include/trace/events/btrfs.h > >>> +++ b/include/trace/events/btrfs.h > >>> @@ -277,8 +277,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, > >>> { EXTENT_FLAG_COMPRESS_LZO, "COMPRESS_LZO" },\ > >>> { EXTENT_FLAG_COMPRESS_ZSTD, "COMPRESS_ZSTD" },\ > >>> { EXTENT_FLAG_PREALLOC, "PREALLOC" },\ > >>> - { EXTENT_FLAG_LOGGING, "LOGGING" },\ > >>> - { EXTENT_FLAG_FILLING, "FILLING" }) > >>> + { EXTENT_FLAG_LOGGING, "LOGGING" }) > >>> > >>> TRACE_EVENT_CONDITION(btrfs_get_extent, > >>>
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 445f7716f1e2..471654cb65b0 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -252,8 +252,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) em->len += merge->len; em->block_len += merge->block_len; em->block_start = merge->block_start; - em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; - em->mod_start = merge->mod_start; em->generation = max(em->generation, merge->generation); em->flags |= EXTENT_FLAG_MERGED; @@ -271,7 +269,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) em->block_len += merge->block_len; rb_erase_cached(&merge->rb_node, &tree->map); RB_CLEAR_NODE(&merge->rb_node); - em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; em->generation = max(em->generation, merge->generation); em->flags |= EXTENT_FLAG_MERGED; free_extent_map(merge); @@ -300,7 +297,6 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) struct extent_map_tree *tree = &inode->extent_tree; int ret = 0; struct extent_map *em; - bool prealloc = false; write_lock(&tree->lock); em = lookup_extent_mapping(tree, start, len); @@ -325,21 +321,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) em->generation = gen; em->flags &= ~EXTENT_FLAG_PINNED; - em->mod_start = em->start; - em->mod_len = em->len; - - if (em->flags & EXTENT_FLAG_FILLING) { - prealloc = true; - em->flags &= ~EXTENT_FLAG_FILLING; - } try_merge_map(tree, em); - if (prealloc) { - em->mod_start = em->start; - em->mod_len = em->len; - } - out: write_unlock(&tree->lock); free_extent_map(em); @@ -361,8 +345,6 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, int modified) { refcount_inc(&em->refs); - em->mod_start = em->start; - em->mod_len = em->len; ASSERT(list_empty(&em->list)); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index c5a098c99cc6..10e9491865c9 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -30,8 +30,6 @@ enum { ENUM_BIT(EXTENT_FLAG_PREALLOC), /* Logging this extent */ ENUM_BIT(EXTENT_FLAG_LOGGING), - /* Filling in a preallocated extent */ - ENUM_BIT(EXTENT_FLAG_FILLING), /* This em is merged from two or more physically adjacent ems */ ENUM_BIT(EXTENT_FLAG_MERGED), }; @@ -46,8 +44,6 @@ struct extent_map { /* all of these are in bytes */ u64 start; u64 len; - u64 mod_start; - u64 mod_len; u64 orig_start; u64 orig_block_len; u64 ram_bytes; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3442dedff53d..c6f2b5d1dee1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7338,9 +7338,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, em->ram_bytes = ram_bytes; em->generation = -1; em->flags |= EXTENT_FLAG_PINNED; - if (type == BTRFS_ORDERED_PREALLOC) - em->flags |= EXTENT_FLAG_FILLING; - else if (type == BTRFS_ORDERED_COMPRESSED) + if (type == BTRFS_ORDERED_COMPRESSED) extent_map_set_compression(em, compress_type); ret = btrfs_replace_extent_map_range(inode, em, true); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 472918a5bc73..d9777649e170 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4574,8 +4574,8 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root; u64 csum_offset; u64 csum_len; - u64 mod_start = em->mod_start; - u64 mod_len = em->mod_len; + u64 mod_start = em->start; + u64 mod_len = em->len; LIST_HEAD(ordered_sums); int ret = 0; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 90b0222390e5..766cfd48386c 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -277,8 +277,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, { EXTENT_FLAG_COMPRESS_LZO, "COMPRESS_LZO" },\ { EXTENT_FLAG_COMPRESS_ZSTD, "COMPRESS_ZSTD" },\ { EXTENT_FLAG_PREALLOC, "PREALLOC" },\ - { EXTENT_FLAG_LOGGING, "LOGGING" },\ - { EXTENT_FLAG_FILLING, "FILLING" }) + { EXTENT_FLAG_LOGGING, "LOGGING" }) TRACE_EVENT_CONDITION(btrfs_get_extent,