Message ID | 20240521-zoned-gc-v3-1-7db9742454c7@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: zoned: always set aside a zone for relocation | expand |
On Tue, May 21, 2024 at 3:58 PM Johannes Thumshirn <jth@kernel.org> wrote: > > From: Johannes Thumshirn <johannes.thumshirn@wdc.com> > > Reserve one zone as a data relocation target on each mount. If we already > find one empty block group, there's no need to force a chunk allocation, > but we can use this empty data block group as our relocation target. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/disk-io.c | 2 ++ > fs/btrfs/zoned.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/btrfs/zoned.h | 3 +++ > 3 files changed, 70 insertions(+) > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index a91a8056758a..19e7b4a59a9e 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -3558,6 +3558,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device > } > btrfs_discard_resume(fs_info); > > + btrfs_reserve_relocation_bg(fs_info); > + > if (fs_info->uuid_root && > (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) || > fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) { > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index 4cba80b34387..9404cb32256f 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -17,6 +17,7 @@ > #include "fs.h" > #include "accessors.h" > #include "bio.h" > +#include "transaction.h" > > /* Maximum number of zones to report per blkdev_report_zones() call */ > #define BTRFS_REPORT_NR_ZONES 4096 > @@ -2634,3 +2635,67 @@ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) > } > spin_unlock(&fs_info->zone_active_bgs_lock); > } > + > +static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags) > +{ > + struct btrfs_block_group *bg; > + > + for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) { > + list_for_each_entry(bg, &sinfo->block_groups[i], list) { > + if (bg->flags != flags) > + continue; > + if (bg->used == 0) > + return bg->start; > + } > + } I believe I commented about this in some previous patchset version, but here goes again. This happens at mount time, where we have already loaded all block groups. When we load them, if we find unused ones, we add them to the list of empty block groups, so that the next time the cleaner kthread runs it deletes them. I don't see any code here removing the selected block group from that list, or anything at btrfs_delete_unused_bgs() that prevents deleting a block group if it was selected as the data reloc bg. Maybe I'm missing something? How do ensure the selected block group isn't deleted by the cleaner kthread? Thanks. > + > + return 0; > +} > + > +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_root *tree_root = fs_info->tree_root; > + struct btrfs_space_info *sinfo = fs_info->data_sinfo; > + struct btrfs_trans_handle *trans; > + u64 flags = btrfs_get_alloc_profile(fs_info, sinfo->flags); > + u64 bytenr = 0; > + > + lockdep_assert_not_held(&fs_info->relocation_bg_lock); > + > + if (!btrfs_is_zoned(fs_info)) > + return; > + > + if (fs_info->data_reloc_bg) > + return; > + > + bytenr = find_empty_block_group(sinfo, flags); > + if (!bytenr) { > + int ret; > + > + trans = btrfs_join_transaction(tree_root); > + if (IS_ERR(trans)) > + return; > + > + ret = btrfs_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE); > + btrfs_end_transaction(trans); > + > + if (!ret) { > + struct btrfs_block_group *bg; > + > + bytenr = find_empty_block_group(sinfo, flags); > + if (!bytenr) > + goto out; > + bg = btrfs_lookup_block_group(fs_info, bytenr); > + ASSERT(bg); > + > + if (!btrfs_zone_activate(bg)) > + bytenr = 0; > + btrfs_put_block_group(bg); > + } > + } > + > +out: > + spin_lock(&fs_info->relocation_bg_lock); > + fs_info->data_reloc_bg = bytenr; > + spin_unlock(&fs_info->relocation_bg_lock); > +} > diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h > index 77c4321e331f..b9935222bf7a 100644 > --- a/fs/btrfs/zoned.h > +++ b/fs/btrfs/zoned.h > @@ -97,6 +97,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info); > int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, > struct btrfs_space_info *space_info, bool do_finish); > void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info); > +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info); > #else /* CONFIG_BLK_DEV_ZONED */ > static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, > struct blk_zone *zone) > @@ -271,6 +272,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, > > static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { } > > +static inline void btrfs_reserve_relocation_zone(struct btrfs_fs_info *fs_info) { } > + > #endif > > static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) > > -- > 2.43.0 > >
On 21.05.24 17:23, Filipe Manana wrote: >> +static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags) >> +{ >> + struct btrfs_block_group *bg; >> + >> + for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) { >> + list_for_each_entry(bg, &sinfo->block_groups[i], list) { >> + if (bg->flags != flags) >> + continue; >> + if (bg->used == 0) >> + return bg->start; >> + } >> + } > I believe I commented about this in some previous patchset version, > but here goes again. > > This happens at mount time, where we have already loaded all block groups. > When we load them, if we find unused ones, we add them to the list of > empty block groups, so that the next time the cleaner kthread runs it > deletes them. > > I don't see any code here removing the selected block group from that > list, or anything at btrfs_delete_unused_bgs() that prevents deleting > a block group if it was selected as the data reloc bg. > > Maybe I'm missing something? > How do ensure the selected block group isn't deleted by the cleaner kthread? Indeed, I forgot about that.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a91a8056758a..19e7b4a59a9e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3558,6 +3558,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } btrfs_discard_resume(fs_info); + btrfs_reserve_relocation_bg(fs_info); + if (fs_info->uuid_root && (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) || fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) { diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 4cba80b34387..9404cb32256f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -17,6 +17,7 @@ #include "fs.h" #include "accessors.h" #include "bio.h" +#include "transaction.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 @@ -2634,3 +2635,67 @@ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) } spin_unlock(&fs_info->zone_active_bgs_lock); } + +static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags) +{ + struct btrfs_block_group *bg; + + for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) { + list_for_each_entry(bg, &sinfo->block_groups[i], list) { + if (bg->flags != flags) + continue; + if (bg->used == 0) + return bg->start; + } + } + + return 0; +} + +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_space_info *sinfo = fs_info->data_sinfo; + struct btrfs_trans_handle *trans; + u64 flags = btrfs_get_alloc_profile(fs_info, sinfo->flags); + u64 bytenr = 0; + + lockdep_assert_not_held(&fs_info->relocation_bg_lock); + + if (!btrfs_is_zoned(fs_info)) + return; + + if (fs_info->data_reloc_bg) + return; + + bytenr = find_empty_block_group(sinfo, flags); + if (!bytenr) { + int ret; + + trans = btrfs_join_transaction(tree_root); + if (IS_ERR(trans)) + return; + + ret = btrfs_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE); + btrfs_end_transaction(trans); + + if (!ret) { + struct btrfs_block_group *bg; + + bytenr = find_empty_block_group(sinfo, flags); + if (!bytenr) + goto out; + bg = btrfs_lookup_block_group(fs_info, bytenr); + ASSERT(bg); + + if (!btrfs_zone_activate(bg)) + bytenr = 0; + btrfs_put_block_group(bg); + } + } + +out: + spin_lock(&fs_info->relocation_bg_lock); + fs_info->data_reloc_bg = bytenr; + spin_unlock(&fs_info->relocation_bg_lock); +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 77c4321e331f..b9935222bf7a 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -97,6 +97,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info); int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, bool do_finish); void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info); +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -271,6 +272,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { } +static inline void btrfs_reserve_relocation_zone(struct btrfs_fs_info *fs_info) { } + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)