[08/13] btrfs: zoned: introduce space_info->active_total_bytes

Message ID	b8b9efd1c21d28dbcda5c2da0080c266455f8ae9.1656909695.git.naohiro.aota@wdc.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-btrfs-owner@kernel.org> IronPort-SDR: /euiknmKQeduV5zViIvV0DdPnDH/eKwNdK1tiLOajGX7yhFJj0sZnukDYd/lfZ23f4BiFM8oGI w7HO00fCzxAOD1zO508v9boV9pWl7kEiivmB2bxGjLeLOp7zBDW1ARWSyJvyUJGh3O/nKvuoCl +UuQwSK4AEYdR6QcbAjfLyh+1WBW2UszcR4l+2o97r+XB1jQ+hVxDCSuphWYXOyGN4FosPmxVB cYXeGoKxaYdP2qLMnCIq6+pOWQMS3QYFXkxUZe+nbWeZ8E2Y1v+GUPPamenFpOtUVeCemGl+Ik 2xhfCVBQkbe1EgnjS7EErr+t IronPort-SDR: uFXXbNTBppuWZ1W9qgSdSGHoPoXPY/kqHrsE8cZOXhgtyjefHKuZ52jwPvuS1/0/9meFc0rFxn Fz+ld5Xkkz73LlTOIBPvoS85wy+7zWza+Qw3QQ0iCbvF9v2oKW2DiSX/oeNEhsFoD6fPwjrPr6 Nn9zP2MMnvsw1Trij5cjUlV+pwdvYy49GLNxabf0IcNI0ZliWj4I+p/I8SgvhKuokNnCozT88B p7jjbHA48TFvtn3jPoBJCbc48l1pBTzY6CbeDUFoUBI+5n6Wc69yY7jPOakq5ZQGsoSuphkxU0 4g4= WDCIronportException: Internal From: Naohiro Aota <naohiro.aota@wdc.com> To: linux-btrfs@vger.kernel.org Cc: Naohiro Aota <naohiro.aota@wdc.com> Subject: [PATCH 08/13] btrfs: zoned: introduce space_info->active_total_bytes Date: Mon, 4 Jul 2022 13:58:12 +0900 Message-Id: <b8b9efd1c21d28dbcda5c2da0080c266455f8ae9.1656909695.git.naohiro.aota@wdc.com> In-Reply-To: <cover.1656909695.git.naohiro.aota@wdc.com> References: <cover.1656909695.git.naohiro.aota@wdc.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	btrfs: zoned: fix active zone tracking issues \| expand [00/13] btrfs: zoned: fix active zone tracking issues [01/13] block: add bdev_max_segments() helper [02/13] btrfs: zoned: revive max_zone_append_bytes [03/13] btrfs: replace BTRFS_MAX_EXTENT_SIZE with fs_info->max_extent_size [04/13] btrfs: convert count_max_extents() to use fs_info->max_extent_size [05/13] btrfs: use fs_info->max_extent_size in get_extent_max_capacity() [06/13] btrfs: let can_allocate_chunk return int [07/13] btrfs: zoned: finish least available block group on data BG allocation [08/13] btrfs: zoned: introduce space_info->active_total_bytes [09/13] btrfs: zoned: disable metadata overcommit for zoned [10/13] btrfs: zoned: activate metadata BG on flush_space [11/13] btrfs: zoned: activate necessary block group [12/13] btrfs: zoned: write out partially allocated region [13/13] btrfs: zoned: wait until zone is finished when allocation didn't progress

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index e930749770ac..51e7c1f1d93f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1051,8 +1051,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, < block_group->zone_unusable); WARN_ON(block_group->space_info->disk_total < block_group->length * factor); + WARN_ON(block_group->zone_is_active && + block_group->space_info->active_total_bytes + < block_group->length); } block_group->space_info->total_bytes -= block_group->length; + if (block_group->zone_is_active) + block_group->space_info->active_total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); block_group->space_info->bytes_zone_unusable -= @@ -2107,7 +2112,8 @@ static int read_one_block_group(struct btrfs_fs_info *info, trace_btrfs_add_block_group(info, cache, 0); btrfs_update_space_info(info, cache->flags, cache->length, cache->used, cache->bytes_super, - cache->zone_unusable, &space_info); + cache->zone_unusable, cache->zone_is_active, + &space_info); cache->space_info = space_info; @@ -2177,7 +2183,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) } btrfs_update_space_info(fs_info, bg->flags, em->len, em->len, - 0, 0, &space_info); + 0, 0, false, &space_info); bg->space_info = space_info; link_block_group(bg); @@ -2558,7 +2564,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran trace_btrfs_add_block_group(fs_info, cache, 1); btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, cache->bytes_super, cache->zone_unusable, - &cache->space_info); + cache->zone_is_active, &cache->space_info); btrfs_update_global_block_rsv(fs_info); link_block_group(cache); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 62d25112310d..c7a60341b2d2 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -295,7 +295,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, u64 bytes_readonly, u64 bytes_zone_unusable, - struct btrfs_space_info **space_info) + bool active, struct btrfs_space_info **space_info) { struct btrfs_space_info *found; int factor; @@ -306,6 +306,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, ASSERT(found); spin_lock(&found->lock); found->total_bytes += total_bytes; + if (active) + found->active_total_bytes += total_bytes; found->disk_total += total_bytes * factor; found->bytes_used += bytes_used; found->disk_used += bytes_used * factor; @@ -369,6 +371,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, return avail; } +static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info) +{ + /* + * On regular btrfs, all total_bytes are always writable. On zoned + * btrfs, there may be a limitation imposed by max_active_zzones. For + * metadata allocation, we cannot finish an existing active block group + * to avoid a deadlock. Thus, we need to consider only the active groups + * to be writable for metadata space. + */ + if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) + return space_info->total_bytes; + + return space_info->active_total_bytes; +} + int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 bytes, enum btrfs_reserve_flush_enum flush) @@ -383,7 +401,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, used = btrfs_space_info_used(space_info, true); avail = calc_available_free_space(fs_info, space_info, flush); - if (used + bytes < space_info->total_bytes + avail) + if (used + bytes < writable_total_bytes(fs_info, space_info) + avail) return 1; return 0; } @@ -419,7 +437,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, ticket = list_first_entry(head, struct reserve_ticket, list); /* Check and see if our ticket can be satisfied now. */ - if ((used + ticket->bytes <= space_info->total_bytes) || + if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) || btrfs_can_overcommit(fs_info, space_info, ticket->bytes, flush)) { btrfs_space_info_update_bytes_may_use(fs_info, @@ -750,6 +768,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, { u64 used; u64 avail; + u64 total; u64 to_reclaim = space_info->reclaim_size; lockdep_assert_held(&space_info->lock); @@ -764,8 +783,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, * space. If that's the case add in our overage so we make sure to put * appropriate pressure on the flushing state machine. */ - if (space_info->total_bytes + avail < used) - to_reclaim += used - (space_info->total_bytes + avail); + total = writable_total_bytes(fs_info, space_info); + if (total + avail < used) + to_reclaim += used - (total + avail); return to_reclaim; } @@ -775,9 +795,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, { u64 global_rsv_size = fs_info->global_block_rsv.reserved; u64 ordered, delalloc; - u64 thresh = div_factor_fine(space_info->total_bytes, 90); + u64 total = writable_total_bytes(fs_info, space_info); + u64 thresh; u64 used; + thresh = div_factor_fine(total, 90); + lockdep_assert_held(&space_info->lock); /* If we're just plain full then async reclaim just slows us down. */ @@ -839,8 +862,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, BTRFS_RESERVE_FLUSH_ALL); used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_readonly + global_rsv_size; - if (used < space_info->total_bytes) - thresh += space_info->total_bytes - used; + if (used < total) + thresh += total - used; thresh >>= space_info->clamp; used = space_info->bytes_pinned; @@ -1557,7 +1580,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * can_overcommit() to ensure we can overcommit to continue. */ if (!pending_tickets && - ((used + orig_bytes <= space_info->total_bytes) || + ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) || btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index e7de24a529cf..3cc356a55c53 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -19,6 +19,8 @@ struct btrfs_space_info { u64 bytes_may_use; /* number of bytes that may be used for delalloc/allocations */ u64 bytes_readonly; /* total bytes that are read only */ + u64 active_total_bytes; /* total bytes in the space, but only accounts + active block groups. */ u64 bytes_zone_unusable; /* total bytes that are unusable until resetting the device zone */ @@ -124,7 +126,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, u64 bytes_readonly, u64 bytes_zone_unusable, - struct btrfs_space_info **space_info); + bool active, struct btrfs_space_info **space_info); void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info, u64 chunk_size); struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 4a69e8492177..9cabf088b800 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1838,6 +1838,7 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, bool btrfs_zone_activate(struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; + struct btrfs_space_info *space_info = block_group->space_info; struct map_lookup *map; struct btrfs_device *device; u64 physical; @@ -1849,6 +1850,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) map = block_group->physical_map; + spin_lock(&space_info->lock); spin_lock(&block_group->lock); if (block_group->zone_is_active) { ret = true; @@ -1877,7 +1879,10 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* Successfully activated all the zones */ block_group->zone_is_active = 1; + space_info->active_total_bytes += block_group->length; spin_unlock(&block_group->lock); + btrfs_try_granting_tickets(fs_info, space_info); + spin_unlock(&space_info->lock); /* For the active block group list */ btrfs_get_block_group(block_group); @@ -1890,20 +1895,24 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) out_unlock: spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); return ret; } static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written) { struct btrfs_fs_info *fs_info = block_group->fs_info; + struct btrfs_space_info *space_info = block_group->space_info; struct map_lookup *map; bool need_zone_finish; int ret = 0; int i; + spin_lock(&space_info->lock); spin_lock(&block_group->lock); if (!block_group->zone_is_active) { spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); return 0; } @@ -1912,6 +1921,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) && block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) { spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); return -EAGAIN; } @@ -1924,6 +1934,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ */ if (!fully_written) { spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); ret = btrfs_inc_block_group_ro(block_group, false); if (ret) @@ -1935,6 +1946,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start, block_group->length); + spin_lock(&space_info->lock); spin_lock(&block_group->lock); /* @@ -1943,12 +1955,14 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ */ if (!block_group->zone_is_active) { spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); btrfs_dec_block_group_ro(block_group); return 0; } if (block_group->reserved) { spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); btrfs_dec_block_group_ro(block_group); return -EAGAIN; } @@ -1965,7 +1979,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ block_group->free_space_ctl->free_space = 0; btrfs_clear_treelog_bg(block_group); btrfs_clear_data_reloc_bg(block_group); + space_info->active_total_bytes -= block_group->length; spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); map = block_group->physical_map; for (i = 0; i < map->num_stripes; i++) {

[08/13] btrfs: zoned: introduce space_info->active_total_bytes

Commit Message

Patch