diff mbox series

[v2,2/2] btrfs: zoned: activate block group only for extent allocation

Message ID 9d491b6ea2d2628eb9632f4dfc43e52b8bfb7057.1647936783.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned: activate new BG only from extent allocation context | expand

Commit Message

Naohiro Aota March 22, 2022, 9:11 a.m. UTC
In btrfs_make_block_group(), we activate the allocated block group,
expecting that the block group is soon used for allocation. However, the
chunk allocation from flush_space() context broke the assumption. There can
be a large time gap between the chunk allocation time and the extent
allocation time from the chunk.

Activating the empty block groups pre-allocated from flush_space() context
can exhaust the active zone counter of a device. Once we use all the active
zone counts for empty pre-allocated BGs, we cannot activate new BG for the
other things: metadata, tree-log, or data relocation BG. That failure
results in a fake -ENOSPC.

This patch introduces CHUNK_ALLOC_FORCE_FOR_EXTENT to distinguish the chunk
allocation from find_free_extent(). Now, the new block group is activated
only in that context.

Fixes: eb66a010d518 ("btrfs: zoned: activate new block group")
Cc: stable@vger.kernel.org # 5.16+: c7d1b9109dd0: btrfs: return allocated block group from do_chunk_alloc()
Cc: stable@vger.kernel.org # 5.16+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/block-group.c | 24 ++++++++++++++++--------
 fs/btrfs/block-group.h |  1 +
 fs/btrfs/extent-tree.c |  2 +-
 3 files changed, 18 insertions(+), 9 deletions(-)

Comments

David Sterba April 1, 2022, 2:12 p.m. UTC | #1
On Tue, Mar 22, 2022 at 06:11:34PM +0900, Naohiro Aota wrote:
> In btrfs_make_block_group(), we activate the allocated block group,
> expecting that the block group is soon used for allocation. However, the
> chunk allocation from flush_space() context broke the assumption. There can
> be a large time gap between the chunk allocation time and the extent
> allocation time from the chunk.
> 
> Activating the empty block groups pre-allocated from flush_space() context
> can exhaust the active zone counter of a device. Once we use all the active
> zone counts for empty pre-allocated BGs, we cannot activate new BG for the
> other things: metadata, tree-log, or data relocation BG. That failure
> results in a fake -ENOSPC.
> 
> This patch introduces CHUNK_ALLOC_FORCE_FOR_EXTENT to distinguish the chunk
> allocation from find_free_extent(). Now, the new block group is activated
> only in that context.
> 
> Fixes: eb66a010d518 ("btrfs: zoned: activate new block group")
> Cc: stable@vger.kernel.org # 5.16+: c7d1b9109dd0: btrfs: return allocated block group from do_chunk_alloc()
> Cc: stable@vger.kernel.org # 5.16+
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  fs/btrfs/block-group.c | 24 ++++++++++++++++--------
>  fs/btrfs/block-group.h |  1 +
>  fs/btrfs/extent-tree.c |  2 +-
>  3 files changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index d4ac1c76f539..84c97d76de92 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -2481,12 +2481,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
>  		return ERR_PTR(ret);
>  	}
>  
> -	/*
> -	 * New block group is likely to be used soon. Try to activate it now.
> -	 * Failure is OK for now.
> -	 */
> -	btrfs_zone_activate(cache);
> -
>  	ret = exclude_super_stripes(cache);
>  	if (ret) {
>  		/* We may have excluded something, so call this just in case */
> @@ -3642,8 +3636,14 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
>  	struct btrfs_block_group *ret_bg;
>  	bool wait_for_alloc = false;
>  	bool should_alloc = false;
> +	bool from_extent_allocation = false;
>  	int ret = 0;
>  
> +	if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) {
> +		from_extent_allocation = true;
> +		force = CHUNK_ALLOC_FORCE;
> +	}
> +
>  	/* Don't re-enter if we're already allocating a chunk */
>  	if (trans->allocating_chunk)
>  		return -ENOSPC;
> @@ -3736,9 +3736,17 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
>  	ret_bg = do_chunk_alloc(trans, flags);
>  	trans->allocating_chunk = false;
>  
> -	if (IS_ERR(ret_bg))
> +	if (IS_ERR(ret_bg)) {
>  		ret = PTR_ERR(ret_bg);
> -	else
> +	} else if (from_extent_allocation) {
> +		/*
> +		 * New block group is likely to be used soon. Try to activate
> +		 * it now. Failure is OK for now.
> +		 */
> +		btrfs_zone_activate(ret_bg);
> +	}
> +
> +	if (!ret)
>  		btrfs_put_block_group(ret_bg);
>  
>  	spin_lock(&space_info->lock);
> diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
> index 93aabc68bb6a..9c822367c432 100644
> --- a/fs/btrfs/block-group.h
> +++ b/fs/btrfs/block-group.h
> @@ -40,6 +40,7 @@ enum btrfs_chunk_alloc_enum {
>  	CHUNK_ALLOC_NO_FORCE,
>  	CHUNK_ALLOC_LIMITED,
>  	CHUNK_ALLOC_FORCE,
> +	CHUNK_ALLOC_FORCE_FOR_EXTENT,

There's a comment documenting what it means, I've written

	CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
	find_free_extent() that also activaes the zone

based on the changelog.
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index d4ac1c76f539..84c97d76de92 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2481,12 +2481,6 @@  struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
 		return ERR_PTR(ret);
 	}
 
-	/*
-	 * New block group is likely to be used soon. Try to activate it now.
-	 * Failure is OK for now.
-	 */
-	btrfs_zone_activate(cache);
-
 	ret = exclude_super_stripes(cache);
 	if (ret) {
 		/* We may have excluded something, so call this just in case */
@@ -3642,8 +3636,14 @@  int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 	struct btrfs_block_group *ret_bg;
 	bool wait_for_alloc = false;
 	bool should_alloc = false;
+	bool from_extent_allocation = false;
 	int ret = 0;
 
+	if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) {
+		from_extent_allocation = true;
+		force = CHUNK_ALLOC_FORCE;
+	}
+
 	/* Don't re-enter if we're already allocating a chunk */
 	if (trans->allocating_chunk)
 		return -ENOSPC;
@@ -3736,9 +3736,17 @@  int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 	ret_bg = do_chunk_alloc(trans, flags);
 	trans->allocating_chunk = false;
 
-	if (IS_ERR(ret_bg))
+	if (IS_ERR(ret_bg)) {
 		ret = PTR_ERR(ret_bg);
-	else
+	} else if (from_extent_allocation) {
+		/*
+		 * New block group is likely to be used soon. Try to activate
+		 * it now. Failure is OK for now.
+		 */
+		btrfs_zone_activate(ret_bg);
+	}
+
+	if (!ret)
 		btrfs_put_block_group(ret_bg);
 
 	spin_lock(&space_info->lock);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 93aabc68bb6a..9c822367c432 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -40,6 +40,7 @@  enum btrfs_chunk_alloc_enum {
 	CHUNK_ALLOC_NO_FORCE,
 	CHUNK_ALLOC_LIMITED,
 	CHUNK_ALLOC_FORCE,
+	CHUNK_ALLOC_FORCE_FOR_EXTENT,
 };
 
 struct btrfs_caching_control {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f477035a2ac2..6aa92f84f465 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4082,7 +4082,7 @@  static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
 			}
 
 			ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
-						CHUNK_ALLOC_FORCE);
+						CHUNK_ALLOC_FORCE_FOR_EXTENT);
 
 			/* Do not bail out on ENOSPC since we can do more. */
 			if (ret == -ENOSPC)