diff mbox series

btrfs: avoid allocating and running pointless delayed extent operations

Message ID 87618653fb07d2f6307babd128b626da36dd33e8.1718964587.git.fdmanana@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: avoid allocating and running pointless delayed extent operations | expand

Commit Message

Filipe Manana June 21, 2024, 10:16 a.m. UTC
From: Filipe Manana <fdmanana@suse.com>

We always allocate a delayed extent op structure when allocating a tree
block (except for log trees), but most of the time we don't need it as
we only need to set the BTRFS_BLOCK_FLAG_FULL_BACKREF if we're dealing
with a relocation tree and we only need to set the key of a tree block
in a btrfs_tree_block_info structure if we are not using skinny metadata
(feature enabled by default since btrfs-progs 3.18 and available as of
kernel 3.10).

In these cases, where we don't need neither to update flags nor to set
the key, we only use the delayed extent op structure to set the tree
block's level. This is a waste of memory and besides that, the memory
allocation can fail and can add additional latency.

Instead of using a delayed extent op structure to store the level of
the tree block, use the delayed ref head to store it. This doesn't
change the size of neither structure and helps us avoid allocating
delayed extent ops structures when using the skinny metadata feature
and there's no relocation going on. This also gets rid of a BUG_ON().

For example, for a fs_mark run, with 5 iterations, 8 threads and 100K
files per iteration, before this patch there were 118109 allocations
of delayed extent op structures and after it there were none.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/delayed-ref.c |  9 +++++-
 fs/btrfs/delayed-ref.h |  6 ++--
 fs/btrfs/extent-tree.c | 63 ++++++++++++++++++------------------------
 3 files changed, 39 insertions(+), 39 deletions(-)

Comments

Boris Burkov June 21, 2024, 5:44 p.m. UTC | #1
On Fri, Jun 21, 2024 at 11:16:29AM +0100, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> We always allocate a delayed extent op structure when allocating a tree
> block (except for log trees), but most of the time we don't need it as
> we only need to set the BTRFS_BLOCK_FLAG_FULL_BACKREF if we're dealing
> with a relocation tree and we only need to set the key of a tree block
> in a btrfs_tree_block_info structure if we are not using skinny metadata
> (feature enabled by default since btrfs-progs 3.18 and available as of
> kernel 3.10).
> 
> In these cases, where we don't need neither to update flags nor to set
> the key, we only use the delayed extent op structure to set the tree
> block's level. This is a waste of memory and besides that, the memory
> allocation can fail and can add additional latency.
> 
> Instead of using a delayed extent op structure to store the level of
> the tree block, use the delayed ref head to store it. This doesn't
> change the size of neither structure and helps us avoid allocating
> delayed extent ops structures when using the skinny metadata feature
> and there's no relocation going on. This also gets rid of a BUG_ON().
> 
> For example, for a fs_mark run, with 5 iterations, 8 threads and 100K
> files per iteration, before this patch there were 118109 allocations
> of delayed extent op structures and after it there were none.
> 

Reviewed-by: Boris Burkov <boris@bur.io>

> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> ---
>  fs/btrfs/delayed-ref.c |  9 +++++-
>  fs/btrfs/delayed-ref.h |  6 ++--
>  fs/btrfs/extent-tree.c | 63 ++++++++++++++++++------------------------
>  3 files changed, 39 insertions(+), 39 deletions(-)
> 
> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
> index 6b4296ab651f..2ac9296edccb 100644
> --- a/fs/btrfs/delayed-ref.c
> +++ b/fs/btrfs/delayed-ref.c
> @@ -819,6 +819,12 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
>  	spin_lock_init(&head_ref->lock);
>  	mutex_init(&head_ref->mutex);
>  
> +	/* If not metadata set an impossible level to help debugging. */
> +	if (generic_ref->type == BTRFS_REF_METADATA)
> +		head_ref->level = generic_ref->tree_ref.level;
> +	else
> +		head_ref->level = U8_MAX;
> +
>  	if (qrecord) {
>  		if (generic_ref->ref_root && reserved) {
>  			qrecord->data_rsv = reserved;
> @@ -1072,7 +1078,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
>  }
>  
>  int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
> -				u64 bytenr, u64 num_bytes,
> +				u64 bytenr, u64 num_bytes, u8 level,
>  				struct btrfs_delayed_extent_op *extent_op)
>  {
>  	struct btrfs_delayed_ref_head *head_ref;
> @@ -1082,6 +1088,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
>  		.action = BTRFS_UPDATE_DELAYED_HEAD,
>  		.bytenr = bytenr,
>  		.num_bytes = num_bytes,
> +		.tree_ref.level = level,
>  	};
>  
>  	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
> index 405be46c420f..ef15e998be03 100644
> --- a/fs/btrfs/delayed-ref.h
> +++ b/fs/btrfs/delayed-ref.h
> @@ -108,7 +108,6 @@ struct btrfs_delayed_ref_node {
>  
>  struct btrfs_delayed_extent_op {
>  	struct btrfs_disk_key key;
> -	u8 level;
>  	bool update_key;
>  	bool update_flags;
>  	u64 flags_to_set;
> @@ -172,6 +171,9 @@ struct btrfs_delayed_ref_head {
>  	 */
>  	u64 reserved_bytes;
>  
> +	/* Tree block level, for metadata only. */
> +	u8 level;
> +
>  	/*
>  	 * when a new extent is allocated, it is just reserved in memory
>  	 * The actual extent isn't inserted into the extent allocation tree
> @@ -355,7 +357,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
>  			       struct btrfs_ref *generic_ref,
>  			       u64 reserved);
>  int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
> -				u64 bytenr, u64 num_bytes,
> +				u64 bytenr, u64 num_bytes, u8 level,
>  				struct btrfs_delayed_extent_op *extent_op);
>  void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
>  			      struct btrfs_delayed_ref_root *delayed_refs,
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 23a7cac108eb..250623f3b094 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -200,19 +200,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
>  			goto search_again;
>  		}
>  		spin_lock(&head->lock);
> -		if (head->extent_op && head->extent_op->update_flags) {
> +		if (head->extent_op && head->extent_op->update_flags)
>  			extent_flags |= head->extent_op->flags_to_set;
> -		} else if (unlikely(num_refs == 0)) {
> -			spin_unlock(&head->lock);
> -			mutex_unlock(&head->mutex);
> -			spin_unlock(&delayed_refs->lock);
> -			ret = -EUCLEAN;
> -			btrfs_err(fs_info,
> -			  "unexpected zero reference count for extent %llu (%s)",
> -				  bytenr, metadata ? "metadata" : "data");
> -			btrfs_abort_transaction(trans, ret);
> -			goto out_free;
> -		}
>  
>  		num_refs += head->ref_mod;
>  		spin_unlock(&head->lock);
> @@ -1632,7 +1621,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
>  
>  	if (metadata) {
>  		key.type = BTRFS_METADATA_ITEM_KEY;
> -		key.offset = extent_op->level;
> +		key.offset = head->level;
>  	} else {
>  		key.type = BTRFS_EXTENT_ITEM_KEY;
>  		key.offset = head->num_bytes;
> @@ -1667,7 +1656,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
>  			ret = -EUCLEAN;
>  			btrfs_err(fs_info,
>  		  "missing extent item for extent %llu num_bytes %llu level %d",
> -				  head->bytenr, head->num_bytes, extent_op->level);
> +				  head->bytenr, head->num_bytes, head->level);
>  			goto out;
>  		}
>  	}
> @@ -1726,7 +1715,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
>  			.generation = trans->transid,
>  		};
>  
> -		BUG_ON(!extent_op || !extent_op->update_flags);
>  		ret = alloc_reserved_tree_block(trans, node, extent_op);
>  		if (!ret)
>  			btrfs_record_squota_delta(fs_info, &delta);
> @@ -2233,7 +2221,6 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
>  				struct extent_buffer *eb, u64 flags)
>  {
>  	struct btrfs_delayed_extent_op *extent_op;
> -	int level = btrfs_header_level(eb);
>  	int ret;
>  
>  	extent_op = btrfs_alloc_delayed_extent_op();
> @@ -2243,9 +2230,9 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
>  	extent_op->flags_to_set = flags;
>  	extent_op->update_flags = true;
>  	extent_op->update_key = false;
> -	extent_op->level = level;
>  
> -	ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
> +	ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len,
> +					  btrfs_header_level(eb), extent_op);
>  	if (ret)
>  		btrfs_free_delayed_extent_op(extent_op);
>  	return ret;
> @@ -4866,7 +4853,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
>  	struct btrfs_path *path;
>  	struct extent_buffer *leaf;
>  	u32 size = sizeof(*extent_item) + sizeof(*iref);
> -	u64 flags = extent_op->flags_to_set;
> +	const u64 flags = (extent_op ? extent_op->flags_to_set : 0);
>  	/* The owner of a tree block is the level. */
>  	int level = btrfs_delayed_ref_owner(node);
>  	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
> @@ -5123,7 +5110,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
>  	struct btrfs_key ins;
>  	struct btrfs_block_rsv *block_rsv;
>  	struct extent_buffer *buf;
> -	struct btrfs_delayed_extent_op *extent_op;
>  	u64 flags = 0;
>  	int ret;
>  	u32 blocksize = fs_info->nodesize;
> @@ -5166,6 +5152,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
>  		BUG_ON(parent > 0);
>  
>  	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
> +		struct btrfs_delayed_extent_op *extent_op;
>  		struct btrfs_ref generic_ref = {
>  			.action = BTRFS_ADD_DELAYED_EXTENT,
>  			.bytenr = ins.objectid,
> @@ -5174,30 +5161,34 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
>  			.owning_root = owning_root,
>  			.ref_root = root_objectid,
>  		};
> -		extent_op = btrfs_alloc_delayed_extent_op();
> -		if (!extent_op) {
> -			ret = -ENOMEM;
> -			goto out_free_buf;
> +
> +		if (!skinny_metadata || flags != 0) {
> +			extent_op = btrfs_alloc_delayed_extent_op();
> +			if (!extent_op) {
> +				ret = -ENOMEM;
> +				goto out_free_buf;
> +			}
> +			if (key)
> +				memcpy(&extent_op->key, key, sizeof(extent_op->key));
> +			else
> +				memset(&extent_op->key, 0, sizeof(extent_op->key));
> +			extent_op->flags_to_set = flags;
> +			extent_op->update_key = skinny_metadata ? false : true;
> +			extent_op->update_flags = (flags != 0);
> +		} else {
> +			extent_op = NULL;
>  		}
> -		if (key)
> -			memcpy(&extent_op->key, key, sizeof(extent_op->key));
> -		else
> -			memset(&extent_op->key, 0, sizeof(extent_op->key));
> -		extent_op->flags_to_set = flags;
> -		extent_op->update_key = skinny_metadata ? false : true;
> -		extent_op->update_flags = true;
> -		extent_op->level = level;
>  
>  		btrfs_init_tree_ref(&generic_ref, level, btrfs_root_id(root), false);
>  		btrfs_ref_tree_mod(fs_info, &generic_ref);
>  		ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
> -		if (ret)
> -			goto out_free_delayed;
> +		if (ret) {
> +			btrfs_free_delayed_extent_op(extent_op);
> +			goto out_free_buf;
> +		}
>  	}
>  	return buf;
>  
> -out_free_delayed:
> -	btrfs_free_delayed_extent_op(extent_op);
>  out_free_buf:
>  	btrfs_tree_unlock(buf);
>  	free_extent_buffer(buf);
> -- 
> 2.43.0
>
diff mbox series

Patch

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6b4296ab651f..2ac9296edccb 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -819,6 +819,12 @@  static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
 	spin_lock_init(&head_ref->lock);
 	mutex_init(&head_ref->mutex);
 
+	/* If not metadata set an impossible level to help debugging. */
+	if (generic_ref->type == BTRFS_REF_METADATA)
+		head_ref->level = generic_ref->tree_ref.level;
+	else
+		head_ref->level = U8_MAX;
+
 	if (qrecord) {
 		if (generic_ref->ref_root && reserved) {
 			qrecord->data_rsv = reserved;
@@ -1072,7 +1078,7 @@  int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
 }
 
 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
-				u64 bytenr, u64 num_bytes,
+				u64 bytenr, u64 num_bytes, u8 level,
 				struct btrfs_delayed_extent_op *extent_op)
 {
 	struct btrfs_delayed_ref_head *head_ref;
@@ -1082,6 +1088,7 @@  int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
 		.action = BTRFS_UPDATE_DELAYED_HEAD,
 		.bytenr = bytenr,
 		.num_bytes = num_bytes,
+		.tree_ref.level = level,
 	};
 
 	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 405be46c420f..ef15e998be03 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -108,7 +108,6 @@  struct btrfs_delayed_ref_node {
 
 struct btrfs_delayed_extent_op {
 	struct btrfs_disk_key key;
-	u8 level;
 	bool update_key;
 	bool update_flags;
 	u64 flags_to_set;
@@ -172,6 +171,9 @@  struct btrfs_delayed_ref_head {
 	 */
 	u64 reserved_bytes;
 
+	/* Tree block level, for metadata only. */
+	u8 level;
+
 	/*
 	 * when a new extent is allocated, it is just reserved in memory
 	 * The actual extent isn't inserted into the extent allocation tree
@@ -355,7 +357,7 @@  int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
 			       struct btrfs_ref *generic_ref,
 			       u64 reserved);
 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
-				u64 bytenr, u64 num_bytes,
+				u64 bytenr, u64 num_bytes, u8 level,
 				struct btrfs_delayed_extent_op *extent_op);
 void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
 			      struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 23a7cac108eb..250623f3b094 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -200,19 +200,8 @@  int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			goto search_again;
 		}
 		spin_lock(&head->lock);
-		if (head->extent_op && head->extent_op->update_flags) {
+		if (head->extent_op && head->extent_op->update_flags)
 			extent_flags |= head->extent_op->flags_to_set;
-		} else if (unlikely(num_refs == 0)) {
-			spin_unlock(&head->lock);
-			mutex_unlock(&head->mutex);
-			spin_unlock(&delayed_refs->lock);
-			ret = -EUCLEAN;
-			btrfs_err(fs_info,
-			  "unexpected zero reference count for extent %llu (%s)",
-				  bytenr, metadata ? "metadata" : "data");
-			btrfs_abort_transaction(trans, ret);
-			goto out_free;
-		}
 
 		num_refs += head->ref_mod;
 		spin_unlock(&head->lock);
@@ -1632,7 +1621,7 @@  static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
 
 	if (metadata) {
 		key.type = BTRFS_METADATA_ITEM_KEY;
-		key.offset = extent_op->level;
+		key.offset = head->level;
 	} else {
 		key.type = BTRFS_EXTENT_ITEM_KEY;
 		key.offset = head->num_bytes;
@@ -1667,7 +1656,7 @@  static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
 			ret = -EUCLEAN;
 			btrfs_err(fs_info,
 		  "missing extent item for extent %llu num_bytes %llu level %d",
-				  head->bytenr, head->num_bytes, extent_op->level);
+				  head->bytenr, head->num_bytes, head->level);
 			goto out;
 		}
 	}
@@ -1726,7 +1715,6 @@  static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 			.generation = trans->transid,
 		};
 
-		BUG_ON(!extent_op || !extent_op->update_flags);
 		ret = alloc_reserved_tree_block(trans, node, extent_op);
 		if (!ret)
 			btrfs_record_squota_delta(fs_info, &delta);
@@ -2233,7 +2221,6 @@  int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 				struct extent_buffer *eb, u64 flags)
 {
 	struct btrfs_delayed_extent_op *extent_op;
-	int level = btrfs_header_level(eb);
 	int ret;
 
 	extent_op = btrfs_alloc_delayed_extent_op();
@@ -2243,9 +2230,9 @@  int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 	extent_op->flags_to_set = flags;
 	extent_op->update_flags = true;
 	extent_op->update_key = false;
-	extent_op->level = level;
 
-	ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
+	ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len,
+					  btrfs_header_level(eb), extent_op);
 	if (ret)
 		btrfs_free_delayed_extent_op(extent_op);
 	return ret;
@@ -4866,7 +4853,7 @@  static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	u32 size = sizeof(*extent_item) + sizeof(*iref);
-	u64 flags = extent_op->flags_to_set;
+	const u64 flags = (extent_op ? extent_op->flags_to_set : 0);
 	/* The owner of a tree block is the level. */
 	int level = btrfs_delayed_ref_owner(node);
 	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@@ -5123,7 +5110,6 @@  struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 	struct btrfs_key ins;
 	struct btrfs_block_rsv *block_rsv;
 	struct extent_buffer *buf;
-	struct btrfs_delayed_extent_op *extent_op;
 	u64 flags = 0;
 	int ret;
 	u32 blocksize = fs_info->nodesize;
@@ -5166,6 +5152,7 @@  struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 		BUG_ON(parent > 0);
 
 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
+		struct btrfs_delayed_extent_op *extent_op;
 		struct btrfs_ref generic_ref = {
 			.action = BTRFS_ADD_DELAYED_EXTENT,
 			.bytenr = ins.objectid,
@@ -5174,30 +5161,34 @@  struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 			.owning_root = owning_root,
 			.ref_root = root_objectid,
 		};
-		extent_op = btrfs_alloc_delayed_extent_op();
-		if (!extent_op) {
-			ret = -ENOMEM;
-			goto out_free_buf;
+
+		if (!skinny_metadata || flags != 0) {
+			extent_op = btrfs_alloc_delayed_extent_op();
+			if (!extent_op) {
+				ret = -ENOMEM;
+				goto out_free_buf;
+			}
+			if (key)
+				memcpy(&extent_op->key, key, sizeof(extent_op->key));
+			else
+				memset(&extent_op->key, 0, sizeof(extent_op->key));
+			extent_op->flags_to_set = flags;
+			extent_op->update_key = skinny_metadata ? false : true;
+			extent_op->update_flags = (flags != 0);
+		} else {
+			extent_op = NULL;
 		}
-		if (key)
-			memcpy(&extent_op->key, key, sizeof(extent_op->key));
-		else
-			memset(&extent_op->key, 0, sizeof(extent_op->key));
-		extent_op->flags_to_set = flags;
-		extent_op->update_key = skinny_metadata ? false : true;
-		extent_op->update_flags = true;
-		extent_op->level = level;
 
 		btrfs_init_tree_ref(&generic_ref, level, btrfs_root_id(root), false);
 		btrfs_ref_tree_mod(fs_info, &generic_ref);
 		ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
-		if (ret)
-			goto out_free_delayed;
+		if (ret) {
+			btrfs_free_delayed_extent_op(extent_op);
+			goto out_free_buf;
+		}
 	}
 	return buf;
 
-out_free_delayed:
-	btrfs_free_delayed_extent_op(extent_op);
 out_free_buf:
 	btrfs_tree_unlock(buf);
 	free_extent_buffer(buf);