diff mbox series

[1/5] btrfs: qgroup: iterate qgroups without memory allocation for qgroup_reserve()

Message ID af338549020e57415b5e4079f37e05b5655991f8.1693391268.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: qgroup: reduce GFP_ATOMIC usage for ulist | expand

Commit Message

Qu Wenruo Aug. 30, 2023, 10:37 a.m. UTC
Qgroup heavily relies on ulist to go through all the involved
qgroups, but since we're using ulist inside fs_info->qgroup_lock
spinlock, this means we're doing a lot of GFP_ATOMIC allocation.

This patch would reduce the GFP_ATOMIC usage for qgroup_reserve() by
eliminating the memory allocation completely.

This is done by moving the needed memory to btrfs_qgroup::iterator
list_head, so that we can put all the involved qgroup into a on-stack list, thus
eliminate the need to allocate memory holding spinlock.

The only cost is the slightly higher memory usage, but considering the
reduce GFP_ATOMIC during a hot path, it should still be acceptable.

Function qgroup_reserve() is the perfect start point for this
conversion.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/qgroup.c | 68 +++++++++++++++++++++++------------------------
 fs/btrfs/qgroup.h |  9 +++++++
 2 files changed, 42 insertions(+), 35 deletions(-)

Comments

Boris Burkov Aug. 30, 2023, 9:58 p.m. UTC | #1
On Wed, Aug 30, 2023 at 06:37:23PM +0800, Qu Wenruo wrote:
> Qgroup heavily relies on ulist to go through all the involved
> qgroups, but since we're using ulist inside fs_info->qgroup_lock
> spinlock, this means we're doing a lot of GFP_ATOMIC allocation.
> 
> This patch would reduce the GFP_ATOMIC usage for qgroup_reserve() by
> eliminating the memory allocation completely.
> 
> This is done by moving the needed memory to btrfs_qgroup::iterator
> list_head, so that we can put all the involved qgroup into a on-stack list, thus
> eliminate the need to allocate memory holding spinlock.
> 
> The only cost is the slightly higher memory usage, but considering the
> reduce GFP_ATOMIC during a hot path, it should still be acceptable.
> 
> Function qgroup_reserve() is the perfect start point for this
> conversion.

This looks great, thanks! I like it more than my array/ulist hybrid
since it never allocates :)

Reviewed-by: Boris Burkov <boris@bur.io>

> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/qgroup.c | 68 +++++++++++++++++++++++------------------------
>  fs/btrfs/qgroup.h |  9 +++++++
>  2 files changed, 42 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 74244b4bb0e9..de34e2aef710 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -216,6 +216,7 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
>  	INIT_LIST_HEAD(&qgroup->groups);
>  	INIT_LIST_HEAD(&qgroup->members);
>  	INIT_LIST_HEAD(&qgroup->dirty);
> +	INIT_LIST_HEAD(&qgroup->iterator);
>  
>  	rb_link_node(&qgroup->node, parent, p);
>  	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
> @@ -1367,6 +1368,25 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
>  		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>  }
>  
> +static void qgroup_iterator_add(struct list_head *head, struct btrfs_qgroup *qgroup)
> +{
> +	if (!list_empty(&qgroup->iterator))
> +		return;
> +
> +	list_add_tail(&qgroup->iterator, head);
> +}
> +
> +static void qgroup_iterator_clean(struct list_head *head)
> +{
> +
> +	while (!list_empty(head)) {
> +		struct btrfs_qgroup *qgroup;
> +
> +		qgroup = list_first_entry(head, struct btrfs_qgroup, iterator);
> +		list_del_init(&qgroup->iterator);
> +	}
> +}
> +
>  /*
>   * The easy accounting, we're updating qgroup relationship whose child qgroup
>   * only has exclusive extents.
> @@ -3154,12 +3174,11 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
>  static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
>  			  enum btrfs_qgroup_rsv_type type)
>  {
> -	struct btrfs_qgroup *qgroup;
> +	struct btrfs_qgroup *cur;
>  	struct btrfs_fs_info *fs_info = root->fs_info;
>  	u64 ref_root = root->root_key.objectid;
>  	int ret = 0;
> -	struct ulist_node *unode;
> -	struct ulist_iterator uiter;
> +	LIST_HEAD(qgroup_list);
>  
>  	if (!is_fstree(ref_root))
>  		return 0;
> @@ -3175,53 +3194,32 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
>  	if (!fs_info->quota_root)
>  		goto out;
>  
> -	qgroup = find_qgroup_rb(fs_info, ref_root);
> -	if (!qgroup)
> +	cur = find_qgroup_rb(fs_info, ref_root);
> +	if (!cur)
>  		goto out;
>  
> -	/*
> -	 * in a first step, we check all affected qgroups if any limits would
> -	 * be exceeded
> -	 */
> -	ulist_reinit(fs_info->qgroup_ulist);
> -	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
> -			qgroup_to_aux(qgroup), GFP_ATOMIC);
> -	if (ret < 0)
> -		goto out;
> -	ULIST_ITER_INIT(&uiter);
> -	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
> -		struct btrfs_qgroup *qg;
> +	qgroup_iterator_add(&qgroup_list, cur);
> +	list_for_each_entry(cur, &qgroup_list, iterator) {
>  		struct btrfs_qgroup_list *glist;
>  
> -		qg = unode_aux_to_qgroup(unode);
> -
> -		if (enforce && !qgroup_check_limits(qg, num_bytes)) {
> +		if (enforce && !qgroup_check_limits(cur, num_bytes)) {
>  			ret = -EDQUOT;
>  			goto out;
>  		}
>  
> -		list_for_each_entry(glist, &qg->groups, next_group) {
> -			ret = ulist_add(fs_info->qgroup_ulist,
> -					glist->group->qgroupid,
> -					qgroup_to_aux(glist->group), GFP_ATOMIC);
> -			if (ret < 0)
> -				goto out;
> -		}
> +		list_for_each_entry(glist, &cur->groups, next_group)
> +			qgroup_iterator_add(&qgroup_list, glist->group);
>  	}
> +
>  	ret = 0;
>  	/*
>  	 * no limits exceeded, now record the reservation into all qgroups
>  	 */
> -	ULIST_ITER_INIT(&uiter);
> -	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
> -		struct btrfs_qgroup *qg;
> -
> -		qg = unode_aux_to_qgroup(unode);
> -
> -		qgroup_rsv_add(fs_info, qg, num_bytes, type);
> -	}
> +	list_for_each_entry(cur, &qgroup_list, iterator)
> +		qgroup_rsv_add(fs_info, cur, num_bytes, type);
>  
>  out:
> +	qgroup_iterator_clean(&qgroup_list);
>  	spin_unlock(&fs_info->qgroup_lock);
>  	return ret;
>  }
> diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
> index 7bffa10589d6..5dc0583622c3 100644
> --- a/fs/btrfs/qgroup.h
> +++ b/fs/btrfs/qgroup.h
> @@ -220,6 +220,15 @@ struct btrfs_qgroup {
>  	struct list_head groups;  /* groups this group is member of */
>  	struct list_head members; /* groups that are members of this group */
>  	struct list_head dirty;   /* dirty groups */
> +
> +	/*
> +	 * For qgroup iteration usage.
> +	 *
> +	 * The iteration list should always be empty until
> +	 * qgroup_iterator_add() is called.
> +	 * And should be reset to empty after the iteration is finished.

Can you also add a note that this relies on global exclusion under the
qgroup spin lock?

> +	 */
> +	struct list_head iterator;
>  	struct rb_node node;	  /* tree of qgroups */
>  
>  	/*
> -- 
> 2.41.0
>
diff mbox series

Patch

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 74244b4bb0e9..de34e2aef710 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -216,6 +216,7 @@  static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
 	INIT_LIST_HEAD(&qgroup->groups);
 	INIT_LIST_HEAD(&qgroup->members);
 	INIT_LIST_HEAD(&qgroup->dirty);
+	INIT_LIST_HEAD(&qgroup->iterator);
 
 	rb_link_node(&qgroup->node, parent, p);
 	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
@@ -1367,6 +1368,25 @@  static void qgroup_dirty(struct btrfs_fs_info *fs_info,
 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
+static void qgroup_iterator_add(struct list_head *head, struct btrfs_qgroup *qgroup)
+{
+	if (!list_empty(&qgroup->iterator))
+		return;
+
+	list_add_tail(&qgroup->iterator, head);
+}
+
+static void qgroup_iterator_clean(struct list_head *head)
+{
+
+	while (!list_empty(head)) {
+		struct btrfs_qgroup *qgroup;
+
+		qgroup = list_first_entry(head, struct btrfs_qgroup, iterator);
+		list_del_init(&qgroup->iterator);
+	}
+}
+
 /*
  * The easy accounting, we're updating qgroup relationship whose child qgroup
  * only has exclusive extents.
@@ -3154,12 +3174,11 @@  static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
 			  enum btrfs_qgroup_rsv_type type)
 {
-	struct btrfs_qgroup *qgroup;
+	struct btrfs_qgroup *cur;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 ref_root = root->root_key.objectid;
 	int ret = 0;
-	struct ulist_node *unode;
-	struct ulist_iterator uiter;
+	LIST_HEAD(qgroup_list);
 
 	if (!is_fstree(ref_root))
 		return 0;
@@ -3175,53 +3194,32 @@  static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
 	if (!fs_info->quota_root)
 		goto out;
 
-	qgroup = find_qgroup_rb(fs_info, ref_root);
-	if (!qgroup)
+	cur = find_qgroup_rb(fs_info, ref_root);
+	if (!cur)
 		goto out;
 
-	/*
-	 * in a first step, we check all affected qgroups if any limits would
-	 * be exceeded
-	 */
-	ulist_reinit(fs_info->qgroup_ulist);
-	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
-			qgroup_to_aux(qgroup), GFP_ATOMIC);
-	if (ret < 0)
-		goto out;
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
-		struct btrfs_qgroup *qg;
+	qgroup_iterator_add(&qgroup_list, cur);
+	list_for_each_entry(cur, &qgroup_list, iterator) {
 		struct btrfs_qgroup_list *glist;
 
-		qg = unode_aux_to_qgroup(unode);
-
-		if (enforce && !qgroup_check_limits(qg, num_bytes)) {
+		if (enforce && !qgroup_check_limits(cur, num_bytes)) {
 			ret = -EDQUOT;
 			goto out;
 		}
 
-		list_for_each_entry(glist, &qg->groups, next_group) {
-			ret = ulist_add(fs_info->qgroup_ulist,
-					glist->group->qgroupid,
-					qgroup_to_aux(glist->group), GFP_ATOMIC);
-			if (ret < 0)
-				goto out;
-		}
+		list_for_each_entry(glist, &cur->groups, next_group)
+			qgroup_iterator_add(&qgroup_list, glist->group);
 	}
+
 	ret = 0;
 	/*
 	 * no limits exceeded, now record the reservation into all qgroups
 	 */
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
-		struct btrfs_qgroup *qg;
-
-		qg = unode_aux_to_qgroup(unode);
-
-		qgroup_rsv_add(fs_info, qg, num_bytes, type);
-	}
+	list_for_each_entry(cur, &qgroup_list, iterator)
+		qgroup_rsv_add(fs_info, cur, num_bytes, type);
 
 out:
+	qgroup_iterator_clean(&qgroup_list);
 	spin_unlock(&fs_info->qgroup_lock);
 	return ret;
 }
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 7bffa10589d6..5dc0583622c3 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -220,6 +220,15 @@  struct btrfs_qgroup {
 	struct list_head groups;  /* groups this group is member of */
 	struct list_head members; /* groups that are members of this group */
 	struct list_head dirty;   /* dirty groups */
+
+	/*
+	 * For qgroup iteration usage.
+	 *
+	 * The iteration list should always be empty until
+	 * qgroup_iterator_add() is called.
+	 * And should be reset to empty after the iteration is finished.
+	 */
+	struct list_head iterator;
 	struct rb_node node;	  /* tree of qgroups */
 
 	/*