diff mbox series

[01/11] cgroup: move rstat pointers into struct of their own

Message ID 20250218031448.46951-2-inwardvessel@gmail.com (mailing list archive)
State New
Headers show
Series cgroup: separate rstat trees | expand

Commit Message

JP Kobryn Feb. 18, 2025, 3:14 a.m. UTC
The rstat infrastructure makes use of pointers for list management.
These pointers only exist as fields in the cgroup struct, so moving them
into their own struct will allow them to be used elsewhere. The base
stat entities are included with them for now.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
---
 include/linux/cgroup-defs.h                   | 90 +-----------------
 include/linux/cgroup_rstat.h                  | 92 +++++++++++++++++++
 kernel/cgroup/cgroup.c                        |  3 +-
 kernel/cgroup/rstat.c                         | 27 +++---
 .../selftests/bpf/progs/btf_type_tag_percpu.c |  4 +-
 5 files changed, 112 insertions(+), 104 deletions(-)
 create mode 100644 include/linux/cgroup_rstat.h

Comments

Shakeel Butt Feb. 19, 2025, 1:05 a.m. UTC | #1
Thanks JP for awesome work. I am doing a quick first iteration and later
will do the deep review.

On Mon, Feb 17, 2025 at 07:14:38PM -0800, JP Kobryn wrote:
>  struct cgroup_freezer_state {
>  	/* Should the cgroup and its descendants be frozen. */
>  	bool freeze;
> @@ -517,23 +445,9 @@ struct cgroup {
>  	struct cgroup *old_dom_cgrp;		/* used while enabling threaded */
>  
>  	/* per-cpu recursive resource statistics */
> -	struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +	struct cgroup_rstat rstat;
>  	struct list_head rstat_css_list;

You might want to place rstat after rstat_css_list just to keep
(hopefully) on the same cacheline as before other this will put
rstat_css_list with rstat_flush_next which the current padding is trying
to avoid. This is just to be safe. Later we might want to reevaluate the
padding and right cacheline alignments of the fields of struct cgroup.

>  
> -	/*
> -	 * Add padding to separate the read mostly rstat_cpu and
> -	 * rstat_css_list into a different cacheline from the following
> -	 * rstat_flush_next and *bstat fields which can have frequent updates.
> -	 */
> -	CACHELINE_PADDING(_pad_);
> -
> -	/*
> -	 * A singly-linked list of cgroup structures to be rstat flushed.
> -	 * This is a scratch field to be used exclusively by
> -	 * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
> -	 */
> -	struct cgroup	*rstat_flush_next;
> -
>  	/* cgroup basic resource statistics */
>  	struct cgroup_base_stat last_bstat;
>  	struct cgroup_base_stat bstat;
> diff --git a/include/linux/cgroup_rstat.h b/include/linux/cgroup_rstat.h
> new file mode 100644
> index 000000000000..f95474d6f8ab
> --- /dev/null
> +++ b/include/linux/cgroup_rstat.h
> @@ -0,0 +1,92 @@
[...]
> +struct cgroup_rstat {
> +	struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +
> +	/*
> +	 * Add padding to separate the read mostly rstat_cpu and
> +	 * rstat_css_list into a different cacheline from the following
> +	 * rstat_flush_next and containing struct fields which can have
> +	 * frequent updates.
> +	 */
> +	CACHELINE_PADDING(_pad_);
> +	struct cgroup *rstat_flush_next;
> +};
Shakeel Butt Feb. 19, 2025, 1:23 a.m. UTC | #2
On Tue, Feb 18, 2025 at 05:05:52PM -0800, Shakeel Butt wrote:
> Thanks JP for awesome work. I am doing a quick first iteration and later
> will do the deep review.
> 
> On Mon, Feb 17, 2025 at 07:14:38PM -0800, JP Kobryn wrote:
> >  struct cgroup_freezer_state {
> >  	/* Should the cgroup and its descendants be frozen. */
> >  	bool freeze;
> > @@ -517,23 +445,9 @@ struct cgroup {
> >  	struct cgroup *old_dom_cgrp;		/* used while enabling threaded */
> >  
> >  	/* per-cpu recursive resource statistics */
> > -	struct cgroup_rstat_cpu __percpu *rstat_cpu;
> > +	struct cgroup_rstat rstat;
> >  	struct list_head rstat_css_list;
> 
> You might want to place rstat after rstat_css_list just to keep
> (hopefully) on the same cacheline as before other this will put
> rstat_css_list with rstat_flush_next which the current padding is trying
> to avoid. This is just to be safe. Later we might want to reevaluate the
> padding and right cacheline alignments of the fields of struct cgroup.
> 

Ah I see later you can removed rstat_css_list as you moved the rstat
state from cgroup to css and you don't need rstat_css_list anymore.
Yosry Ahmed Feb. 20, 2025, 4:53 p.m. UTC | #3
On Mon, Feb 17, 2025 at 07:14:38PM -0800, JP Kobryn wrote:
> The rstat infrastructure makes use of pointers for list management.
> These pointers only exist as fields in the cgroup struct, so moving them
> into their own struct will allow them to be used elsewhere. The base
> stat entities are included with them for now.
> 
> Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
> ---
>  include/linux/cgroup-defs.h                   | 90 +-----------------
>  include/linux/cgroup_rstat.h                  | 92 +++++++++++++++++++
>  kernel/cgroup/cgroup.c                        |  3 +-
>  kernel/cgroup/rstat.c                         | 27 +++---
>  .../selftests/bpf/progs/btf_type_tag_percpu.c |  4 +-
>  5 files changed, 112 insertions(+), 104 deletions(-)
>  create mode 100644 include/linux/cgroup_rstat.h
> 
> diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
> index 1b20d2d8ef7c..6b6cc027fe70 100644
> --- a/include/linux/cgroup-defs.h
> +++ b/include/linux/cgroup-defs.h
> @@ -17,7 +17,7 @@
>  #include <linux/refcount.h>
>  #include <linux/percpu-refcount.h>
>  #include <linux/percpu-rwsem.h>
> -#include <linux/u64_stats_sync.h>
> +#include <linux/cgroup_rstat.h>
>  #include <linux/workqueue.h>
>  #include <linux/bpf-cgroup-defs.h>
>  #include <linux/psi_types.h>
> @@ -321,78 +321,6 @@ struct css_set {
>  	struct rcu_head rcu_head;
>  };
>  
> -struct cgroup_base_stat {
> -	struct task_cputime cputime;
> -
> -#ifdef CONFIG_SCHED_CORE
> -	u64 forceidle_sum;
> -#endif
> -	u64 ntime;
> -};
> -
> -/*
> - * rstat - cgroup scalable recursive statistics.  Accounting is done
> - * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
> - * hierarchy on reads.
> - *
> - * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
> - * linked into the updated tree.  On the following read, propagation only
> - * considers and consumes the updated tree.  This makes reading O(the
> - * number of descendants which have been active since last read) instead of
> - * O(the total number of descendants).
> - *
> - * This is important because there can be a lot of (draining) cgroups which
> - * aren't active and stat may be read frequently.  The combination can
> - * become very expensive.  By propagating selectively, increasing reading
> - * frequency decreases the cost of each read.
> - *
> - * This struct hosts both the fields which implement the above -
> - * updated_children and updated_next - and the fields which track basic
> - * resource statistics on top of it - bsync, bstat and last_bstat.
> - */
> -struct cgroup_rstat_cpu {
> -	/*
> -	 * ->bsync protects ->bstat.  These are the only fields which get
> -	 * updated in the hot path.
> -	 */
> -	struct u64_stats_sync bsync;
> -	struct cgroup_base_stat bstat;
> -
> -	/*
> -	 * Snapshots at the last reading.  These are used to calculate the
> -	 * deltas to propagate to the global counters.
> -	 */
> -	struct cgroup_base_stat last_bstat;
> -
> -	/*
> -	 * This field is used to record the cumulative per-cpu time of
> -	 * the cgroup and its descendants. Currently it can be read via
> -	 * eBPF/drgn etc, and we are still trying to determine how to
> -	 * expose it in the cgroupfs interface.
> -	 */
> -	struct cgroup_base_stat subtree_bstat;
> -
> -	/*
> -	 * Snapshots at the last reading. These are used to calculate the
> -	 * deltas to propagate to the per-cpu subtree_bstat.
> -	 */
> -	struct cgroup_base_stat last_subtree_bstat;
> -
> -	/*
> -	 * Child cgroups with stat updates on this cpu since the last read
> -	 * are linked on the parent's ->updated_children through
> -	 * ->updated_next.
> -	 *
> -	 * In addition to being more compact, singly-linked list pointing
> -	 * to the cgroup makes it unnecessary for each per-cpu struct to
> -	 * point back to the associated cgroup.
> -	 *
> -	 * Protected by per-cpu cgroup_rstat_cpu_lock.
> -	 */
> -	struct cgroup *updated_children;	/* terminated by self cgroup */
> -	struct cgroup *updated_next;		/* NULL iff not on the list */
> -};
> -
>  struct cgroup_freezer_state {
>  	/* Should the cgroup and its descendants be frozen. */
>  	bool freeze;
> @@ -517,23 +445,9 @@ struct cgroup {
>  	struct cgroup *old_dom_cgrp;		/* used while enabling threaded */
>  
>  	/* per-cpu recursive resource statistics */
> -	struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +	struct cgroup_rstat rstat;
>  	struct list_head rstat_css_list;
>  
> -	/*
> -	 * Add padding to separate the read mostly rstat_cpu and
> -	 * rstat_css_list into a different cacheline from the following
> -	 * rstat_flush_next and *bstat fields which can have frequent updates.
> -	 */
> -	CACHELINE_PADDING(_pad_);
> -
> -	/*
> -	 * A singly-linked list of cgroup structures to be rstat flushed.
> -	 * This is a scratch field to be used exclusively by
> -	 * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
> -	 */
> -	struct cgroup	*rstat_flush_next;
> -
>  	/* cgroup basic resource statistics */
>  	struct cgroup_base_stat last_bstat;
>  	struct cgroup_base_stat bstat;
> diff --git a/include/linux/cgroup_rstat.h b/include/linux/cgroup_rstat.h
> new file mode 100644
> index 000000000000..f95474d6f8ab
> --- /dev/null
> +++ b/include/linux/cgroup_rstat.h
> @@ -0,0 +1,92 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSTAT_H
> +#define _LINUX_RSTAT_H
> +
> +#include <linux/u64_stats_sync.h>
> +
> +struct cgroup_rstat_cpu;

Why do we need the forward declaration instead of just defining struct
cgroup_rstat_cpu first? Also, why do we need a new header for these
definitions rather than just adding struct cgroup_rstat to
cgroup-defs.h?

> +
> +/*
> + * rstat - cgroup scalable recursive statistics.  Accounting is done
> + * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
> + * hierarchy on reads.
> + *
> + * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
> + * linked into the updated tree.  On the following read, propagation only
> + * considers and consumes the updated tree.  This makes reading O(the
> + * number of descendants which have been active since last read) instead of
> + * O(the total number of descendants).
> + *
> + * This is important because there can be a lot of (draining) cgroups which
> + * aren't active and stat may be read frequently.  The combination can
> + * become very expensive.  By propagating selectively, increasing reading
> + * frequency decreases the cost of each read.
> + *
> + * This struct hosts both the fields which implement the above -
> + * updated_children and updated_next - and the fields which track basic
> + * resource statistics on top of it - bsync, bstat and last_bstat.
> + */
> +struct cgroup_rstat {
> +	struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +
> +	/*
> +	 * Add padding to separate the read mostly rstat_cpu and
> +	 * rstat_css_list into a different cacheline from the following
> +	 * rstat_flush_next and containing struct fields which can have
> +	 * frequent updates.
> +	 */
> +	CACHELINE_PADDING(_pad_);
> +	struct cgroup *rstat_flush_next;
> +};
> +
> +struct cgroup_base_stat {
> +	struct task_cputime cputime;
> +
> +#ifdef CONFIG_SCHED_CORE
> +	u64 forceidle_sum;
> +#endif
> +	u64 ntime;
> +};
> +
> +struct cgroup_rstat_cpu {
> +	/*
> +	 * Child cgroups with stat updates on this cpu since the last read
> +	 * are linked on the parent's ->updated_children through
> +	 * ->updated_next.
> +	 *
> +	 * In addition to being more compact, singly-linked list pointing
> +	 * to the cgroup makes it unnecessary for each per-cpu struct to
> +	 * point back to the associated cgroup.
> +	 */
> +	struct cgroup *updated_children;	/* terminated by self */
> +	struct cgroup *updated_next;		/* NULL if not on the list */
> +
> +	/*
> +	 * ->bsync protects ->bstat.  These are the only fields which get
> +	 * updated in the hot path.
> +	 */
> +	struct u64_stats_sync bsync;
> +	struct cgroup_base_stat bstat;
> +
> +	/*
> +	 * Snapshots at the last reading.  These are used to calculate the
> +	 * deltas to propagate to the global counters.
> +	 */
> +	struct cgroup_base_stat last_bstat;
> +
> +	/*
> +	 * This field is used to record the cumulative per-cpu time of
> +	 * the cgroup and its descendants. Currently it can be read via
> +	 * eBPF/drgn etc, and we are still trying to determine how to
> +	 * expose it in the cgroupfs interface.
> +	 */
> +	struct cgroup_base_stat subtree_bstat;
> +
> +	/*
> +	 * Snapshots at the last reading. These are used to calculate the
> +	 * deltas to propagate to the per-cpu subtree_bstat.
> +	 */
> +	struct cgroup_base_stat last_subtree_bstat;
> +};
> +
> +#endif	/* _LINUX_RSTAT_H */
diff mbox series

Patch

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1b20d2d8ef7c..6b6cc027fe70 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -17,7 +17,7 @@ 
 #include <linux/refcount.h>
 #include <linux/percpu-refcount.h>
 #include <linux/percpu-rwsem.h>
-#include <linux/u64_stats_sync.h>
+#include <linux/cgroup_rstat.h>
 #include <linux/workqueue.h>
 #include <linux/bpf-cgroup-defs.h>
 #include <linux/psi_types.h>
@@ -321,78 +321,6 @@  struct css_set {
 	struct rcu_head rcu_head;
 };
 
-struct cgroup_base_stat {
-	struct task_cputime cputime;
-
-#ifdef CONFIG_SCHED_CORE
-	u64 forceidle_sum;
-#endif
-	u64 ntime;
-};
-
-/*
- * rstat - cgroup scalable recursive statistics.  Accounting is done
- * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
- * hierarchy on reads.
- *
- * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
- * linked into the updated tree.  On the following read, propagation only
- * considers and consumes the updated tree.  This makes reading O(the
- * number of descendants which have been active since last read) instead of
- * O(the total number of descendants).
- *
- * This is important because there can be a lot of (draining) cgroups which
- * aren't active and stat may be read frequently.  The combination can
- * become very expensive.  By propagating selectively, increasing reading
- * frequency decreases the cost of each read.
- *
- * This struct hosts both the fields which implement the above -
- * updated_children and updated_next - and the fields which track basic
- * resource statistics on top of it - bsync, bstat and last_bstat.
- */
-struct cgroup_rstat_cpu {
-	/*
-	 * ->bsync protects ->bstat.  These are the only fields which get
-	 * updated in the hot path.
-	 */
-	struct u64_stats_sync bsync;
-	struct cgroup_base_stat bstat;
-
-	/*
-	 * Snapshots at the last reading.  These are used to calculate the
-	 * deltas to propagate to the global counters.
-	 */
-	struct cgroup_base_stat last_bstat;
-
-	/*
-	 * This field is used to record the cumulative per-cpu time of
-	 * the cgroup and its descendants. Currently it can be read via
-	 * eBPF/drgn etc, and we are still trying to determine how to
-	 * expose it in the cgroupfs interface.
-	 */
-	struct cgroup_base_stat subtree_bstat;
-
-	/*
-	 * Snapshots at the last reading. These are used to calculate the
-	 * deltas to propagate to the per-cpu subtree_bstat.
-	 */
-	struct cgroup_base_stat last_subtree_bstat;
-
-	/*
-	 * Child cgroups with stat updates on this cpu since the last read
-	 * are linked on the parent's ->updated_children through
-	 * ->updated_next.
-	 *
-	 * In addition to being more compact, singly-linked list pointing
-	 * to the cgroup makes it unnecessary for each per-cpu struct to
-	 * point back to the associated cgroup.
-	 *
-	 * Protected by per-cpu cgroup_rstat_cpu_lock.
-	 */
-	struct cgroup *updated_children;	/* terminated by self cgroup */
-	struct cgroup *updated_next;		/* NULL iff not on the list */
-};
-
 struct cgroup_freezer_state {
 	/* Should the cgroup and its descendants be frozen. */
 	bool freeze;
@@ -517,23 +445,9 @@  struct cgroup {
 	struct cgroup *old_dom_cgrp;		/* used while enabling threaded */
 
 	/* per-cpu recursive resource statistics */
-	struct cgroup_rstat_cpu __percpu *rstat_cpu;
+	struct cgroup_rstat rstat;
 	struct list_head rstat_css_list;
 
-	/*
-	 * Add padding to separate the read mostly rstat_cpu and
-	 * rstat_css_list into a different cacheline from the following
-	 * rstat_flush_next and *bstat fields which can have frequent updates.
-	 */
-	CACHELINE_PADDING(_pad_);
-
-	/*
-	 * A singly-linked list of cgroup structures to be rstat flushed.
-	 * This is a scratch field to be used exclusively by
-	 * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
-	 */
-	struct cgroup	*rstat_flush_next;
-
 	/* cgroup basic resource statistics */
 	struct cgroup_base_stat last_bstat;
 	struct cgroup_base_stat bstat;
diff --git a/include/linux/cgroup_rstat.h b/include/linux/cgroup_rstat.h
new file mode 100644
index 000000000000..f95474d6f8ab
--- /dev/null
+++ b/include/linux/cgroup_rstat.h
@@ -0,0 +1,92 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSTAT_H
+#define _LINUX_RSTAT_H
+
+#include <linux/u64_stats_sync.h>
+
+struct cgroup_rstat_cpu;
+
+/*
+ * rstat - cgroup scalable recursive statistics.  Accounting is done
+ * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
+ * hierarchy on reads.
+ *
+ * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
+ * linked into the updated tree.  On the following read, propagation only
+ * considers and consumes the updated tree.  This makes reading O(the
+ * number of descendants which have been active since last read) instead of
+ * O(the total number of descendants).
+ *
+ * This is important because there can be a lot of (draining) cgroups which
+ * aren't active and stat may be read frequently.  The combination can
+ * become very expensive.  By propagating selectively, increasing reading
+ * frequency decreases the cost of each read.
+ *
+ * This struct hosts both the fields which implement the above -
+ * updated_children and updated_next - and the fields which track basic
+ * resource statistics on top of it - bsync, bstat and last_bstat.
+ */
+struct cgroup_rstat {
+	struct cgroup_rstat_cpu __percpu *rstat_cpu;
+
+	/*
+	 * Add padding to separate the read mostly rstat_cpu and
+	 * rstat_css_list into a different cacheline from the following
+	 * rstat_flush_next and containing struct fields which can have
+	 * frequent updates.
+	 */
+	CACHELINE_PADDING(_pad_);
+	struct cgroup *rstat_flush_next;
+};
+
+struct cgroup_base_stat {
+	struct task_cputime cputime;
+
+#ifdef CONFIG_SCHED_CORE
+	u64 forceidle_sum;
+#endif
+	u64 ntime;
+};
+
+struct cgroup_rstat_cpu {
+	/*
+	 * Child cgroups with stat updates on this cpu since the last read
+	 * are linked on the parent's ->updated_children through
+	 * ->updated_next.
+	 *
+	 * In addition to being more compact, singly-linked list pointing
+	 * to the cgroup makes it unnecessary for each per-cpu struct to
+	 * point back to the associated cgroup.
+	 */
+	struct cgroup *updated_children;	/* terminated by self */
+	struct cgroup *updated_next;		/* NULL if not on the list */
+
+	/*
+	 * ->bsync protects ->bstat.  These are the only fields which get
+	 * updated in the hot path.
+	 */
+	struct u64_stats_sync bsync;
+	struct cgroup_base_stat bstat;
+
+	/*
+	 * Snapshots at the last reading.  These are used to calculate the
+	 * deltas to propagate to the global counters.
+	 */
+	struct cgroup_base_stat last_bstat;
+
+	/*
+	 * This field is used to record the cumulative per-cpu time of
+	 * the cgroup and its descendants. Currently it can be read via
+	 * eBPF/drgn etc, and we are still trying to determine how to
+	 * expose it in the cgroupfs interface.
+	 */
+	struct cgroup_base_stat subtree_bstat;
+
+	/*
+	 * Snapshots at the last reading. These are used to calculate the
+	 * deltas to propagate to the per-cpu subtree_bstat.
+	 */
+	struct cgroup_base_stat last_subtree_bstat;
+};
+
+#endif	/* _LINUX_RSTAT_H */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index d9061bd55436..03a3a4da49f1 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -31,6 +31,7 @@ 
 #include "cgroup-internal.h"
 
 #include <linux/bpf-cgroup.h>
+#include <linux/cgroup_rstat.h>
 #include <linux/cred.h>
 #include <linux/errno.h>
 #include <linux/init_task.h>
@@ -164,7 +165,7 @@  static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
 static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
 
 /* the default hierarchy */
-struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
+struct cgroup_root cgrp_dfl_root = { .cgrp.rstat.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
 EXPORT_SYMBOL_GPL(cgrp_dfl_root);
 
 /*
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 5877974ece92..7e7879d88c38 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -16,7 +16,7 @@  static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
 
 static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
 {
-	return per_cpu_ptr(cgrp->rstat_cpu, cpu);
+	return per_cpu_ptr(cgrp->rstat.rstat_cpu, cpu);
 }
 
 /*
@@ -149,24 +149,24 @@  static struct cgroup *cgroup_rstat_push_children(struct cgroup *head,
 	struct cgroup *parent, *grandchild;
 	struct cgroup_rstat_cpu *crstatc;
 
-	child->rstat_flush_next = NULL;
+	child->rstat.rstat_flush_next = NULL;
 
 next_level:
 	while (chead) {
 		child = chead;
-		chead = child->rstat_flush_next;
+		chead = child->rstat.rstat_flush_next;
 		parent = cgroup_parent(child);
 
 		/* updated_next is parent cgroup terminated */
 		while (child != parent) {
-			child->rstat_flush_next = head;
+			child->rstat.rstat_flush_next = head;
 			head = child;
 			crstatc = cgroup_rstat_cpu(child, cpu);
 			grandchild = crstatc->updated_children;
 			if (grandchild != child) {
 				/* Push the grand child to the next level */
 				crstatc->updated_children = child;
-				grandchild->rstat_flush_next = ghead;
+				grandchild->rstat.rstat_flush_next = ghead;
 				ghead = grandchild;
 			}
 			child = crstatc->updated_next;
@@ -238,7 +238,7 @@  static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
 
 	/* Push @root to the list first before pushing the children */
 	head = root;
-	root->rstat_flush_next = NULL;
+	root->rstat.rstat_flush_next = NULL;
 	child = rstatc->updated_children;
 	rstatc->updated_children = root;
 	if (child != root)
@@ -310,7 +310,7 @@  static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
 	for_each_possible_cpu(cpu) {
 		struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu);
 
-		for (; pos; pos = pos->rstat_flush_next) {
+		for (; pos; pos = pos->rstat.rstat_flush_next) {
 			struct cgroup_subsys_state *css;
 
 			cgroup_base_stat_flush(pos, cpu);
@@ -387,9 +387,10 @@  int cgroup_rstat_init(struct cgroup *cgrp)
 	int cpu;
 
 	/* the root cgrp has rstat_cpu preallocated */
-	if (!cgrp->rstat_cpu) {
-		cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
-		if (!cgrp->rstat_cpu)
+	if (!cgrp->rstat.rstat_cpu) {
+		cgrp->rstat.rstat_cpu = alloc_percpu(
+				struct cgroup_rstat_cpu);
+		if (!cgrp->rstat.rstat_cpu)
 			return -ENOMEM;
 	}
 
@@ -419,8 +420,8 @@  void cgroup_rstat_exit(struct cgroup *cgrp)
 			return;
 	}
 
-	free_percpu(cgrp->rstat_cpu);
-	cgrp->rstat_cpu = NULL;
+	free_percpu(cgrp->rstat.rstat_cpu);
+	cgrp->rstat.rstat_cpu = NULL;
 }
 
 void __init cgroup_rstat_boot(void)
@@ -503,7 +504,7 @@  cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags
 {
 	struct cgroup_rstat_cpu *rstatc;
 
-	rstatc = get_cpu_ptr(cgrp->rstat_cpu);
+	rstatc = get_cpu_ptr(cgrp->rstat.rstat_cpu);
 	*flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
 	return rstatc;
 }
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 38f78d9345de..035412265c3c 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -45,7 +45,7 @@  int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
 SEC("tp_btf/cgroup_mkdir")
 int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
 {
-	g = (__u64)cgrp->rstat_cpu->updated_children;
+	g = (__u64)cgrp->rstat.rstat_cpu->updated_children;
 	return 0;
 }
 
@@ -56,7 +56,7 @@  int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
 	__u32 cpu;
 
 	cpu = bpf_get_smp_processor_id();
-	rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu);
+	rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat.rstat_cpu, cpu);
 	if (rstat) {
 		/* READ_ONCE */
 		*(volatile int *)rstat;