@@ -75,6 +75,9 @@ struct cgroup_bpf {
/* cgroup_bpf is released using a work queue */
struct work_struct release_work;
+
+ /* per-cpu recursive resource statistics */
+ struct cgroup_rstat rstat;
};
#else /* CONFIG_CGROUP_BPF */
@@ -836,6 +836,9 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
#endif /* !CONFIG_CGROUPS */
#ifdef CONFIG_CGROUP_BPF
+void bpf_cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
+void bpf_cgroup_rstat_flush(struct cgroup *cgrp);
+
static inline void cgroup_bpf_get(struct cgroup *cgrp)
{
percpu_ref_get(&cgrp->bpf.refcnt);
@@ -210,6 +210,7 @@ void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
percpu_ref_kill(&cgrp->bpf.refcnt);
+ bpf_cgroup_rstat_exit(&cgrp->bpf);
}
static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
@@ -490,6 +491,10 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
if (ret)
return ret;
+ ret = bpf_cgroup_rstat_init(&cgrp->bpf);
+ if (ret)
+ goto cleanup_ref;
+
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_get(p);
@@ -513,6 +518,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_put(p);
+cleanup_ref:
percpu_ref_exit(&cgrp->bpf.refcnt);
return -ENOMEM;
@@ -274,6 +274,11 @@ void cgroup_rstat_exit(struct cgroup_subsys_state *css);
void cgroup_rstat_boot(void);
void cgroup_base_stat_cputime_show(struct seq_file *seq);
+#ifdef CONFIG_CGROUP_BPF
+int bpf_cgroup_rstat_init(struct cgroup_bpf *bpf);
+void bpf_cgroup_rstat_exit(struct cgroup_bpf *bpf);
+#endif /* CONFIG_CGROUP_BPF */
+
/*
* namespace.c
*/
@@ -73,6 +73,47 @@ static struct cgroup_rstat_ops rstat_css_ops = {
.flush_fn = rstat_flush_via_css,
};
+#ifdef CONFIG_CGROUP_BPF
+__weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
+ struct cgroup *parent, int cpu);
+
+static struct cgroup *rstat_cgroup_via_bpf(struct cgroup_rstat *rstat)
+{
+ struct cgroup_bpf *bpf = container_of(rstat, typeof(*bpf), rstat);
+ struct cgroup *cgrp = container_of(bpf, typeof(*cgrp), bpf);
+
+ return cgrp;
+}
+
+static struct cgroup_rstat *rstat_parent_via_bpf(
+ struct cgroup_rstat *rstat)
+{
+ struct cgroup *cgrp, *cgrp_parent;
+
+ cgrp = rstat_cgroup_via_bpf(rstat);
+ cgrp_parent = cgroup_parent(cgrp);
+ if (!cgrp_parent)
+ return NULL;
+
+ return &(cgrp_parent->bpf.rstat);
+}
+
+static void rstat_flush_via_bpf(struct cgroup_rstat *rstat, int cpu)
+{
+ struct cgroup *cgrp, *cgrp_parent;
+
+ cgrp = rstat_cgroup_via_bpf(rstat);
+ cgrp_parent = cgroup_parent(cgrp);
+ bpf_rstat_flush(cgrp, cgrp_parent, cpu);
+}
+
+static struct cgroup_rstat_ops rstat_bpf_ops = {
+ .parent_fn = rstat_parent_via_bpf,
+ .cgroup_fn = rstat_cgroup_via_bpf,
+ .flush_fn = rstat_flush_via_bpf,
+};
+#endif /* CONFIG_CGROUP_BPF */
+
/*
* Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
*
@@ -187,11 +228,18 @@ static void __cgroup_rstat_updated(struct cgroup_rstat *rstat, int cpu,
* rstat_cpu->updated_children list. See the comment on top of
* cgroup_rstat_cpu definition for details.
*/
-__bpf_kfunc void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu)
+void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
__cgroup_rstat_updated(&css->rstat, cpu, &rstat_css_ops);
}
+#ifdef CONFIG_CGROUP_BPF
+__bpf_kfunc void bpf_cgroup_rstat_updated(struct cgroup *cgroup, int cpu)
+{
+ __cgroup_rstat_updated(&(cgroup->bpf.rstat), cpu, &rstat_bpf_ops);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
/**
* cgroup_rstat_push_children - push children cgroups into the given list
* @head: current head of the list (= subtree root)
@@ -330,8 +378,7 @@ static struct cgroup_rstat *cgroup_rstat_updated_list(
__bpf_hook_start();
-__weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
- struct cgroup *parent, int cpu)
+void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu)
{
}
@@ -379,12 +426,8 @@ static void cgroup_rstat_flush_locked(struct cgroup_rstat *rstat,
struct cgroup_rstat *pos = cgroup_rstat_updated_list(
rstat, cpu, ops);
- for (; pos; pos = pos->rstat_flush_next) {
- struct cgroup *pos_cgroup = ops->cgroup_fn(pos);
-
+ for (; pos; pos = pos->rstat_flush_next)
ops->flush_fn(pos, cpu);
- bpf_rstat_flush(pos_cgroup, cgroup_parent(pos_cgroup), cpu);
- }
/* play nice and yield if necessary */
if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
@@ -424,11 +467,18 @@ static void __cgroup_rstat_flush(struct cgroup_rstat *rstat,
*
* This function may block.
*/
-__bpf_kfunc void cgroup_rstat_flush(struct cgroup_subsys_state *css)
+void cgroup_rstat_flush(struct cgroup_subsys_state *css)
{
__cgroup_rstat_flush(&css->rstat, &rstat_css_ops);
}
+#ifdef CONFIG_CGROUP_BPF
+__bpf_kfunc void bpf_cgroup_rstat_flush(struct cgroup *cgroup)
+{
+ __cgroup_rstat_flush(&(cgroup->bpf.rstat), &rstat_bpf_ops);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
static void __cgroup_rstat_flush_hold(struct cgroup_rstat *rstat,
struct cgroup_rstat_ops *ops)
__acquires(&cgroup_rstat_lock)
@@ -532,6 +582,27 @@ void cgroup_rstat_exit(struct cgroup_subsys_state *css)
__cgroup_rstat_exit(rstat);
}
+#ifdef CONFIG_CGROUP_BPF
+int bpf_cgroup_rstat_init(struct cgroup_bpf *bpf)
+{
+ struct cgroup_rstat *rstat = &bpf->rstat;
+
+ rstat->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+ if (!rstat->rstat_cpu)
+ return -ENOMEM;
+
+ __cgroup_rstat_init(rstat);
+
+ return 0;
+}
+
+void bpf_cgroup_rstat_exit(struct cgroup_bpf *bpf)
+{
+ __cgroup_rstat_flush(&bpf->rstat, &rstat_bpf_ops);
+ __cgroup_rstat_exit(&bpf->rstat);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
void __init cgroup_rstat_boot(void)
{
int cpu;
@@ -754,10 +825,11 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
cgroup_force_idle_show(seq, &cgrp->bstat);
}
+#ifdef CONFIG_CGROUP_BPF
/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
-BTF_ID_FLAGS(func, cgroup_rstat_updated)
-BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_cgroup_rstat_updated)
+BTF_ID_FLAGS(func, bpf_cgroup_rstat_flush, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
@@ -771,3 +843,4 @@ static int __init bpf_rstat_kfunc_init(void)
&bpf_rstat_kfunc_set);
}
late_initcall(bpf_rstat_kfunc_init);
+#endif /* CONFIG_CGROUP_BPF */
@@ -45,7 +45,7 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
{
- g = (__u64)cgrp->self.rstat.rstat_cpu->updated_children;
+ g = (__u64)cgrp->bpf.rstat.rstat_cpu->updated_children;
return 0;
}
@@ -57,7 +57,7 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
cpu = bpf_get_smp_processor_id();
rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(
- cgrp->self.rstat.rstat_cpu, cpu);
+ cgrp->bpf.rstat.rstat_cpu, cpu);
if (rstat) {
/* READ_ONCE */
*(volatile int *)rstat;
@@ -37,8 +37,8 @@ struct {
__type(value, struct attach_counter);
} attach_counters SEC(".maps");
-extern void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup_subsys_state *css) __ksym;
+extern void bpf_cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
+extern void bpf_cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
static uint64_t cgroup_id(struct cgroup *cgrp)
{
@@ -75,7 +75,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
else if (create_percpu_attach_counter(cg_id, 1))
return 0;
- cgroup_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
+ bpf_cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
return 0;
}
@@ -141,7 +141,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
return 1;
/* Flush the stats to make sure we get the most updated numbers */
- cgroup_rstat_flush(&cgrp->self);
+ bpf_cgroup_rstat_flush(cgrp);
total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
if (!total_counter) {
The processing of bpf cgroup stats is tied to the rstat actions of other subsystems. Make changes to have them updated/flushed independently. Give the cgroup_bpf struct its own cgroup_rstat instance and define a new cgroup_rstat_ops instance specifically for the cgroup_bpf. Then replace the kfunc status of the existing updated/flush api calls with non-kfunc status. As an alternative, create new updated/flush kfuncs specifically for bpf cgroups. In these new kfuncs, make use of the bpf-specific rstat ops to plumb back in to the existing rstat routines. Where applicable, use pre-processor conditionals to define bpf rstat related stuff. Signed-off-by: JP Kobryn <inwardvessel@gmail.com> --- include/linux/bpf-cgroup-defs.h | 3 + include/linux/cgroup.h | 3 + kernel/bpf/cgroup.c | 6 ++ kernel/cgroup/cgroup-internal.h | 5 + kernel/cgroup/rstat.c | 95 ++++++++++++++++--- .../selftests/bpf/progs/btf_type_tag_percpu.c | 4 +- .../bpf/progs/cgroup_hierarchical_stats.c | 8 +- 7 files changed, 107 insertions(+), 17 deletions(-)