diff mbox series

[RFC,bpf-next,v2,2/7] cgroup: bpf: flush bpf stats on rstat flush

Message ID 20220515023504.1823463-3-yosryahmed@google.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: rstat: cgroup hierarchical stats | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 fail Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-VM_Test-2 fail Logs for Kernel LATEST on ubuntu-latest with llvm-15
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 15 this patch: 15
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/build_clang success Errors and warnings before: 11 this patch: 11
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 15 this patch: 15
netdev/checkpatch warning CHECK: No space is necessary after a cast
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Yosry Ahmed May 15, 2022, 2:34 a.m. UTC
When an rstat flush is ongoing for a cgroup, also flush bpf stats by
running any attached rstat flush programs.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 include/linux/bpf-rstat.h |  6 ++++++
 kernel/bpf/rstat.c        | 21 +++++++++++++++++++++
 kernel/cgroup/rstat.c     |  2 ++
 3 files changed, 29 insertions(+)

Comments

Alexei Starovoitov May 17, 2022, 2:08 a.m. UTC | #1
On Sun, May 15, 2022 at 02:34:59AM +0000, Yosry Ahmed wrote:
> +
> +void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
> +{
> +	struct bpf_rstat_flusher *flusher;
> +	struct bpf_rstat_flush_ctx ctx = {
> +		.cgrp = cgrp,
> +		.parent = cgroup_parent(cgrp),
> +		.cpu = cpu,
> +	};
> +
> +	rcu_read_lock();
> +	migrate_disable();
> +	spin_lock(&bpf_rstat_flushers_lock);
> +
> +	list_for_each_entry(flusher, &bpf_rstat_flushers, list)
> +		(void) bpf_prog_run(flusher->prog, &ctx);
> +
> +	spin_unlock(&bpf_rstat_flushers_lock);
> +	migrate_enable();
> +	rcu_read_unlock();
> +}
> diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
> index 24b5c2ab5598..0285d496e807 100644
> --- a/kernel/cgroup/rstat.c
> +++ b/kernel/cgroup/rstat.c
> @@ -2,6 +2,7 @@
>  #include "cgroup-internal.h"
>  
>  #include <linux/sched/cputime.h>
> +#include <linux/bpf-rstat.h>
>  
>  static DEFINE_SPINLOCK(cgroup_rstat_lock);
>  static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
> @@ -168,6 +169,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
>  			struct cgroup_subsys_state *css;
>  
>  			cgroup_base_stat_flush(pos, cpu);
> +			bpf_rstat_flush(pos, cpu);

Please use the following approach instead:

__weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu)
{
}

and change above line to:
  bpf_rstat_flush(pos, cgroup_parent(pos), cpu);

Then tracing bpf fentry progs will be able to attach to bpf_rstat_flush.
Pretty much the patches 1, 2, 3 are not necessary.
In patch 4 add bpf_cgroup_rstat_updated/flush as two kfuncs instead of stable helpers.

This way patches 1,2,3,4 will become 2 trivial patches and we will be
able to extend the interface between cgroup rstat and bpf whenever we need
without worrying about uapi stability.

We had similar discusison with HID subsystem that plans to use bpf in HID
with the same approach.
See this patch set:
https://lore.kernel.org/bpf/20220421140740.459558-2-benjamin.tissoires@redhat.com/
You'd need patch 1 from it to enable kfuncs for tracing.

Your patch 5 is needed as-is.
Yonghong,
please review it.
Different approach for patch 1-4 won't affect patch 5.
Patches 6 and 7 look good.

With this approach that patch 7 will mostly stay as-is. Instead of:
+SEC("rstat/flush")
+int vmscan_flush(struct bpf_rstat_flush_ctx *ctx)
+{
+       struct vmscan_percpu *pcpu_stat;
+       struct vmscan *total_stat, *parent_stat;
+       struct cgroup *cgrp = ctx->cgrp, *parent = ctx->parent;

it will become

SEC("fentry/bpf_rstat_flush")
int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu)
Yosry Ahmed May 17, 2022, 9:51 p.m. UTC | #2
On Mon, May 16, 2022 at 7:08 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Sun, May 15, 2022 at 02:34:59AM +0000, Yosry Ahmed wrote:
> > +
> > +void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
> > +{
> > +     struct bpf_rstat_flusher *flusher;
> > +     struct bpf_rstat_flush_ctx ctx = {
> > +             .cgrp = cgrp,
> > +             .parent = cgroup_parent(cgrp),
> > +             .cpu = cpu,
> > +     };
> > +
> > +     rcu_read_lock();
> > +     migrate_disable();
> > +     spin_lock(&bpf_rstat_flushers_lock);
> > +
> > +     list_for_each_entry(flusher, &bpf_rstat_flushers, list)
> > +             (void) bpf_prog_run(flusher->prog, &ctx);
> > +
> > +     spin_unlock(&bpf_rstat_flushers_lock);
> > +     migrate_enable();
> > +     rcu_read_unlock();
> > +}
> > diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
> > index 24b5c2ab5598..0285d496e807 100644
> > --- a/kernel/cgroup/rstat.c
> > +++ b/kernel/cgroup/rstat.c
> > @@ -2,6 +2,7 @@
> >  #include "cgroup-internal.h"
> >
> >  #include <linux/sched/cputime.h>
> > +#include <linux/bpf-rstat.h>
> >
> >  static DEFINE_SPINLOCK(cgroup_rstat_lock);
> >  static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
> > @@ -168,6 +169,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
> >                       struct cgroup_subsys_state *css;
> >
> >                       cgroup_base_stat_flush(pos, cpu);
> > +                     bpf_rstat_flush(pos, cpu);
>
> Please use the following approach instead:
>
> __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu)
> {
> }
>
> and change above line to:
>   bpf_rstat_flush(pos, cgroup_parent(pos), cpu);
>
> Then tracing bpf fentry progs will be able to attach to bpf_rstat_flush.
> Pretty much the patches 1, 2, 3 are not necessary.
> In patch 4 add bpf_cgroup_rstat_updated/flush as two kfuncs instead of stable helpers.
>
> This way patches 1,2,3,4 will become 2 trivial patches and we will be
> able to extend the interface between cgroup rstat and bpf whenever we need
> without worrying about uapi stability.
>
> We had similar discusison with HID subsystem that plans to use bpf in HID
> with the same approach.
> See this patch set:
> https://lore.kernel.org/bpf/20220421140740.459558-2-benjamin.tissoires@redhat.com/
> You'd need patch 1 from it to enable kfuncs for tracing.
>
> Your patch 5 is needed as-is.
> Yonghong,
> please review it.
> Different approach for patch 1-4 won't affect patch 5.
> Patches 6 and 7 look good.
>
> With this approach that patch 7 will mostly stay as-is. Instead of:
> +SEC("rstat/flush")
> +int vmscan_flush(struct bpf_rstat_flush_ctx *ctx)
> +{
> +       struct vmscan_percpu *pcpu_stat;
> +       struct vmscan *total_stat, *parent_stat;
> +       struct cgroup *cgrp = ctx->cgrp, *parent = ctx->parent;
>
> it will become
>
> SEC("fentry/bpf_rstat_flush")
> int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu)

Thanks so much for taking the time to look into this.

Indeed, this approach looks cleaner and simpler. I will incorporate
that into a V1 and send it. Thanks!
diff mbox series

Patch

diff --git a/include/linux/bpf-rstat.h b/include/linux/bpf-rstat.h
index 23cad23b5fc2..55e000fe0f47 100644
--- a/include/linux/bpf-rstat.h
+++ b/include/linux/bpf-rstat.h
@@ -12,6 +12,8 @@ 
 int bpf_rstat_link_attach(const union bpf_attr *attr,
 				 struct bpf_prog *prog);
 
+void bpf_rstat_flush(struct cgroup *cgrp, int cpu);
+
 #else /* defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_CGROUPS) */
 
 static inline int bpf_rstat_link_attach(const union bpf_attr *attr,
@@ -20,6 +22,10 @@  static inline int bpf_rstat_link_attach(const union bpf_attr *attr,
 	return -ENOTSUPP;
 }
 
+static inline void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
+{
+}
+
 #endif /* defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_CGROUPS) */
 
 #endif  /* _BPF_RSTAT */
diff --git a/kernel/bpf/rstat.c b/kernel/bpf/rstat.c
index 5f529002d4b9..e96bc080f4b9 100644
--- a/kernel/bpf/rstat.c
+++ b/kernel/bpf/rstat.c
@@ -164,3 +164,24 @@  int bpf_rstat_link_attach(const union bpf_attr *attr,
 
 	return bpf_link_settle(&link_primer);
 }
+
+void bpf_rstat_flush(struct cgroup *cgrp, int cpu)
+{
+	struct bpf_rstat_flusher *flusher;
+	struct bpf_rstat_flush_ctx ctx = {
+		.cgrp = cgrp,
+		.parent = cgroup_parent(cgrp),
+		.cpu = cpu,
+	};
+
+	rcu_read_lock();
+	migrate_disable();
+	spin_lock(&bpf_rstat_flushers_lock);
+
+	list_for_each_entry(flusher, &bpf_rstat_flushers, list)
+		(void) bpf_prog_run(flusher->prog, &ctx);
+
+	spin_unlock(&bpf_rstat_flushers_lock);
+	migrate_enable();
+	rcu_read_unlock();
+}
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 24b5c2ab5598..0285d496e807 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -2,6 +2,7 @@ 
 #include "cgroup-internal.h"
 
 #include <linux/sched/cputime.h>
+#include <linux/bpf-rstat.h>
 
 static DEFINE_SPINLOCK(cgroup_rstat_lock);
 static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
@@ -168,6 +169,7 @@  static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
 			struct cgroup_subsys_state *css;
 
 			cgroup_base_stat_flush(pos, cpu);
+			bpf_rstat_flush(pos, cpu);
 
 			rcu_read_lock();
 			list_for_each_entry_rcu(css, &pos->rstat_css_list,