[RFC,bpf-next,v2,1/7] bpf: introduce RSTAT_FLUSH program type

Message ID	20220515023504.1823463-2-yosryahmed@google.com (mailing list archive)
State	Superseded
Delegated to:	BPF
Headers	show Return-Path: <netdev-owner@kernel.org> Date: Sun, 15 May 2022 02:34:58 +0000 In-Reply-To: <20220515023504.1823463-1-yosryahmed@google.com> Message-Id: <20220515023504.1823463-2-yosryahmed@google.com> Mime-Version: 1.0 References: <20220515023504.1823463-1-yosryahmed@google.com> Subject: [RFC PATCH bpf-next v2 1/7] bpf: introduce RSTAT_FLUSH program type From: Yosry Ahmed <yosryahmed@google.com> To: Alexei Starovoitov <ast@kernel.org>, Daniel Borkmann <daniel@iogearbox.net>, Andrii Nakryiko <andrii@kernel.org>, Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>, Yonghong Song <yhs@fb.com>, John Fastabend <john.fastabend@gmail.com>, KP Singh <kpsingh@kernel.org>, Hao Luo <haoluo@google.com>, Tejun Heo <tj@kernel.org>, Zefan Li <lizefan.x@bytedance.com>, Johannes Weiner <hannes@cmpxchg.org>, Shuah Khan <shuah@kernel.org>, Roman Gushchin <roman.gushchin@linux.dev>, Michal Hocko <mhocko@kernel.org> Cc: Stanislav Fomichev <sdf@google.com>, David Rientjes <rientjes@google.com>, Greg Thelen <gthelen@google.com>, Shakeel Butt <shakeelb@google.com>, linux-kernel@vger.kernel.org, netdev@vger.kernel.org, bpf@vger.kernel.org, cgroups@vger.kernel.org, Yosry Ahmed <yosryahmed@google.com> Content-Type: text/plain; charset="UTF-8" Precedence: bulk
Series	bpf: rstat: cgroup hierarchical stats \| expand [RFC,bpf-next,v2,0/7] bpf: rstat: cgroup hierarchical stats [RFC,bpf-next,v2,1/7] bpf: introduce RSTAT_FLUSH program type [RFC,bpf-next,v2,2/7] cgroup: bpf: flush bpf stats on rstat flush [RFC,bpf-next,v2,3/7] libbpf: Add support for rstat flush progs [RFC,bpf-next,v2,4/7] bpf: add bpf rstat helpers [RFC,bpf-next,v2,5/7] bpf: Introduce cgroup iter [RFC,bpf-next,v2,6/7] selftests/bpf: extend cgroup helpers [RFC,bpf-next,v2,7/7] bpf: add a selftest for cgroup hierarchical stats collection

Context	Check	Description
bpf/vmtest-bpf-next-VM_Test-3	fail	Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-PR	fail	PR summary
bpf/vmtest-bpf-next-VM_Test-1	fail	Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-VM_Test-2	fail	Logs for Kernel LATEST on ubuntu-latest with llvm-15
netdev/tree_selection	success	Clearly marked for bpf-next, async
netdev/fixes_present	success	Fixes tag not required for -next series
netdev/subject_prefix	success	Link
netdev/cover_letter	success	Series has a cover letter
netdev/patch_count	success	Link
netdev/header_inline	success	No static functions without inline keyword in header files
netdev/build_32bit	fail	Errors and warnings before: 1820 this patch: 1822
netdev/cc_maintainers	success	CCed 10 of 10 maintainers
netdev/build_clang	fail	Errors and warnings before: 196 this patch: 198
netdev/module_param	success	Was 0 now: 0
netdev/verify_signedoff	success	Signed-off-by tag matches author and committer
netdev/verify_fixes	success	No Fixes tag
netdev/build_allmodconfig_warn	fail	Errors and warnings before: 1830 this patch: 1832
netdev/checkpatch	warning	CHECK: Alignment should match open parenthesis CHECK: Please don't use multiple blank lines WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/kdoc	success	Errors and warnings before: 0 this patch: 0
netdev/source_inline	success	Was 0 now: 0

diff --git a/include/linux/bpf-rstat.h b/include/linux/bpf-rstat.h new file mode 100644 index 000000000000..23cad23b5fc2 --- /dev/null +++ b/include/linux/bpf-rstat.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2022 Google LLC. + */ +#ifndef _BPF_RSTAT_H_ +#define _BPF_RSTAT_H_ + +#include <linux/bpf.h> + +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_CGROUPS) + +int bpf_rstat_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog); + +#else /* defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_CGROUPS) */ + +static inline int bpf_rstat_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -ENOTSUPP; +} + +#endif /* defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_CGROUPS) */ + +#endif /* _BPF_RSTAT */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2b9112b80171..ff92299f76a9 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -77,6 +77,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, void *, void *) #endif /* CONFIG_BPF_LSM */ #endif +#ifdef CONFIG_CGROUPS +BPF_PROG_TYPE(BPF_PROG_TYPE_RSTAT_FLUSH, rstat_flush, + struct bpf_rstat_flush_ctx, struct bpf_rstat_flush_ctx) +#endif /* CONFIG_CGROUPS */ BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall, void *, void *) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0210f85131b3..968e3cb02580 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -952,6 +952,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_RSTAT_FLUSH, }; enum bpf_attach_type { @@ -998,6 +999,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, + BPF_RSTAT_FLUSH, __MAX_BPF_ATTACH_TYPE }; @@ -1014,6 +1016,7 @@ enum bpf_link_type { BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_RSTAT = 10, MAX_BPF_LINK_TYPE, }; @@ -6359,6 +6362,12 @@ struct bpf_cgroup_dev_ctx { __u32 minor; }; +struct bpf_rstat_flush_ctx { + __bpf_md_ptr(struct cgroup *, cgrp); + __bpf_md_ptr(struct cgroup *, parent); + __s32 cpu; +}; + struct bpf_raw_tracepoint_args { __u64 args[0]; }; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 057ba8e01e70..0487133b799f 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -36,6 +36,9 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o obj-${CONFIG_BPF_LSM} += bpf_lsm.o endif obj-$(CONFIG_BPF_PRELOAD) += preload/ +ifeq ($(CONFIG_CGROUPS),y) +obj-$(CONFIG_BPF_SYSCALL) += rstat.o +endif obj-$(CONFIG_BPF_SYSCALL) += relo_core.o $(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE diff --git a/kernel/bpf/rstat.c b/kernel/bpf/rstat.c new file mode 100644 index 000000000000..5f529002d4b9 --- /dev/null +++ b/kernel/bpf/rstat.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Functions to manage eBPF programs attached to cgroup subsystems + * + * Copyright 2022 Google LLC. + */ + +#include <linux/bpf-rstat.h> +#include <linux/btf_ids.h> +#include <linux/cgroup.h> +#include <linux/filter.h> + +static LIST_HEAD(bpf_rstat_flushers); +static DEFINE_SPINLOCK(bpf_rstat_flushers_lock); + + +struct bpf_rstat_flusher { + struct bpf_prog *prog; + /* List of BPF rtstat flushers, anchored at subsys->bpf */ + struct list_head list; +}; + +struct bpf_rstat_link { + struct bpf_link link; + struct bpf_rstat_flusher *flusher; +}; + +static int bpf_rstat_flush_attach(struct bpf_prog *prog, + struct bpf_rstat_link *rlink) +{ + struct bpf_rstat_flusher *flusher; + + flusher = kmalloc(sizeof(*flusher), GFP_KERNEL); + if (!flusher) + return -ENOMEM; + + flusher->prog = prog; + rlink->flusher = flusher; + + spin_lock(&bpf_rstat_flushers_lock); + list_add(&flusher->list, &bpf_rstat_flushers); + spin_unlock(&bpf_rstat_flushers_lock); + + return 0; +} + +static void bpf_rstat_flush_detach(struct bpf_rstat_link *rstat_link) +{ + struct bpf_rstat_flusher *flusher = rstat_link->flusher; + + if (!flusher) + return; + + spin_lock(&bpf_rstat_flushers_lock); + list_del(&flusher->list); + bpf_prog_put(flusher->prog); + kfree(flusher); + spin_unlock(&bpf_rstat_flushers_lock); +} + +static const struct bpf_func_proto * +bpf_rstat_flush_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog) +{ + return bpf_base_func_proto(func_id); +} + +BTF_ID_LIST_SINGLE(bpf_cgroup_btf_ids, struct, cgroup) + +static bool bpf_rstat_flush_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) + return false; + + if (off < 0 || off + size > sizeof(struct bpf_rstat_flush_ctx)) + return false; + /* The verifier guarantees that size > 0 */ + if (off % size != 0) + return false; + + switch (off) { + case bpf_ctx_range_ptr(struct bpf_rstat_flush_ctx, cgrp): + info->reg_type = PTR_TO_BTF_ID; + info->btf_id = bpf_cgroup_btf_ids[0]; + info->btf = bpf_get_btf_vmlinux(); + return !IS_ERR(info->btf) && info->btf && size == sizeof(__u64); + case bpf_ctx_range_ptr(struct bpf_rstat_flush_ctx, parent): + info->reg_type = PTR_TO_BTF_ID_OR_NULL; + info->btf_id = bpf_cgroup_btf_ids[0]; + info->btf = bpf_get_btf_vmlinux(); + return !IS_ERR(info->btf) && info->btf && size == sizeof(__u64); + case bpf_ctx_range(struct bpf_rstat_flush_ctx, cpu): + return size == sizeof(__s32); + default: + return false; + } +} + +const struct bpf_prog_ops rstat_flush_prog_ops = { +}; + +const struct bpf_verifier_ops rstat_flush_verifier_ops = { + .get_func_proto = bpf_rstat_flush_func_proto, + .is_valid_access = bpf_rstat_flush_is_valid_access, +}; + +static void bpf_rstat_link_release(struct bpf_link *link) +{ + struct bpf_rstat_link *rlink; + + rlink = container_of(link, + struct bpf_rstat_link, + link); + + /* rstat flushers are currently the only supported rstat programs */ + bpf_rstat_flush_detach(rlink); +} + +static void bpf_rstat_link_dealloc(struct bpf_link *link) +{ + struct bpf_rstat_link *rlink = container_of(link, + struct bpf_rstat_link, + link); + kfree(rlink); +} + +static const struct bpf_link_ops bpf_rstat_link_lops = { + .release = bpf_rstat_link_release, + .dealloc = bpf_rstat_link_dealloc, +}; + +int bpf_rstat_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + struct bpf_link_primer link_primer; + struct bpf_rstat_link *link; + int err; + + if (attr->link_create.target_fd || attr->link_create.flags) + return -EINVAL; + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) + return -ENOMEM; + + bpf_link_init(&link->link, BPF_LINK_TYPE_RSTAT, + &bpf_rstat_link_lops, prog); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) { + kfree(link); + return err; + } + + /* rstat flushers are currently the only supported rstat programs */ + err = bpf_rstat_flush_attach(prog, link); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + + return bpf_link_settle(&link_primer); +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 72e53489165d..ffeed8379b35 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3,6 +3,7 @@ */ #include <linux/bpf.h> #include <linux/bpf-cgroup.h> +#include <linux/bpf-rstat.h> #include <linux/bpf_trace.h> #include <linux/bpf_lirc.h> #include <linux/bpf_verifier.h> @@ -3416,6 +3417,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_SK_LOOKUP; case BPF_XDP: return BPF_PROG_TYPE_XDP; + case BPF_RSTAT_FLUSH: + return BPF_PROG_TYPE_RSTAT_FLUSH; default: return BPF_PROG_TYPE_UNSPEC; } @@ -4564,6 +4567,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) else ret = bpf_kprobe_multi_link_attach(attr, prog); break; + case BPF_PROG_TYPE_RSTAT_FLUSH: + ret = bpf_rstat_link_attach(attr, prog); + break; default: ret = -EINVAL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0210f85131b3..968e3cb02580 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -952,6 +952,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_RSTAT_FLUSH, }; enum bpf_attach_type { @@ -998,6 +999,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, + BPF_RSTAT_FLUSH, __MAX_BPF_ATTACH_TYPE }; @@ -1014,6 +1016,7 @@ enum bpf_link_type { BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_RSTAT = 10, MAX_BPF_LINK_TYPE, }; @@ -6359,6 +6362,12 @@ struct bpf_cgroup_dev_ctx { __u32 minor; }; +struct bpf_rstat_flush_ctx { + __bpf_md_ptr(struct cgroup *, cgrp); + __bpf_md_ptr(struct cgroup *, parent); + __s32 cpu; +}; + struct bpf_raw_tracepoint_args { __u64 args[0]; };

[RFC,bpf-next,v2,1/7] bpf: introduce RSTAT_FLUSH program type

Checks

Commit Message

Patch