Message ID | 20210210033634.62081-2-alexei.starovoitov@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 700d4796ef59f5faf240d307839bd419e2b6bdff |
Delegated to: | BPF |
Headers | show |
Series | bpf: Misc improvements | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | warning | 8 maintainers not CCed: yhs@fb.com kafai@fb.com netdev@vger.kernel.org ast@kernel.org songliubraving@fb.com john.fastabend@gmail.com kpsingh@kernel.org andrii@kernel.org |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 12175 this patch: 12175 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 119 lines checked |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 12823 this patch: 12823 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On Tue, 2021-02-09 at 19:36 -0800, Alexei Starovoitov wrote: > From: Alexei Starovoitov <ast@kernel.org> > > Move bpf_prog_stats from prog->aux into prog to avoid one extra load > in critical path of program execution. > > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > Acked-by: Andrii Nakryiko <andrii@kernel.org> > --- > include/linux/bpf.h | 8 -------- > include/linux/filter.h | 14 +++++++++++--- > kernel/bpf/core.c | 8 ++++---- > kernel/bpf/syscall.c | 2 +- > kernel/bpf/trampoline.c | 2 +- > kernel/bpf/verifier.c | 2 +- > 6 files changed, 18 insertions(+), 18 deletions(-) ... > @@ -249,10 +249,10 @@ void __bpf_prog_free(struct bpf_prog *fp) > if (fp->aux) { > mutex_destroy(&fp->aux->used_maps_mutex); > mutex_destroy(&fp->aux->dst_mutex); > - free_percpu(fp->aux->stats); > kfree(fp->aux->poke_tab); > kfree(fp->aux); > } > + free_percpu(fp->stats); On s390 this line causes the following in "ld_abs: vlan + abs, test 1" with the latest bpf-next: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:0000000001bd0007 R3:00000001ffff0007 S:00000001ffffd000 P:000000000000003d Oops: 0004 ilc:2 [#1] SMP Modules linked in: CPU: 0 PID: 184 Comm: test_verifier Not tainted 5.11.0-rc4-00952- g6fdd671baaf5 #7 Hardware name: IBM 3906 M03 703 (KVM/Linux) Krnl PSW : 0404c00180000000 000000000042707a (refill_obj_stock+0x11a/0x1e0) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000000 0000000000000000 0000000000000018 0000000100000000 0000000000000000 000000008764ca88 00000000013d3ff8 000000000141d140 0000000000000080 0000000000000000 0000000000000000 00000001ff61c8f0 000000008764c000 00000000012eb678 0000000000427066 00000380001bb888 Krnl Code: 0000000000427070: a7380000 lhi %r3,0 0000000000427074: 5810a018 l %r1,24(%r10) #0000000000427078: 1841 lr %r4,%r1 >000000000042707a: ba432000 cs %r4,%r3,0(%r2) 000000000042707e: a774fffb brc 7,0000000000427074 0000000000427082: 1a18 ar %r1,%r8 0000000000427084: 5010b018 st %r1,24(%r11) 0000000000427088: c21f00001000 clfi %r1,4096 Call Trace: [<000000000042707a>] refill_obj_stock+0x11a/0x1e0 ([<0000000000427066>] refill_obj_stock+0x106/0x1e0) [<000000000039bd86>] free_percpu.part.0+0xd6/0x428 [<00000000002ef738>] bpf_prog_realloc+0xa0/0xd8 [<00000000002efae8>] bpf_patch_insn_single+0x88/0x208 [<000000000030762e>] bpf_patch_insn_data+0x36/0x290 [<00000000003086ca>] fixup_bpf_calls+0x572/0xa28 [<000000000031045c>] bpf_check+0xb44/0xcb8 [<00000000002f747a>] bpf_prog_load+0x5fa/0x968 [<00000000002fa25c>] __do_sys_bpf+0x634/0x700 [<0000000000a2f3ca>] system_call+0xe2/0x28c INFO: lockdep is turned off. Last Breaking-Event-Address: [<0000000000203f76>] lock_release+0x6e/0x218 Kernel panic - not syncing: Fatal exception: panic_on_oops Here is the better backtrace (line numbers correspond to commit 6fdd671baaf5): #0 refill_obj_stock (objcg=objcg@entry=0x0, nr_bytes=<optimized out>) at mm/memcontrol.c:3248 #1 0x0000000000427a08 in obj_cgroup_uncharge (objcg=objcg@entry=0x0, size=<optimized out>) at mm/memcontrol.c:3300 #2 0x000000000039bd86 in pcpu_memcg_free_hook (size=32, off=<optimized out>, chunk=0x82d4fa00) at ./include/linux/bitmap.h:400 #3 free_percpu (ptr=0x3fd813b5960) at mm/percpu.c:2105 #4 0x000000000039c0ec in free_percpu (ptr=<optimized out>) at mm/percpu.c:2089 #5 0x00000000002ef738 in __bpf_prog_free (fp=0x380001ce000) at kernel/bpf/core.c:262 #6 bpf_prog_realloc (fp_old=fp_old@entry=0x380001ce000, size=249856, size@entry=245776, gfp_extra_flags=gfp_extra_flags@entry=1051840) at kernel/bpf/core.c:248 #7 0x00000000002efae8 in bpf_patch_insn_single (prog=0x380001ce000, off=off@entry=2205, patch=patch@entry=0x380001bbba0, len=len@entry=6) at ./include/linux/filter.h:788 #8 0x000000000030762e in bpf_patch_insn_data (env=env@entry=0x87566000, off=off@entry=2205, patch=patch@entry=0x380001bbba0, len=<optimized out>) at kernel/bpf/verifier.c:10669 #9 0x00000000003086ca in fixup_bpf_calls (env=env@entry=0x87566000) at kernel/bpf/verifier.c:11539 #10 0x000000000031045c in bpf_check (prog=prog@entry=0x380001bbda0, attr=attr@entry=0x380001bbe80, uattr=uattr@entry=0x3ffe66fe9d0) at kernel/bpf/verifier.c:12573 #11 0x00000000002f747a in bpf_prog_load (attr=attr@entry=0x380001bbe80, uattr=uattr@entry=0x3ffe66fe9d0) at kernel/bpf/syscall.c:2209 #12 0x00000000002fa25c in __do_sys_bpf (cmd=<optimized out>, uattr=0x3ffe66fe9d0, size=120) at kernel/bpf/syscall.c:4388 #13 0x0000000000a2f3ca in system_call () at arch/s390/kernel/entry.S:439 So we end up with objcg=NULL, but I'm not sure why this happens. Please let me know if you need more info.
On Thu, Feb 11, 2021 at 7:26 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > mm/percpu.c:2089 > #5 0x00000000002ef738 in __bpf_prog_free (fp=0x380001ce000) at > kernel/bpf/core.c:262 > #6 bpf_prog_realloc (fp_old=fp_old@entry=0x380001ce000, size=249856, > > So we end up with objcg=NULL, but I'm not sure why this happens. > Please let me know if you need more info. Argh. Thanks for reporting! Pushed the obvious fix: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=1336c662474edec3966c96c8de026f794d16b804 Pls pull bpf-next and give it a spin.
On Thu, 2021-02-11 at 19:43 -0800, Alexei Starovoitov wrote: > On Thu, Feb 11, 2021 at 7:26 PM Ilya Leoshkevich <iii@linux.ibm.com> > wrote: > > mm/percpu.c:2089 > > #5 0x00000000002ef738 in __bpf_prog_free (fp=0x380001ce000) at > > kernel/bpf/core.c:262 > > #6 bpf_prog_realloc (fp_old=fp_old@entry=0x380001ce000, > > size=249856, > > > > So we end up with objcg=NULL, but I'm not sure why this happens. > > Please let me know if you need more info. > > Argh. Thanks for reporting! > Pushed the obvious fix: > https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=1336c662474edec3966c96c8de026f794d16b804 > Pls pull bpf-next and give it a spin. Works now, thanks for the very quick fix!
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 321966fc35db..026fa8873c5d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -14,7 +14,6 @@ #include <linux/numa.h> #include <linux/mm_types.h> #include <linux/wait.h> -#include <linux/u64_stats_sync.h> #include <linux/refcount.h> #include <linux/mutex.h> #include <linux/module.h> @@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_ARGS 12 -struct bpf_prog_stats { - u64 cnt; - u64 nsecs; - struct u64_stats_sync syncp; -} __aligned(2 * sizeof(u64)); - struct btf_func_model { u8 ret_size; u8 nr_args; @@ -845,7 +838,6 @@ struct bpf_prog_aux { u32 linfo_idx; u32 num_exentries; struct exception_table_entry *extable; - struct bpf_prog_stats __percpu *stats; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5b3137d7b690..cecb03c9d251 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/sockptr.h> #include <crypto/sha1.h> +#include <linux/u64_stats_sync.h> #include <net/sch_generic.h> @@ -539,6 +540,12 @@ struct bpf_binary_header { u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; +struct bpf_prog_stats { + u64 cnt; + u64 nsecs; + struct u64_stats_sync syncp; +} __aligned(2 * sizeof(u64)); + struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ @@ -557,10 +564,11 @@ struct bpf_prog { u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; - struct bpf_prog_aux *aux; /* Auxiliary fields */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ + struct bpf_prog_stats __percpu *stats; unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); + struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ /* Instructions for interpreter */ struct sock_filter insns[0]; struct bpf_insn insnsi[]; @@ -581,7 +589,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); struct bpf_prog_stats *__stats; \ u64 __start = sched_clock(); \ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ - __stats = this_cpu_ptr(prog->aux->stats); \ + __stats = this_cpu_ptr(prog->stats); \ u64_stats_update_begin(&__stats->syncp); \ __stats->cnt++; \ __stats->nsecs += sched_clock() - __start; \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5bbd4884ff7a..2cf71fd39c22 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -114,8 +114,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) if (!prog) return NULL; - prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); - if (!prog->aux->stats) { + prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); + if (!prog->stats) { kfree(prog->aux); vfree(prog); return NULL; @@ -124,7 +124,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) for_each_possible_cpu(cpu) { struct bpf_prog_stats *pstats; - pstats = per_cpu_ptr(prog->aux->stats, cpu); + pstats = per_cpu_ptr(prog->stats, cpu); u64_stats_init(&pstats->syncp); } return prog; @@ -249,10 +249,10 @@ void __bpf_prog_free(struct bpf_prog *fp) if (fp->aux) { mutex_destroy(&fp->aux->used_maps_mutex); mutex_destroy(&fp->aux->dst_mutex); - free_percpu(fp->aux->stats); kfree(fp->aux->poke_tab); kfree(fp->aux); } + free_percpu(fp->stats); vfree(fp); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e5999d86c76e..f7df56a704de 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1739,7 +1739,7 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, unsigned int start; u64 tnsecs, tcnt; - st = per_cpu_ptr(prog->aux->stats, cpu); + st = per_cpu_ptr(prog->stats, cpu); do { start = u64_stats_fetch_begin_irq(&st->syncp); tnsecs = st->nsecs; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 35c5887d82ff..5be3beeedd74 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -412,7 +412,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) * Hence check that 'start' is not zero. */ start) { - stats = this_cpu_ptr(prog->aux->stats); + stats = this_cpu_ptr(prog->stats); u64_stats_update_begin(&stats->syncp); stats->cnt++; stats->nsecs += sched_clock() - start; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 15694246f854..4189edb41b73 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10889,7 +10889,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) /* BPF_PROG_RUN doesn't call subprogs directly, * hence main prog stats include the runtime of subprogs. * subprogs don't have IDs and not reachable via prog_get_next_id - * func[i]->aux->stats will never be accessed and stays NULL + * func[i]->stats will never be accessed and stays NULL */ func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); if (!func[i])