diff mbox series

[v4,bpf-next,1/9] bpf: Optimize program stats

Message ID 20210210033634.62081-2-alexei.starovoitov@gmail.com (mailing list archive)
State Accepted
Commit 700d4796ef59f5faf240d307839bd419e2b6bdff
Delegated to: BPF
Headers show
Series bpf: Misc improvements | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 8 maintainers not CCed: yhs@fb.com kafai@fb.com netdev@vger.kernel.org ast@kernel.org songliubraving@fb.com john.fastabend@gmail.com kpsingh@kernel.org andrii@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 12175 this patch: 12175
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 119 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 12823 this patch: 12823
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Alexei Starovoitov Feb. 10, 2021, 3:36 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Move bpf_prog_stats from prog->aux into prog to avoid one extra load
in critical path of program execution.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h     |  8 --------
 include/linux/filter.h  | 14 +++++++++++---
 kernel/bpf/core.c       |  8 ++++----
 kernel/bpf/syscall.c    |  2 +-
 kernel/bpf/trampoline.c |  2 +-
 kernel/bpf/verifier.c   |  2 +-
 6 files changed, 18 insertions(+), 18 deletions(-)

Comments

Ilya Leoshkevich Feb. 12, 2021, 3:26 a.m. UTC | #1
On Tue, 2021-02-09 at 19:36 -0800, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Move bpf_prog_stats from prog->aux into prog to avoid one extra load
> in critical path of program execution.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  include/linux/bpf.h     |  8 --------
>  include/linux/filter.h  | 14 +++++++++++---
>  kernel/bpf/core.c       |  8 ++++----
>  kernel/bpf/syscall.c    |  2 +-
>  kernel/bpf/trampoline.c |  2 +-
>  kernel/bpf/verifier.c   |  2 +-
>  6 files changed, 18 insertions(+), 18 deletions(-)

...

> @@ -249,10 +249,10 @@ void __bpf_prog_free(struct bpf_prog *fp)
>         if (fp->aux) {
>                 mutex_destroy(&fp->aux->used_maps_mutex);
>                 mutex_destroy(&fp->aux->dst_mutex);
> -               free_percpu(fp->aux->stats);
>                 kfree(fp->aux->poke_tab);
>                 kfree(fp->aux);
>         }
> +       free_percpu(fp->stats);

On s390 this line causes the following in "ld_abs: vlan + abs, test 1"
with the latest bpf-next:

Unable to handle kernel pointer dereference in virtual kernel address
space
Failing address: 0000000000000000 TEID: 0000000000000483
Fault in home space mode while using kernel ASCE.
AS:0000000001bd0007 R3:00000001ffff0007 S:00000001ffffd000
P:000000000000003d 
Oops: 0004 ilc:2 [#1] SMP 
Modules linked in:
CPU: 0 PID: 184 Comm: test_verifier Not tainted 5.11.0-rc4-00952-
g6fdd671baaf5 #7
Hardware name: IBM 3906 M03 703 (KVM/Linux)
Krnl PSW : 0404c00180000000 000000000042707a
(refill_obj_stock+0x11a/0x1e0)
           R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
Krnl GPRS: 0000000000000000 0000000000000000 0000000000000018
0000000100000000
           0000000000000000 000000008764ca88 00000000013d3ff8
000000000141d140
           0000000000000080 0000000000000000 0000000000000000
00000001ff61c8f0
           000000008764c000 00000000012eb678 0000000000427066
00000380001bb888
Krnl Code: 0000000000427070: a7380000           lhi     %r3,0
           0000000000427074: 5810a018           l       %r1,24(%r10)
          #0000000000427078: 1841               lr      %r4,%r1
          >000000000042707a: ba432000           cs      %r4,%r3,0(%r2)
           000000000042707e: a774fffb           brc    
7,0000000000427074
           0000000000427082: 1a18               ar      %r1,%r8
           0000000000427084: 5010b018           st      %r1,24(%r11)
           0000000000427088: c21f00001000       clfi    %r1,4096
Call Trace:
 [<000000000042707a>] refill_obj_stock+0x11a/0x1e0 
([<0000000000427066>] refill_obj_stock+0x106/0x1e0)
 [<000000000039bd86>] free_percpu.part.0+0xd6/0x428 
 [<00000000002ef738>] bpf_prog_realloc+0xa0/0xd8 
 [<00000000002efae8>] bpf_patch_insn_single+0x88/0x208 
 [<000000000030762e>] bpf_patch_insn_data+0x36/0x290 
 [<00000000003086ca>] fixup_bpf_calls+0x572/0xa28 
 [<000000000031045c>] bpf_check+0xb44/0xcb8 
 [<00000000002f747a>] bpf_prog_load+0x5fa/0x968 
 [<00000000002fa25c>] __do_sys_bpf+0x634/0x700 
 [<0000000000a2f3ca>] system_call+0xe2/0x28c 
INFO: lockdep is turned off.
Last Breaking-Event-Address:
 [<0000000000203f76>] lock_release+0x6e/0x218
Kernel panic - not syncing: Fatal exception: panic_on_oops

Here is the better backtrace (line numbers correspond to commit
6fdd671baaf5):

#0  refill_obj_stock (objcg=objcg@entry=0x0, nr_bytes=<optimized out>)
at mm/memcontrol.c:3248
#1  0x0000000000427a08 in obj_cgroup_uncharge (objcg=objcg@entry=0x0,
size=<optimized out>) at mm/memcontrol.c:3300
#2  0x000000000039bd86 in pcpu_memcg_free_hook (size=32, off=<optimized
out>, chunk=0x82d4fa00) at ./include/linux/bitmap.h:400
#3  free_percpu (ptr=0x3fd813b5960) at mm/percpu.c:2105
#4  0x000000000039c0ec in free_percpu (ptr=<optimized out>) at
mm/percpu.c:2089
#5  0x00000000002ef738 in __bpf_prog_free (fp=0x380001ce000) at
kernel/bpf/core.c:262
#6  bpf_prog_realloc (fp_old=fp_old@entry=0x380001ce000, size=249856,
size@entry=245776, gfp_extra_flags=gfp_extra_flags@entry=1051840) at
kernel/bpf/core.c:248
#7  0x00000000002efae8 in bpf_patch_insn_single (prog=0x380001ce000,
off=off@entry=2205, patch=patch@entry=0x380001bbba0, len=len@entry=6)
at ./include/linux/filter.h:788
#8  0x000000000030762e in bpf_patch_insn_data
(env=env@entry=0x87566000, off=off@entry=2205,
patch=patch@entry=0x380001bbba0, len=<optimized out>) at
kernel/bpf/verifier.c:10669
#9  0x00000000003086ca in fixup_bpf_calls (env=env@entry=0x87566000) at
kernel/bpf/verifier.c:11539
#10 0x000000000031045c in bpf_check (prog=prog@entry=0x380001bbda0,
attr=attr@entry=0x380001bbe80, uattr=uattr@entry=0x3ffe66fe9d0) at
kernel/bpf/verifier.c:12573
#11 0x00000000002f747a in bpf_prog_load (attr=attr@entry=0x380001bbe80,
uattr=uattr@entry=0x3ffe66fe9d0) at kernel/bpf/syscall.c:2209
#12 0x00000000002fa25c in __do_sys_bpf (cmd=<optimized out>,
uattr=0x3ffe66fe9d0, size=120) at kernel/bpf/syscall.c:4388
#13 0x0000000000a2f3ca in system_call () at
arch/s390/kernel/entry.S:439

So we end up with objcg=NULL, but I'm not sure why this happens.
Please let me know if you need more info.
Alexei Starovoitov Feb. 12, 2021, 3:43 a.m. UTC | #2
On Thu, Feb 11, 2021 at 7:26 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> mm/percpu.c:2089
> #5  0x00000000002ef738 in __bpf_prog_free (fp=0x380001ce000) at
> kernel/bpf/core.c:262
> #6  bpf_prog_realloc (fp_old=fp_old@entry=0x380001ce000, size=249856,
>
> So we end up with objcg=NULL, but I'm not sure why this happens.
> Please let me know if you need more info.

Argh. Thanks for reporting!
Pushed the obvious fix:
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=1336c662474edec3966c96c8de026f794d16b804
Pls pull bpf-next and give it a spin.
Ilya Leoshkevich Feb. 12, 2021, 3:51 a.m. UTC | #3
On Thu, 2021-02-11 at 19:43 -0800, Alexei Starovoitov wrote:
> On Thu, Feb 11, 2021 at 7:26 PM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > mm/percpu.c:2089
> > #5  0x00000000002ef738 in __bpf_prog_free (fp=0x380001ce000) at
> > kernel/bpf/core.c:262
> > #6  bpf_prog_realloc (fp_old=fp_old@entry=0x380001ce000,
> > size=249856,
> > 
> > So we end up with objcg=NULL, but I'm not sure why this happens.
> > Please let me know if you need more info.
> 
> Argh. Thanks for reporting!
> Pushed the obvious fix:
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=1336c662474edec3966c96c8de026f794d16b804
> Pls pull bpf-next and give it a spin.

Works now, thanks for the very quick fix!
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 321966fc35db..026fa8873c5d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,7 +14,6 @@ 
 #include <linux/numa.h>
 #include <linux/mm_types.h>
 #include <linux/wait.h>
-#include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -507,12 +506,6 @@  enum bpf_cgroup_storage_type {
  */
 #define MAX_BPF_FUNC_ARGS 12
 
-struct bpf_prog_stats {
-	u64 cnt;
-	u64 nsecs;
-	struct u64_stats_sync syncp;
-} __aligned(2 * sizeof(u64));
-
 struct btf_func_model {
 	u8 ret_size;
 	u8 nr_args;
@@ -845,7 +838,6 @@  struct bpf_prog_aux {
 	u32 linfo_idx;
 	u32 num_exentries;
 	struct exception_table_entry *extable;
-	struct bpf_prog_stats __percpu *stats;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5b3137d7b690..cecb03c9d251 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -22,6 +22,7 @@ 
 #include <linux/vmalloc.h>
 #include <linux/sockptr.h>
 #include <crypto/sha1.h>
+#include <linux/u64_stats_sync.h>
 
 #include <net/sch_generic.h>
 
@@ -539,6 +540,12 @@  struct bpf_binary_header {
 	u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
 };
 
+struct bpf_prog_stats {
+	u64 cnt;
+	u64 nsecs;
+	struct u64_stats_sync syncp;
+} __aligned(2 * sizeof(u64));
+
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	u16			jited:1,	/* Is our filter JIT'ed? */
@@ -557,10 +564,11 @@  struct bpf_prog {
 	u32			len;		/* Number of filter blocks */
 	u32			jited_len;	/* Size of jited insns in bytes */
 	u8			tag[BPF_TAG_SIZE];
-	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
-	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
+	struct bpf_prog_stats __percpu *stats;
 	unsigned int		(*bpf_func)(const void *ctx,
 					    const struct bpf_insn *insn);
+	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
+	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	/* Instructions for interpreter */
 	struct sock_filter	insns[0];
 	struct bpf_insn		insnsi[];
@@ -581,7 +589,7 @@  DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 		struct bpf_prog_stats *__stats;				\
 		u64 __start = sched_clock();				\
 		__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);	\
-		__stats = this_cpu_ptr(prog->aux->stats);		\
+		__stats = this_cpu_ptr(prog->stats);			\
 		u64_stats_update_begin(&__stats->syncp);		\
 		__stats->cnt++;						\
 		__stats->nsecs += sched_clock() - __start;		\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5bbd4884ff7a..2cf71fd39c22 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -114,8 +114,8 @@  struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (!prog)
 		return NULL;
 
-	prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
-	if (!prog->aux->stats) {
+	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
+	if (!prog->stats) {
 		kfree(prog->aux);
 		vfree(prog);
 		return NULL;
@@ -124,7 +124,7 @@  struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	for_each_possible_cpu(cpu) {
 		struct bpf_prog_stats *pstats;
 
-		pstats = per_cpu_ptr(prog->aux->stats, cpu);
+		pstats = per_cpu_ptr(prog->stats, cpu);
 		u64_stats_init(&pstats->syncp);
 	}
 	return prog;
@@ -249,10 +249,10 @@  void __bpf_prog_free(struct bpf_prog *fp)
 	if (fp->aux) {
 		mutex_destroy(&fp->aux->used_maps_mutex);
 		mutex_destroy(&fp->aux->dst_mutex);
-		free_percpu(fp->aux->stats);
 		kfree(fp->aux->poke_tab);
 		kfree(fp->aux);
 	}
+	free_percpu(fp->stats);
 	vfree(fp);
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e5999d86c76e..f7df56a704de 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1739,7 +1739,7 @@  static void bpf_prog_get_stats(const struct bpf_prog *prog,
 		unsigned int start;
 		u64 tnsecs, tcnt;
 
-		st = per_cpu_ptr(prog->aux->stats, cpu);
+		st = per_cpu_ptr(prog->stats, cpu);
 		do {
 			start = u64_stats_fetch_begin_irq(&st->syncp);
 			tnsecs = st->nsecs;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 35c5887d82ff..5be3beeedd74 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -412,7 +412,7 @@  void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 	     * Hence check that 'start' is not zero.
 	     */
 	    start) {
-		stats = this_cpu_ptr(prog->aux->stats);
+		stats = this_cpu_ptr(prog->stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->cnt++;
 		stats->nsecs += sched_clock() - start;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 15694246f854..4189edb41b73 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10889,7 +10889,7 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 		/* BPF_PROG_RUN doesn't call subprogs directly,
 		 * hence main prog stats include the runtime of subprogs.
 		 * subprogs don't have IDs and not reachable via prog_get_next_id
-		 * func[i]->aux->stats will never be accessed and stays NULL
+		 * func[i]->stats will never be accessed and stays NULL
 		 */
 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
 		if (!func[i])