diff mbox series

[dwarves] btf_encoder: always initialize func_state to 0

Message ID 20250110023138.659519-1-ihor.solodrai@pm.me (mailing list archive)
State New
Headers show
Series [dwarves] btf_encoder: always initialize func_state to 0 | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Ihor Solodrai Jan. 10, 2025, 2:31 a.m. UTC
BPF CI caught a segfault on aarch64 and s390x [1] after recent merges
into the master branch.

The segfault happened at free(func_state->annots) in
btf_encoder__delete_saved_funcs().

func_state->annots arrived there uninitialized because after patch [2]
in some cases func_state may be allocated with a realloc, but was not
zeroed out.

Fix this bug by always memset-ing a func_state to zero in
btf_encoder__alloc_func_state().

[1] https://github.com/kernel-patches/bpf/actions/runs/12700574327
[2] https://lore.kernel.org/dwarves/20250109185950.653110-11-ihor.solodrai@pm.me/
---
 btf_encoder.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

Comments

Alan Maguire Jan. 10, 2025, 10:39 a.m. UTC | #1
On 10/01/2025 02:31, Ihor Solodrai wrote:
> BPF CI caught a segfault on aarch64 and s390x [1] after recent merges
> into the master branch.
> 
> The segfault happened at free(func_state->annots) in
> btf_encoder__delete_saved_funcs().
> 
> func_state->annots arrived there uninitialized because after patch [2]
> in some cases func_state may be allocated with a realloc, but was not
> zeroed out.
> 
> Fix this bug by always memset-ing a func_state to zero in
> btf_encoder__alloc_func_state().
> 
> [1] https://github.com/kernel-patches/bpf/actions/runs/12700574327
> [2] https://lore.kernel.org/dwarves/20250109185950.653110-11-ihor.solodrai@pm.me/


Thanks for the quick fix! Reproduced this on an aarch64 system:

 BTF [M] kernel/resource_kunit.ko
/bin/sh: line 1: 630875 Segmentation fault      (core dumped)
LLVM_OBJCOPY="objcopy" pahole -J -j
--btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
--lang_exclude=rust --btf_base ./vmlinux kernel/kcsan/kcsan_test.ko
make[2]: *** [scripts/Makefile.modfinal:57: kernel/kcsan/kcsan_test.ko]
Error 139
make[2]: *** Deleting file 'kernel/kcsan/kcsan_test.ko'
make[2]: *** Waiting for unfinished jobs....
/bin/sh: line 1: 630907 Segmentation fault      (core dumped)
LLVM_OBJCOPY="objcopy" pahole -J -j
--btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
--lang_exclude=rust --btf_base ./vmlinux kernel/torture.ko
make[2]: *** [scripts/Makefile.modfinal:56: kernel/torture.ko] Error 139
make[2]: *** Deleting file 'kernel/torture.ko'

...and verified that with the fix all works well.

Nit: missing Signed-off-by

Tested-by: Alan Maguire <alan.maguire@oracle.com>

> ---
>  btf_encoder.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/btf_encoder.c b/btf_encoder.c
> index 78efd70..511c1ea 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -1083,7 +1083,7 @@ static bool funcs__match(struct btf_encoder_func_state *s1,
>  
>  static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_encoder *encoder)
>  {
> -	struct btf_encoder_func_state *tmp;
> +	struct btf_encoder_func_state *state, *tmp;
>  
>  	if (encoder->func_states.cnt >= encoder->func_states.cap) {
>  
> @@ -1100,7 +1100,10 @@ static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e
>  		encoder->func_states.array = tmp;
>  	}
>  
> -	return &encoder->func_states.array[encoder->func_states.cnt++];
> +	state = &encoder->func_states.array[encoder->func_states.cnt++];
> +	memset(state, 0, sizeof(*state));
> +
> +	return state;
>  }
>  
>  static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func)
Arnaldo Carvalho de Melo Jan. 10, 2025, 1:48 p.m. UTC | #2
On Fri, Jan 10, 2025 at 10:39:50AM +0000, Alan Maguire wrote:
> On 10/01/2025 02:31, Ihor Solodrai wrote:
> > BPF CI caught a segfault on aarch64 and s390x [1] after recent merges
> > into the master branch.
> > 
> > The segfault happened at free(func_state->annots) in
> > btf_encoder__delete_saved_funcs().
> > 
> > func_state->annots arrived there uninitialized because after patch [2]
> > in some cases func_state may be allocated with a realloc, but was not
> > zeroed out.
> > 
> > Fix this bug by always memset-ing a func_state to zero in
> > btf_encoder__alloc_func_state().
> > 
> > [1] https://github.com/kernel-patches/bpf/actions/runs/12700574327
> > [2] https://lore.kernel.org/dwarves/20250109185950.653110-11-ihor.solodrai@pm.me/
> 
> 
> Thanks for the quick fix! Reproduced this on an aarch64 system:
> 
>  BTF [M] kernel/resource_kunit.ko
> /bin/sh: line 1: 630875 Segmentation fault      (core dumped)
> LLVM_OBJCOPY="objcopy" pahole -J -j
> --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
> --lang_exclude=rust --btf_base ./vmlinux kernel/kcsan/kcsan_test.ko
> make[2]: *** [scripts/Makefile.modfinal:57: kernel/kcsan/kcsan_test.ko]
> Error 139
> make[2]: *** Deleting file 'kernel/kcsan/kcsan_test.ko'
> make[2]: *** Waiting for unfinished jobs....
> /bin/sh: line 1: 630907 Segmentation fault      (core dumped)
> LLVM_OBJCOPY="objcopy" pahole -J -j
> --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
> --lang_exclude=rust --btf_base ./vmlinux kernel/torture.ko
> make[2]: *** [scripts/Makefile.modfinal:56: kernel/torture.ko] Error 139
> make[2]: *** Deleting file 'kernel/torture.ko'
> 
> ...and verified that with the fix all works well.
> 
> Nit: missing Signed-off-by

I'm adding it, ok?

- Arnaldo
 
> Tested-by: Alan Maguire <alan.maguire@oracle.com>

Thanks!
 
> > ---
> >  btf_encoder.c | 7 +++++--
> >  1 file changed, 5 insertions(+), 2 deletions(-)
> > 
> > diff --git a/btf_encoder.c b/btf_encoder.c
> > index 78efd70..511c1ea 100644
> > --- a/btf_encoder.c
> > +++ b/btf_encoder.c
> > @@ -1083,7 +1083,7 @@ static bool funcs__match(struct btf_encoder_func_state *s1,
> >  
> >  static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_encoder *encoder)
> >  {
> > -	struct btf_encoder_func_state *tmp;
> > +	struct btf_encoder_func_state *state, *tmp;
> >  
> >  	if (encoder->func_states.cnt >= encoder->func_states.cap) {
> >  
> > @@ -1100,7 +1100,10 @@ static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e
> >  		encoder->func_states.array = tmp;
> >  	}
> >  
> > -	return &encoder->func_states.array[encoder->func_states.cnt++];
> > +	state = &encoder->func_states.array[encoder->func_states.cnt++];
> > +	memset(state, 0, sizeof(*state));
> > +
> > +	return state;
> >  }
> >  
> >  static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func)
Arnaldo Carvalho de Melo Jan. 10, 2025, 1:51 p.m. UTC | #3
On Fri, Jan 10, 2025 at 02:31:41AM +0000, Ihor Solodrai wrote:
> BPF CI caught a segfault on aarch64 and s390x [1] after recent merges
> into the master branch.

In the past the libbpf github actions was tracking the tmp.master (it would
be better to track "next") branch and I was looking at when it passed to
then move "next" to master, that would be great to have so that we
wouldn't be having these bugs in the git history, avoiding force pushes.

Anyhway, thanks for the fix, I'll add it and push it out.

- Arnaldo
 
> The segfault happened at free(func_state->annots) in
> btf_encoder__delete_saved_funcs().
> 
> func_state->annots arrived there uninitialized because after patch [2]
> in some cases func_state may be allocated with a realloc, but was not
> zeroed out.
> 
> Fix this bug by always memset-ing a func_state to zero in
> btf_encoder__alloc_func_state().
> 
> [1] https://github.com/kernel-patches/bpf/actions/runs/12700574327
> [2] https://lore.kernel.org/dwarves/20250109185950.653110-11-ihor.solodrai@pm.me/
> ---
>  btf_encoder.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/btf_encoder.c b/btf_encoder.c
> index 78efd70..511c1ea 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -1083,7 +1083,7 @@ static bool funcs__match(struct btf_encoder_func_state *s1,
>  
>  static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_encoder *encoder)
>  {
> -	struct btf_encoder_func_state *tmp;
> +	struct btf_encoder_func_state *state, *tmp;
>  
>  	if (encoder->func_states.cnt >= encoder->func_states.cap) {
>  
> @@ -1100,7 +1100,10 @@ static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e
>  		encoder->func_states.array = tmp;
>  	}
>  
> -	return &encoder->func_states.array[encoder->func_states.cnt++];
> +	state = &encoder->func_states.array[encoder->func_states.cnt++];
> +	memset(state, 0, sizeof(*state));
> +
> +	return state;
>  }
>  
>  static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func)
> -- 
> 2.47.1
>
Arnaldo Carvalho de Melo Jan. 10, 2025, 1:55 p.m. UTC | #4
On Fri, Jan 10, 2025 at 02:31:41AM +0000, Ihor Solodrai wrote:
> @@ -1100,7 +1100,10 @@ static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e
>  		encoder->func_states.array = tmp;
>  	}
  
> -	return &encoder->func_states.array[encoder->func_states.cnt++];
> +	state = &encoder->func_states.array[encoder->func_states.cnt++];
> +	memset(state, 0, sizeof(*state));
> +
> +	return state;

Just a super nit, the following is equivalent and shorter:

	state = &encoder->func_states.array[encoder->func_states.cnt++];
	return memset(state, 0, sizeof(*state));

:-)

But nah, I'm appling your patch as-is.

Thanks!

- Arnaldo
Ihor Solodrai Jan. 10, 2025, 3:46 p.m. UTC | #5
On Friday, January 10th, 2025 at 5:48 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:

> 
> 
> On Fri, Jan 10, 2025 at 10:39:50AM +0000, Alan Maguire wrote:
> 
> > On 10/01/2025 02:31, Ihor Solodrai wrote:
> > 
> > > BPF CI caught a segfault on aarch64 and s390x [1] after recent merges
> > > into the master branch.
> > > 
> > > The segfault happened at free(func_state->annots) in
> > > btf_encoder__delete_saved_funcs().
> > > 
> > > func_state->annots arrived there uninitialized because after patch [2]
> > > in some cases func_state may be allocated with a realloc, but was not
> > > zeroed out.
> > > 
> > > Fix this bug by always memset-ing a func_state to zero in
> > > btf_encoder__alloc_func_state().
> > > 
> > > [1] https://github.com/kernel-patches/bpf/actions/runs/12700574327
> > > [2] https://lore.kernel.org/dwarves/20250109185950.653110-11-ihor.solodrai@pm.me/
> > 
> > Thanks for the quick fix! Reproduced this on an aarch64 system:
> > 
> > BTF [M] kernel/resource_kunit.ko
> > /bin/sh: line 1: 630875 Segmentation fault (core dumped)
> > LLVM_OBJCOPY="objcopy" pahole -J -j
> > --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
> > --lang_exclude=rust --btf_base ./vmlinux kernel/kcsan/kcsan_test.ko
> > make[2]: *** [scripts/Makefile.modfinal:57: kernel/kcsan/kcsan_test.ko]
> > Error 139
> > make[2]: *** Deleting file 'kernel/kcsan/kcsan_test.ko'
> > make[2]: *** Waiting for unfinished jobs....
> > /bin/sh: line 1: 630907 Segmentation fault (core dumped)
> > LLVM_OBJCOPY="objcopy" pahole -J -j
> > --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
> > --lang_exclude=rust --btf_base ./vmlinux kernel/torture.ko
> > make[2]: *** [scripts/Makefile.modfinal:56: kernel/torture.ko] Error 139
> > make[2]: *** Deleting file 'kernel/torture.ko'
> > 
> > ...and verified that with the fix all works well.
> > 
> > Nit: missing Signed-off-by
> 
> 
> I'm adding it, ok?

Yes, thanks!

Signed-off-by: Ihor Solodrai <ihor.solodrai@pm.me>

> 
> - Arnaldo
> 
> > Tested-by: Alan Maguire alan.maguire@oracle.com
> 
> 
> Thanks!
> 
> > > ---
> > > btf_encoder.c | 7 +++++--
> > > 1 file changed, 5 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/btf_encoder.c b/btf_encoder.c
> > > index 78efd70..511c1ea 100644
> > > --- a/btf_encoder.c
> > > +++ b/btf_encoder.c
> > > @@ -1083,7 +1083,7 @@ static bool funcs__match(struct btf_encoder_func_state *s1,
> > > 
> > > static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_encoder *encoder)
> > > {
> > > - struct btf_encoder_func_state *tmp;
> > > + struct btf_encoder_func_state *state, *tmp;
> > > 
> > > if (encoder->func_states.cnt >= encoder->func_states.cap) {
> > > 
> > > @@ -1100,7 +1100,10 @@ static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e
> > > encoder->func_states.array = tmp;
> > > }
> > > 
> > > - return &encoder->func_states.array[encoder->func_states.cnt++];
> > > + state = &encoder->func_states.array[encoder->func_states.cnt++];
> > > + memset(state, 0, sizeof(*state));
> > > +
> > > + return state;
> > > }
> > > 
> > > static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func)
Ihor Solodrai Jan. 10, 2025, 3:58 p.m. UTC | #6
On Friday, January 10th, 2025 at 5:51 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:

> 
> 
> On Fri, Jan 10, 2025 at 02:31:41AM +0000, Ihor Solodrai wrote:
> 
> > BPF CI caught a segfault on aarch64 and s390x [1] after recent merges
> > into the master branch.
> 
> 
> In the past the libbpf github actions was tracking the tmp.master (it would
> be better to track "next") branch and I was looking at when it passed to
> then move "next" to master, that would be great to have so that we
> wouldn't be having these bugs in the git history, avoiding force pushes.

libbpf CI is still tracking tmp.master:

https://github.com/libbpf/libbpf/actions/runs/12696027660/job/35389206487

However it only runs once a day. BPF CI runs more frequently due to the
volume of incoming patches. As of recently, BPF CI has been using "master".
Yesterday, when I saw the failures, I switched BPF CI to v1.28.

I think the right way to approach this is for libbpf/libbpf to track "next",
and BPF CI use "master". Then, most importantly, only merge next into master
after libbpf CI has passed.

This can potentially be automated, but would require push access to the
pahole repo. Until then, a maintainer would need to manually check the
libbpf CI status here:

https://github.com/libbpf/libbpf/actions/workflows/test.yml

Another thing is that libbpf CI only tests x86_64 currently.
We could add aarch64 to libbpf, or migrate pahole staging job to
kernel-patches/vmtest (which is almost identical to BPF CI).

Andrii, what do you think?

> 
> Anyhway, thanks for the fix, I'll add it and push it out.
> 
> - Arnaldo
> 
> > The segfault happened at free(func_state->annots) in
> > btf_encoder__delete_saved_funcs().
> > 
> > func_state->annots arrived there uninitialized because after patch [2]
> > in some cases func_state may be allocated with a realloc, but was not
> > zeroed out.
> > 
> > Fix this bug by always memset-ing a func_state to zero in
> > btf_encoder__alloc_func_state().
> > 
> > [1] https://github.com/kernel-patches/bpf/actions/runs/12700574327
> > [2] https://lore.kernel.org/dwarves/20250109185950.653110-11-ihor.solodrai@pm.me/
> > ---
> > btf_encoder.c | 7 +++++--
> > 1 file changed, 5 insertions(+), 2 deletions(-)
> > 
> > diff --git a/btf_encoder.c b/btf_encoder.c
> > index 78efd70..511c1ea 100644
> > --- a/btf_encoder.c
> > +++ b/btf_encoder.c
> > @@ -1083,7 +1083,7 @@ static bool funcs__match(struct btf_encoder_func_state *s1,
> > 
> > static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_encoder *encoder)
> > {
> > - struct btf_encoder_func_state *tmp;
> > + struct btf_encoder_func_state *state, *tmp;
> > 
> > if (encoder->func_states.cnt >= encoder->func_states.cap) {
> > 
> > @@ -1100,7 +1100,10 @@ static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e
> > encoder->func_states.array = tmp;
> > }
> > 
> > - return &encoder->func_states.array[encoder->func_states.cnt++];
> > + state = &encoder->func_states.array[encoder->func_states.cnt++];
> > + memset(state, 0, sizeof(*state));
> > +
> > + return state;
> > }
> > 
> > static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func)
> > --
> > 2.47.1
diff mbox series

Patch

diff --git a/btf_encoder.c b/btf_encoder.c
index 78efd70..511c1ea 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -1083,7 +1083,7 @@  static bool funcs__match(struct btf_encoder_func_state *s1,
 
 static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_encoder *encoder)
 {
-	struct btf_encoder_func_state *tmp;
+	struct btf_encoder_func_state *state, *tmp;
 
 	if (encoder->func_states.cnt >= encoder->func_states.cap) {
 
@@ -1100,7 +1100,10 @@  static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e
 		encoder->func_states.array = tmp;
 	}
 
-	return &encoder->func_states.array[encoder->func_states.cnt++];
+	state = &encoder->func_states.array[encoder->func_states.cnt++];
+	memset(state, 0, sizeof(*state));
+
+	return state;
 }
 
 static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func)