Message ID | 20230524001825.2688661-1-eddyz87@gmail.com (mailing list archive) |
---|---|
Headers | show |
Series | Support for new btf_type_tag encoding | expand |
On Wed, 2023-05-24 at 03:18 +0300, Eduard Zingerman wrote: [...] > - gcc / LLVM-main / pahole-new > - kernel build : ok > - bpf tests : ok > - btfdiff : ok but dwarf dump sometimes segfaults Regarding this segfault. I can reproduce it using pahole 'next' branch if btfdiff is executed several times. So, the proposed patch-set is not the cause. Specifically, it happens when the following command is executed: pahole -F dwarf --flat_arrays --sort ... vmlinux With the following stack trace: Thread 1 "pahole" received signal SIGSEGV, Segmentation fault. 0x00007ffff7f819ad in __rb_erase_color (node=0x7fffd4045830, parent=0x0, root=0x5555555672d8 <structures.tree>) at /home/eddy/work/dwarves-fork/rbtree.c:134 134 if (parent->rb_left == node) (gdb) bt #0 0x00007ffff7f819ad in __rb_erase_color (node=0x7fffd4045830, parent=0x0, root=0x5555555672d8 <structures.tree>) at /home/eddy/work/dwarves-fork/rbtree.c:134 #1 0x00007ffff7f82014 in rb_erase (node=0x7fff21ae5b80, root=0x5555555672d8 <structures.tree>) at /home/eddy/work/dwarves-fork/rbtree.c:275 #2 0x0000555555559c3d in __structures__delete () at /home/eddy/work/dwarves-fork/pahole.c:440 #3 0x0000555555559c70 in structures__delete () at /home/eddy/work/dwarves-fork/pahole.c:448 #4 0x0000555555560bb6 in main (argc=13, argv=0x7fffffffdcd8) at /home/eddy/work/dwarves-fork/pahole.c:3584 I tried random testing the rb_tree implementation and can reliably reproduce the error using the diff at the end of the email. E.g. it happens for test sequence (using add/delete functions adapted to work on int values instead of 'struct structure'): 60 4 24 34 15 2 80 26 82 67 92 77 9. Will figure out a fix for rb_erase code tomorrow. --- testing code for rb_erase --- diff --git a/pahole.c b/pahole.c index 6fc4ed6..6c60c9e 100644 --- a/pahole.c +++ b/pahole.c @@ -8,6 +8,7 @@ #include <argp.h> #include <assert.h> +#include <signal.h> #include <stdio.h> #include <dwarf.h> #include <elfutils/version.h> @@ -3408,10 +3409,97 @@ out_free: return ret; } +struct test_struct { + struct rb_node rb_node; + int val; +}; + +static struct rb_root test_tree = RB_ROOT; +static struct test_struct structs[100] = {}; +static int free_struct_idx = 0; +static int num_tries = 0; + +static void test_add(int val) +{ + struct rb_node **p = &test_tree.rb_node; + struct rb_node *parent = NULL; + struct test_struct *str; + + while (*p != NULL) { + int rc; + + parent = *p; + str = rb_entry(parent, struct test_struct, rb_node); + rc = str->val - val; + + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else { + return; + } + } + + str = &structs[free_struct_idx++]; + str->val = val; + + rb_link_node(&str->rb_node, parent, p); + rb_insert_color(&str->rb_node, &test_tree); + + return; +} + +static void test_delete() +{ + struct rb_node *next = rb_first(&test_tree); + + while (next) { + struct test_struct *pos = rb_entry(next, struct test_struct, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, &structures__tree); + } +} + +void test_erase_once() +{ + test_tree = RB_ROOT; + free_struct_idx = 0; + bzero(structs, sizeof(structs)); + int size = rand() % 16; + for (int i = 0; i < size; ++i) { + int val = rand() % 100; + test_add(val); + } + test_delete(); + ++num_tries; +} + +static void sigsegv_handler(int nSignum, siginfo_t* si, void* vcontext) { + fprintf(stderr, "SIGSEGV after %d iters: [%d]", num_tries, free_struct_idx); + for (int i = 0; i < free_struct_idx; ++i) + fprintf(stderr, " %d", structs[i].val); + fprintf(stderr, "\n"); + exit(1); +} + int main(int argc, char *argv[]) { int err, remaining, rc = EXIT_FAILURE; + int seed = clock(); + fprintf(stderr, "seed = %d\n", seed); + srand(seed); + struct sigaction action = {}; + action.sa_flags = SA_SIGINFO; + action.sa_sigaction = sigsegv_handler; + sigaction(SIGSEGV, &action, NULL); + for (int i = 0; i < 1000 * 1000; ++i) { + test_erase_once(); + } + fprintf(stderr, "done\n"); + return 0; + if (argp_parse(&pahole__argp, argc, argv, 0, &remaining, NULL)) { argp_help(&pahole__argp, stderr, ARGP_HELP_SEE, argv[0]); goto out;
On Thu, 2023-05-25 at 03:13 +0300, Eduard Zingerman wrote: [...] > +static void test_delete() > +{ > + struct rb_node *next = rb_first(&test_tree); > + > + while (next) { > + struct test_struct *pos = rb_entry(next, struct test_struct, rb_node); > + next = rb_next(&pos->rb_node); > + rb_erase(&pos->rb_node, &structures__tree); ^^^^^^^ Should be &test_tree I made a mistake in this test code. When correct tree is passed to rb_erase() test passes w/o segfaults. The original issue is caused by the same 'struct structures' instance being added to an rb_tree twice, posted a separate patch with a fix to the mailing list. > + } > +} [...]