diff mbox series

[bpf-next,1/2] libbpf: __attribute__((btf_decl_tag("..."))) for btf dump in C format

Message ID 20221103134522.2764601-1-eddyz87@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next,1/2] libbpf: __attribute__((btf_decl_tag("..."))) for btf dump in C format | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 11 maintainers not CCed: sdf@google.com john.fastabend@gmail.com trix@redhat.com haoluo@google.com jolsa@kernel.org kpsingh@kernel.org song@kernel.org ndesaulniers@google.com llvm@lists.linux.dev martin.lau@linux.dev nathan@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-10 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 pending Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 pending Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-6 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with llvm-16

Commit Message

Eduard Zingerman Nov. 3, 2022, 1:45 p.m. UTC
Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
the type annotated with this attribute. This commit adds
reconstitution of such attributes for BTF dump in C format.

BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
this is not enforced and tests don't honor this restriction.
This commit uses hashmap to map types to the list of decl tags.
The hashmap is filled by `btf_dump_assign_decl_tags` function called
from `btf_dump__new`.

It is assumed that total number of types annotated with decl tags is
relatively small, thus some space is saved by using hashmap instead of
adding a new field to `struct btf_dump_type_aux_state`.

It is assumed that list of decl tags associated with a single type is
small. Thus the list is represented by an array which grows linearly.

To accommodate older Clang versions decl tags are dumped using the
following macro:

 #if __has_attribute(btf_decl_tag)
 #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
 #else
 #  define __btf_decl_tag(x)
 #endif

The macro definition is emitted upon first call to `btf_dump__dump_type`.

Clang allows to attach btf_decl_tag attributes to the following kinds
of items:
- struct/union         supported
- struct/union field   supported
- typedef              supported
- function             not applicable
- function parameter   not applicable
- variable             not applicable

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 160 insertions(+), 3 deletions(-)

Comments

Andrii Nakryiko Nov. 4, 2022, 8:54 p.m. UTC | #1
On Thu, Nov 3, 2022 at 6:45 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
> as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
> the type annotated with this attribute. This commit adds
> reconstitution of such attributes for BTF dump in C format.
>
> BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
> this is not enforced and tests don't honor this restriction.
> This commit uses hashmap to map types to the list of decl tags.
> The hashmap is filled by `btf_dump_assign_decl_tags` function called
> from `btf_dump__new`.
>
> It is assumed that total number of types annotated with decl tags is
> relatively small, thus some space is saved by using hashmap instead of
> adding a new field to `struct btf_dump_type_aux_state`.
>
> It is assumed that list of decl tags associated with a single type is
> small. Thus the list is represented by an array which grows linearly.
>
> To accommodate older Clang versions decl tags are dumped using the
> following macro:
>
>  #if __has_attribute(btf_decl_tag)
>  #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
>  #else
>  #  define __btf_decl_tag(x)
>  #endif
>
> The macro definition is emitted upon first call to `btf_dump__dump_type`.
>
> Clang allows to attach btf_decl_tag attributes to the following kinds
> of items:
> - struct/union         supported
> - struct/union field   supported
> - typedef              supported
> - function             not applicable
> - function parameter   not applicable
> - variable             not applicable
>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 160 insertions(+), 3 deletions(-)
>

Functions and their args can also have tags. This works:

diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
index 7a5af8b86065..75fcabe700cd 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
@@ -54,7 +54,7 @@ struct root_struct {

 /* ------ END-EXPECTED-OUTPUT ------ */

-int f(struct root_struct *s)
+int f(struct root_struct *s __btf_decl_tag("func_arg_tag"))
__btf_decl_tag("func_tag")
 {
        return 0;
 }

And I see correct BTF:

[26] FUNC 'f' type_id=25 linkage=global
[27] DECL_TAG 'func_arg_tag' type_id=26 component_idx=0
[28] DECL_TAG 'func_tag' type_id=26 component_idx=-1

So let's add support and test for that case as well. btf_dump
shouldn't assume vmlinux.h-only case.

Also, please check if DATASEC and VARs can have decl_tags associated with them.

[...]

> @@ -143,6 +174,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
>
>  static int btf_dump_mark_referenced(struct btf_dump *d);
>  static int btf_dump_resize(struct btf_dump *d);
> +static int btf_dump_assign_decl_tags(struct btf_dump *d);
>
>  struct btf_dump *btf_dump__new(const struct btf *btf,
>                                btf_dump_printf_fn_t printf_fn,
> @@ -179,11 +211,21 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
>                 d->ident_names = NULL;
>                 goto err;
>         }
> +       d->decl_tags = hashmap__new(identity_hash_fn, identity_equal_fn, NULL);
> +       if (IS_ERR(d->decl_tags)) {
> +               err = PTR_ERR(d->decl_tags);
> +               d->decl_tags = NULL;

nit: no need to clear out ERR pointer, hashmap__free() handles that properly

> +               goto err;
> +       }
>
>         err = btf_dump_resize(d);
>         if (err)
>                 goto err;
>
> +       err = btf_dump_assign_decl_tags(d);
> +       if (err)
> +               goto err;
> +
>         return d;
>  err:
>         btf_dump__free(d);
> @@ -232,7 +274,8 @@ static void btf_dump_free_names(struct hashmap *map)
>
>  void btf_dump__free(struct btf_dump *d)
>  {
> -       int i;
> +       int i, bkt;
> +       struct hashmap_entry *cur;
>
>         if (IS_ERR_OR_NULL(d))
>                 return;
> @@ -248,14 +291,22 @@ void btf_dump__free(struct btf_dump *d)
>         free(d->cached_names);
>         free(d->emit_queue);
>         free(d->decl_stack);
> -       btf_dump_free_names(d->type_names);
> -       btf_dump_free_names(d->ident_names);
> +       if (d->type_names)
> +               btf_dump_free_names(d->type_names);
> +       if (d->ident_names)
> +               btf_dump_free_names(d->ident_names);
> +       if (d->decl_tags) {
> +               hashmap__for_each_entry(d->decl_tags, cur, bkt)
> +                       free(cur->value);
> +               hashmap__free(d->decl_tags);

generalize btf_dump_free_names() to btf_dump_free_strs_map() and
handle IS_ERR_OR_NULL call internally?

> +       }
>
>         free(d);
>  }
>
>  static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
>  static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
> +static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d);

naming nit: btf_dump_ensure_btf_decl_tag_macro() ?

>
>  /*
>   * Dump BTF type in a compilable C syntax, including all the necessary
> @@ -284,6 +335,8 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
>         if (err)
>                 return libbpf_err(err);
>
> +       btf_dump_maybe_define_btf_decl_tag(d);
> +
>         d->emit_queue_cnt = 0;
>         err = btf_dump_order_type(d, id, false);
>         if (err < 0)
> @@ -373,6 +426,61 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
>         return 0;
>  }
>
> +/*
> + * This hashmap lookup is used in several places, so extract it as a
> + * function to hide all the ceremony with casts and NULL assignment.
> + */
> +static struct decl_tag_array *btf_dump_find_decl_tags(struct btf_dump *d, __u32 id)
> +{
> +       struct decl_tag_array *decl_tags = NULL;
> +
> +       hashmap__find(d->decl_tags, (void *)(uintptr_t)id, (void **)&decl_tags);
> +
> +       return decl_tags;
> +}
> +

with your hashmap void * -> long refactoring this is not necessary,
though, right?

> +/*
> + * Scans all BTF objects looking for BTF_KIND_DECL_TAG entries.
> + * The id's of the entries are stored in the `btf_dump.decl_tags` table,
> + * grouped by a target type.
> + */
> +static int btf_dump_assign_decl_tags(struct btf_dump *d)
> +{
> +       __u32 id, new_cnt, type_cnt = btf__type_cnt(d->btf);
> +       struct decl_tag_array *decl_tags;
> +       const struct btf_type *t;
> +       int err;
> +
> +       for (id = 1; id < type_cnt; id++) {
> +               t = btf__type_by_id(d->btf, id);
> +               if (!btf_is_decl_tag(t))
> +                       continue;
> +
> +               decl_tags = btf_dump_find_decl_tags(d, t->type);
> +               /* Assume small number of decl tags per id, increase array size by 1 */
> +               new_cnt = decl_tags ? decl_tags->cnt + 1 : 1;
> +               if (new_cnt > MAX_DECL_TAGS_PER_ID)
> +                       return -ERANGE;

why artificial limitations? user will pay the price proportional to
its BTF, and we don't really care as the memory is allocated
dynamically anyway

> +
> +               /* Allocate new_cnt + 1 to account for decl_tag_array header */
> +               decl_tags = libbpf_reallocarray(decl_tags, new_cnt + 1, sizeof(__u32));

oh, this new_cnt + 1 looks weird and error prone. we are reallocating
entire struct, not just an array, so realloc() makes more sense here.
How about:

decl_tags = realloc(decl_tags, sizeof(decl_tags) + new_cnt *
sizeof(decl_tags->tag_ids[0]));

?

> +               if (!decl_tags)
> +                       return -ENOMEM;
> +
> +               err = hashmap__insert(d->decl_tags, (void *)(uintptr_t)t->type, decl_tags,
> +                                     HASHMAP_SET, NULL, NULL);

why not using hashmap__set()?

> +               if (err) {
> +                       free(decl_tags);

hm... as this is written, it makes it look like double free can happen
if previous version of this pointer stays in d->decl_tags.

I think error shouldn't ever be returned because hashmap__insert()
won't be allocating any new memory, so I think it's best to leave a
small comment about this and just do:

(void)hashmap__set(d->decl_tag, t->type, (long)decl_tags, NULL, NULL);

and no error checking because we don't expect it to ever fail

> +                       return err;
> +               }
> +
> +               decl_tags->tag_ids[new_cnt - 1] = id;
> +               decl_tags->cnt = new_cnt;
> +       }
> +
> +       return 0;
> +}
> +
>  static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
>  {
>         __u32 *new_queue;
> @@ -899,6 +1007,51 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
>         }
>  }
>
> +/*
> + * Define __btf_decl_tag to be either __attribute__ or noop.
> + */
> +static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d)
> +{
> +       if (d->btf_decl_tag_is_defined || !hashmap__size(d->decl_tags))
> +               return;
> +
> +       d->btf_decl_tag_is_defined = true;
> +       btf_dump_printf(d, "#if __has_attribute(btf_decl_tag)\n");
> +       btf_dump_printf(d, "#  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))\n");
> +       btf_dump_printf(d, "#else\n");
> +       btf_dump_printf(d, "#  define __btf_decl_tag(x)\n");
> +       btf_dump_printf(d, "#endif\n\n");
> +}
> +

$ rg '#\s+define' | wc -l
44
$ rg '#define' | wc -l
696

not a big fan of this cuteness, #define is better IMO (more grep'able
as well, if anything)

> +/*
> + * Emits a list of __btf_decl_tag(...) attributes attached to some type.
> + * Decl tags attached to a type and to it's fields reside in a same
> + * list, thus use component_idx to filter out relevant tags:
> + * - component_idx == -1 designates the type itself;
> + * - component_idx >=  0 designates specific field.
> + */
> +static void btf_dump_emit_decl_tags(struct btf_dump *d,
> +                                   struct decl_tag_array *decl_tags,
> +                                   int component_idx)
> +{
> +       struct btf_type *decl_tag_t;

is there any ambiguity to justify verbose name? maybe just "t"?

> +       const char *decl_tag_text;
> +       struct btf_decl_tag *tag;
> +       __u32 i;
> +
> +       if (!decl_tags)
> +               return;
> +
> +       for (i = 0; i < decl_tags->cnt; ++i) {
> +               decl_tag_t = btf_type_by_id(d->btf, decl_tags->tag_ids[i]);
> +               tag = btf_decl_tag(decl_tag_t);
> +               if (tag->component_idx != component_idx)
> +                       continue;
> +               decl_tag_text = btf__name_by_offset(d->btf, decl_tag_t->name_off);
> +               btf_dump_printf(d, " __btf_decl_tag(\"%s\")", decl_tag_text);
> +       }
> +}
> +
>  static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
>                                      const struct btf_type *t)
>  {
> @@ -913,6 +1066,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
>                                      const struct btf_type *t,
>                                      int lvl)
>  {
> +       struct decl_tag_array *decl_tags = btf_dump_find_decl_tags(d, id);
>         const struct btf_member *m = btf_members(t);
>         bool is_struct = btf_is_struct(t);
>         int align, i, packed, off = 0;
> @@ -945,6 +1099,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
>                         m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
>                         off = m_off + m_sz * 8;
>                 }
> +               btf_dump_emit_decl_tags(d, decl_tags, i);
>                 btf_dump_printf(d, ";");
>         }
>
> @@ -964,6 +1119,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
>         btf_dump_printf(d, "%s}", pfx(lvl));
>         if (packed)
>                 btf_dump_printf(d, " __attribute__((packed))");
> +       btf_dump_emit_decl_tags(d, decl_tags, -1);
>  }
>
>  static const char *missing_base_types[][2] = {
> @@ -1104,6 +1260,7 @@ static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
>
>         btf_dump_printf(d, "typedef ");
>         btf_dump_emit_type_decl(d, t->type, name, lvl);
> +       btf_dump_emit_decl_tags(d, btf_dump_find_decl_tags(d, id), -1);
>  }
>
>  static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
> --
> 2.34.1
>
Eduard Zingerman Nov. 6, 2022, 9:40 p.m. UTC | #2
On Fri, 2022-11-04 at 13:54 -0700, Andrii Nakryiko wrote:
> On Thu, Nov 3, 2022 at 6:45 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
> > as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
> > the type annotated with this attribute. This commit adds
> > reconstitution of such attributes for BTF dump in C format.
> > 
> > BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
> > this is not enforced and tests don't honor this restriction.
> > This commit uses hashmap to map types to the list of decl tags.
> > The hashmap is filled by `btf_dump_assign_decl_tags` function called
> > from `btf_dump__new`.
> > 
> > It is assumed that total number of types annotated with decl tags is
> > relatively small, thus some space is saved by using hashmap instead of
> > adding a new field to `struct btf_dump_type_aux_state`.
> > 
> > It is assumed that list of decl tags associated with a single type is
> > small. Thus the list is represented by an array which grows linearly.
> > 
> > To accommodate older Clang versions decl tags are dumped using the
> > following macro:
> > 
> >  #if __has_attribute(btf_decl_tag)
> >  #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> >  #else
> >  #  define __btf_decl_tag(x)
> >  #endif
> > 
> > The macro definition is emitted upon first call to `btf_dump__dump_type`.
> > 
> > Clang allows to attach btf_decl_tag attributes to the following kinds
> > of items:
> > - struct/union         supported
> > - struct/union field   supported
> > - typedef              supported
> > - function             not applicable
> > - function parameter   not applicable
> > - variable             not applicable
> > 
> > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > ---
> >  tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 160 insertions(+), 3 deletions(-)
> > 
> 
> Functions and their args can also have tags. This works:
> 
> diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> index 7a5af8b86065..75fcabe700cd 100644
> --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> @@ -54,7 +54,7 @@ struct root_struct {
> 
>  /* ------ END-EXPECTED-OUTPUT ------ */
> 
> -int f(struct root_struct *s)
> +int f(struct root_struct *s __btf_decl_tag("func_arg_tag"))
> __btf_decl_tag("func_tag")
>  {
>         return 0;
>  }
> 
> And I see correct BTF:
> 
> [26] FUNC 'f' type_id=25 linkage=global
> [27] DECL_TAG 'func_arg_tag' type_id=26 component_idx=0
> [28] DECL_TAG 'func_tag' type_id=26 component_idx=-1
> 
> So let's add support and test for that case as well. btf_dump
> shouldn't assume vmlinux.h-only case.
> 
> Also, please check if DATASEC and VARs can have decl_tags associated with them.
> 
> [...]
> 
> > @@ -143,6 +174,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
> > 
> >  static int btf_dump_mark_referenced(struct btf_dump *d);
> >  static int btf_dump_resize(struct btf_dump *d);
> > +static int btf_dump_assign_decl_tags(struct btf_dump *d);
> > 
> >  struct btf_dump *btf_dump__new(const struct btf *btf,
> >                                btf_dump_printf_fn_t printf_fn,
> > @@ -179,11 +211,21 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
> >                 d->ident_names = NULL;
> >                 goto err;
> >         }
> > +       d->decl_tags = hashmap__new(identity_hash_fn, identity_equal_fn, NULL);
> > +       if (IS_ERR(d->decl_tags)) {
> > +               err = PTR_ERR(d->decl_tags);
> > +               d->decl_tags = NULL;
> 
> nit: no need to clear out ERR pointer, hashmap__free() handles that properly

The `err` is passed to `libbpf_err_ptr` at the end of the function:

struct btf_dump *btf_dump__new(...)
{
	...
err:
	btf_dump__free(d);
	return libbpf_err_ptr(err);
}

The `libbpf_err_ptr` uses it to update the `errno` global. So I think
that PTR_ERR call is not redundant in this case.

> 
> > +               goto err;
> > +       }
> > 
> >         err = btf_dump_resize(d);
> >         if (err)
> >                 goto err;
> > 
> > +       err = btf_dump_assign_decl_tags(d);
> > +       if (err)
> > +               goto err;
> > +
> >         return d;
> >  err:
> >         btf_dump__free(d);
> > @@ -232,7 +274,8 @@ static void btf_dump_free_names(struct hashmap *map)
> > 
> >  void btf_dump__free(struct btf_dump *d)
> >  {
> > -       int i;
> > +       int i, bkt;
> > +       struct hashmap_entry *cur;
> > 
> >         if (IS_ERR_OR_NULL(d))
> >                 return;
> > @@ -248,14 +291,22 @@ void btf_dump__free(struct btf_dump *d)
> >         free(d->cached_names);
> >         free(d->emit_queue);
> >         free(d->decl_stack);
> > -       btf_dump_free_names(d->type_names);
> > -       btf_dump_free_names(d->ident_names);
> > +       if (d->type_names)
> > +               btf_dump_free_names(d->type_names);
> > +       if (d->ident_names)
> > +               btf_dump_free_names(d->ident_names);
> > +       if (d->decl_tags) {
> > +               hashmap__for_each_entry(d->decl_tags, cur, bkt)
> > +                       free(cur->value);
> > +               hashmap__free(d->decl_tags);
> 
> generalize btf_dump_free_names() to btf_dump_free_strs_map() and
> handle IS_ERR_OR_NULL call internally?
> 
> > +       }
> > 
> >         free(d);
> >  }
> > 
> >  static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
> >  static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
> > +static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d);
> 
> naming nit: btf_dump_ensure_btf_decl_tag_macro() ?
> 
> > 
> >  /*
> >   * Dump BTF type in a compilable C syntax, including all the necessary
> > @@ -284,6 +335,8 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
> >         if (err)
> >                 return libbpf_err(err);
> > 
> > +       btf_dump_maybe_define_btf_decl_tag(d);
> > +
> >         d->emit_queue_cnt = 0;
> >         err = btf_dump_order_type(d, id, false);
> >         if (err < 0)
> > @@ -373,6 +426,61 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
> >         return 0;
> >  }
> > 
> > +/*
> > + * This hashmap lookup is used in several places, so extract it as a
> > + * function to hide all the ceremony with casts and NULL assignment.
> > + */
> > +static struct decl_tag_array *btf_dump_find_decl_tags(struct btf_dump *d, __u32 id)
> > +{
> > +       struct decl_tag_array *decl_tags = NULL;
> > +
> > +       hashmap__find(d->decl_tags, (void *)(uintptr_t)id, (void **)&decl_tags);
> > +
> > +       return decl_tags;
> > +}
> > +
> 
> with your hashmap void * -> long refactoring this is not necessary,
> though, right?

If that refactoring is accepted the casts would go away, but it is
still convenient for me to have a function returning pointer for the
in the btf_dump_emit_typedef_def. I can inline it in all three call
locations, but I think it is a bit cleaner this way.

> 
> > +/*
> > + * Scans all BTF objects looking for BTF_KIND_DECL_TAG entries.
> > + * The id's of the entries are stored in the `btf_dump.decl_tags` table,
> > + * grouped by a target type.
> > + */
> > +static int btf_dump_assign_decl_tags(struct btf_dump *d)
> > +{
> > +       __u32 id, new_cnt, type_cnt = btf__type_cnt(d->btf);
> > +       struct decl_tag_array *decl_tags;
> > +       const struct btf_type *t;
> > +       int err;
> > +
> > +       for (id = 1; id < type_cnt; id++) {
> > +               t = btf__type_by_id(d->btf, id);
> > +               if (!btf_is_decl_tag(t))
> > +                       continue;
> > +
> > +               decl_tags = btf_dump_find_decl_tags(d, t->type);
> > +               /* Assume small number of decl tags per id, increase array size by 1 */
> > +               new_cnt = decl_tags ? decl_tags->cnt + 1 : 1;
> > +               if (new_cnt > MAX_DECL_TAGS_PER_ID)
> > +                       return -ERANGE;
> 
> why artificial limitations? user will pay the price proportional to
> its BTF, and we don't really care as the memory is allocated
> dynamically anyway

Since you requested to change allocation strategy from buffer doubling
to +1 I figured that this point would get unusably slow for some large
enough value. I'll remove this limitation.

> 
> > +
> > +               /* Allocate new_cnt + 1 to account for decl_tag_array header */
> > +               decl_tags = libbpf_reallocarray(decl_tags, new_cnt + 1, sizeof(__u32));
> 
> oh, this new_cnt + 1 looks weird and error prone. we are reallocating
> entire struct, not just an array, so realloc() makes more sense here.
> How about:
> 
> decl_tags = realloc(decl_tags, sizeof(decl_tags) + new_cnt *
> sizeof(decl_tags->tag_ids[0]));
> 
> ?

Ok, will replace with realloc.

> 
> > +               if (!decl_tags)
> > +                       return -ENOMEM;
> > +
> > +               err = hashmap__insert(d->decl_tags, (void *)(uintptr_t)t->type, decl_tags,
> > +                                     HASHMAP_SET, NULL, NULL);
> 
> why not using hashmap__set()?
> 
> > +               if (err) {
> > +                       free(decl_tags);
> 
> hm... as this is written, it makes it look like double free can happen
> if previous version of this pointer stays in d->decl_tags.

It can indeed, thank you for catching this.

> 
> I think error shouldn't ever be returned because hashmap__insert()
> won't be allocating any new memory, so I think it's best to leave a
> small comment about this and just do:
> 
> (void)hashmap__set(d->decl_tag, t->type, (long)decl_tags, NULL, NULL);
> 
> and no error checking because we don't expect it to ever fail
> 
> > +                       return err;
> > +               }
> > +
> > +               decl_tags->tag_ids[new_cnt - 1] = id;
> > +               decl_tags->cnt = new_cnt;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
> >  {
> >         __u32 *new_queue;
> > @@ -899,6 +1007,51 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
> >         }
> >  }
> > 
> > +/*
> > + * Define __btf_decl_tag to be either __attribute__ or noop.
> > + */
> > +static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d)
> > +{
> > +       if (d->btf_decl_tag_is_defined || !hashmap__size(d->decl_tags))
> > +               return;
> > +
> > +       d->btf_decl_tag_is_defined = true;
> > +       btf_dump_printf(d, "#if __has_attribute(btf_decl_tag)\n");
> > +       btf_dump_printf(d, "#  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))\n");
> > +       btf_dump_printf(d, "#else\n");
> > +       btf_dump_printf(d, "#  define __btf_decl_tag(x)\n");
> > +       btf_dump_printf(d, "#endif\n\n");
> > +}
> > +
> 
> $ rg '#\s+define' | wc -l
> 44
> $ rg '#define' | wc -l
> 696
> 
> not a big fan of this cuteness, #define is better IMO (more grep'able
> as well, if anything)
> 
> > +/*
> > + * Emits a list of __btf_decl_tag(...) attributes attached to some type.
> > + * Decl tags attached to a type and to it's fields reside in a same
> > + * list, thus use component_idx to filter out relevant tags:
> > + * - component_idx == -1 designates the type itself;
> > + * - component_idx >=  0 designates specific field.
> > + */
> > +static void btf_dump_emit_decl_tags(struct btf_dump *d,
> > +                                   struct decl_tag_array *decl_tags,
> > +                                   int component_idx)
> > +{
> > +       struct btf_type *decl_tag_t;
> 
> is there any ambiguity to justify verbose name? maybe just "t"?
> 
> > +       const char *decl_tag_text;
> > +       struct btf_decl_tag *tag;
> > +       __u32 i;
> > +
> > +       if (!decl_tags)
> > +               return;
> > +
> > +       for (i = 0; i < decl_tags->cnt; ++i) {
> > +               decl_tag_t = btf_type_by_id(d->btf, decl_tags->tag_ids[i]);
> > +               tag = btf_decl_tag(decl_tag_t);
> > +               if (tag->component_idx != component_idx)
> > +                       continue;
> > +               decl_tag_text = btf__name_by_offset(d->btf, decl_tag_t->name_off);
> > +               btf_dump_printf(d, " __btf_decl_tag(\"%s\")", decl_tag_text);
> > +       }
> > +}
> > +
> >  static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
> >                                      const struct btf_type *t)
> >  {
> > @@ -913,6 +1066,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
> >                                      const struct btf_type *t,
> >                                      int lvl)
> >  {
> > +       struct decl_tag_array *decl_tags = btf_dump_find_decl_tags(d, id);
> >         const struct btf_member *m = btf_members(t);
> >         bool is_struct = btf_is_struct(t);
> >         int align, i, packed, off = 0;
> > @@ -945,6 +1099,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
> >                         m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
> >                         off = m_off + m_sz * 8;
> >                 }
> > +               btf_dump_emit_decl_tags(d, decl_tags, i);
> >                 btf_dump_printf(d, ";");
> >         }
> > 
> > @@ -964,6 +1119,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
> >         btf_dump_printf(d, "%s}", pfx(lvl));
> >         if (packed)
> >                 btf_dump_printf(d, " __attribute__((packed))");
> > +       btf_dump_emit_decl_tags(d, decl_tags, -1);
> >  }
> > 
> >  static const char *missing_base_types[][2] = {
> > @@ -1104,6 +1260,7 @@ static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
> > 
> >         btf_dump_printf(d, "typedef ");
> >         btf_dump_emit_type_decl(d, t->type, name, lvl);
> > +       btf_dump_emit_decl_tags(d, btf_dump_find_decl_tags(d, id), -1);
> >  }
> > 
> >  static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
> > --
> > 2.34.1
> >
Eduard Zingerman Nov. 7, 2022, 3:35 p.m. UTC | #3
On Fri, 2022-11-04 at 13:54 -0700, Andrii Nakryiko wrote:
> On Thu, Nov 3, 2022 at 6:45 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
> > as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
> > the type annotated with this attribute. This commit adds
> > reconstitution of such attributes for BTF dump in C format.
> > 
> > BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
> > this is not enforced and tests don't honor this restriction.
> > This commit uses hashmap to map types to the list of decl tags.
> > The hashmap is filled by `btf_dump_assign_decl_tags` function called
> > from `btf_dump__new`.
> > 
> > It is assumed that total number of types annotated with decl tags is
> > relatively small, thus some space is saved by using hashmap instead of
> > adding a new field to `struct btf_dump_type_aux_state`.
> > 
> > It is assumed that list of decl tags associated with a single type is
> > small. Thus the list is represented by an array which grows linearly.
> > 
> > To accommodate older Clang versions decl tags are dumped using the
> > following macro:
> > 
> >  #if __has_attribute(btf_decl_tag)
> >  #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> >  #else
> >  #  define __btf_decl_tag(x)
> >  #endif
> > 
> > The macro definition is emitted upon first call to `btf_dump__dump_type`.
> > 
> > Clang allows to attach btf_decl_tag attributes to the following kinds
> > of items:
> > - struct/union         supported
> > - struct/union field   supported
> > - typedef              supported
> > - function             not applicable
> > - function parameter   not applicable
> > - variable             not applicable
> > 
> > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > ---
> >  tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 160 insertions(+), 3 deletions(-)
> > 
> 
> Functions and their args can also have tags. This works:
> 
> diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> index 7a5af8b86065..75fcabe700cd 100644
> --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> @@ -54,7 +54,7 @@ struct root_struct {
> 
>  /* ------ END-EXPECTED-OUTPUT ------ */
> 
> -int f(struct root_struct *s)
> +int f(struct root_struct *s __btf_decl_tag("func_arg_tag"))
> __btf_decl_tag("func_tag")
>  {
>         return 0;
>  }
> 
> And I see correct BTF:
> 
> [26] FUNC 'f' type_id=25 linkage=global
> [27] DECL_TAG 'func_arg_tag' type_id=26 component_idx=0
> [28] DECL_TAG 'func_tag' type_id=26 component_idx=-1
> 
> So let's add support and test for that case as well. btf_dump
> shouldn't assume vmlinux.h-only case.
> 
> Also, please check if DATASEC and VARs can have decl_tags associated with them.

I see that right now decl tags are saved for:
- BTF_KIND_VAR
- BTF_KIND_FUNC
- BTF_KIND_FUNC arguments

Decl tags are lost but legal for:
- BTF_KIND_FUNC_PROTO arguments

I have not found a way to attach decl tag to DATASEC.

For BTF_KIND_FUNC_PROTO  arguments it would  be great to  update clang
first. Then  it would be  possible to keep all  decl tags checks  as a
single  `btf_dump_test_case`.  On  the   other  hand  this  will  make
testsuite dependent on the latest clang version, which is not great. I
can add a test with hand-crafted BTF instead. Which way is preferable?

BTF_KIND_FUNC is ignored by `btf_dump__dump_type_data`
(via `btf_dump_unsupported_data`).

BTF_KIND_VAR is dumped but current  testing infrastructure is not very
convenient, it only checks for  some variables defined in vmlinux BTF.
I can write a  test that accepts a custom built BTF  but this is still
inferior   to  what   `test_btf_dump_case`  provides.   I've  extended
`test_btf_dump_case` to print DATASEC  with subordinate vars alongside
the type definitions instead.

------

$ cat test.c 
#define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))

int var __btf_decl_tag("var_tag");

struct root {
  int a;
  int (*b)(int x __btf_decl_tag("arg_tag_proto")) __btf_decl_tag("field_tag");
};

int foo(struct root *x __btf_decl_tag("arg_tag_fn")) __btf_decl_tag("func_tag_fn") {
  return 0;
}
$ clang -g -O2 -mcpu=v3 -target bpf -c test.c -o test.o
$ bpftool btf dump file test.o
[1] PTR '(anon)' type_id=2
[2] STRUCT 'root' size=16 vlen=2
	'a' type_id=3 bits_offset=0
	'b' type_id=4 bits_offset=64
[3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
[4] PTR '(anon)' type_id=5
[5] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
	'(anon)' type_id=3
[6] DECL_TAG 'field_tag' type_id=2 component_idx=1
[7] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
	'x' type_id=1
[8] FUNC 'foo' type_id=7 linkage=global
[9] DECL_TAG 'arg_tag_fn' type_id=8 component_idx=0
[10] DECL_TAG 'func_tag_fn' type_id=8 component_idx=-1
[11] VAR 'var' type_id=3, linkage=global
[12] DECL_TAG 'var_tag' type_id=11 component_idx=-1
[13] DATASEC '.bss' size=0 vlen=1
	type_id=11 offset=0 size=4 (VAR 'var')

> [...]
> 
> > @@ -143,6 +174,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
> > 
> >  static int btf_dump_mark_referenced(struct btf_dump *d);
> >  static int btf_dump_resize(struct btf_dump *d);
> > +static int btf_dump_assign_decl_tags(struct btf_dump *d);
> > 
> >  struct btf_dump *btf_dump__new(const struct btf *btf,
> >                                btf_dump_printf_fn_t printf_fn,
> > @@ -179,11 +211,21 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
> >                 d->ident_names = NULL;
> >                 goto err;
> >         }
> > +       d->decl_tags = hashmap__new(identity_hash_fn, identity_equal_fn, NULL);
> > +       if (IS_ERR(d->decl_tags)) {
> > +               err = PTR_ERR(d->decl_tags);
> > +               d->decl_tags = NULL;
> 
> nit: no need to clear out ERR pointer, hashmap__free() handles that properly
> 
> > +               goto err;
> > +       }
> > 
> >         err = btf_dump_resize(d);
> >         if (err)
> >                 goto err;
> > 
> > +       err = btf_dump_assign_decl_tags(d);
> > +       if (err)
> > +               goto err;
> > +
> >         return d;
> >  err:
> >         btf_dump__free(d);
> > @@ -232,7 +274,8 @@ static void btf_dump_free_names(struct hashmap *map)
> > 
> >  void btf_dump__free(struct btf_dump *d)
> >  {
> > -       int i;
> > +       int i, bkt;
> > +       struct hashmap_entry *cur;
> > 
> >         if (IS_ERR_OR_NULL(d))
> >                 return;
> > @@ -248,14 +291,22 @@ void btf_dump__free(struct btf_dump *d)
> >         free(d->cached_names);
> >         free(d->emit_queue);
> >         free(d->decl_stack);
> > -       btf_dump_free_names(d->type_names);
> > -       btf_dump_free_names(d->ident_names);
> > +       if (d->type_names)
> > +               btf_dump_free_names(d->type_names);
> > +       if (d->ident_names)
> > +               btf_dump_free_names(d->ident_names);
> > +       if (d->decl_tags) {
> > +               hashmap__for_each_entry(d->decl_tags, cur, bkt)
> > +                       free(cur->value);
> > +               hashmap__free(d->decl_tags);
> 
> generalize btf_dump_free_names() to btf_dump_free_strs_map() and
> handle IS_ERR_OR_NULL call internally?
> 
> > +       }
> > 
> >         free(d);
> >  }
> > 
> >  static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
> >  static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
> > +static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d);
> 
> naming nit: btf_dump_ensure_btf_decl_tag_macro() ?
> 
> > 
> >  /*
> >   * Dump BTF type in a compilable C syntax, including all the necessary
> > @@ -284,6 +335,8 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
> >         if (err)
> >                 return libbpf_err(err);
> > 
> > +       btf_dump_maybe_define_btf_decl_tag(d);
> > +
> >         d->emit_queue_cnt = 0;
> >         err = btf_dump_order_type(d, id, false);
> >         if (err < 0)
> > @@ -373,6 +426,61 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
> >         return 0;
> >  }
> > 
> > +/*
> > + * This hashmap lookup is used in several places, so extract it as a
> > + * function to hide all the ceremony with casts and NULL assignment.
> > + */
> > +static struct decl_tag_array *btf_dump_find_decl_tags(struct btf_dump *d, __u32 id)
> > +{
> > +       struct decl_tag_array *decl_tags = NULL;
> > +
> > +       hashmap__find(d->decl_tags, (void *)(uintptr_t)id, (void **)&decl_tags);
> > +
> > +       return decl_tags;
> > +}
> > +
> 
> with your hashmap void * -> long refactoring this is not necessary,
> though, right?
> 
> > +/*
> > + * Scans all BTF objects looking for BTF_KIND_DECL_TAG entries.
> > + * The id's of the entries are stored in the `btf_dump.decl_tags` table,
> > + * grouped by a target type.
> > + */
> > +static int btf_dump_assign_decl_tags(struct btf_dump *d)
> > +{
> > +       __u32 id, new_cnt, type_cnt = btf__type_cnt(d->btf);
> > +       struct decl_tag_array *decl_tags;
> > +       const struct btf_type *t;
> > +       int err;
> > +
> > +       for (id = 1; id < type_cnt; id++) {
> > +               t = btf__type_by_id(d->btf, id);
> > +               if (!btf_is_decl_tag(t))
> > +                       continue;
> > +
> > +               decl_tags = btf_dump_find_decl_tags(d, t->type);
> > +               /* Assume small number of decl tags per id, increase array size by 1 */
> > +               new_cnt = decl_tags ? decl_tags->cnt + 1 : 1;
> > +               if (new_cnt > MAX_DECL_TAGS_PER_ID)
> > +                       return -ERANGE;
> 
> why artificial limitations? user will pay the price proportional to
> its BTF, and we don't really care as the memory is allocated
> dynamically anyway
> 
> > +
> > +               /* Allocate new_cnt + 1 to account for decl_tag_array header */
> > +               decl_tags = libbpf_reallocarray(decl_tags, new_cnt + 1, sizeof(__u32));
> 
> oh, this new_cnt + 1 looks weird and error prone. we are reallocating
> entire struct, not just an array, so realloc() makes more sense here.
> How about:
> 
> decl_tags = realloc(decl_tags, sizeof(decl_tags) + new_cnt *
> sizeof(decl_tags->tag_ids[0]));
> 
> ?
> 
> > +               if (!decl_tags)
> > +                       return -ENOMEM;
> > +
> > +               err = hashmap__insert(d->decl_tags, (void *)(uintptr_t)t->type, decl_tags,
> > +                                     HASHMAP_SET, NULL, NULL);
> 
> why not using hashmap__set()?
> 
> > +               if (err) {
> > +                       free(decl_tags);
> 
> hm... as this is written, it makes it look like double free can happen
> if previous version of this pointer stays in d->decl_tags.
> 
> I think error shouldn't ever be returned because hashmap__insert()
> won't be allocating any new memory, so I think it's best to leave a
> small comment about this and just do:
> 
> (void)hashmap__set(d->decl_tag, t->type, (long)decl_tags, NULL, NULL);
> 
> and no error checking because we don't expect it to ever fail
> 
> > +                       return err;
> > +               }
> > +
> > +               decl_tags->tag_ids[new_cnt - 1] = id;
> > +               decl_tags->cnt = new_cnt;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
> >  {
> >         __u32 *new_queue;
> > @@ -899,6 +1007,51 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
> >         }
> >  }
> > 
> > +/*
> > + * Define __btf_decl_tag to be either __attribute__ or noop.
> > + */
> > +static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d)
> > +{
> > +       if (d->btf_decl_tag_is_defined || !hashmap__size(d->decl_tags))
> > +               return;
> > +
> > +       d->btf_decl_tag_is_defined = true;
> > +       btf_dump_printf(d, "#if __has_attribute(btf_decl_tag)\n");
> > +       btf_dump_printf(d, "#  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))\n");
> > +       btf_dump_printf(d, "#else\n");
> > +       btf_dump_printf(d, "#  define __btf_decl_tag(x)\n");
> > +       btf_dump_printf(d, "#endif\n\n");
> > +}
> > +
> 
> $ rg '#\s+define' | wc -l
> 44
> $ rg '#define' | wc -l
> 696
> 
> not a big fan of this cuteness, #define is better IMO (more grep'able
> as well, if anything)
> 
> > +/*
> > + * Emits a list of __btf_decl_tag(...) attributes attached to some type.
> > + * Decl tags attached to a type and to it's fields reside in a same
> > + * list, thus use component_idx to filter out relevant tags:
> > + * - component_idx == -1 designates the type itself;
> > + * - component_idx >=  0 designates specific field.
> > + */
> > +static void btf_dump_emit_decl_tags(struct btf_dump *d,
> > +                                   struct decl_tag_array *decl_tags,
> > +                                   int component_idx)
> > +{
> > +       struct btf_type *decl_tag_t;
> 
> is there any ambiguity to justify verbose name? maybe just "t"?
> 
> > +       const char *decl_tag_text;
> > +       struct btf_decl_tag *tag;
> > +       __u32 i;
> > +
> > +       if (!decl_tags)
> > +               return;
> > +
> > +       for (i = 0; i < decl_tags->cnt; ++i) {
> > +               decl_tag_t = btf_type_by_id(d->btf, decl_tags->tag_ids[i]);
> > +               tag = btf_decl_tag(decl_tag_t);
> > +               if (tag->component_idx != component_idx)
> > +                       continue;
> > +               decl_tag_text = btf__name_by_offset(d->btf, decl_tag_t->name_off);
> > +               btf_dump_printf(d, " __btf_decl_tag(\"%s\")", decl_tag_text);
> > +       }
> > +}
> > +
> >  static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
> >                                      const struct btf_type *t)
> >  {
> > @@ -913,6 +1066,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
> >                                      const struct btf_type *t,
> >                                      int lvl)
> >  {
> > +       struct decl_tag_array *decl_tags = btf_dump_find_decl_tags(d, id);
> >         const struct btf_member *m = btf_members(t);
> >         bool is_struct = btf_is_struct(t);
> >         int align, i, packed, off = 0;
> > @@ -945,6 +1099,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
> >                         m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
> >                         off = m_off + m_sz * 8;
> >                 }
> > +               btf_dump_emit_decl_tags(d, decl_tags, i);
> >                 btf_dump_printf(d, ";");
> >         }
> > 
> > @@ -964,6 +1119,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
> >         btf_dump_printf(d, "%s}", pfx(lvl));
> >         if (packed)
> >                 btf_dump_printf(d, " __attribute__((packed))");
> > +       btf_dump_emit_decl_tags(d, decl_tags, -1);
> >  }
> > 
> >  static const char *missing_base_types[][2] = {
> > @@ -1104,6 +1260,7 @@ static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
> > 
> >         btf_dump_printf(d, "typedef ");
> >         btf_dump_emit_type_decl(d, t->type, name, lvl);
> > +       btf_dump_emit_decl_tags(d, btf_dump_find_decl_tags(d, id), -1);
> >  }
> > 
> >  static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
> > --
> > 2.34.1
> >
Andrii Nakryiko Nov. 7, 2022, 10:52 p.m. UTC | #4
On Sun, Nov 6, 2022 at 1:40 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2022-11-04 at 13:54 -0700, Andrii Nakryiko wrote:
> > On Thu, Nov 3, 2022 at 6:45 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
> > > as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
> > > the type annotated with this attribute. This commit adds
> > > reconstitution of such attributes for BTF dump in C format.
> > >
> > > BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
> > > this is not enforced and tests don't honor this restriction.
> > > This commit uses hashmap to map types to the list of decl tags.
> > > The hashmap is filled by `btf_dump_assign_decl_tags` function called
> > > from `btf_dump__new`.
> > >
> > > It is assumed that total number of types annotated with decl tags is
> > > relatively small, thus some space is saved by using hashmap instead of
> > > adding a new field to `struct btf_dump_type_aux_state`.
> > >
> > > It is assumed that list of decl tags associated with a single type is
> > > small. Thus the list is represented by an array which grows linearly.
> > >
> > > To accommodate older Clang versions decl tags are dumped using the
> > > following macro:
> > >
> > >  #if __has_attribute(btf_decl_tag)
> > >  #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> > >  #else
> > >  #  define __btf_decl_tag(x)
> > >  #endif
> > >
> > > The macro definition is emitted upon first call to `btf_dump__dump_type`.
> > >
> > > Clang allows to attach btf_decl_tag attributes to the following kinds
> > > of items:
> > > - struct/union         supported
> > > - struct/union field   supported
> > > - typedef              supported
> > > - function             not applicable
> > > - function parameter   not applicable
> > > - variable             not applicable
> > >
> > > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > > ---
> > >  tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 160 insertions(+), 3 deletions(-)
> > >
> >

[...]

> > >  struct btf_dump *btf_dump__new(const struct btf *btf,
> > >                                btf_dump_printf_fn_t printf_fn,
> > > @@ -179,11 +211,21 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
> > >                 d->ident_names = NULL;
> > >                 goto err;
> > >         }
> > > +       d->decl_tags = hashmap__new(identity_hash_fn, identity_equal_fn, NULL);
> > > +       if (IS_ERR(d->decl_tags)) {
> > > +               err = PTR_ERR(d->decl_tags);
> > > +               d->decl_tags = NULL;
> >
> > nit: no need to clear out ERR pointer, hashmap__free() handles that properly
>
> The `err` is passed to `libbpf_err_ptr` at the end of the function:
>
> struct btf_dump *btf_dump__new(...)
> {
>         ...
> err:
>         btf_dump__free(d);
>         return libbpf_err_ptr(err);
> }
>
> The `libbpf_err_ptr` uses it to update the `errno` global. So I think
> that PTR_ERR call is not redundant in this case.
>

I was talking about `d->decl_tags = NULL;`, you don't need to do that,
hashmap__free() handles such non-NULL error pointer just fine.


> >
> > > +               goto err;
> > > +       }
> > >
> > >         err = btf_dump_resize(d);
> > >         if (err)
> > >                 goto err;
> > >
> > > +       err = btf_dump_assign_decl_tags(d);
> > > +       if (err)
> > > +               goto err;
> > > +
> > >         return d;
> > >  err:
> > >         btf_dump__free(d);

[...]

> > >
> > > +/*
> > > + * This hashmap lookup is used in several places, so extract it as a
> > > + * function to hide all the ceremony with casts and NULL assignment.
> > > + */
> > > +static struct decl_tag_array *btf_dump_find_decl_tags(struct btf_dump *d, __u32 id)
> > > +{
> > > +       struct decl_tag_array *decl_tags = NULL;
> > > +
> > > +       hashmap__find(d->decl_tags, (void *)(uintptr_t)id, (void **)&decl_tags);
> > > +
> > > +       return decl_tags;
> > > +}
> > > +
> >
> > with your hashmap void * -> long refactoring this is not necessary,
> > though, right?
>
> If that refactoring is accepted the casts would go away, but it is
> still convenient for me to have a function returning pointer for the
> in the btf_dump_emit_typedef_def. I can inline it in all three call
> locations, but I think it is a bit cleaner this way.
>

I think

if (!hashmap__find(d->decl_tags, id, (void **)&decl_tags)) {
  /* handle NULL case */
}

is just as readable. Let's not add unnecessary helpers.

> >
> > > +/*
> > > + * Scans all BTF objects looking for BTF_KIND_DECL_TAG entries.
> > > + * The id's of the entries are stored in the `btf_dump.decl_tags` table,
> > > + * grouped by a target type.
> > > + */
> > > +static int btf_dump_assign_decl_tags(struct btf_dump *d)
> > > +{
> > > +       __u32 id, new_cnt, type_cnt = btf__type_cnt(d->btf);
> > > +       struct decl_tag_array *decl_tags;
> > > +       const struct btf_type *t;
> > > +       int err;
> > > +
> > > +       for (id = 1; id < type_cnt; id++) {
> > > +               t = btf__type_by_id(d->btf, id);
> > > +               if (!btf_is_decl_tag(t))
> > > +                       continue;
> > > +
> > > +               decl_tags = btf_dump_find_decl_tags(d, t->type);
> > > +               /* Assume small number of decl tags per id, increase array size by 1 */
> > > +               new_cnt = decl_tags ? decl_tags->cnt + 1 : 1;
> > > +               if (new_cnt > MAX_DECL_TAGS_PER_ID)
> > > +                       return -ERANGE;
> >
> > why artificial limitations? user will pay the price proportional to
> > its BTF, and we don't really care as the memory is allocated
> > dynamically anyway
>
> Since you requested to change allocation strategy from buffer doubling
> to +1 I figured that this point would get unusably slow for some large
> enough value. I'll remove this limitation.

allocators like jemalloc don't actually reallocate internally on every
realloc() call, they just adjust size if the value stays within the
same size bucket, so while you can micro-optimize to avoid unnecessary
calls to realloc() (but not necessarily reallocations themselves),
it's not that critical in practice, especially somewhere where we
don't expects many thousands of calls

>
> >
> > > +
> > > +               /* Allocate new_cnt + 1 to account for decl_tag_array header */
> > > +               decl_tags = libbpf_reallocarray(decl_tags, new_cnt + 1, sizeof(__u32));
> >

[...]
Andrii Nakryiko Nov. 7, 2022, 11:09 p.m. UTC | #5
On Mon, Nov 7, 2022 at 7:35 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2022-11-04 at 13:54 -0700, Andrii Nakryiko wrote:
> > On Thu, Nov 3, 2022 at 6:45 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
> > > as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
> > > the type annotated with this attribute. This commit adds
> > > reconstitution of such attributes for BTF dump in C format.
> > >
> > > BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
> > > this is not enforced and tests don't honor this restriction.
> > > This commit uses hashmap to map types to the list of decl tags.
> > > The hashmap is filled by `btf_dump_assign_decl_tags` function called
> > > from `btf_dump__new`.
> > >
> > > It is assumed that total number of types annotated with decl tags is
> > > relatively small, thus some space is saved by using hashmap instead of
> > > adding a new field to `struct btf_dump_type_aux_state`.
> > >
> > > It is assumed that list of decl tags associated with a single type is
> > > small. Thus the list is represented by an array which grows linearly.
> > >
> > > To accommodate older Clang versions decl tags are dumped using the
> > > following macro:
> > >
> > >  #if __has_attribute(btf_decl_tag)
> > >  #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> > >  #else
> > >  #  define __btf_decl_tag(x)
> > >  #endif
> > >
> > > The macro definition is emitted upon first call to `btf_dump__dump_type`.
> > >
> > > Clang allows to attach btf_decl_tag attributes to the following kinds
> > > of items:
> > > - struct/union         supported
> > > - struct/union field   supported
> > > - typedef              supported
> > > - function             not applicable
> > > - function parameter   not applicable
> > > - variable             not applicable
> > >
> > > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > > ---
> > >  tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 160 insertions(+), 3 deletions(-)
> > >
> >
> > Functions and their args can also have tags. This works:
> >
> > diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > index 7a5af8b86065..75fcabe700cd 100644
> > --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > @@ -54,7 +54,7 @@ struct root_struct {
> >
> >  /* ------ END-EXPECTED-OUTPUT ------ */
> >
> > -int f(struct root_struct *s)
> > +int f(struct root_struct *s __btf_decl_tag("func_arg_tag"))
> > __btf_decl_tag("func_tag")
> >  {
> >         return 0;
> >  }
> >
> > And I see correct BTF:
> >
> > [26] FUNC 'f' type_id=25 linkage=global
> > [27] DECL_TAG 'func_arg_tag' type_id=26 component_idx=0
> > [28] DECL_TAG 'func_tag' type_id=26 component_idx=-1
> >
> > So let's add support and test for that case as well. btf_dump
> > shouldn't assume vmlinux.h-only case.
> >
> > Also, please check if DATASEC and VARs can have decl_tags associated with them.
>
> I see that right now decl tags are saved for:
> - BTF_KIND_VAR
> - BTF_KIND_FUNC
> - BTF_KIND_FUNC arguments
>
> Decl tags are lost but legal for:
> - BTF_KIND_FUNC_PROTO arguments

ah, that's unfortunate and even DECL_TAGS example I showed above seems
like a bug. FUNC itself doesn't have args, I implicitly assumed that
all DECL_TAG will be actually associated with underlying FUNC_PROTO.

Yonghong, is this by design or a bug?

>
> I have not found a way to attach decl tag to DATASEC.
>
> For BTF_KIND_FUNC_PROTO  arguments it would  be great to  update clang
> first. Then  it would be  possible to keep all  decl tags checks  as a
> single  `btf_dump_test_case`.  On  the   other  hand  this  will  make
> testsuite dependent on the latest clang version, which is not great. I
> can add a test with hand-crafted BTF instead. Which way is preferable?

let's figure out if current state is accidental or by design.

From practical standpoint, I'd still implement the code for FUNC_PROTO
and its args, but I wouldn't go all the way to hand-craft BTF
programmatically. As you said, btf_dump tests are way more ergonomic
because we rely on compiler to do the heavy lifting.

As for the dependency on latest clang for some tests, I think that's
totally fine and unavoidable. Worst case some subtests will fail on
old kernels, they can be denylisted on systems with old compiler. All
that won't break the build (which is much worse and inconvenient).

>
> BTF_KIND_FUNC is ignored by `btf_dump__dump_type_data`
> (via `btf_dump_unsupported_data`).
>
> BTF_KIND_VAR is dumped but current  testing infrastructure is not very
> convenient, it only checks for  some variables defined in vmlinux BTF.
> I can write a  test that accepts a custom built BTF  but this is still
> inferior   to  what   `test_btf_dump_case`  provides.   I've  extended
> `test_btf_dump_case` to print DATASEC  with subordinate vars alongside
> the type definitions instead.
>

dumping DATASEC/VAR and FUNC is something that seems useful in
general, but we should treat it as a separate problem. Seeing DATASEC
variables and FUNCs in a familiar C syntax would be nice, but it
probably should be guarded behind a bpftool option or something.

So in summary, let's figure out the situation with FUNC and FUNC_PROTO
first, and let's not due too laborious selftests yet

> ------
>
> $ cat test.c
> #define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
>
> int var __btf_decl_tag("var_tag");
>
> struct root {
>   int a;
>   int (*b)(int x __btf_decl_tag("arg_tag_proto")) __btf_decl_tag("field_tag");
> };
>
> int foo(struct root *x __btf_decl_tag("arg_tag_fn")) __btf_decl_tag("func_tag_fn") {
>   return 0;
> }
> $ clang -g -O2 -mcpu=v3 -target bpf -c test.c -o test.o
> $ bpftool btf dump file test.o
> [1] PTR '(anon)' type_id=2
> [2] STRUCT 'root' size=16 vlen=2
>         'a' type_id=3 bits_offset=0
>         'b' type_id=4 bits_offset=64
> [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
> [4] PTR '(anon)' type_id=5
> [5] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
>         '(anon)' type_id=3
> [6] DECL_TAG 'field_tag' type_id=2 component_idx=1
> [7] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
>         'x' type_id=1
> [8] FUNC 'foo' type_id=7 linkage=global
> [9] DECL_TAG 'arg_tag_fn' type_id=8 component_idx=0
> [10] DECL_TAG 'func_tag_fn' type_id=8 component_idx=-1
> [11] VAR 'var' type_id=3, linkage=global
> [12] DECL_TAG 'var_tag' type_id=11 component_idx=-1
> [13] DATASEC '.bss' size=0 vlen=1
>         type_id=11 offset=0 size=4 (VAR 'var')
>
> > [...]
> >

[...]
Eduard Zingerman Nov. 7, 2022, 11:17 p.m. UTC | #6
On Mon, 2022-11-07 at 15:09 -0800, Andrii Nakryiko wrote:
> On Mon, Nov 7, 2022 at 7:35 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > On Fri, 2022-11-04 at 13:54 -0700, Andrii Nakryiko wrote:
> > > On Thu, Nov 3, 2022 at 6:45 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > > > 
> > > > Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
> > > > as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
> > > > the type annotated with this attribute. This commit adds
> > > > reconstitution of such attributes for BTF dump in C format.
> > > > 
> > > > BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
> > > > this is not enforced and tests don't honor this restriction.
> > > > This commit uses hashmap to map types to the list of decl tags.
> > > > The hashmap is filled by `btf_dump_assign_decl_tags` function called
> > > > from `btf_dump__new`.
> > > > 
> > > > It is assumed that total number of types annotated with decl tags is
> > > > relatively small, thus some space is saved by using hashmap instead of
> > > > adding a new field to `struct btf_dump_type_aux_state`.
> > > > 
> > > > It is assumed that list of decl tags associated with a single type is
> > > > small. Thus the list is represented by an array which grows linearly.
> > > > 
> > > > To accommodate older Clang versions decl tags are dumped using the
> > > > following macro:
> > > > 
> > > >  #if __has_attribute(btf_decl_tag)
> > > >  #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> > > >  #else
> > > >  #  define __btf_decl_tag(x)
> > > >  #endif
> > > > 
> > > > The macro definition is emitted upon first call to `btf_dump__dump_type`.
> > > > 
> > > > Clang allows to attach btf_decl_tag attributes to the following kinds
> > > > of items:
> > > > - struct/union         supported
> > > > - struct/union field   supported
> > > > - typedef              supported
> > > > - function             not applicable
> > > > - function parameter   not applicable
> > > > - variable             not applicable
> > > > 
> > > > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > > > ---
> > > >  tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
> > > >  1 file changed, 160 insertions(+), 3 deletions(-)
> > > > 
> > > 
> > > Functions and their args can also have tags. This works:
> > > 
> > > diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > index 7a5af8b86065..75fcabe700cd 100644
> > > --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > @@ -54,7 +54,7 @@ struct root_struct {
> > > 
> > >  /* ------ END-EXPECTED-OUTPUT ------ */
> > > 
> > > -int f(struct root_struct *s)
> > > +int f(struct root_struct *s __btf_decl_tag("func_arg_tag"))
> > > __btf_decl_tag("func_tag")
> > >  {
> > >         return 0;
> > >  }
> > > 
> > > And I see correct BTF:
> > > 
> > > [26] FUNC 'f' type_id=25 linkage=global
> > > [27] DECL_TAG 'func_arg_tag' type_id=26 component_idx=0
> > > [28] DECL_TAG 'func_tag' type_id=26 component_idx=-1
> > > 
> > > So let's add support and test for that case as well. btf_dump
> > > shouldn't assume vmlinux.h-only case.
> > > 
> > > Also, please check if DATASEC and VARs can have decl_tags associated with them.
> > 
> > I see that right now decl tags are saved for:
> > - BTF_KIND_VAR
> > - BTF_KIND_FUNC
> > - BTF_KIND_FUNC arguments
> > 
> > Decl tags are lost but legal for:
> > - BTF_KIND_FUNC_PROTO arguments
> 
> ah, that's unfortunate and even DECL_TAGS example I showed above seems
> like a bug. FUNC itself doesn't have args, I implicitly assumed that
> all DECL_TAG will be actually associated with underlying FUNC_PROTO.
> 
> Yonghong, is this by design or a bug?
> 
> > 
> > I have not found a way to attach decl tag to DATASEC.
> > 
> > For BTF_KIND_FUNC_PROTO  arguments it would  be great to  update clang
> > first. Then  it would be  possible to keep all  decl tags checks  as a
> > single  `btf_dump_test_case`.  On  the   other  hand  this  will  make
> > testsuite dependent on the latest clang version, which is not great. I
> > can add a test with hand-crafted BTF instead. Which way is preferable?
> 
> let's figure out if current state is accidental or by design.
> 
> From practical standpoint, I'd still implement the code for FUNC_PROTO
> and its args, but I wouldn't go all the way to hand-craft BTF
> programmatically. As you said, btf_dump tests are way more ergonomic
> because we rely on compiler to do the heavy lifting.
> 
> As for the dependency on latest clang for some tests, I think that's
> totally fine and unavoidable. Worst case some subtests will fail on
> old kernels, they can be denylisted on systems with old compiler. All
> that won't break the build (which is much worse and inconvenient).
> 
> > 
> > BTF_KIND_FUNC is ignored by `btf_dump__dump_type_data`
> > (via `btf_dump_unsupported_data`).
> > 
> > BTF_KIND_VAR is dumped but current  testing infrastructure is not very
> > convenient, it only checks for  some variables defined in vmlinux BTF.
> > I can write a  test that accepts a custom built BTF  but this is still
> > inferior   to  what   `test_btf_dump_case`  provides.   I've  extended
> > `test_btf_dump_case` to print DATASEC  with subordinate vars alongside
> > the type definitions instead.
> > 
> 
> dumping DATASEC/VAR and FUNC is something that seems useful in
> general, but we should treat it as a separate problem. Seeing DATASEC
> variables and FUNCs in a familiar C syntax would be nice, but it
> probably should be guarded behind a bpftool option or something.
> 
> So in summary, let's figure out the situation with FUNC and FUNC_PROTO
> first, and let's not due too laborious selftests yet

Actually, to test the decl tag attachment for VAR I already implemented
a change to the `test_btf_dump_case`. It can be separated as a different
kind of tests but I don't see a point as it would be very similar to
`test_btf_dump_case`.

And manually creating BTF to attach decl tag to function proto parameter
highlighted an issue that `btf_dump_assign_decl_tags` is only called once.
So, the incremental scenario is not supported.

I can post v2 with the change to `test_btf_dump_case`, support for
decl tags on VARs and a fix for incremental dump behavior.

> 
> > ------
> > 
> > $ cat test.c
> > #define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> > 
> > int var __btf_decl_tag("var_tag");
> > 
> > struct root {
> >   int a;
> >   int (*b)(int x __btf_decl_tag("arg_tag_proto")) __btf_decl_tag("field_tag");
> > };
> > 
> > int foo(struct root *x __btf_decl_tag("arg_tag_fn")) __btf_decl_tag("func_tag_fn") {
> >   return 0;
> > }
> > $ clang -g -O2 -mcpu=v3 -target bpf -c test.c -o test.o
> > $ bpftool btf dump file test.o
> > [1] PTR '(anon)' type_id=2
> > [2] STRUCT 'root' size=16 vlen=2
> >         'a' type_id=3 bits_offset=0
> >         'b' type_id=4 bits_offset=64
> > [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
> > [4] PTR '(anon)' type_id=5
> > [5] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
> >         '(anon)' type_id=3
> > [6] DECL_TAG 'field_tag' type_id=2 component_idx=1
> > [7] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
> >         'x' type_id=1
> > [8] FUNC 'foo' type_id=7 linkage=global
> > [9] DECL_TAG 'arg_tag_fn' type_id=8 component_idx=0
> > [10] DECL_TAG 'func_tag_fn' type_id=8 component_idx=-1
> > [11] VAR 'var' type_id=3, linkage=global
> > [12] DECL_TAG 'var_tag' type_id=11 component_idx=-1
> > [13] DATASEC '.bss' size=0 vlen=1
> >         type_id=11 offset=0 size=4 (VAR 'var')
> > 
> > > [...]
> > > 
> 
> [...]
Andrii Nakryiko Nov. 8, 2022, 12:54 a.m. UTC | #7
On Mon, Nov 7, 2022 at 3:17 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Mon, 2022-11-07 at 15:09 -0800, Andrii Nakryiko wrote:
> > On Mon, Nov 7, 2022 at 7:35 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > On Fri, 2022-11-04 at 13:54 -0700, Andrii Nakryiko wrote:
> > > > On Thu, Nov 3, 2022 at 6:45 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > > > >
> > > > > Clang's `__attribute__((btf_decl_tag("...")))` is represented in BTF
> > > > > as a record of kind BTF_KIND_DECL_TAG with `type` field pointing to
> > > > > the type annotated with this attribute. This commit adds
> > > > > reconstitution of such attributes for BTF dump in C format.
> > > > >
> > > > > BTF doc says that BTF_KIND_DECL_TAGs should follow a target type but
> > > > > this is not enforced and tests don't honor this restriction.
> > > > > This commit uses hashmap to map types to the list of decl tags.
> > > > > The hashmap is filled by `btf_dump_assign_decl_tags` function called
> > > > > from `btf_dump__new`.
> > > > >
> > > > > It is assumed that total number of types annotated with decl tags is
> > > > > relatively small, thus some space is saved by using hashmap instead of
> > > > > adding a new field to `struct btf_dump_type_aux_state`.
> > > > >
> > > > > It is assumed that list of decl tags associated with a single type is
> > > > > small. Thus the list is represented by an array which grows linearly.
> > > > >
> > > > > To accommodate older Clang versions decl tags are dumped using the
> > > > > following macro:
> > > > >
> > > > >  #if __has_attribute(btf_decl_tag)
> > > > >  #  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> > > > >  #else
> > > > >  #  define __btf_decl_tag(x)
> > > > >  #endif
> > > > >
> > > > > The macro definition is emitted upon first call to `btf_dump__dump_type`.
> > > > >
> > > > > Clang allows to attach btf_decl_tag attributes to the following kinds
> > > > > of items:
> > > > > - struct/union         supported
> > > > > - struct/union field   supported
> > > > > - typedef              supported
> > > > > - function             not applicable
> > > > > - function parameter   not applicable
> > > > > - variable             not applicable
> > > > >
> > > > > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > > > > ---
> > > > >  tools/lib/bpf/btf_dump.c | 163 ++++++++++++++++++++++++++++++++++++++-
> > > > >  1 file changed, 160 insertions(+), 3 deletions(-)
> > > > >
> > > >
> > > > Functions and their args can also have tags. This works:
> > > >
> > > > diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > > b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > > index 7a5af8b86065..75fcabe700cd 100644
> > > > --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > > +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_decl_tag.c
> > > > @@ -54,7 +54,7 @@ struct root_struct {
> > > >
> > > >  /* ------ END-EXPECTED-OUTPUT ------ */
> > > >
> > > > -int f(struct root_struct *s)
> > > > +int f(struct root_struct *s __btf_decl_tag("func_arg_tag"))
> > > > __btf_decl_tag("func_tag")
> > > >  {
> > > >         return 0;
> > > >  }
> > > >
> > > > And I see correct BTF:
> > > >
> > > > [26] FUNC 'f' type_id=25 linkage=global
> > > > [27] DECL_TAG 'func_arg_tag' type_id=26 component_idx=0
> > > > [28] DECL_TAG 'func_tag' type_id=26 component_idx=-1
> > > >
> > > > So let's add support and test for that case as well. btf_dump
> > > > shouldn't assume vmlinux.h-only case.
> > > >
> > > > Also, please check if DATASEC and VARs can have decl_tags associated with them.
> > >
> > > I see that right now decl tags are saved for:
> > > - BTF_KIND_VAR
> > > - BTF_KIND_FUNC
> > > - BTF_KIND_FUNC arguments
> > >
> > > Decl tags are lost but legal for:
> > > - BTF_KIND_FUNC_PROTO arguments
> >
> > ah, that's unfortunate and even DECL_TAGS example I showed above seems
> > like a bug. FUNC itself doesn't have args, I implicitly assumed that
> > all DECL_TAG will be actually associated with underlying FUNC_PROTO.
> >
> > Yonghong, is this by design or a bug?
> >
> > >
> > > I have not found a way to attach decl tag to DATASEC.
> > >
> > > For BTF_KIND_FUNC_PROTO  arguments it would  be great to  update clang
> > > first. Then  it would be  possible to keep all  decl tags checks  as a
> > > single  `btf_dump_test_case`.  On  the   other  hand  this  will  make
> > > testsuite dependent on the latest clang version, which is not great. I
> > > can add a test with hand-crafted BTF instead. Which way is preferable?
> >
> > let's figure out if current state is accidental or by design.
> >
> > From practical standpoint, I'd still implement the code for FUNC_PROTO
> > and its args, but I wouldn't go all the way to hand-craft BTF
> > programmatically. As you said, btf_dump tests are way more ergonomic
> > because we rely on compiler to do the heavy lifting.
> >
> > As for the dependency on latest clang for some tests, I think that's
> > totally fine and unavoidable. Worst case some subtests will fail on
> > old kernels, they can be denylisted on systems with old compiler. All
> > that won't break the build (which is much worse and inconvenient).
> >
> > >
> > > BTF_KIND_FUNC is ignored by `btf_dump__dump_type_data`
> > > (via `btf_dump_unsupported_data`).
> > >
> > > BTF_KIND_VAR is dumped but current  testing infrastructure is not very
> > > convenient, it only checks for  some variables defined in vmlinux BTF.
> > > I can write a  test that accepts a custom built BTF  but this is still
> > > inferior   to  what   `test_btf_dump_case`  provides.   I've  extended
> > > `test_btf_dump_case` to print DATASEC  with subordinate vars alongside
> > > the type definitions instead.
> > >
> >
> > dumping DATASEC/VAR and FUNC is something that seems useful in
> > general, but we should treat it as a separate problem. Seeing DATASEC
> > variables and FUNCs in a familiar C syntax would be nice, but it
> > probably should be guarded behind a bpftool option or something.
> >
> > So in summary, let's figure out the situation with FUNC and FUNC_PROTO
> > first, and let's not due too laborious selftests yet
>
> Actually, to test the decl tag attachment for VAR I already implemented
> a change to the `test_btf_dump_case`. It can be separated as a different
> kind of tests but I don't see a point as it would be very similar to
> `test_btf_dump_case`.
>
> And manually creating BTF to attach decl tag to function proto parameter
> highlighted an issue that `btf_dump_assign_decl_tags` is only called once.
> So, the incremental scenario is not supported.
>
> I can post v2 with the change to `test_btf_dump_case`, support for
> decl tags on VARs and a fix for incremental dump behavior.

sounds good, just didn't want you to spend too much time on something
that shouldn't be needed once compiler gets fixed

>
> >
> > > ------
> > >
> > > $ cat test.c
> > > #define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))
> > >
> > > int var __btf_decl_tag("var_tag");
> > >
> > > struct root {
> > >   int a;
> > >   int (*b)(int x __btf_decl_tag("arg_tag_proto")) __btf_decl_tag("field_tag");
> > > };
> > >
> > > int foo(struct root *x __btf_decl_tag("arg_tag_fn")) __btf_decl_tag("func_tag_fn") {
> > >   return 0;
> > > }
> > > $ clang -g -O2 -mcpu=v3 -target bpf -c test.c -o test.o
> > > $ bpftool btf dump file test.o
> > > [1] PTR '(anon)' type_id=2
> > > [2] STRUCT 'root' size=16 vlen=2
> > >         'a' type_id=3 bits_offset=0
> > >         'b' type_id=4 bits_offset=64
> > > [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
> > > [4] PTR '(anon)' type_id=5
> > > [5] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
> > >         '(anon)' type_id=3
> > > [6] DECL_TAG 'field_tag' type_id=2 component_idx=1
> > > [7] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1
> > >         'x' type_id=1
> > > [8] FUNC 'foo' type_id=7 linkage=global
> > > [9] DECL_TAG 'arg_tag_fn' type_id=8 component_idx=0
> > > [10] DECL_TAG 'func_tag_fn' type_id=8 component_idx=-1
> > > [11] VAR 'var' type_id=3, linkage=global
> > > [12] DECL_TAG 'var_tag' type_id=11 component_idx=-1
> > > [13] DATASEC '.bss' size=0 vlen=1
> > >         type_id=11 offset=0 size=4 (VAR 'var')
> > >
> > > > [...]
> > > >
> >
> > [...]
>
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bf0cc0e986dd..7bf14e2ed910 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -75,6 +75,20 @@  struct btf_dump_data {
 	bool is_array_char;
 };
 
+/*
+ * An arbitrary limit for a number of decl tags that could be attached
+ * to a type and it's fields combined.
+ */
+#define MAX_DECL_TAGS_PER_ID			256
+
+/*
+ * An array of ids of BTF_DECL_TAG objects associated with a specific type.
+ */
+struct decl_tag_array {
+	__u32 cnt;
+	__u32 tag_ids[0];
+};
+
 struct btf_dump {
 	const struct btf *btf;
 	btf_dump_printf_fn_t printf_fn;
@@ -111,6 +125,13 @@  struct btf_dump {
 	 * name occurrences
 	 */
 	struct hashmap *ident_names;
+	/*
+	 * maps type id to decl_tag_array, assume that relatively small
+	 * fraction of types has btf_decl_tag's attached
+	 */
+	struct hashmap *decl_tags;
+	/* indicates whether '#define __btf_decl_tag ...' was printed */
+	bool btf_decl_tag_is_defined;
 	/*
 	 * data for typed display; allocated if needed.
 	 */
@@ -127,6 +148,16 @@  static bool str_equal_fn(const void *a, const void *b, void *ctx)
 	return strcmp(a, b) == 0;
 }
 
+static size_t identity_hash_fn(const void *key, void *ctx)
+{
+	return (size_t)key;
+}
+
+static bool identity_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+	return k1 == k2;
+}
+
 static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
 {
 	return btf__name_by_offset(d->btf, name_off);
@@ -143,6 +174,7 @@  static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
 
 static int btf_dump_mark_referenced(struct btf_dump *d);
 static int btf_dump_resize(struct btf_dump *d);
+static int btf_dump_assign_decl_tags(struct btf_dump *d);
 
 struct btf_dump *btf_dump__new(const struct btf *btf,
 			       btf_dump_printf_fn_t printf_fn,
@@ -179,11 +211,21 @@  struct btf_dump *btf_dump__new(const struct btf *btf,
 		d->ident_names = NULL;
 		goto err;
 	}
+	d->decl_tags = hashmap__new(identity_hash_fn, identity_equal_fn, NULL);
+	if (IS_ERR(d->decl_tags)) {
+		err = PTR_ERR(d->decl_tags);
+		d->decl_tags = NULL;
+		goto err;
+	}
 
 	err = btf_dump_resize(d);
 	if (err)
 		goto err;
 
+	err = btf_dump_assign_decl_tags(d);
+	if (err)
+		goto err;
+
 	return d;
 err:
 	btf_dump__free(d);
@@ -232,7 +274,8 @@  static void btf_dump_free_names(struct hashmap *map)
 
 void btf_dump__free(struct btf_dump *d)
 {
-	int i;
+	int i, bkt;
+	struct hashmap_entry *cur;
 
 	if (IS_ERR_OR_NULL(d))
 		return;
@@ -248,14 +291,22 @@  void btf_dump__free(struct btf_dump *d)
 	free(d->cached_names);
 	free(d->emit_queue);
 	free(d->decl_stack);
-	btf_dump_free_names(d->type_names);
-	btf_dump_free_names(d->ident_names);
+	if (d->type_names)
+		btf_dump_free_names(d->type_names);
+	if (d->ident_names)
+		btf_dump_free_names(d->ident_names);
+	if (d->decl_tags) {
+		hashmap__for_each_entry(d->decl_tags, cur, bkt)
+			free(cur->value);
+		hashmap__free(d->decl_tags);
+	}
 
 	free(d);
 }
 
 static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
 static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
+static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d);
 
 /*
  * Dump BTF type in a compilable C syntax, including all the necessary
@@ -284,6 +335,8 @@  int btf_dump__dump_type(struct btf_dump *d, __u32 id)
 	if (err)
 		return libbpf_err(err);
 
+	btf_dump_maybe_define_btf_decl_tag(d);
+
 	d->emit_queue_cnt = 0;
 	err = btf_dump_order_type(d, id, false);
 	if (err < 0)
@@ -373,6 +426,61 @@  static int btf_dump_mark_referenced(struct btf_dump *d)
 	return 0;
 }
 
+/*
+ * This hashmap lookup is used in several places, so extract it as a
+ * function to hide all the ceremony with casts and NULL assignment.
+ */
+static struct decl_tag_array *btf_dump_find_decl_tags(struct btf_dump *d, __u32 id)
+{
+	struct decl_tag_array *decl_tags = NULL;
+
+	hashmap__find(d->decl_tags, (void *)(uintptr_t)id, (void **)&decl_tags);
+
+	return decl_tags;
+}
+
+/*
+ * Scans all BTF objects looking for BTF_KIND_DECL_TAG entries.
+ * The id's of the entries are stored in the `btf_dump.decl_tags` table,
+ * grouped by a target type.
+ */
+static int btf_dump_assign_decl_tags(struct btf_dump *d)
+{
+	__u32 id, new_cnt, type_cnt = btf__type_cnt(d->btf);
+	struct decl_tag_array *decl_tags;
+	const struct btf_type *t;
+	int err;
+
+	for (id = 1; id < type_cnt; id++) {
+		t = btf__type_by_id(d->btf, id);
+		if (!btf_is_decl_tag(t))
+			continue;
+
+		decl_tags = btf_dump_find_decl_tags(d, t->type);
+		/* Assume small number of decl tags per id, increase array size by 1 */
+		new_cnt = decl_tags ? decl_tags->cnt + 1 : 1;
+		if (new_cnt > MAX_DECL_TAGS_PER_ID)
+			return -ERANGE;
+
+		/* Allocate new_cnt + 1 to account for decl_tag_array header */
+		decl_tags = libbpf_reallocarray(decl_tags, new_cnt + 1, sizeof(__u32));
+		if (!decl_tags)
+			return -ENOMEM;
+
+		err = hashmap__insert(d->decl_tags, (void *)(uintptr_t)t->type, decl_tags,
+				      HASHMAP_SET, NULL, NULL);
+		if (err) {
+			free(decl_tags);
+			return err;
+		}
+
+		decl_tags->tag_ids[new_cnt - 1] = id;
+		decl_tags->cnt = new_cnt;
+	}
+
+	return 0;
+}
+
 static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
 {
 	__u32 *new_queue;
@@ -899,6 +1007,51 @@  static void btf_dump_emit_bit_padding(const struct btf_dump *d,
 	}
 }
 
+/*
+ * Define __btf_decl_tag to be either __attribute__ or noop.
+ */
+static void btf_dump_maybe_define_btf_decl_tag(struct btf_dump *d)
+{
+	if (d->btf_decl_tag_is_defined || !hashmap__size(d->decl_tags))
+		return;
+
+	d->btf_decl_tag_is_defined = true;
+	btf_dump_printf(d, "#if __has_attribute(btf_decl_tag)\n");
+	btf_dump_printf(d, "#  define __btf_decl_tag(x) __attribute__((btf_decl_tag(x)))\n");
+	btf_dump_printf(d, "#else\n");
+	btf_dump_printf(d, "#  define __btf_decl_tag(x)\n");
+	btf_dump_printf(d, "#endif\n\n");
+}
+
+/*
+ * Emits a list of __btf_decl_tag(...) attributes attached to some type.
+ * Decl tags attached to a type and to it's fields reside in a same
+ * list, thus use component_idx to filter out relevant tags:
+ * - component_idx == -1 designates the type itself;
+ * - component_idx >=  0 designates specific field.
+ */
+static void btf_dump_emit_decl_tags(struct btf_dump *d,
+				    struct decl_tag_array *decl_tags,
+				    int component_idx)
+{
+	struct btf_type *decl_tag_t;
+	const char *decl_tag_text;
+	struct btf_decl_tag *tag;
+	__u32 i;
+
+	if (!decl_tags)
+		return;
+
+	for (i = 0; i < decl_tags->cnt; ++i) {
+		decl_tag_t = btf_type_by_id(d->btf, decl_tags->tag_ids[i]);
+		tag = btf_decl_tag(decl_tag_t);
+		if (tag->component_idx != component_idx)
+			continue;
+		decl_tag_text = btf__name_by_offset(d->btf, decl_tag_t->name_off);
+		btf_dump_printf(d, " __btf_decl_tag(\"%s\")", decl_tag_text);
+	}
+}
+
 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
 				     const struct btf_type *t)
 {
@@ -913,6 +1066,7 @@  static void btf_dump_emit_struct_def(struct btf_dump *d,
 				     const struct btf_type *t,
 				     int lvl)
 {
+	struct decl_tag_array *decl_tags = btf_dump_find_decl_tags(d, id);
 	const struct btf_member *m = btf_members(t);
 	bool is_struct = btf_is_struct(t);
 	int align, i, packed, off = 0;
@@ -945,6 +1099,7 @@  static void btf_dump_emit_struct_def(struct btf_dump *d,
 			m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
 			off = m_off + m_sz * 8;
 		}
+		btf_dump_emit_decl_tags(d, decl_tags, i);
 		btf_dump_printf(d, ";");
 	}
 
@@ -964,6 +1119,7 @@  static void btf_dump_emit_struct_def(struct btf_dump *d,
 	btf_dump_printf(d, "%s}", pfx(lvl));
 	if (packed)
 		btf_dump_printf(d, " __attribute__((packed))");
+	btf_dump_emit_decl_tags(d, decl_tags, -1);
 }
 
 static const char *missing_base_types[][2] = {
@@ -1104,6 +1260,7 @@  static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
 
 	btf_dump_printf(d, "typedef ");
 	btf_dump_emit_type_decl(d, t->type, name, lvl);
+	btf_dump_emit_decl_tags(d, btf_dump_find_decl_tags(d, id), -1);
 }
 
 static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)