diff mbox series

[bpf-next,v2,1/4] libbpf: put forward declarations to btf_dump->emit_queue

Message ID 20240517190555.4032078-2-eddyz87@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series API to access btf_dump emit queue and print single type | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success Errors and warnings before: 1 this patch: 1
netdev/cc_maintainers warning 11 maintainers not CCed: jolsa@kernel.org john.fastabend@gmail.com morbo@google.com haoluo@google.com song@kernel.org llvm@lists.linux.dev nathan@kernel.org kpsingh@kernel.org justinstitt@google.com ndesaulniers@google.com sdf@google.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eduard Zingerman May 17, 2024, 7:05 p.m. UTC
As a preparatory step for introducing public API for BTF topological
ordering / single type printing, this commit removes ordering logic
from btf_dump_emit_type() and moves parts necessary to compute forward
declarations to btf_dump_order_type().

Before this commit the topological ordering of types was effectively
done twice:
- first time in btf_dump_order_type(), which ordered types using only
  "strong" links (one structure embedded in another);
- second time in btf_dump_emit_type() to emit forward declarations.

After this commit:
- btf_dump_emit_type() is responsible only for printing
  declaration / forward declaration of a single type;
- btf_dump->emit_queue now contains pairs of form
  {id, forward declaration flag};
- btf_dump->emit_state is no longer necessary,
  as EMITTED state is effectively replaced by ORDERED state
  in btf_dump->order_state;

Notable changes to btf_dump_order_type() logic:
- no need to return strong / weak result, emit forward declaration to
  the queue for weak links instead;
- track containing type id ('cont_id'), as btf_dump_emit_type() did,
  to avoid unnecessary forward declarations in recursive structs;
- PTRs can no longer be marked ORDERED (see comment in the code);
- incorporate btf_dump_emit_missing_aliases() and
  btf_dump_is_blacklisted() checks from btf_dump_emit_type().

When called for e.g. PTR type pointing to a struct
btf_dump__dump_type() would previously result in an empty emit queue:
btf_dump_order_type() would have reached struct with
'through_ptr' == true, thus not adding it to the queue.
To mimic such behavior this patch adds a type filter to
btf_dump__dump_type(): only STRUCT, UNION, TYPEDEF, ENUM, ENUM64, FWD
types are ordered.

The downside of a single pass algorithm is that for the following
situation old logic would have avoided extra forward declaration:

	struct bar {};

	struct foo {		/* Suppose btf_dump__dump_type(foo) */
	    struct bar *a;	/* is called first.                 */
	    struct bar b;
	};

The btf_dump_order_type() would have ordered 'bar' before 'foo',
btf_dump_emit_type() would have been first called for 'bar' thus
avoiding forward declaration for 'sturct bar' when processing 'foo'.

In contrast, new logic would act as follows:
- when processing foo->a forward declaration for 'bar' would be enqueued;
- when processing foo->b full declaration for 'bar' would be enqueued;

In practice this does not seem to be a big issue, number of forward
declarations in vmlinux.h (for BPF selftests config) compared:
                     llvm  gcc
- before this patch: 1772  1235
- after  this patch: 1786  1249

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 tools/lib/bpf/btf_dump.c | 351 ++++++++++++++++++---------------------
 1 file changed, 161 insertions(+), 190 deletions(-)

Comments

Andrii Nakryiko May 28, 2024, 10:05 p.m. UTC | #1
On Fri, May 17, 2024 at 12:06 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> As a preparatory step for introducing public API for BTF topological
> ordering / single type printing, this commit removes ordering logic
> from btf_dump_emit_type() and moves parts necessary to compute forward
> declarations to btf_dump_order_type().
>
> Before this commit the topological ordering of types was effectively
> done twice:
> - first time in btf_dump_order_type(), which ordered types using only
>   "strong" links (one structure embedded in another);
> - second time in btf_dump_emit_type() to emit forward declarations.
>
> After this commit:
> - btf_dump_emit_type() is responsible only for printing
>   declaration / forward declaration of a single type;
> - btf_dump->emit_queue now contains pairs of form
>   {id, forward declaration flag};
> - btf_dump->emit_state is no longer necessary,
>   as EMITTED state is effectively replaced by ORDERED state
>   in btf_dump->order_state;
>
> Notable changes to btf_dump_order_type() logic:
> - no need to return strong / weak result, emit forward declaration to
>   the queue for weak links instead;
> - track containing type id ('cont_id'), as btf_dump_emit_type() did,
>   to avoid unnecessary forward declarations in recursive structs;
> - PTRs can no longer be marked ORDERED (see comment in the code);
> - incorporate btf_dump_emit_missing_aliases() and
>   btf_dump_is_blacklisted() checks from btf_dump_emit_type().
>
> When called for e.g. PTR type pointing to a struct
> btf_dump__dump_type() would previously result in an empty emit queue:
> btf_dump_order_type() would have reached struct with
> 'through_ptr' == true, thus not adding it to the queue.
> To mimic such behavior this patch adds a type filter to
> btf_dump__dump_type(): only STRUCT, UNION, TYPEDEF, ENUM, ENUM64, FWD
> types are ordered.
>
> The downside of a single pass algorithm is that for the following
> situation old logic would have avoided extra forward declaration:
>
>         struct bar {};
>
>         struct foo {            /* Suppose btf_dump__dump_type(foo) */
>             struct bar *a;      /* is called first.                 */
>             struct bar b;
>         };
>
> The btf_dump_order_type() would have ordered 'bar' before 'foo',
> btf_dump_emit_type() would have been first called for 'bar' thus
> avoiding forward declaration for 'sturct bar' when processing 'foo'.
>
> In contrast, new logic would act as follows:
> - when processing foo->a forward declaration for 'bar' would be enqueued;
> - when processing foo->b full declaration for 'bar' would be enqueued;
>
> In practice this does not seem to be a big issue, number of forward
> declarations in vmlinux.h (for BPF selftests config) compared:
>                      llvm  gcc
> - before this patch: 1772  1235
> - after  this patch: 1786  1249
>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  tools/lib/bpf/btf_dump.c | 351 ++++++++++++++++++---------------------
>  1 file changed, 161 insertions(+), 190 deletions(-)
>
> diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> index 5dbca76b953f..10532ae9ff14 100644
> --- a/tools/lib/bpf/btf_dump.c
> +++ b/tools/lib/bpf/btf_dump.c
> @@ -36,24 +36,16 @@ enum btf_dump_type_order_state {
>         ORDERED,
>  };
>
> -enum btf_dump_type_emit_state {
> -       NOT_EMITTED,
> -       EMITTING,
> -       EMITTED,
> -};
> -
>  /* per-type auxiliary state */
>  struct btf_dump_type_aux_state {
>         /* topological sorting state */
>         enum btf_dump_type_order_state order_state: 2;
> -       /* emitting state used to determine the need for forward declaration */
> -       enum btf_dump_type_emit_state emit_state: 2;
> -       /* whether forward declaration was already emitted */
> -       __u8 fwd_emitted: 1;
>         /* whether unique non-duplicate name was already assigned */
>         __u8 name_resolved: 1;
>         /* whether type is referenced from any other type */
>         __u8 referenced: 1;
> +       /* whether forward declaration was already ordered */
> +       __u8 fwd_ordered: 1;
>  };
>
>  /* indent string length; one indent string is added for each indent level */
> @@ -93,7 +85,10 @@ struct btf_dump {
>         size_t cached_names_cap;
>
>         /* topo-sorted list of dependent type definitions */
> -       __u32 *emit_queue;
> +       struct {
> +               __u32 id:31;
> +               __u32 fwd:1;
> +       } *emit_queue;

let's define the named type right in this patch, no need to use
typeof() hack just to remove it later.

Also, let's maybe have

struct <whatever> {
    __u32 id;
    __u32 flags;
};

and define

enum btf_dump_emit_flags {
    BTF_DUMP_FWD_DECL = 0x1,
};

Or something along those lines? Having a few more flags available will
make it less like that we'll need to add a new set of APIs just to
accommodate one extra flag. (Though, if we add another field, we'll
end up adding another API anyways, but I really hope we will never
have to do this).

>         int emit_queue_cap;
>         int emit_queue_cnt;
>
> @@ -208,7 +203,6 @@ static int btf_dump_resize(struct btf_dump *d)
>         if (d->last_id == 0) {
>                 /* VOID is special */
>                 d->type_states[0].order_state = ORDERED;
> -               d->type_states[0].emit_state = EMITTED;
>         }
>
>         /* eagerly determine referenced types for anon enums */
> @@ -255,8 +249,8 @@ void btf_dump__free(struct btf_dump *d)
>         free(d);
>  }
>
> -static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
> -static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
> +static int btf_dump_order_type(struct btf_dump *d, __u32 id, __u32 cont_id, bool through_ptr);
> +static void btf_dump_emit_type(struct btf_dump *d, __u32 id, bool fwd);
>
>  /*
>   * Dump BTF type in a compilable C syntax, including all the necessary
> @@ -276,6 +270,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
>   */
>  int btf_dump__dump_type(struct btf_dump *d, __u32 id)
>  {
> +       const struct btf_type *t;
>         int err, i;
>
>         if (id >= btf__type_cnt(d->btf))
> @@ -286,12 +281,23 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
>                 return libbpf_err(err);
>
>         d->emit_queue_cnt = 0;
> -       err = btf_dump_order_type(d, id, false);
> -       if (err < 0)
> -               return libbpf_err(err);
> +       t = btf_type_by_id(d->btf, id);
> +       switch (btf_kind(t)) {
> +       case BTF_KIND_STRUCT:
> +       case BTF_KIND_UNION:
> +       case BTF_KIND_TYPEDEF:
> +       case BTF_KIND_ENUM:
> +       case BTF_KIND_ENUM64:
> +       case BTF_KIND_FWD:
> +               err = btf_dump_order_type(d, id, id, false);
> +               if (err < 0)
> +                       return libbpf_err(err);
> +       default:
> +               break;
> +       };
>
>         for (i = 0; i < d->emit_queue_cnt; i++)
> -               btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
> +               btf_dump_emit_type(d, d->emit_queue[i].id, d->emit_queue[i].fwd);
>
>         return 0;
>  }
> @@ -374,9 +380,9 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
>         return 0;
>  }
>
> -static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
> +static int __btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id, bool fwd)

I don't like those underscored functions in libbpf code base, please
don't add them. But I'm also not sure we need to have it, there are
only a few calles of the original btf_dump_add_emit_queue_id(), so we
can just update them to pass true/false as appropriate.

>  {
> -       __u32 *new_queue;
> +       typeof(d->emit_queue[0]) *new_queue = NULL;
>         size_t new_cap;
>
>         if (d->emit_queue_cnt >= d->emit_queue_cap) {
> @@ -388,10 +394,45 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
>                 d->emit_queue_cap = new_cap;
>         }
>
> -       d->emit_queue[d->emit_queue_cnt++] = id;
> +       d->emit_queue[d->emit_queue_cnt].id = id;
> +       d->emit_queue[d->emit_queue_cnt].fwd = fwd;
> +       d->emit_queue_cnt++;
>         return 0;
>  }
>
> +static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
> +{
> +       return __btf_dump_add_emit_queue_id(d, id, false);
> +}
> +
> +static int btf_dump_add_emit_queue_fwd(struct btf_dump *d, __u32 id)
> +{
> +       struct btf_dump_type_aux_state *tstate = &d->type_states[id];
> +
> +       if (tstate->fwd_ordered)
> +               return 0;
> +
> +       tstate->fwd_ordered = 1;
> +       return __btf_dump_add_emit_queue_id(d, id, true);
> +}
> +

see above, do we really need these wrappers, passing true/false in a
few places doesn't seem to be a big deal

> +static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run);
> +
> +static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
> +{
> +       const struct btf_type *t = btf__type_by_id(d->btf, id);
> +
> +       /* __builtin_va_list is a compiler built-in, which causes compilation
> +        * errors, when compiling w/ different compiler, then used to compile
> +        * original code (e.g., GCC to compile kernel, Clang to use generated
> +        * C header from BTF). As it is built-in, it should be already defined
> +        * properly internally in compiler.
> +        */
> +       if (t->name_off == 0)
> +               return false;
> +       return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
> +}
> +

why moving btf_dump_is_blacklisted() but forward declaring
btf_dump_emit_missing_aliases()? Let's do the same to both, whichever
it is (forward declaring probably is least distracting here)

>  /*
>   * Determine order of emitting dependent types and specified type to satisfy
>   * C compilation rules.  This is done through topological sorting with an
> @@ -441,32 +482,33 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
>   * The rule is as follows. Given a chain of BTF types from X to Y, if there is
>   * BTF_KIND_PTR type in the chain and at least one non-anonymous type
>   * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
> - * Weak/strong relationship is determined recursively during DFS traversal and
> - * is returned as a result from btf_dump_order_type().
> + * Weak/strong relationship is determined recursively during DFS traversal.
> + *
> + * When type id is reached via a weak link a forward declaration for
> + * that type is added to the emit queue, otherwise "full" declaration
> + * is added to the emit queue.
> + *
> + * We also keep track of "containing struct/union type ID" to determine when
> + * we reference it from inside and thus can avoid emitting unnecessary forward
> + * declaration.
>   *
>   * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
>   * but it is not guaranteeing that no extraneous forward declarations will be
>   * emitted.
>   *
>   * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
> - * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
> - * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
> + * it's done with them, but not for all (e.g., PTR, VOLATILE, CONST, RESTRICT,
> + * ARRAY, FUNC_PROTO, TYPEDEF), as weak/strong semantics for those depends on the
>   * entire graph path, so depending where from one came to that BTF type, it
>   * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
>   * once they are processed, there is no need to do it again, so they are
> - * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
> - * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
> - * in any case, once those are processed, no need to do it again, as the
> - * result won't change.
> + * marked as ORDERED.
>   *
>   * Returns:
> - *   - 1, if type is part of strong link (so there is strong topological
> - *   ordering requirements);
> - *   - 0, if type is part of weak link (so can be satisfied through forward
> - *   declaration);
> + *   - 0, on success;
>   *   - <0, on error (e.g., unsatisfiable type loop detected).
>   */
> -static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
> +static int btf_dump_order_type(struct btf_dump *d, __u32 id, __u32 cont_id, bool through_ptr)
>  {
>         /*
>          * Order state is used to detect strong link cycles, but only for BTF
> @@ -486,48 +528,78 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
>
>         /* return true, letting typedefs know that it's ok to be emitted */
>         if (tstate->order_state == ORDERED)
> -               return 1;
> +               return 0;
>
>         t = btf__type_by_id(d->btf, id);
>
>         if (tstate->order_state == ORDERING) {
>                 /* type loop, but resolvable through fwd declaration */
> -               if (btf_is_composite(t) && through_ptr && t->name_off != 0)
> -                       return 0;
> +               if (btf_is_composite(t) && through_ptr && t->name_off != 0) {
> +                       if (id != cont_id)
> +                               return btf_dump_add_emit_queue_fwd(d, id);
> +                       else
> +                               return 0;
> +               }
>                 pr_warn("unsatisfiable type cycle, id:[%u]\n", id);
>                 return -ELOOP;
>         }
>
>         switch (btf_kind(t)) {
>         case BTF_KIND_INT:
> +               tstate->order_state = ORDERED;
> +               if (btf_dump_emit_missing_aliases(d, id, true))
> +                       return btf_dump_add_emit_queue_id(d, id);
> +               else
> +                       return 0;
>         case BTF_KIND_FLOAT:
>                 tstate->order_state = ORDERED;
>                 return 0;
>
>         case BTF_KIND_PTR:
> -               err = btf_dump_order_type(d, t->type, true);
> -               tstate->order_state = ORDERED;
> +               /* Depending on whether pointer is a part of a recursive struct
> +                * declaration, it might not necessitate generation of a forward
> +                * declaration for the target type, e.g.:
> +                *
> +                * struct foo {
> +                *      struct foo *p; // no need for forward declaration
> +                * }
> +                *
> +                * struct bar {
> +                *      struct foo *p; // forward declaration is needed
> +                * }
> +                *
> +                * Hence, don't mark pointer as ORDERED, to allow traversal
> +                * to the target type and comparison with 'cont_id'.
> +                */
> +               err = btf_dump_order_type(d, t->type, cont_id, true);
>                 return err;
>
>         case BTF_KIND_ARRAY:
> -               return btf_dump_order_type(d, btf_array(t)->type, false);
> +               return btf_dump_order_type(d, btf_array(t)->type, cont_id, false);
>
>         case BTF_KIND_STRUCT:
>         case BTF_KIND_UNION: {
>                 const struct btf_member *m = btf_members(t);
> +               __u32 new_cont_id;
> +
>                 /*
>                  * struct/union is part of strong link, only if it's embedded
>                  * (so no ptr in a path) or it's anonymous (so has to be
>                  * defined inline, even if declared through ptr)
>                  */
> -               if (through_ptr && t->name_off != 0)
> -                       return 0;
> +               if (through_ptr && t->name_off != 0) {
> +                       if (id != cont_id)
> +                               return btf_dump_add_emit_queue_fwd(d, id);
> +                       else
> +                               return 0;

very subjective nit, but this "else return 0;" just doesn't feel right
here. Let's do:

if (id == cont_id)
    return 0;
return btf_dump_add_emit_queue_fwd();

It feels a bit more natural as "if it's a special nice case, we are
done (return 0); otherwise we need to emit extra fwd decl."


> +               }
>
>                 tstate->order_state = ORDERING;
>
> +               new_cont_id = t->name_off == 0 ? cont_id : id;
>                 vlen = btf_vlen(t);
>                 for (i = 0; i < vlen; i++, m++) {
> -                       err = btf_dump_order_type(d, m->type, false);
> +                       err = btf_dump_order_type(d, m->type, new_cont_id, false);

just inline `t->name_off ? id : cont_id` here? It's short and
straightforward enough, I suppose (named type defines new containing
"scope", anonymous type continues existing scope)

>                         if (err < 0)
>                                 return err;
>                 }
> @@ -539,7 +611,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
>                 }
>
>                 tstate->order_state = ORDERED;
> -               return 1;
> +               return 0;
>         }
>         case BTF_KIND_ENUM:
>         case BTF_KIND_ENUM64:
> @@ -555,51 +627,53 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
>                                 return err;
>                 }
>                 tstate->order_state = ORDERED;
> -               return 1;
> -
> -       case BTF_KIND_TYPEDEF: {
> -               int is_strong;
> -
> -               is_strong = btf_dump_order_type(d, t->type, through_ptr);
> -               if (is_strong < 0)
> -                       return is_strong;
> +               return 0;
>
> -               /* typedef is similar to struct/union w.r.t. fwd-decls */
> -               if (through_ptr && !is_strong)
> +       case BTF_KIND_TYPEDEF:
> +               /* Do not mark typedef as ORDERED, always emit a forward declaration for
> +                * it instead. Otherwise the following situation would be troublesome:
> +                *
> +                *   typedef struct foo foo_alias;
> +                *
> +                *   struct foo {};
> +                *
> +                *   struct root {
> +                *      foo_alias *a;
> +                *      foo_alias b;
> +                *   };
> +                *
> +                */
> +               if (btf_dump_is_blacklisted(d, id))
>                         return 0;
>
> -               /* typedef is always a named definition */
> -               err = btf_dump_add_emit_queue_id(d, id);
> -               if (err)
> +               err = btf_dump_order_type(d, t->type, t->type, through_ptr);
> +               if (err < 0)
>                         return err;
>
> -               d->type_states[id].order_state = ORDERED;
> -               return 1;
> -       }
> +               err = btf_dump_add_emit_queue_fwd(d, id);
> +               if (err)
> +                       return err;
> +               return 0;

return btf_dump_add_emit_queue_fwd(...); ? this is the last step, so
seems appropriate

>         case BTF_KIND_VOLATILE:
>         case BTF_KIND_CONST:
>         case BTF_KIND_RESTRICT:
>         case BTF_KIND_TYPE_TAG:
> -               return btf_dump_order_type(d, t->type, through_ptr);
> +               return btf_dump_order_type(d, t->type, cont_id, through_ptr);
>
>         case BTF_KIND_FUNC_PROTO: {
>                 const struct btf_param *p = btf_params(t);
> -               bool is_strong;
>
> -               err = btf_dump_order_type(d, t->type, through_ptr);
> +               err = btf_dump_order_type(d, t->type, cont_id, through_ptr);
>                 if (err < 0)
>                         return err;
> -               is_strong = err > 0;
>
>                 vlen = btf_vlen(t);
>                 for (i = 0; i < vlen; i++, p++) {
> -                       err = btf_dump_order_type(d, p->type, through_ptr);
> +                       err = btf_dump_order_type(d, p->type, cont_id, through_ptr);
>                         if (err < 0)
>                                 return err;
> -                       if (err > 0)
> -                               is_strong = true;
>                 }
> -               return is_strong;
> +               return err;

this should always be zero, right? Just return zero explicit, don't
make reader to guess

>         }
>         case BTF_KIND_FUNC:
>         case BTF_KIND_VAR:
> @@ -613,9 +687,6 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
>         }
>  }
>
> -static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
> -                                         const struct btf_type *t);
> -
>  static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
>                                      const struct btf_type *t);
>  static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
> @@ -649,73 +720,33 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
>  static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
>                                  const char *orig_name);
>
> -static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
> -{
> -       const struct btf_type *t = btf__type_by_id(d->btf, id);
> -
> -       /* __builtin_va_list is a compiler built-in, which causes compilation
> -        * errors, when compiling w/ different compiler, then used to compile
> -        * original code (e.g., GCC to compile kernel, Clang to use generated
> -        * C header from BTF). As it is built-in, it should be already defined
> -        * properly internally in compiler.
> -        */
> -       if (t->name_off == 0)
> -               return false;
> -       return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
> -}
> -
>  /*
>   * Emit C-syntax definitions of types from chains of BTF types.
>   *
> - * High-level handling of determining necessary forward declarations are handled
> - * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
> + * All nitty-gritty details of emitting type
>   * declarations/definitions in C syntax  are handled by a combo of
>   * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
>   * corresponding btf_dump_emit_*_{def,fwd}() functions.
>   *
> - * We also keep track of "containing struct/union type ID" to determine when
> - * we reference it from inside and thus can avoid emitting unnecessary forward
> - * declaration.
> - *
>   * This algorithm is designed in such a way, that even if some error occurs
>   * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
>   * that doesn't comply to C rules completely), algorithm will try to proceed
>   * and produce as much meaningful output as possible.
>   */
> -static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
> +static void btf_dump_emit_type(struct btf_dump *d, __u32 id, bool fwd)
>  {
> -       struct btf_dump_type_aux_state *tstate = &d->type_states[id];
> -       bool top_level_def = cont_id == 0;
>         const struct btf_type *t;
>         __u16 kind;
>
> -       if (tstate->emit_state == EMITTED)
> -               return;
> -
>         t = btf__type_by_id(d->btf, id);
>         kind = btf_kind(t);
>
> -       if (tstate->emit_state == EMITTING) {
> -               if (tstate->fwd_emitted)
> -                       return;
> -
> +       if (fwd) {
>                 switch (kind) {
>                 case BTF_KIND_STRUCT:
>                 case BTF_KIND_UNION:
> -                       /*
> -                        * if we are referencing a struct/union that we are
> -                        * part of - then no need for fwd declaration
> -                        */
> -                       if (id == cont_id)
> -                               return;
> -                       if (t->name_off == 0) {
> -                               pr_warn("anonymous struct/union loop, id:[%u]\n",
> -                                       id);
> -                               return;
> -                       }
>                         btf_dump_emit_struct_fwd(d, id, t);
>                         btf_dump_printf(d, ";\n\n");
> -                       tstate->fwd_emitted = 1;
>                         break;
>                 case BTF_KIND_TYPEDEF:
>                         /*
> @@ -723,11 +754,8 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
>                          * was emitted, but it can be used only for "weak"
>                          * references through pointer only, not for embedding
>                          */
> -                       if (!btf_dump_is_blacklisted(d, id)) {
> -                               btf_dump_emit_typedef_def(d, id, t, 0);
> -                               btf_dump_printf(d, ";\n\n");
> -                       }
> -                       tstate->fwd_emitted = 1;
> +                       btf_dump_emit_typedef_def(d, id, t, 0);
> +                       btf_dump_printf(d, ";\n\n");
>                         break;
>                 default:
>                         break;
> @@ -739,36 +767,18 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
>         switch (kind) {
>         case BTF_KIND_INT:
>                 /* Emit type alias definitions if necessary */
> -               btf_dump_emit_missing_aliases(d, id, t);
> -
> -               tstate->emit_state = EMITTED;
> +               btf_dump_emit_missing_aliases(d, id, false);
>                 break;
>         case BTF_KIND_ENUM:
>         case BTF_KIND_ENUM64:
> -               if (top_level_def) {
> -                       btf_dump_emit_enum_def(d, id, t, 0);
> -                       btf_dump_printf(d, ";\n\n");
> -               }
> -               tstate->emit_state = EMITTED;
> -               break;
> -       case BTF_KIND_PTR:
> -       case BTF_KIND_VOLATILE:
> -       case BTF_KIND_CONST:
> -       case BTF_KIND_RESTRICT:
> -       case BTF_KIND_TYPE_TAG:
> -               btf_dump_emit_type(d, t->type, cont_id);
> -               break;
> -       case BTF_KIND_ARRAY:
> -               btf_dump_emit_type(d, btf_array(t)->type, cont_id);
> +               btf_dump_emit_enum_def(d, id, t, 0);
> +               btf_dump_printf(d, ";\n\n");
>                 break;
>         case BTF_KIND_FWD:
>                 btf_dump_emit_fwd_def(d, id, t);
>                 btf_dump_printf(d, ";\n\n");
> -               tstate->emit_state = EMITTED;
>                 break;
>         case BTF_KIND_TYPEDEF:
> -               tstate->emit_state = EMITTING;
> -               btf_dump_emit_type(d, t->type, id);
>                 /*
>                  * typedef can server as both definition and forward
>                  * declaration; at this stage someone depends on
> @@ -776,55 +786,14 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
>                  * through pointer), so unless we already did it,
>                  * emit typedef as a forward declaration
>                  */
> -               if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
> -                       btf_dump_emit_typedef_def(d, id, t, 0);
> -                       btf_dump_printf(d, ";\n\n");
> -               }
> -               tstate->emit_state = EMITTED;
> +               btf_dump_emit_typedef_def(d, id, t, 0);
> +               btf_dump_printf(d, ";\n\n");
>                 break;
>         case BTF_KIND_STRUCT:
>         case BTF_KIND_UNION:
> -               tstate->emit_state = EMITTING;
> -               /* if it's a top-level struct/union definition or struct/union
> -                * is anonymous, then in C we'll be emitting all fields and
> -                * their types (as opposed to just `struct X`), so we need to
> -                * make sure that all types, referenced from struct/union
> -                * members have necessary forward-declarations, where
> -                * applicable
> -                */
> -               if (top_level_def || t->name_off == 0) {
> -                       const struct btf_member *m = btf_members(t);
> -                       __u16 vlen = btf_vlen(t);
> -                       int i, new_cont_id;
> -
> -                       new_cont_id = t->name_off == 0 ? cont_id : id;
> -                       for (i = 0; i < vlen; i++, m++)
> -                               btf_dump_emit_type(d, m->type, new_cont_id);
> -               } else if (!tstate->fwd_emitted && id != cont_id) {
> -                       btf_dump_emit_struct_fwd(d, id, t);
> -                       btf_dump_printf(d, ";\n\n");
> -                       tstate->fwd_emitted = 1;
> -               }
> -
> -               if (top_level_def) {
> -                       btf_dump_emit_struct_def(d, id, t, 0);
> -                       btf_dump_printf(d, ";\n\n");
> -                       tstate->emit_state = EMITTED;
> -               } else {
> -                       tstate->emit_state = NOT_EMITTED;
> -               }
> -               break;
> -       case BTF_KIND_FUNC_PROTO: {
> -               const struct btf_param *p = btf_params(t);
> -               __u16 n = btf_vlen(t);
> -               int i;
> -
> -               btf_dump_emit_type(d, t->type, cont_id);
> -               for (i = 0; i < n; i++, p++)
> -                       btf_dump_emit_type(d, p->type, cont_id);
> -
> +               btf_dump_emit_struct_def(d, id, t, 0);
> +               btf_dump_printf(d, ";\n\n");
>                 break;
> -       }
>         default:
>                 break;
>         }
> @@ -1037,19 +1006,21 @@ static const char *missing_base_types[][2] = {
>         { "__Poly128_t",        "unsigned __int128" },
>  };
>
> -static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
> -                                         const struct btf_type *t)
> +static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run)

this dry_run approach look like a sloppy hack, tbh. Maybe just return
`const char *` of aliased name, and let caller handle whatever is
necessary (either making decision about the need for alias or actually
emitting `typedef %s %s`?

>  {
>         const char *name = btf_dump_type_name(d, id);
>         int i;
>
>         for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
> -               if (strcmp(name, missing_base_types[i][0]) == 0) {
> +               if (strcmp(name, missing_base_types[i][0]) != 0)
> +                       continue;
> +               if (!dry_run)
>                         btf_dump_printf(d, "typedef %s %s;\n\n",
>                                         missing_base_types[i][1], name);
> -                       break;
> -               }
> +               return true;
>         }
> +
> +       return false;
>  }
>
>  static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
> --
> 2.34.1
>
Eduard Zingerman May 28, 2024, 10:25 p.m. UTC | #2
On Tue, 2024-05-28 at 15:05 -0700, Andrii Nakryiko wrote:

[...]

> > @@ -93,7 +85,10 @@ struct btf_dump {
> >         size_t cached_names_cap;
> > 
> >         /* topo-sorted list of dependent type definitions */
> > -       __u32 *emit_queue;
> > +       struct {
> > +               __u32 id:31;
> > +               __u32 fwd:1;
> > +       } *emit_queue;
> 
> let's define the named type right in this patch, no need to use
> typeof() hack just to remove it later.
> 
> Also, let's maybe have
> 
> struct <whatever> {
>     __u32 id;
>     __u32 flags;
> };
> 
> and define
> 
> enum btf_dump_emit_flags {
>     BTF_DUMP_FWD_DECL = 0x1,
> };
> 
> Or something along those lines? Having a few more flags available will
> make it less like that we'll need to add a new set of APIs just to
> accommodate one extra flag. (Though, if we add another field, we'll
> end up adding another API anyways, but I really hope we will never
> have to do this).

Ok, will do

[...]

> > -static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
> > +static int __btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id, bool fwd)
> 
> I don't like those underscored functions in libbpf code base, please
> don't add them. But I'm also not sure we need to have it, there are
> only a few calles of the original btf_dump_add_emit_queue_id(), so we
> can just update them to pass true/false as appropriate.

Will do

[...]

> > +static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run);
> > +
> > +static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
> > +{
> > +       const struct btf_type *t = btf__type_by_id(d->btf, id);
> > +
> > +       /* __builtin_va_list is a compiler built-in, which causes compilation
> > +        * errors, when compiling w/ different compiler, then used to compile
> > +        * original code (e.g., GCC to compile kernel, Clang to use generated
> > +        * C header from BTF). As it is built-in, it should be already defined
> > +        * properly internally in compiler.
> > +        */
> > +       if (t->name_off == 0)
> > +               return false;
> > +       return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
> > +}
> > +
> 
> why moving btf_dump_is_blacklisted() but forward declaring
> btf_dump_emit_missing_aliases()? Let's do the same to both, whichever
> it is (forward declaring probably is least distracting here)

Sure, will add forward declarations for both.

[...]

> >         case BTF_KIND_UNION: {
> >                 const struct btf_member *m = btf_members(t);
> > +               __u32 new_cont_id;
> > +
> >                 /*
> >                  * struct/union is part of strong link, only if it's embedded
> >                  * (so no ptr in a path) or it's anonymous (so has to be
> >                  * defined inline, even if declared through ptr)
> >                  */
> > -               if (through_ptr && t->name_off != 0)
> > -                       return 0;
> > +               if (through_ptr && t->name_off != 0) {
> > +                       if (id != cont_id)
> > +                               return btf_dump_add_emit_queue_fwd(d, id);
> > +                       else
> > +                               return 0;
> 
> very subjective nit, but this "else return 0;" just doesn't feel right
> here. Let's do:
> 
> if (id == cont_id)
>     return 0;
> return btf_dump_add_emit_queue_fwd();
> 
> It feels a bit more natural as "if it's a special nice case, we are
> done (return 0); otherwise we need to emit extra fwd decl."

Will do

> 
> > +               }
> > 
> >                 tstate->order_state = ORDERING;
> > 
> > +               new_cont_id = t->name_off == 0 ? cont_id : id;
> >                 vlen = btf_vlen(t);
> >                 for (i = 0; i < vlen; i++, m++) {
> > -                       err = btf_dump_order_type(d, m->type, false);
> > +                       err = btf_dump_order_type(d, m->type, new_cont_id, false);
> 
> just inline `t->name_off ? id : cont_id` here? It's short and
> straightforward enough, I suppose (named type defines new containing
> "scope", anonymous type continues existing scope)

Will do

[...]

> > +               err = btf_dump_add_emit_queue_fwd(d, id);
> > +               if (err)
> > +                       return err;
> > +               return 0;
> 
> return btf_dump_add_emit_queue_fwd(...); ? this is the last step, so
> seems appropriate

Will do

> >         case BTF_KIND_VOLATILE:
> >         case BTF_KIND_CONST:
> >         case BTF_KIND_RESTRICT:
> >         case BTF_KIND_TYPE_TAG:
> > -               return btf_dump_order_type(d, t->type, through_ptr);
> > +               return btf_dump_order_type(d, t->type, cont_id, through_ptr);
> > 
> >         case BTF_KIND_FUNC_PROTO: {
> >                 const struct btf_param *p = btf_params(t);
> > -               bool is_strong;
> > 
> > -               err = btf_dump_order_type(d, t->type, through_ptr);
> > +               err = btf_dump_order_type(d, t->type, cont_id, through_ptr);
> >                 if (err < 0)
> >                         return err;
> > -               is_strong = err > 0;
> > 
> >                 vlen = btf_vlen(t);
> >                 for (i = 0; i < vlen; i++, p++) {
> > -                       err = btf_dump_order_type(d, p->type, through_ptr);
> > +                       err = btf_dump_order_type(d, p->type, cont_id, through_ptr);
> >                         if (err < 0)
> >                                 return err;
> > -                       if (err > 0)
> > -                               is_strong = true;
> >                 }
> > -               return is_strong;
> > +               return err;
> 
> this should always be zero, right? Just return zero explicit, don't
> make reader to guess

Ok

[...]

> > @@ -1037,19 +1006,21 @@ static const char *missing_base_types[][2] = {
> >         { "__Poly128_t",        "unsigned __int128" },
> >  };
> > 
> > -static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
> > -                                         const struct btf_type *t)
> > +static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run)
> 
> this dry_run approach look like a sloppy hack, tbh. Maybe just return
> `const char *` of aliased name, and let caller handle whatever is
> necessary (either making decision about the need for alias or actually
> emitting `typedef %s %s`?

I had a version w/o dry run but it seemed to heavy-handed for the purpose:

static int btf_dump_missing_alias_idx(struct btf_dump *d, __u32 id)
{
	const char *name = btf_dump_type_name(d, id);
	int i;

	for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
		if (strcmp(name, missing_base_types[i][0]) == 0)
			return i;
	}

        return -1;
}

static bool btf_dump_is_missing_alias(struct btf_dump *d, __u32 id)
{
	return btf_dump_missing_alias_idx(d, id) >= 0;
}

static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id)
{
	const char *name = btf_dump_type_name(d, id);
	int i;

	i = btf_dump_missing_alias_idx(d, id);
	if (i < 0)
		return false;

	btf_dump_printf(d, "typedef %s %s;\n\n",
			missing_base_types[i][1], name);
	return true;
}

I can use the above if you think it is better.
Andrii Nakryiko May 28, 2024, 10:39 p.m. UTC | #3
On Tue, May 28, 2024 at 3:25 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2024-05-28 at 15:05 -0700, Andrii Nakryiko wrote:
>
> [...]
>
> > > @@ -93,7 +85,10 @@ struct btf_dump {
> > >         size_t cached_names_cap;
> > >
> > >         /* topo-sorted list of dependent type definitions */
> > > -       __u32 *emit_queue;
> > > +       struct {
> > > +               __u32 id:31;
> > > +               __u32 fwd:1;
> > > +       } *emit_queue;
> >
> > let's define the named type right in this patch, no need to use
> > typeof() hack just to remove it later.
> >
> > Also, let's maybe have
> >
> > struct <whatever> {
> >     __u32 id;
> >     __u32 flags;
> > };
> >
> > and define
> >
> > enum btf_dump_emit_flags {
> >     BTF_DUMP_FWD_DECL = 0x1,
> > };
> >
> > Or something along those lines? Having a few more flags available will
> > make it less like that we'll need to add a new set of APIs just to
> > accommodate one extra flag. (Though, if we add another field, we'll
> > end up adding another API anyways, but I really hope we will never
> > have to do this).
>
> Ok, will do
>
> [...]
>
> > > -static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
> > > +static int __btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id, bool fwd)
> >
> > I don't like those underscored functions in libbpf code base, please
> > don't add them. But I'm also not sure we need to have it, there are
> > only a few calles of the original btf_dump_add_emit_queue_id(), so we
> > can just update them to pass true/false as appropriate.
>
> Will do
>
> [...]
>
> > > +static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run);
> > > +
> > > +static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
> > > +{
> > > +       const struct btf_type *t = btf__type_by_id(d->btf, id);
> > > +
> > > +       /* __builtin_va_list is a compiler built-in, which causes compilation
> > > +        * errors, when compiling w/ different compiler, then used to compile
> > > +        * original code (e.g., GCC to compile kernel, Clang to use generated
> > > +        * C header from BTF). As it is built-in, it should be already defined
> > > +        * properly internally in compiler.
> > > +        */
> > > +       if (t->name_off == 0)
> > > +               return false;
> > > +       return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
> > > +}
> > > +
> >
> > why moving btf_dump_is_blacklisted() but forward declaring
> > btf_dump_emit_missing_aliases()? Let's do the same to both, whichever
> > it is (forward declaring probably is least distracting here)
>
> Sure, will add forward declarations for both.
>
> [...]
>
> > >         case BTF_KIND_UNION: {
> > >                 const struct btf_member *m = btf_members(t);
> > > +               __u32 new_cont_id;
> > > +
> > >                 /*
> > >                  * struct/union is part of strong link, only if it's embedded
> > >                  * (so no ptr in a path) or it's anonymous (so has to be
> > >                  * defined inline, even if declared through ptr)
> > >                  */
> > > -               if (through_ptr && t->name_off != 0)
> > > -                       return 0;
> > > +               if (through_ptr && t->name_off != 0) {
> > > +                       if (id != cont_id)
> > > +                               return btf_dump_add_emit_queue_fwd(d, id);
> > > +                       else
> > > +                               return 0;
> >
> > very subjective nit, but this "else return 0;" just doesn't feel right
> > here. Let's do:
> >
> > if (id == cont_id)
> >     return 0;
> > return btf_dump_add_emit_queue_fwd();
> >
> > It feels a bit more natural as "if it's a special nice case, we are
> > done (return 0); otherwise we need to emit extra fwd decl."
>
> Will do
>
> >
> > > +               }
> > >
> > >                 tstate->order_state = ORDERING;
> > >
> > > +               new_cont_id = t->name_off == 0 ? cont_id : id;
> > >                 vlen = btf_vlen(t);
> > >                 for (i = 0; i < vlen; i++, m++) {
> > > -                       err = btf_dump_order_type(d, m->type, false);
> > > +                       err = btf_dump_order_type(d, m->type, new_cont_id, false);
> >
> > just inline `t->name_off ? id : cont_id` here? It's short and
> > straightforward enough, I suppose (named type defines new containing
> > "scope", anonymous type continues existing scope)
>
> Will do
>
> [...]
>
> > > +               err = btf_dump_add_emit_queue_fwd(d, id);
> > > +               if (err)
> > > +                       return err;
> > > +               return 0;
> >
> > return btf_dump_add_emit_queue_fwd(...); ? this is the last step, so
> > seems appropriate
>
> Will do
>
> > >         case BTF_KIND_VOLATILE:
> > >         case BTF_KIND_CONST:
> > >         case BTF_KIND_RESTRICT:
> > >         case BTF_KIND_TYPE_TAG:
> > > -               return btf_dump_order_type(d, t->type, through_ptr);
> > > +               return btf_dump_order_type(d, t->type, cont_id, through_ptr);
> > >
> > >         case BTF_KIND_FUNC_PROTO: {
> > >                 const struct btf_param *p = btf_params(t);
> > > -               bool is_strong;
> > >
> > > -               err = btf_dump_order_type(d, t->type, through_ptr);
> > > +               err = btf_dump_order_type(d, t->type, cont_id, through_ptr);
> > >                 if (err < 0)
> > >                         return err;
> > > -               is_strong = err > 0;
> > >
> > >                 vlen = btf_vlen(t);
> > >                 for (i = 0; i < vlen; i++, p++) {
> > > -                       err = btf_dump_order_type(d, p->type, through_ptr);
> > > +                       err = btf_dump_order_type(d, p->type, cont_id, through_ptr);
> > >                         if (err < 0)
> > >                                 return err;
> > > -                       if (err > 0)
> > > -                               is_strong = true;
> > >                 }
> > > -               return is_strong;
> > > +               return err;
> >
> > this should always be zero, right? Just return zero explicit, don't
> > make reader to guess
>
> Ok
>
> [...]
>
> > > @@ -1037,19 +1006,21 @@ static const char *missing_base_types[][2] = {
> > >         { "__Poly128_t",        "unsigned __int128" },
> > >  };
> > >
> > > -static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
> > > -                                         const struct btf_type *t)
> > > +static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run)
> >
> > this dry_run approach look like a sloppy hack, tbh. Maybe just return
> > `const char *` of aliased name, and let caller handle whatever is
> > necessary (either making decision about the need for alias or actually
> > emitting `typedef %s %s`?
>
> I had a version w/o dry run but it seemed to heavy-handed for the purpose:
>
> static int btf_dump_missing_alias_idx(struct btf_dump *d, __u32 id)
> {
>         const char *name = btf_dump_type_name(d, id);
>         int i;
>
>         for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
>                 if (strcmp(name, missing_base_types[i][0]) == 0)
>                         return i;
>         }
>
>         return -1;
> }
>
> static bool btf_dump_is_missing_alias(struct btf_dump *d, __u32 id)
> {
>         return btf_dump_missing_alias_idx(d, id) >= 0;
> }
>
> static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id)
> {
>         const char *name = btf_dump_type_name(d, id);
>         int i;
>
>         i = btf_dump_missing_alias_idx(d, id);
>         if (i < 0)
>                 return false;
>
>         btf_dump_printf(d, "typedef %s %s;\n\n",
>                         missing_base_types[i][1], name);
>         return true;
> }
>
> I can use the above if you think it is better.

I meant something less heavy-handed:

static const char *btf_dump_missing_alias(struct btf_dump *d, __u32 id)
{
        const char *name = btf_dump_type_name(d, id);
        int i;

        for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
                if (strcmp(name, missing_base_types[i][0]) == 0)
                        return missing_base_types[i][1];
        }
        return NULL;
}

And we actually don't need to use btf_dump_type_name(), btf_name_of()
should be more than adequate for this.

Then if you get NULL from this function, there is no aliasing
required. If you got non-NULL, you have the name you should alias to
(btf_name_of() will give you original name).
Eduard Zingerman May 28, 2024, 10:41 p.m. UTC | #4
On Tue, 2024-05-28 at 15:39 -0700, Andrii Nakryiko wrote:


[...]

> I meant something less heavy-handed:
> 
> static const char *btf_dump_missing_alias(struct btf_dump *d, __u32 id)
> {
>         const char *name = btf_dump_type_name(d, id);
>         int i;
> 
>         for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
>                 if (strcmp(name, missing_base_types[i][0]) == 0)
>                         return missing_base_types[i][1];
>         }
>         return NULL;
> }
> 
> And we actually don't need to use btf_dump_type_name(), btf_name_of()
> should be more than adequate for this.
> 
> Then if you get NULL from this function, there is no aliasing
> required. If you got non-NULL, you have the name you should alias to
> (btf_name_of() will give you original name).

This should do it, thank you.
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 5dbca76b953f..10532ae9ff14 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -36,24 +36,16 @@  enum btf_dump_type_order_state {
 	ORDERED,
 };
 
-enum btf_dump_type_emit_state {
-	NOT_EMITTED,
-	EMITTING,
-	EMITTED,
-};
-
 /* per-type auxiliary state */
 struct btf_dump_type_aux_state {
 	/* topological sorting state */
 	enum btf_dump_type_order_state order_state: 2;
-	/* emitting state used to determine the need for forward declaration */
-	enum btf_dump_type_emit_state emit_state: 2;
-	/* whether forward declaration was already emitted */
-	__u8 fwd_emitted: 1;
 	/* whether unique non-duplicate name was already assigned */
 	__u8 name_resolved: 1;
 	/* whether type is referenced from any other type */
 	__u8 referenced: 1;
+	/* whether forward declaration was already ordered */
+	__u8 fwd_ordered: 1;
 };
 
 /* indent string length; one indent string is added for each indent level */
@@ -93,7 +85,10 @@  struct btf_dump {
 	size_t cached_names_cap;
 
 	/* topo-sorted list of dependent type definitions */
-	__u32 *emit_queue;
+	struct {
+		__u32 id:31;
+		__u32 fwd:1;
+	} *emit_queue;
 	int emit_queue_cap;
 	int emit_queue_cnt;
 
@@ -208,7 +203,6 @@  static int btf_dump_resize(struct btf_dump *d)
 	if (d->last_id == 0) {
 		/* VOID is special */
 		d->type_states[0].order_state = ORDERED;
-		d->type_states[0].emit_state = EMITTED;
 	}
 
 	/* eagerly determine referenced types for anon enums */
@@ -255,8 +249,8 @@  void btf_dump__free(struct btf_dump *d)
 	free(d);
 }
 
-static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
-static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, __u32 cont_id, bool through_ptr);
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, bool fwd);
 
 /*
  * Dump BTF type in a compilable C syntax, including all the necessary
@@ -276,6 +270,7 @@  static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
  */
 int btf_dump__dump_type(struct btf_dump *d, __u32 id)
 {
+	const struct btf_type *t;
 	int err, i;
 
 	if (id >= btf__type_cnt(d->btf))
@@ -286,12 +281,23 @@  int btf_dump__dump_type(struct btf_dump *d, __u32 id)
 		return libbpf_err(err);
 
 	d->emit_queue_cnt = 0;
-	err = btf_dump_order_type(d, id, false);
-	if (err < 0)
-		return libbpf_err(err);
+	t = btf_type_by_id(d->btf, id);
+	switch (btf_kind(t)) {
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+	case BTF_KIND_FWD:
+		err = btf_dump_order_type(d, id, id, false);
+		if (err < 0)
+			return libbpf_err(err);
+	default:
+		break;
+	};
 
 	for (i = 0; i < d->emit_queue_cnt; i++)
-		btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
+		btf_dump_emit_type(d, d->emit_queue[i].id, d->emit_queue[i].fwd);
 
 	return 0;
 }
@@ -374,9 +380,9 @@  static int btf_dump_mark_referenced(struct btf_dump *d)
 	return 0;
 }
 
-static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
+static int __btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id, bool fwd)
 {
-	__u32 *new_queue;
+	typeof(d->emit_queue[0]) *new_queue = NULL;
 	size_t new_cap;
 
 	if (d->emit_queue_cnt >= d->emit_queue_cap) {
@@ -388,10 +394,45 @@  static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
 		d->emit_queue_cap = new_cap;
 	}
 
-	d->emit_queue[d->emit_queue_cnt++] = id;
+	d->emit_queue[d->emit_queue_cnt].id = id;
+	d->emit_queue[d->emit_queue_cnt].fwd = fwd;
+	d->emit_queue_cnt++;
 	return 0;
 }
 
+static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
+{
+	return __btf_dump_add_emit_queue_id(d, id, false);
+}
+
+static int btf_dump_add_emit_queue_fwd(struct btf_dump *d, __u32 id)
+{
+	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+
+	if (tstate->fwd_ordered)
+		return 0;
+
+	tstate->fwd_ordered = 1;
+	return __btf_dump_add_emit_queue_id(d, id, true);
+}
+
+static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run);
+
+static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(d->btf, id);
+
+	/* __builtin_va_list is a compiler built-in, which causes compilation
+	 * errors, when compiling w/ different compiler, then used to compile
+	 * original code (e.g., GCC to compile kernel, Clang to use generated
+	 * C header from BTF). As it is built-in, it should be already defined
+	 * properly internally in compiler.
+	 */
+	if (t->name_off == 0)
+		return false;
+	return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
+}
+
 /*
  * Determine order of emitting dependent types and specified type to satisfy
  * C compilation rules.  This is done through topological sorting with an
@@ -441,32 +482,33 @@  static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
  * The rule is as follows. Given a chain of BTF types from X to Y, if there is
  * BTF_KIND_PTR type in the chain and at least one non-anonymous type
  * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
- * Weak/strong relationship is determined recursively during DFS traversal and
- * is returned as a result from btf_dump_order_type().
+ * Weak/strong relationship is determined recursively during DFS traversal.
+ *
+ * When type id is reached via a weak link a forward declaration for
+ * that type is added to the emit queue, otherwise "full" declaration
+ * is added to the emit queue.
+ *
+ * We also keep track of "containing struct/union type ID" to determine when
+ * we reference it from inside and thus can avoid emitting unnecessary forward
+ * declaration.
  *
  * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
  * but it is not guaranteeing that no extraneous forward declarations will be
  * emitted.
  *
  * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
- * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
- * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
+ * it's done with them, but not for all (e.g., PTR, VOLATILE, CONST, RESTRICT,
+ * ARRAY, FUNC_PROTO, TYPEDEF), as weak/strong semantics for those depends on the
  * entire graph path, so depending where from one came to that BTF type, it
  * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
  * once they are processed, there is no need to do it again, so they are
- * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
- * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
- * in any case, once those are processed, no need to do it again, as the
- * result won't change.
+ * marked as ORDERED.
  *
  * Returns:
- *   - 1, if type is part of strong link (so there is strong topological
- *   ordering requirements);
- *   - 0, if type is part of weak link (so can be satisfied through forward
- *   declaration);
+ *   - 0, on success;
  *   - <0, on error (e.g., unsatisfiable type loop detected).
  */
-static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, __u32 cont_id, bool through_ptr)
 {
 	/*
 	 * Order state is used to detect strong link cycles, but only for BTF
@@ -486,48 +528,78 @@  static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 
 	/* return true, letting typedefs know that it's ok to be emitted */
 	if (tstate->order_state == ORDERED)
-		return 1;
+		return 0;
 
 	t = btf__type_by_id(d->btf, id);
 
 	if (tstate->order_state == ORDERING) {
 		/* type loop, but resolvable through fwd declaration */
-		if (btf_is_composite(t) && through_ptr && t->name_off != 0)
-			return 0;
+		if (btf_is_composite(t) && through_ptr && t->name_off != 0) {
+			if (id != cont_id)
+				return btf_dump_add_emit_queue_fwd(d, id);
+			else
+				return 0;
+		}
 		pr_warn("unsatisfiable type cycle, id:[%u]\n", id);
 		return -ELOOP;
 	}
 
 	switch (btf_kind(t)) {
 	case BTF_KIND_INT:
+		tstate->order_state = ORDERED;
+		if (btf_dump_emit_missing_aliases(d, id, true))
+			return btf_dump_add_emit_queue_id(d, id);
+		else
+			return 0;
 	case BTF_KIND_FLOAT:
 		tstate->order_state = ORDERED;
 		return 0;
 
 	case BTF_KIND_PTR:
-		err = btf_dump_order_type(d, t->type, true);
-		tstate->order_state = ORDERED;
+		/* Depending on whether pointer is a part of a recursive struct
+		 * declaration, it might not necessitate generation of a forward
+		 * declaration for the target type, e.g.:
+		 *
+		 * struct foo {
+		 *	struct foo *p; // no need for forward declaration
+		 * }
+		 *
+		 * struct bar {
+		 *	struct foo *p; // forward declaration is needed
+		 * }
+		 *
+		 * Hence, don't mark pointer as ORDERED, to allow traversal
+		 * to the target type and comparison with 'cont_id'.
+		 */
+		err = btf_dump_order_type(d, t->type, cont_id, true);
 		return err;
 
 	case BTF_KIND_ARRAY:
-		return btf_dump_order_type(d, btf_array(t)->type, false);
+		return btf_dump_order_type(d, btf_array(t)->type, cont_id, false);
 
 	case BTF_KIND_STRUCT:
 	case BTF_KIND_UNION: {
 		const struct btf_member *m = btf_members(t);
+		__u32 new_cont_id;
+
 		/*
 		 * struct/union is part of strong link, only if it's embedded
 		 * (so no ptr in a path) or it's anonymous (so has to be
 		 * defined inline, even if declared through ptr)
 		 */
-		if (through_ptr && t->name_off != 0)
-			return 0;
+		if (through_ptr && t->name_off != 0) {
+			if (id != cont_id)
+				return btf_dump_add_emit_queue_fwd(d, id);
+			else
+				return 0;
+		}
 
 		tstate->order_state = ORDERING;
 
+		new_cont_id = t->name_off == 0 ? cont_id : id;
 		vlen = btf_vlen(t);
 		for (i = 0; i < vlen; i++, m++) {
-			err = btf_dump_order_type(d, m->type, false);
+			err = btf_dump_order_type(d, m->type, new_cont_id, false);
 			if (err < 0)
 				return err;
 		}
@@ -539,7 +611,7 @@  static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 		}
 
 		tstate->order_state = ORDERED;
-		return 1;
+		return 0;
 	}
 	case BTF_KIND_ENUM:
 	case BTF_KIND_ENUM64:
@@ -555,51 +627,53 @@  static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 				return err;
 		}
 		tstate->order_state = ORDERED;
-		return 1;
-
-	case BTF_KIND_TYPEDEF: {
-		int is_strong;
-
-		is_strong = btf_dump_order_type(d, t->type, through_ptr);
-		if (is_strong < 0)
-			return is_strong;
+		return 0;
 
-		/* typedef is similar to struct/union w.r.t. fwd-decls */
-		if (through_ptr && !is_strong)
+	case BTF_KIND_TYPEDEF:
+		/* Do not mark typedef as ORDERED, always emit a forward declaration for
+		 * it instead. Otherwise the following situation would be troublesome:
+		 *
+		 *   typedef struct foo foo_alias;
+		 *
+		 *   struct foo {};
+		 *
+		 *   struct root {
+		 *      foo_alias *a;
+		 *      foo_alias b;
+		 *   };
+		 *
+		 */
+		if (btf_dump_is_blacklisted(d, id))
 			return 0;
 
-		/* typedef is always a named definition */
-		err = btf_dump_add_emit_queue_id(d, id);
-		if (err)
+		err = btf_dump_order_type(d, t->type, t->type, through_ptr);
+		if (err < 0)
 			return err;
 
-		d->type_states[id].order_state = ORDERED;
-		return 1;
-	}
+		err = btf_dump_add_emit_queue_fwd(d, id);
+		if (err)
+			return err;
+		return 0;
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_CONST:
 	case BTF_KIND_RESTRICT:
 	case BTF_KIND_TYPE_TAG:
-		return btf_dump_order_type(d, t->type, through_ptr);
+		return btf_dump_order_type(d, t->type, cont_id, through_ptr);
 
 	case BTF_KIND_FUNC_PROTO: {
 		const struct btf_param *p = btf_params(t);
-		bool is_strong;
 
-		err = btf_dump_order_type(d, t->type, through_ptr);
+		err = btf_dump_order_type(d, t->type, cont_id, through_ptr);
 		if (err < 0)
 			return err;
-		is_strong = err > 0;
 
 		vlen = btf_vlen(t);
 		for (i = 0; i < vlen; i++, p++) {
-			err = btf_dump_order_type(d, p->type, through_ptr);
+			err = btf_dump_order_type(d, p->type, cont_id, through_ptr);
 			if (err < 0)
 				return err;
-			if (err > 0)
-				is_strong = true;
 		}
-		return is_strong;
+		return err;
 	}
 	case BTF_KIND_FUNC:
 	case BTF_KIND_VAR:
@@ -613,9 +687,6 @@  static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 	}
 }
 
-static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
-					  const struct btf_type *t);
-
 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
 				     const struct btf_type *t);
 static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
@@ -649,73 +720,33 @@  static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
 				 const char *orig_name);
 
-static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
-{
-	const struct btf_type *t = btf__type_by_id(d->btf, id);
-
-	/* __builtin_va_list is a compiler built-in, which causes compilation
-	 * errors, when compiling w/ different compiler, then used to compile
-	 * original code (e.g., GCC to compile kernel, Clang to use generated
-	 * C header from BTF). As it is built-in, it should be already defined
-	 * properly internally in compiler.
-	 */
-	if (t->name_off == 0)
-		return false;
-	return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
-}
-
 /*
  * Emit C-syntax definitions of types from chains of BTF types.
  *
- * High-level handling of determining necessary forward declarations are handled
- * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
+ * All nitty-gritty details of emitting type
  * declarations/definitions in C syntax  are handled by a combo of
  * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
  * corresponding btf_dump_emit_*_{def,fwd}() functions.
  *
- * We also keep track of "containing struct/union type ID" to determine when
- * we reference it from inside and thus can avoid emitting unnecessary forward
- * declaration.
- *
  * This algorithm is designed in such a way, that even if some error occurs
  * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
  * that doesn't comply to C rules completely), algorithm will try to proceed
  * and produce as much meaningful output as possible.
  */
-static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, bool fwd)
 {
-	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
-	bool top_level_def = cont_id == 0;
 	const struct btf_type *t;
 	__u16 kind;
 
-	if (tstate->emit_state == EMITTED)
-		return;
-
 	t = btf__type_by_id(d->btf, id);
 	kind = btf_kind(t);
 
-	if (tstate->emit_state == EMITTING) {
-		if (tstate->fwd_emitted)
-			return;
-
+	if (fwd) {
 		switch (kind) {
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
-			/*
-			 * if we are referencing a struct/union that we are
-			 * part of - then no need for fwd declaration
-			 */
-			if (id == cont_id)
-				return;
-			if (t->name_off == 0) {
-				pr_warn("anonymous struct/union loop, id:[%u]\n",
-					id);
-				return;
-			}
 			btf_dump_emit_struct_fwd(d, id, t);
 			btf_dump_printf(d, ";\n\n");
-			tstate->fwd_emitted = 1;
 			break;
 		case BTF_KIND_TYPEDEF:
 			/*
@@ -723,11 +754,8 @@  static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 			 * was emitted, but it can be used only for "weak"
 			 * references through pointer only, not for embedding
 			 */
-			if (!btf_dump_is_blacklisted(d, id)) {
-				btf_dump_emit_typedef_def(d, id, t, 0);
-				btf_dump_printf(d, ";\n\n");
-			}
-			tstate->fwd_emitted = 1;
+			btf_dump_emit_typedef_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
 			break;
 		default:
 			break;
@@ -739,36 +767,18 @@  static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 	switch (kind) {
 	case BTF_KIND_INT:
 		/* Emit type alias definitions if necessary */
-		btf_dump_emit_missing_aliases(d, id, t);
-
-		tstate->emit_state = EMITTED;
+		btf_dump_emit_missing_aliases(d, id, false);
 		break;
 	case BTF_KIND_ENUM:
 	case BTF_KIND_ENUM64:
-		if (top_level_def) {
-			btf_dump_emit_enum_def(d, id, t, 0);
-			btf_dump_printf(d, ";\n\n");
-		}
-		tstate->emit_state = EMITTED;
-		break;
-	case BTF_KIND_PTR:
-	case BTF_KIND_VOLATILE:
-	case BTF_KIND_CONST:
-	case BTF_KIND_RESTRICT:
-	case BTF_KIND_TYPE_TAG:
-		btf_dump_emit_type(d, t->type, cont_id);
-		break;
-	case BTF_KIND_ARRAY:
-		btf_dump_emit_type(d, btf_array(t)->type, cont_id);
+		btf_dump_emit_enum_def(d, id, t, 0);
+		btf_dump_printf(d, ";\n\n");
 		break;
 	case BTF_KIND_FWD:
 		btf_dump_emit_fwd_def(d, id, t);
 		btf_dump_printf(d, ";\n\n");
-		tstate->emit_state = EMITTED;
 		break;
 	case BTF_KIND_TYPEDEF:
-		tstate->emit_state = EMITTING;
-		btf_dump_emit_type(d, t->type, id);
 		/*
 		 * typedef can server as both definition and forward
 		 * declaration; at this stage someone depends on
@@ -776,55 +786,14 @@  static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 		 * through pointer), so unless we already did it,
 		 * emit typedef as a forward declaration
 		 */
-		if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
-			btf_dump_emit_typedef_def(d, id, t, 0);
-			btf_dump_printf(d, ";\n\n");
-		}
-		tstate->emit_state = EMITTED;
+		btf_dump_emit_typedef_def(d, id, t, 0);
+		btf_dump_printf(d, ";\n\n");
 		break;
 	case BTF_KIND_STRUCT:
 	case BTF_KIND_UNION:
-		tstate->emit_state = EMITTING;
-		/* if it's a top-level struct/union definition or struct/union
-		 * is anonymous, then in C we'll be emitting all fields and
-		 * their types (as opposed to just `struct X`), so we need to
-		 * make sure that all types, referenced from struct/union
-		 * members have necessary forward-declarations, where
-		 * applicable
-		 */
-		if (top_level_def || t->name_off == 0) {
-			const struct btf_member *m = btf_members(t);
-			__u16 vlen = btf_vlen(t);
-			int i, new_cont_id;
-
-			new_cont_id = t->name_off == 0 ? cont_id : id;
-			for (i = 0; i < vlen; i++, m++)
-				btf_dump_emit_type(d, m->type, new_cont_id);
-		} else if (!tstate->fwd_emitted && id != cont_id) {
-			btf_dump_emit_struct_fwd(d, id, t);
-			btf_dump_printf(d, ";\n\n");
-			tstate->fwd_emitted = 1;
-		}
-
-		if (top_level_def) {
-			btf_dump_emit_struct_def(d, id, t, 0);
-			btf_dump_printf(d, ";\n\n");
-			tstate->emit_state = EMITTED;
-		} else {
-			tstate->emit_state = NOT_EMITTED;
-		}
-		break;
-	case BTF_KIND_FUNC_PROTO: {
-		const struct btf_param *p = btf_params(t);
-		__u16 n = btf_vlen(t);
-		int i;
-
-		btf_dump_emit_type(d, t->type, cont_id);
-		for (i = 0; i < n; i++, p++)
-			btf_dump_emit_type(d, p->type, cont_id);
-
+		btf_dump_emit_struct_def(d, id, t, 0);
+		btf_dump_printf(d, ";\n\n");
 		break;
-	}
 	default:
 		break;
 	}
@@ -1037,19 +1006,21 @@  static const char *missing_base_types[][2] = {
 	{ "__Poly128_t",	"unsigned __int128" },
 };
 
-static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
-					  const struct btf_type *t)
+static bool btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, bool dry_run)
 {
 	const char *name = btf_dump_type_name(d, id);
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
-		if (strcmp(name, missing_base_types[i][0]) == 0) {
+		if (strcmp(name, missing_base_types[i][0]) != 0)
+			continue;
+		if (!dry_run)
 			btf_dump_printf(d, "typedef %s %s;\n\n",
 					missing_base_types[i][1], name);
-			break;
-		}
+		return true;
 	}
+
+	return false;
 }
 
 static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,