diff mbox series

[v4,bpf-next,01/11] libbpf: add btf__distill_base() creating split BTF with distilled base BTF

Message ID 20240517102246.4070184-2-alan.maguire@oracle.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: support resilient split BTF | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 fail Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 fail Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 fail Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 fail Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 fail Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 fail Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / test
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 fail Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next-0

Commit Message

Alan Maguire May 17, 2024, 10:22 a.m. UTC
To support more robust split BTF, adding supplemental context for the
base BTF type ids that split BTF refers to is required.  Without such
references, a simple shuffling of base BTF type ids (without any other
significant change) invalidates the split BTF.  Here the attempt is made
to store additional context to make split BTF more robust.

This context comes in the form of distilled base BTF providing minimal
information (name and - in some cases - size) for base INTs, FLOATs,
STRUCTs, UNIONs, ENUMs and ENUM64s along with modified split BTF that
points at that base and contains any additional types needed (such as
TYPEDEF, PTR and anonymous STRUCT/UNION declarations).  This
information constitutes the minimal BTF representation needed to
disambiguate or remove split BTF references to base BTF.  The rules
are as follows:

- INT, FLOAT are recorded in full.
- if a named base BTF STRUCT or UNION is referred to from split BTF, it
  will be encoded either as a zero-member sized STRUCT/UNION (preserving
  size for later relocation checks) or as a named FWD.  Only base BTF
  STRUCT/UNIONs that are either embedded in split BTF STRUCT/UNIONs or
  that have multiple STRUCT/UNION instances of the same name need to
  preserve size information, so a FWD representation will be used in
  most cases.
- if an ENUM[64] is named, a ENUM forward representation (an ENUM
  with no values) is used.
- in all other cases, the type is added to the new split BTF.

Avoiding struct/union/enum/enum64 expansion is important to keep the
distilled base BTF representation to a minimum size.

When successful, new representations of the distilled base BTF and new
split BTF that refers to it are returned.  Both need to be freed by the
caller.

So to take a simple example, with split BTF with a type referring
to "struct sk_buff", we will generate distilled base BTF with a
FWD struct sk_buff, and the split BTF will refer to it instead.

Tools like pahole can utilize such split BTF to populate the .BTF
section (split BTF) and an additional .BTF.base section.  Then
when the split BTF is loaded, the distilled base BTF can be used
to relocate split BTF to reference the current (and possibly changed)
base BTF.

So for example if "struct sk_buff" was id 502 when the split BTF was
originally generated,  we can use the distilled base BTF to see that
id 502 refers to a "struct sk_buff" and replace instances of id 502
with the current (relocated) base BTF sk_buff type id.

Distilled base BTF is small; when building a kernel with all modules
using distilled base BTF as a test, ovreall module size grew by only
5.3Mb total across ~2700 modules.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 tools/lib/bpf/btf.c      | 409 ++++++++++++++++++++++++++++++++++++++-
 tools/lib/bpf/btf.h      |  20 ++
 tools/lib/bpf/libbpf.map |   1 +
 3 files changed, 424 insertions(+), 6 deletions(-)

Comments

Andrii Nakryiko May 21, 2024, 9:48 p.m. UTC | #1
On Fri, May 17, 2024 at 3:23 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> To support more robust split BTF, adding supplemental context for the
> base BTF type ids that split BTF refers to is required.  Without such
> references, a simple shuffling of base BTF type ids (without any other
> significant change) invalidates the split BTF.  Here the attempt is made
> to store additional context to make split BTF more robust.
>
> This context comes in the form of distilled base BTF providing minimal
> information (name and - in some cases - size) for base INTs, FLOATs,
> STRUCTs, UNIONs, ENUMs and ENUM64s along with modified split BTF that
> points at that base and contains any additional types needed (such as
> TYPEDEF, PTR and anonymous STRUCT/UNION declarations).  This
> information constitutes the minimal BTF representation needed to
> disambiguate or remove split BTF references to base BTF.  The rules
> are as follows:
>
> - INT, FLOAT are recorded in full.
> - if a named base BTF STRUCT or UNION is referred to from split BTF, it
>   will be encoded either as a zero-member sized STRUCT/UNION (preserving
>   size for later relocation checks) or as a named FWD.  Only base BTF
>   STRUCT/UNIONs that are either embedded in split BTF STRUCT/UNIONs or
>   that have multiple STRUCT/UNION instances of the same name need to
>   preserve size information, so a FWD representation will be used in
>   most cases.
> - if an ENUM[64] is named, a ENUM forward representation (an ENUM
>   with no values) is used.
> - in all other cases, the type is added to the new split BTF.
>
> Avoiding struct/union/enum/enum64 expansion is important to keep the
> distilled base BTF representation to a minimum size.
>
> When successful, new representations of the distilled base BTF and new
> split BTF that refers to it are returned.  Both need to be freed by the
> caller.
>
> So to take a simple example, with split BTF with a type referring
> to "struct sk_buff", we will generate distilled base BTF with a
> FWD struct sk_buff, and the split BTF will refer to it instead.
>
> Tools like pahole can utilize such split BTF to populate the .BTF
> section (split BTF) and an additional .BTF.base section.  Then
> when the split BTF is loaded, the distilled base BTF can be used
> to relocate split BTF to reference the current (and possibly changed)
> base BTF.
>
> So for example if "struct sk_buff" was id 502 when the split BTF was
> originally generated,  we can use the distilled base BTF to see that
> id 502 refers to a "struct sk_buff" and replace instances of id 502
> with the current (relocated) base BTF sk_buff type id.
>
> Distilled base BTF is small; when building a kernel with all modules
> using distilled base BTF as a test, ovreall module size grew by only

typo: overall

> 5.3Mb total across ~2700 modules.
>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  tools/lib/bpf/btf.c      | 409 ++++++++++++++++++++++++++++++++++++++-
>  tools/lib/bpf/btf.h      |  20 ++
>  tools/lib/bpf/libbpf.map |   1 +
>  3 files changed, 424 insertions(+), 6 deletions(-)
>
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 2d0840ef599a..953929d196c3 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -1771,9 +1771,8 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx)
>         return 0;
>  }
>
> -int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
> +static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
>  {
> -       struct btf_pipe p = { .src = src_btf, .dst = btf };
>         struct btf_type *t;
>         int sz, err;
>
> @@ -1782,20 +1781,27 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
>                 return libbpf_err(sz);
>
>         /* deconstruct BTF, if necessary, and invalidate raw_data */
> -       if (btf_ensure_modifiable(btf))
> +       if (btf_ensure_modifiable(p->dst))
>                 return libbpf_err(-ENOMEM);
>
> -       t = btf_add_type_mem(btf, sz);
> +       t = btf_add_type_mem(p->dst, sz);
>         if (!t)
>                 return libbpf_err(-ENOMEM);
>
>         memcpy(t, src_type, sz);
>
> -       err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
> +       err = btf_type_visit_str_offs(t, btf_rewrite_str, p);
>         if (err)
>                 return libbpf_err(err);
>
> -       return btf_commit_type(btf, sz);
> +       return btf_commit_type(p->dst, sz);
> +}
> +
> +int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
> +{
> +       struct btf_pipe p = { .src = src_btf, .dst = btf };
> +
> +       return btf_add_type(&p, src_type);
>  }
>
>  static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
> @@ -5212,3 +5218,394 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
>
>         return 0;
>  }
> +
> +#define BTF_NEEDS_SIZE (1 << 31)       /* flag set if either struct/union is
> +                                        * embedded - and thus size info must
> +                                        * be preserved - or if there are
> +                                        * multiple instances of the same
> +                                        * struct/union - where size can be
> +                                        * used to clarify which is wanted.
> +                                        */

please use full line comment in front of #define

> +#define BTF_ID(id)             (id & ~BTF_NEEDS_SIZE)
> +
> +struct btf_distill {
> +       struct btf_pipe pipe;
> +       int *ids;

reading the rest of the code, this BTF_NEEDS_SIZE and BTF_ID() macro
use was quite distracting. I'm wondering if it would be better to use
a simple struct with bitfields here, e.g.,

struct type_state {
    int id: 31;
    bool needs_size;
};

struct dist_state *states;

Same memory usage, same efficiency, but more readable code. WDYT?

> +       unsigned int split_start_id;
> +       unsigned int split_start_str;
> +       unsigned int diff_id;
> +};
> +
> +/* Check if a member of a split BTF struct/union refers to a base BTF

nit: comments uses "check" terminology, function name uses "find",
while really it "marks" some time as embedded... Let's use consistent
terminology (where mark seems like the most fitting, IMO)

> + * struct/union.  Members can be const/restrict/volatile/typedef
> + * reference types, but if a pointer is encountered, type is no longer
> + * considered embedded.
> + */
> +static int btf_find_embedded_composite_type_ids(__u32 *id, void *ctx)
> +{
> +       struct btf_distill *dist = ctx;
> +       const struct btf_type *t;
> +       __u32 next_id = *id;
> +
> +       do {
> +               if (next_id == 0)
> +                       return 0;
> +               t = btf_type_by_id(dist->pipe.src, next_id);
> +               switch (btf_kind(t)) {
> +               case BTF_KIND_CONST:
> +               case BTF_KIND_RESTRICT:
> +               case BTF_KIND_VOLATILE:
> +               case BTF_KIND_TYPEDEF:
> +               case BTF_KIND_TYPE_TAG:
> +                       next_id = t->type;
> +                       break;
> +               case BTF_KIND_ARRAY: {
> +                       struct btf_array *a = btf_array(t);
> +
> +                       next_id = a->type;
> +                       break;
> +               }
> +               case BTF_KIND_STRUCT:
> +               case BTF_KIND_UNION:
> +                       dist->ids[next_id] |= BTF_NEEDS_SIZE;
> +                       return 0;
> +               default:
> +                       return 0;
> +               }
> +
> +       } while (1);

nit: while (true)

> +
> +       return 0;
> +}
> +
> +/* Check if composite type has a duplicate-named type; if it does, retain

see above about check vs mark, here at least the function name uses "mark" :)

> + * size information to help guide later relocation towards the correct type.
> + * For example there are duplicate 'dma_chan' structs in vmlinux BTF;
> + * one is size 112, the other 16.  Having size information allows relocation
> + * to choose the right one.

re: this dma_chan and similar cases. Is it ever a problem where a
module actually needs two dma_chan in distilled base BTF? Name
conflicts do happen, but intuitively I'd expect this to happen between
some vmlinux-internal (so to speak, i.e., not the type used in
exported functions/types) and kernel module-specific types. It would
be awkward for the same module to use two different types that are
named the same.

Have you seen this as a problem in practice? What if for now we just
error out if there are two conflicting types required in distilled
BTF?

> + */
> +static int btf_mark_composite_dups(struct btf_distill *dist, __u32 id)
> +{
> +       __u8 *cnt = calloc(dist->split_start_str, sizeof(__u8));

nit: we generally follow that initialization of variable shouldn't be
doing non-trivial operations. So let's do calloc() as a separate
statement outside of variable declaration.

> +       struct btf_type *t;
> +       int i;
> +
> +       if (!cnt)
> +               return -ENOMEM;
> +
> +       /* First pass; collect name counts for composite types. */
> +       for (i = 1; i < dist->split_start_id; i++) {
> +               t = btf_type_by_id(dist->pipe.src, i);
> +               if (!btf_is_composite(t) || !t->name_off)
> +                       continue;
> +               if (cnt[t->name_off] < 255)
> +                       cnt[t->name_off]++;
> +       }
> +       /* Second pass; mark composite types with multiple instances of the
> +        * same name as needing size information.
> +        */
> +       for (i = 1; i < dist->split_start_id; i++) {
> +               /* id not needed or is already preserving size information */
> +               if (!dist->ids[i] || (dist->ids[i] & BTF_NEEDS_SIZE))
> +                       continue;
> +               t = btf_type_by_id(dist->pipe.src, i);
> +               if (!btf_is_composite(t) || !t->name_off)
> +                       continue;
> +               if (cnt[t->name_off] > 1)
> +                       dist->ids[i] |= BTF_NEEDS_SIZE;
> +       }
> +       free(cnt);
> +
> +       return 0;
> +}
> +
> +static bool btf_is_eligible_named_fwd(const struct btf_type *t)
> +{
> +       return (btf_is_composite(t) || btf_is_any_enum(t)) && t->name_off != 0;
> +}
> +
> +static int btf_add_distilled_type_ids(__u32 *id, void *ctx)
> +{
> +       struct btf_distill *dist = ctx;
> +       struct btf_type *t = btf_type_by_id(dist->pipe.src, *id);
> +       int err;
> +
> +       if (!*id)
> +               return 0;
> +       /* split BTF id, not needed */
> +       if (*id >= dist->split_start_id)
> +               return 0;
> +       /* already added ? */
> +       if (BTF_ID(dist->ids[*id]) > 0)
> +               return 0;
> +
> +       /* only a subset of base BTF types should be referenced from split
> +        * BTF; ensure nothing unexpected is referenced.
> +        */
> +       switch (btf_kind(t)) {
> +       case BTF_KIND_INT:
> +       case BTF_KIND_FLOAT:
> +       case BTF_KIND_FWD:
> +       case BTF_KIND_ARRAY:
> +       case BTF_KIND_STRUCT:
> +       case BTF_KIND_UNION:
> +       case BTF_KIND_TYPEDEF:
> +       case BTF_KIND_ENUM:
> +       case BTF_KIND_ENUM64:
> +       case BTF_KIND_PTR:
> +       case BTF_KIND_CONST:
> +       case BTF_KIND_RESTRICT:
> +       case BTF_KIND_VOLATILE:
> +       case BTF_KIND_FUNC_PROTO:
> +       case BTF_KIND_TYPE_TAG:
> +               dist->ids[*id] |= *id;
> +               break;
> +       default:
> +               pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n",
> +                       *id, btf_kind(t));
> +               return -EINVAL;
> +       }
> +
> +       /* struct/union members not needed, except for anonymous structs
> +        * and unions, which we need since name won't help us determine
> +        * matches; so if a named struct/union, no need to recurse
> +        * into members.
> +        */

is this an outdated comment? we shouldn't have anonymous types in the
distilled base, right?

> +       if (btf_is_eligible_named_fwd(t))
> +               return 0;
> +
> +       /* ensure references in type are added also. */
> +       err = btf_type_visit_type_ids(t, btf_add_distilled_type_ids, ctx);
> +       if (err < 0)
> +               return err;
> +       return 0;

nit: could be just `return btf_type_visit_type_ids(...);`?

> +}
> +
> +static int btf_add_distilled_types(struct btf_distill *dist)
> +{
> +       bool adding_to_base = dist->pipe.dst->start_id == 1;
> +       int id = btf__type_cnt(dist->pipe.dst);
> +       struct btf_type *t;
> +       int i, err = 0;
> +
> +       /* Add types for each of the required references to either distilled
> +        * base or split BTF, depending on type characteristics.
> +        */
> +       for (i = 1; i < dist->split_start_id; i++) {
> +               const char *name;
> +               int kind;
> +
> +               if (!BTF_ID(dist->ids[i]))
> +                       continue;
> +               t = btf_type_by_id(dist->pipe.src, i);
> +               kind = btf_kind(t);
> +               name = btf__name_by_offset(dist->pipe.src, t->name_off);
> +
> +               /* Named int, float, fwd struct, union, enum[64] are added to
> +                * base; everything else is added to split BTF.
> +                */
> +               switch (kind) {
> +               case BTF_KIND_INT:
> +               case BTF_KIND_FLOAT:
> +               case BTF_KIND_FWD:
> +               case BTF_KIND_STRUCT:
> +               case BTF_KIND_UNION:
> +               case BTF_KIND_ENUM:
> +               case BTF_KIND_ENUM64:
> +                       if ((adding_to_base && !t->name_off) || (!adding_to_base && t->name_off))
> +                               continue;
> +                       break;
> +               default:
> +                       if (adding_to_base)
> +                               continue;
> +                       break;
> +               }
> +               if (dist->ids[i] & BTF_NEEDS_SIZE) {
> +                       /* If a named struct/union in base BTF is referenced as a type
> +                        * in split BTF without use of a pointer - i.e. as an embedded
> +                        * struct/union - add an empty struct/union preserving size
> +                        * since size must be consistent when relocating split and
> +                        * possibly changed base BTF.  Similarly, when a struct/union
> +                        * has multiple instances of the same name in the original
> +                        * base BTF, retain size to help relocation later pick the
> +                        * right struct/union.
> +                        */
> +                       err = btf_add_composite(dist->pipe.dst, kind, name, t->size);
> +               } else if (btf_is_eligible_named_fwd(t)) {
> +                       /* If not embedded, use a fwd for named struct/unions since we
> +                        * can match via name without any other details.
> +                        */
> +                       switch (kind) {
> +                       case BTF_KIND_STRUCT:
> +                               err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_STRUCT);
> +                               break;
> +                       case BTF_KIND_UNION:
> +                               err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_UNION);
> +                               break;
> +                       case BTF_KIND_ENUM:
> +                               err = btf__add_enum(dist->pipe.dst, name, t->size);
> +                               break;
> +                       case BTF_KIND_ENUM64:

nit: combine ENUM/ENUM64 cases?

> +                               err = btf__add_enum(dist->pipe.dst, name, t->size);
> +                               break;
> +                       default:
> +                               pr_warn("unexpected kind [%u] when creating distilled base BTF.\n",
> +                                       btf_kind(t));
> +                               return -EINVAL;
> +                       }
> +               } else {
> +                       err = btf_add_type(&dist->pipe, t);

So this should never happen if adding_to_base == true, is that right?
Can we check this? Or am I missing something as usual?

> +               }
> +               if (err < 0)
> +                       break;
> +               dist->ids[i] = id++;
> +       }
> +       return err;
> +}
> +

[...]

> +       n = btf__type_cnt(new_split);
> +       /* Now update base/split BTF ids. */
> +       for (i = 1; i < n; i++) {
> +               t = btf_type_by_id(new_split, i);
> +
> +               err = btf_type_visit_type_ids(t, btf_update_distilled_type_ids, &dist);
> +               if (err < 0)
> +                       goto err_out;
> +       }
> +       free(dist.ids);
> +       hashmap__free(dist.pipe.str_off_map);
> +       *new_base_btf = new_base;
> +       *new_split_btf = new_split;
> +       return 0;
> +err_out:
> +       free(dist.ids);
> +       if (!IS_ERR(dist.pipe.str_off_map))

you don't need to check this, hashmap__free() works for IS_ERR()
pointers as well

> +               hashmap__free(dist.pipe.str_off_map);
> +       btf__free(new_split);
> +       btf__free(new_base);
> +       return libbpf_err(err);
> +}

[...]
Alan Maguire May 22, 2024, 4:42 p.m. UTC | #2
On 21/05/2024 22:48, Andrii Nakryiko wrote:
> On Fri, May 17, 2024 at 3:23 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>
>> To support more robust split BTF, adding supplemental context for the
>> base BTF type ids that split BTF refers to is required.  Without such
>> references, a simple shuffling of base BTF type ids (without any other
>> significant change) invalidates the split BTF.  Here the attempt is made
>> to store additional context to make split BTF more robust.
>>
>> This context comes in the form of distilled base BTF providing minimal
>> information (name and - in some cases - size) for base INTs, FLOATs,
>> STRUCTs, UNIONs, ENUMs and ENUM64s along with modified split BTF that
>> points at that base and contains any additional types needed (such as
>> TYPEDEF, PTR and anonymous STRUCT/UNION declarations).  This
>> information constitutes the minimal BTF representation needed to
>> disambiguate or remove split BTF references to base BTF.  The rules
>> are as follows:
>>
>> - INT, FLOAT are recorded in full.
>> - if a named base BTF STRUCT or UNION is referred to from split BTF, it
>>   will be encoded either as a zero-member sized STRUCT/UNION (preserving
>>   size for later relocation checks) or as a named FWD.  Only base BTF
>>   STRUCT/UNIONs that are either embedded in split BTF STRUCT/UNIONs or
>>   that have multiple STRUCT/UNION instances of the same name need to
>>   preserve size information, so a FWD representation will be used in
>>   most cases.
>> - if an ENUM[64] is named, a ENUM forward representation (an ENUM
>>   with no values) is used.
>> - in all other cases, the type is added to the new split BTF.
>>
>> Avoiding struct/union/enum/enum64 expansion is important to keep the
>> distilled base BTF representation to a minimum size.
>>
>> When successful, new representations of the distilled base BTF and new
>> split BTF that refers to it are returned.  Both need to be freed by the
>> caller.
>>
>> So to take a simple example, with split BTF with a type referring
>> to "struct sk_buff", we will generate distilled base BTF with a
>> FWD struct sk_buff, and the split BTF will refer to it instead.
>>
>> Tools like pahole can utilize such split BTF to populate the .BTF
>> section (split BTF) and an additional .BTF.base section.  Then
>> when the split BTF is loaded, the distilled base BTF can be used
>> to relocate split BTF to reference the current (and possibly changed)
>> base BTF.
>>
>> So for example if "struct sk_buff" was id 502 when the split BTF was
>> originally generated,  we can use the distilled base BTF to see that
>> id 502 refers to a "struct sk_buff" and replace instances of id 502
>> with the current (relocated) base BTF sk_buff type id.
>>
>> Distilled base BTF is small; when building a kernel with all modules
>> using distilled base BTF as a test, ovreall module size grew by only
> 
> typo: overall
> 
>> 5.3Mb total across ~2700 modules.
>>
>> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
>> ---
>>  tools/lib/bpf/btf.c      | 409 ++++++++++++++++++++++++++++++++++++++-
>>  tools/lib/bpf/btf.h      |  20 ++
>>  tools/lib/bpf/libbpf.map |   1 +
>>  3 files changed, 424 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
>> index 2d0840ef599a..953929d196c3 100644
>> --- a/tools/lib/bpf/btf.c
>> +++ b/tools/lib/bpf/btf.c
>> @@ -1771,9 +1771,8 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx)
>>         return 0;
>>  }
>>
>> -int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
>> +static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
>>  {
>> -       struct btf_pipe p = { .src = src_btf, .dst = btf };
>>         struct btf_type *t;
>>         int sz, err;
>>
>> @@ -1782,20 +1781,27 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
>>                 return libbpf_err(sz);
>>
>>         /* deconstruct BTF, if necessary, and invalidate raw_data */
>> -       if (btf_ensure_modifiable(btf))
>> +       if (btf_ensure_modifiable(p->dst))
>>                 return libbpf_err(-ENOMEM);
>>
>> -       t = btf_add_type_mem(btf, sz);
>> +       t = btf_add_type_mem(p->dst, sz);
>>         if (!t)
>>                 return libbpf_err(-ENOMEM);
>>
>>         memcpy(t, src_type, sz);
>>
>> -       err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
>> +       err = btf_type_visit_str_offs(t, btf_rewrite_str, p);
>>         if (err)
>>                 return libbpf_err(err);
>>
>> -       return btf_commit_type(btf, sz);
>> +       return btf_commit_type(p->dst, sz);
>> +}
>> +
>> +int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
>> +{
>> +       struct btf_pipe p = { .src = src_btf, .dst = btf };
>> +
>> +       return btf_add_type(&p, src_type);
>>  }
>>
>>  static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
>> @@ -5212,3 +5218,394 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
>>
>>         return 0;
>>  }
>> +
>> +#define BTF_NEEDS_SIZE (1 << 31)       /* flag set if either struct/union is
>> +                                        * embedded - and thus size info must
>> +                                        * be preserved - or if there are
>> +                                        * multiple instances of the same
>> +                                        * struct/union - where size can be
>> +                                        * used to clarify which is wanted.
>> +                                        */
> 
> please use full line comment in front of #define
>

yep, will do.

>> +#define BTF_ID(id)             (id & ~BTF_NEEDS_SIZE)
>> +
>> +struct btf_distill {
>> +       struct btf_pipe pipe;
>> +       int *ids;
> 
> reading the rest of the code, this BTF_NEEDS_SIZE and BTF_ID() macro
> use was quite distracting. I'm wondering if it would be better to use
> a simple struct with bitfields here, e.g.,
> 
> struct type_state {
>     int id: 31;
>     bool needs_size;
> };
> 
> struct dist_state *states;
> 
> Same memory usage, same efficiency, but more readable code. WDYT?
>

Yeah, I saw Eduard used that approach elsewhere, it's much neater.
However in other discussion with Eduard we talked about moving the
embedded/duplicate validation to relocation time. The motivation for
that is that if we record sizes for all STRUCTs and UNIONs, we can then
be selective about size checks at relocation time.  This is particularly
relevant for the duplicate name case since it's possible a name either
is no longer duplicate in the new vmlinux, or in the new vmlinux has a
duplicate where the original vmlinux did not. In other words it's the
"new world" of the vmlinux we're trying to relocate with that we're
really concerned with, so preserving all size information until we see
that new vmlinux and can spot duplicates and embedded types where the
size checks need to be enforced makes sense I think.

In that context, we mark embedded types prior to assigning mappings, and
don't mark duplicate names at all in the map since it is duplicates in
the new vmlinux we're interested in. So the way I'd approached it is to
have a BTF_IS_EMBEDDED (casting -1) value that the map values would set
when they found a split BTF struct/union with an embedded base BTF type.
That later gets overwritten when we do the map assignments so it never
gets exposed to the user and we can still return a __u32 array of BTF
ids to the caller.
>> +       unsigned int split_start_id;
>> +       unsigned int split_start_str;
>> +       unsigned int diff_id;
>> +};
>> +
>> +/* Check if a member of a split BTF struct/union refers to a base BTF
> 
> nit: comments uses "check" terminology, function name uses "find",
> while really it "marks" some time as embedded... Let's use consistent
> terminology (where mark seems like the most fitting, IMO)
> 

Sure, I'll try and be more consistent around naming here.

>> + * struct/union.  Members can be const/restrict/volatile/typedef
>> + * reference types, but if a pointer is encountered, type is no longer
>> + * considered embedded.
>> + */
>> +static int btf_find_embedded_composite_type_ids(__u32 *id, void *ctx)
>> +{
>> +       struct btf_distill *dist = ctx;
>> +       const struct btf_type *t;
>> +       __u32 next_id = *id;
>> +
>> +       do {
>> +               if (next_id == 0)
>> +                       return 0;
>> +               t = btf_type_by_id(dist->pipe.src, next_id);
>> +               switch (btf_kind(t)) {
>> +               case BTF_KIND_CONST:
>> +               case BTF_KIND_RESTRICT:
>> +               case BTF_KIND_VOLATILE:
>> +               case BTF_KIND_TYPEDEF:
>> +               case BTF_KIND_TYPE_TAG:
>> +                       next_id = t->type;
>> +                       break;
>> +               case BTF_KIND_ARRAY: {
>> +                       struct btf_array *a = btf_array(t);
>> +
>> +                       next_id = a->type;
>> +                       break;
>> +               }
>> +               case BTF_KIND_STRUCT:
>> +               case BTF_KIND_UNION:
>> +                       dist->ids[next_id] |= BTF_NEEDS_SIZE;
>> +                       return 0;
>> +               default:
>> +                       return 0;
>> +               }
>> +
>> +       } while (1);
> 
> nit: while (true)
> 
>> +
>> +       return 0;
>> +}
>> +
>> +/* Check if composite type has a duplicate-named type; if it does, retain
> 
> see above about check vs mark, here at least the function name uses "mark" :)
> 
>> + * size information to help guide later relocation towards the correct type.
>> + * For example there are duplicate 'dma_chan' structs in vmlinux BTF;
>> + * one is size 112, the other 16.  Having size information allows relocation
>> + * to choose the right one.
> 
> re: this dma_chan and similar cases. Is it ever a problem where a
> module actually needs two dma_chan in distilled base BTF? Name
> conflicts do happen, but intuitively I'd expect this to happen between
> some vmlinux-internal (so to speak, i.e., not the type used in
> exported functions/types) and kernel module-specific types. It would
> be awkward for the same module to use two different types that are
> named the same.
> 
> Have you seen this as a problem in practice? What if for now we just
> error out if there are two conflicting types required in distilled
> BTF?

Funnily enough I ran into it with "dma_chan" itself when trying to load
an in-tree module which I'd forced (along with the rest of the tree) to
be built with distilled base BTF.  And it was also an embedded struct
too, so we got 2 for 1 there ;-)

But it does seem like it's a legitimate problem, so if we can address it
I think it'd be worth trying. Even the size checks (applied at
relocation time) would be better than nothing I think.

The only other way I could think that we could disambiguate things would
be to add the duplicate-named type to the split BTF (as we do with the
anonymous structs).  That would remove the ambiguity, but expose us to
the risk of having to pull in a lot more types. I can't imagine a
duplicate-named type would ever figure in a kfunc signature, so it
should be safe to do. But I think either way we probably have to have
some sort of answer for this problem.

> 
>> + */
>> +static int btf_mark_composite_dups(struct btf_distill *dist, __u32 id)
>> +{
>> +       __u8 *cnt = calloc(dist->split_start_str, sizeof(__u8));
> 
> nit: we generally follow that initialization of variable shouldn't be
> doing non-trivial operations. So let's do calloc() as a separate
> statement outside of variable declaration.
> 

Sure, will do.

>> +       struct btf_type *t;
>> +       int i;
>> +
>> +       if (!cnt)
>> +               return -ENOMEM;
>> +
>> +       /* First pass; collect name counts for composite types. */
>> +       for (i = 1; i < dist->split_start_id; i++) {
>> +               t = btf_type_by_id(dist->pipe.src, i);
>> +               if (!btf_is_composite(t) || !t->name_off)
>> +                       continue;
>> +               if (cnt[t->name_off] < 255)
>> +                       cnt[t->name_off]++;
>> +       }
>> +       /* Second pass; mark composite types with multiple instances of the
>> +        * same name as needing size information.
>> +        */
>> +       for (i = 1; i < dist->split_start_id; i++) {
>> +               /* id not needed or is already preserving size information */
>> +               if (!dist->ids[i] || (dist->ids[i] & BTF_NEEDS_SIZE))
>> +                       continue;
>> +               t = btf_type_by_id(dist->pipe.src, i);
>> +               if (!btf_is_composite(t) || !t->name_off)
>> +                       continue;
>> +               if (cnt[t->name_off] > 1)
>> +                       dist->ids[i] |= BTF_NEEDS_SIZE;
>> +       }
>> +       free(cnt);
>> +
>> +       return 0;
>> +}
>> +
>> +static bool btf_is_eligible_named_fwd(const struct btf_type *t)
>> +{
>> +       return (btf_is_composite(t) || btf_is_any_enum(t)) && t->name_off != 0;
>> +}
>> +
>> +static int btf_add_distilled_type_ids(__u32 *id, void *ctx)
>> +{
>> +       struct btf_distill *dist = ctx;
>> +       struct btf_type *t = btf_type_by_id(dist->pipe.src, *id);
>> +       int err;
>> +
>> +       if (!*id)
>> +               return 0;
>> +       /* split BTF id, not needed */
>> +       if (*id >= dist->split_start_id)
>> +               return 0;
>> +       /* already added ? */
>> +       if (BTF_ID(dist->ids[*id]) > 0)
>> +               return 0;
>> +
>> +       /* only a subset of base BTF types should be referenced from split
>> +        * BTF; ensure nothing unexpected is referenced.
>> +        */
>> +       switch (btf_kind(t)) {
>> +       case BTF_KIND_INT:
>> +       case BTF_KIND_FLOAT:
>> +       case BTF_KIND_FWD:
>> +       case BTF_KIND_ARRAY:
>> +       case BTF_KIND_STRUCT:
>> +       case BTF_KIND_UNION:
>> +       case BTF_KIND_TYPEDEF:
>> +       case BTF_KIND_ENUM:
>> +       case BTF_KIND_ENUM64:
>> +       case BTF_KIND_PTR:
>> +       case BTF_KIND_CONST:
>> +       case BTF_KIND_RESTRICT:
>> +       case BTF_KIND_VOLATILE:
>> +       case BTF_KIND_FUNC_PROTO:
>> +       case BTF_KIND_TYPE_TAG:
>> +               dist->ids[*id] |= *id;
>> +               break;
>> +       default:
>> +               pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n",
>> +                       *id, btf_kind(t));
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* struct/union members not needed, except for anonymous structs
>> +        * and unions, which we need since name won't help us determine
>> +        * matches; so if a named struct/union, no need to recurse
>> +        * into members.
>> +        */
> 
> is this an outdated comment? we shouldn't have anonymous types in the
> distilled base, right?

Yep, they go into split BTF. However, we might need to add their members
to our distilled base; for example if we hadn't yet added an int and had

	struct {
		int field;
	};

...we'd want to make sure we added an INT to our distilled base. I'll
try and clarify the comment a bit more.

> 
>> +       if (btf_is_eligible_named_fwd(t))
>> +               return 0;
>> +
>> +       /* ensure references in type are added also. */
>> +       err = btf_type_visit_type_ids(t, btf_add_distilled_type_ids, ctx);
>> +       if (err < 0)
>> +               return err;
>> +       return 0;
> 
> nit: could be just `return btf_type_visit_type_ids(...);`?
> 

good idea.

>> +}
>> +
>> +static int btf_add_distilled_types(struct btf_distill *dist)
>> +{
>> +       bool adding_to_base = dist->pipe.dst->start_id == 1;
>> +       int id = btf__type_cnt(dist->pipe.dst);
>> +       struct btf_type *t;
>> +       int i, err = 0;
>> +
>> +       /* Add types for each of the required references to either distilled
>> +        * base or split BTF, depending on type characteristics.
>> +        */
>> +       for (i = 1; i < dist->split_start_id; i++) {
>> +               const char *name;
>> +               int kind;
>> +
>> +               if (!BTF_ID(dist->ids[i]))
>> +                       continue;
>> +               t = btf_type_by_id(dist->pipe.src, i);
>> +               kind = btf_kind(t);
>> +               name = btf__name_by_offset(dist->pipe.src, t->name_off);
>> +
>> +               /* Named int, float, fwd struct, union, enum[64] are added to
>> +                * base; everything else is added to split BTF.
>> +                */
>> +               switch (kind) {
>> +               case BTF_KIND_INT:
>> +               case BTF_KIND_FLOAT:
>> +               case BTF_KIND_FWD:
>> +               case BTF_KIND_STRUCT:
>> +               case BTF_KIND_UNION:
>> +               case BTF_KIND_ENUM:
>> +               case BTF_KIND_ENUM64:
>> +                       if ((adding_to_base && !t->name_off) || (!adding_to_base && t->name_off))
>> +                               continue;
>> +                       break;
>> +               default:
>> +                       if (adding_to_base)
>> +                               continue;
>> +                       break;
>> +               }
>> +               if (dist->ids[i] & BTF_NEEDS_SIZE) {
>> +                       /* If a named struct/union in base BTF is referenced as a type
>> +                        * in split BTF without use of a pointer - i.e. as an embedded
>> +                        * struct/union - add an empty struct/union preserving size
>> +                        * since size must be consistent when relocating split and
>> +                        * possibly changed base BTF.  Similarly, when a struct/union
>> +                        * has multiple instances of the same name in the original
>> +                        * base BTF, retain size to help relocation later pick the
>> +                        * right struct/union.
>> +                        */
>> +                       err = btf_add_composite(dist->pipe.dst, kind, name, t->size);
>> +               } else if (btf_is_eligible_named_fwd(t)) {
>> +                       /* If not embedded, use a fwd for named struct/unions since we
>> +                        * can match via name without any other details.
>> +                        */
>> +                       switch (kind) {
>> +                       case BTF_KIND_STRUCT:
>> +                               err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_STRUCT);
>> +                               break;
>> +                       case BTF_KIND_UNION:
>> +                               err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_UNION);
>> +                               break;
>> +                       case BTF_KIND_ENUM:
>> +                               err = btf__add_enum(dist->pipe.dst, name, t->size);
>> +                               break;
>> +                       case BTF_KIND_ENUM64:
> 
> nit: combine ENUM/ENUM64 cases?
>

yep, good catch.

>> +                               err = btf__add_enum(dist->pipe.dst, name, t->size);
>> +                               break;
>> +                       default:
>> +                               pr_warn("unexpected kind [%u] when creating distilled base BTF.\n",
>> +                                       btf_kind(t));
>> +                               return -EINVAL;
>> +                       }
>> +               } else {
>> +                       err = btf_add_type(&dist->pipe, t);
> 
> So this should never happen if adding_to_base == true, is that right?
> Can we check this? Or am I missing something as usual?
> 

We're currently adding INTs and FLOATs to the base also, so they are two
cases where we end up here I think.

>> +               }
>> +               if (err < 0)
>> +                       break;
>> +               dist->ids[i] = id++;
>> +       }
>> +       return err;
>> +}
>> +
> 
> [...]
> 
>> +       n = btf__type_cnt(new_split);
>> +       /* Now update base/split BTF ids. */
>> +       for (i = 1; i < n; i++) {
>> +               t = btf_type_by_id(new_split, i);
>> +
>> +               err = btf_type_visit_type_ids(t, btf_update_distilled_type_ids, &dist);
>> +               if (err < 0)
>> +                       goto err_out;
>> +       }
>> +       free(dist.ids);
>> +       hashmap__free(dist.pipe.str_off_map);
>> +       *new_base_btf = new_base;
>> +       *new_split_btf = new_split;
>> +       return 0;
>> +err_out:
>> +       free(dist.ids);
>> +       if (!IS_ERR(dist.pipe.str_off_map))
> 
> you don't need to check this, hashmap__free() works for IS_ERR()
> pointers as well
>

ah, good to know, thanks!


>> +               hashmap__free(dist.pipe.str_off_map);
>> +       btf__free(new_split);
>> +       btf__free(new_base);
>> +       return libbpf_err(err);
>> +}
> 
> [...]
Andrii Nakryiko May 22, 2024, 4:57 p.m. UTC | #3
On Wed, May 22, 2024 at 9:42 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 21/05/2024 22:48, Andrii Nakryiko wrote:
> > On Fri, May 17, 2024 at 3:23 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>
> >> To support more robust split BTF, adding supplemental context for the
> >> base BTF type ids that split BTF refers to is required.  Without such
> >> references, a simple shuffling of base BTF type ids (without any other
> >> significant change) invalidates the split BTF.  Here the attempt is made
> >> to store additional context to make split BTF more robust.
> >>
> >> This context comes in the form of distilled base BTF providing minimal
> >> information (name and - in some cases - size) for base INTs, FLOATs,
> >> STRUCTs, UNIONs, ENUMs and ENUM64s along with modified split BTF that
> >> points at that base and contains any additional types needed (such as
> >> TYPEDEF, PTR and anonymous STRUCT/UNION declarations).  This
> >> information constitutes the minimal BTF representation needed to
> >> disambiguate or remove split BTF references to base BTF.  The rules
> >> are as follows:
> >>
> >> - INT, FLOAT are recorded in full.
> >> - if a named base BTF STRUCT or UNION is referred to from split BTF, it
> >>   will be encoded either as a zero-member sized STRUCT/UNION (preserving
> >>   size for later relocation checks) or as a named FWD.  Only base BTF
> >>   STRUCT/UNIONs that are either embedded in split BTF STRUCT/UNIONs or
> >>   that have multiple STRUCT/UNION instances of the same name need to
> >>   preserve size information, so a FWD representation will be used in
> >>   most cases.
> >> - if an ENUM[64] is named, a ENUM forward representation (an ENUM
> >>   with no values) is used.
> >> - in all other cases, the type is added to the new split BTF.
> >>
> >> Avoiding struct/union/enum/enum64 expansion is important to keep the
> >> distilled base BTF representation to a minimum size.
> >>
> >> When successful, new representations of the distilled base BTF and new
> >> split BTF that refers to it are returned.  Both need to be freed by the
> >> caller.
> >>
> >> So to take a simple example, with split BTF with a type referring
> >> to "struct sk_buff", we will generate distilled base BTF with a
> >> FWD struct sk_buff, and the split BTF will refer to it instead.
> >>
> >> Tools like pahole can utilize such split BTF to populate the .BTF
> >> section (split BTF) and an additional .BTF.base section.  Then
> >> when the split BTF is loaded, the distilled base BTF can be used
> >> to relocate split BTF to reference the current (and possibly changed)
> >> base BTF.
> >>
> >> So for example if "struct sk_buff" was id 502 when the split BTF was
> >> originally generated,  we can use the distilled base BTF to see that
> >> id 502 refers to a "struct sk_buff" and replace instances of id 502
> >> with the current (relocated) base BTF sk_buff type id.
> >>
> >> Distilled base BTF is small; when building a kernel with all modules
> >> using distilled base BTF as a test, ovreall module size grew by only
> >
> > typo: overall
> >
> >> 5.3Mb total across ~2700 modules.
> >>
> >> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> >> ---
> >>  tools/lib/bpf/btf.c      | 409 ++++++++++++++++++++++++++++++++++++++-
> >>  tools/lib/bpf/btf.h      |  20 ++
> >>  tools/lib/bpf/libbpf.map |   1 +
> >>  3 files changed, 424 insertions(+), 6 deletions(-)
> >>

[...]

> >> +#define BTF_ID(id)             (id & ~BTF_NEEDS_SIZE)
> >> +
> >> +struct btf_distill {
> >> +       struct btf_pipe pipe;
> >> +       int *ids;
> >
> > reading the rest of the code, this BTF_NEEDS_SIZE and BTF_ID() macro
> > use was quite distracting. I'm wondering if it would be better to use
> > a simple struct with bitfields here, e.g.,
> >
> > struct type_state {
> >     int id: 31;
> >     bool needs_size;
> > };
> >
> > struct dist_state *states;
> >
> > Same memory usage, same efficiency, but more readable code. WDYT?
> >
>
> Yeah, I saw Eduard used that approach elsewhere, it's much neater.
> However in other discussion with Eduard we talked about moving the
> embedded/duplicate validation to relocation time. The motivation for
> that is that if we record sizes for all STRUCTs and UNIONs, we can then
> be selective about size checks at relocation time.  This is particularly
> relevant for the duplicate name case since it's possible a name either
> is no longer duplicate in the new vmlinux, or in the new vmlinux has a
> duplicate where the original vmlinux did not. In other words it's the
> "new world" of the vmlinux we're trying to relocate with that we're
> really concerned with, so preserving all size information until we see
> that new vmlinux and can spot duplicates and embedded types where the
> size checks need to be enforced makes sense I think.

Sure, I think it makes sense (though let's see how much more
complexity we add to relocation code).

>
> In that context, we mark embedded types prior to assigning mappings, and
> don't mark duplicate names at all in the map since it is duplicates in
> the new vmlinux we're interested in. So the way I'd approached it is to
> have a BTF_IS_EMBEDDED (casting -1) value that the map values would set
> when they found a split BTF struct/union with an embedded base BTF type.
> That later gets overwritten when we do the map assignments so it never
> gets exposed to the user and we can still return a __u32 array of BTF
> ids to the caller.

My only question/concern is duplicate named entries in distilled base.
How will that work with binary search (at least the exact variant you
are using). But I'm not sure I understand all the nuances of what you
agreed on.

[...]

> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +/* Check if composite type has a duplicate-named type; if it does, retain
> >
> > see above about check vs mark, here at least the function name uses "mark" :)
> >
> >> + * size information to help guide later relocation towards the correct type.
> >> + * For example there are duplicate 'dma_chan' structs in vmlinux BTF;
> >> + * one is size 112, the other 16.  Having size information allows relocation
> >> + * to choose the right one.
> >
> > re: this dma_chan and similar cases. Is it ever a problem where a
> > module actually needs two dma_chan in distilled base BTF? Name
> > conflicts do happen, but intuitively I'd expect this to happen between
> > some vmlinux-internal (so to speak, i.e., not the type used in
> > exported functions/types) and kernel module-specific types. It would
> > be awkward for the same module to use two different types that are
> > named the same.
> >
> > Have you seen this as a problem in practice? What if for now we just
> > error out if there are two conflicting types required in distilled
> > BTF?
>
> Funnily enough I ran into it with "dma_chan" itself when trying to load
> an in-tree module which I'd forced (along with the rest of the tree) to
> be built with distilled base BTF.  And it was also an embedded struct
> too, so we got 2 for 1 there ;-)
>
> But it does seem like it's a legitimate problem, so if we can address it
> I think it'd be worth trying. Even the size checks (applied at
> relocation time) would be better than nothing I think.
>
> The only other way I could think that we could disambiguate things would
> be to add the duplicate-named type to the split BTF (as we do with the
> anonymous structs).  That would remove the ambiguity, but expose us to
> the risk of having to pull in a lot more types. I can't imagine a
> duplicate-named type would ever figure in a kfunc signature, so it
> should be safe to do. But I think either way we probably have to have
> some sort of answer for this problem.
>

Ok, just keep in mind that duplicated names in distilled base BTF do
cause issues when "joining" actual vmlinux BTF and distilled base BTF.

[...]

> >> +
> >> +       /* struct/union members not needed, except for anonymous structs
> >> +        * and unions, which we need since name won't help us determine
> >> +        * matches; so if a named struct/union, no need to recurse
> >> +        * into members.
> >> +        */
> >
> > is this an outdated comment? we shouldn't have anonymous types in the
> > distilled base, right?
>
> Yep, they go into split BTF. However, we might need to add their members
> to our distilled base; for example if we hadn't yet added an int and had
>
>         struct {
>                 int field;
>         };
>
> ...we'd want to make sure we added an INT to our distilled base. I'll
> try and clarify the comment a bit more.

I see, it makes sense. Yes, let's add a comment, it's a bit subtle.

>
> >
> >> +       if (btf_is_eligible_named_fwd(t))
> >> +               return 0;
> >> +

[...]

> >> +                               err = btf__add_enum(dist->pipe.dst, name, t->size);
> >> +                               break;
> >> +                       default:
> >> +                               pr_warn("unexpected kind [%u] when creating distilled base BTF.\n",
> >> +                                       btf_kind(t));
> >> +                               return -EINVAL;
> >> +                       }
> >> +               } else {
> >> +                       err = btf_add_type(&dist->pipe, t);
> >
> > So this should never happen if adding_to_base == true, is that right?
> > Can we check this? Or am I missing something as usual?
> >
>
> We're currently adding INTs and FLOATs to the base also, so they are two
> cases where we end up here I think.

let's have them as another explicit case and then warn for everything
we don't expect to be added? I'm trying to prevent silent problems we
haven't thought about or that will appear in the future due to BTF
extension, so let's be conservative everywhere.

>
> >> +               }
> >> +               if (err < 0)
> >> +                       break;
> >> +               dist->ids[i] = id++;
> >> +       }
> >> +       return err;
> >> +}
> >> +
> >

[...]
Kui-Feng Lee May 22, 2024, 6 p.m. UTC | #4
On 5/17/24 03:22, Alan Maguire wrote:
> To support more robust split BTF, adding supplemental context for the
> base BTF type ids that split BTF refers to is required.  Without such
> references, a simple shuffling of base BTF type ids (without any other
> significant change) invalidates the split BTF.  Here the attempt is made
> to store additional context to make split BTF more robust.
> 
> This context comes in the form of distilled base BTF providing minimal
> information (name and - in some cases - size) for base INTs, FLOATs,
> STRUCTs, UNIONs, ENUMs and ENUM64s along with modified split BTF that
> points at that base and contains any additional types needed (such as
> TYPEDEF, PTR and anonymous STRUCT/UNION declarations).  This
> information constitutes the minimal BTF representation needed to
> disambiguate or remove split BTF references to base BTF.  The rules
> are as follows:
> 
> - INT, FLOAT are recorded in full.
> - if a named base BTF STRUCT or UNION is referred to from split BTF, it
>    will be encoded either as a zero-member sized STRUCT/UNION (preserving
>    size for later relocation checks) or as a named FWD.  Only base BTF
>    STRUCT/UNIONs that are either embedded in split BTF STRUCT/UNIONs or
>    that have multiple STRUCT/UNION instances of the same name need to
>    preserve size information, so a FWD representation will be used in
>    most cases.
> - if an ENUM[64] is named, a ENUM forward representation (an ENUM
>    with no values) is used.
> - in all other cases, the type is added to the new split BTF.
> 
> Avoiding struct/union/enum/enum64 expansion is important to keep the
> distilled base BTF representation to a minimum size.
> 
> When successful, new representations of the distilled base BTF and new
> split BTF that refers to it are returned.  Both need to be freed by the
> caller.
> 
> So to take a simple example, with split BTF with a type referring
> to "struct sk_buff", we will generate distilled base BTF with a
> FWD struct sk_buff, and the split BTF will refer to it instead.
> 
> Tools like pahole can utilize such split BTF to populate the .BTF
> section (split BTF) and an additional .BTF.base section.  Then
> when the split BTF is loaded, the distilled base BTF can be used
> to relocate split BTF to reference the current (and possibly changed)
> base BTF.
> 
> So for example if "struct sk_buff" was id 502 when the split BTF was
> originally generated,  we can use the distilled base BTF to see that
> id 502 refers to a "struct sk_buff" and replace instances of id 502
> with the current (relocated) base BTF sk_buff type id.
> 
> Distilled base BTF is small; when building a kernel with all modules
> using distilled base BTF as a test, ovreall module size grew by only
> 5.3Mb total across ~2700 modules.
> 
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>   tools/lib/bpf/btf.c      | 409 ++++++++++++++++++++++++++++++++++++++-
>   tools/lib/bpf/btf.h      |  20 ++
>   tools/lib/bpf/libbpf.map |   1 +
>   3 files changed, 424 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 2d0840ef599a..953929d196c3 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -1771,9 +1771,8 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx)
>   	return 0;
>   }
>   
> -int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
> +static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
>   {
> -	struct btf_pipe p = { .src = src_btf, .dst = btf };
>   	struct btf_type *t;
>   	int sz, err;
>   
> @@ -1782,20 +1781,27 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
>   		return libbpf_err(sz);
>   
>   	/* deconstruct BTF, if necessary, and invalidate raw_data */
> -	if (btf_ensure_modifiable(btf))
> +	if (btf_ensure_modifiable(p->dst))
>   		return libbpf_err(-ENOMEM);
>   
> -	t = btf_add_type_mem(btf, sz);
> +	t = btf_add_type_mem(p->dst, sz);
>   	if (!t)
>   		return libbpf_err(-ENOMEM);
>   
>   	memcpy(t, src_type, sz);
>   
> -	err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
> +	err = btf_type_visit_str_offs(t, btf_rewrite_str, p);
>   	if (err)
>   		return libbpf_err(err);
>   
> -	return btf_commit_type(btf, sz);
> +	return btf_commit_type(p->dst, sz);
> +}
> +
> +int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
> +{
> +	struct btf_pipe p = { .src = src_btf, .dst = btf };
> +
> +	return btf_add_type(&p, src_type);
>   }
>   
>   static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
> @@ -5212,3 +5218,394 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
>   
>   	return 0;
>   }
> +
> +#define BTF_NEEDS_SIZE	(1 << 31)	/* flag set if either struct/union is
> +					 * embedded - and thus size info must
> +					 * be preserved - or if there are
> +					 * multiple instances of the same
> +					 * struct/union - where size can be
> +					 * used to clarify which is wanted.
> +					 */
> +#define BTF_ID(id)		(id & ~BTF_NEEDS_SIZE)
> +
> +struct btf_distill {
> +	struct btf_pipe pipe;
> +	int *ids;
> +	unsigned int split_start_id;
> +	unsigned int split_start_str;
> +	unsigned int diff_id;
> +};
> +
> +/* Check if a member of a split BTF struct/union refers to a base BTF
> + * struct/union.  Members can be const/restrict/volatile/typedef
> + * reference types, but if a pointer is encountered, type is no longer
> + * considered embedded.
> + */
> +static int btf_find_embedded_composite_type_ids(__u32 *id, void *ctx)
> +{
> +	struct btf_distill *dist = ctx;
> +	const struct btf_type *t;
> +	__u32 next_id = *id;
> +
> +	do {
> +		if (next_id == 0)
> +			return 0;
> +		t = btf_type_by_id(dist->pipe.src, next_id);
> +		switch (btf_kind(t)) {
> +		case BTF_KIND_CONST:
> +		case BTF_KIND_RESTRICT:
> +		case BTF_KIND_VOLATILE:
> +		case BTF_KIND_TYPEDEF:
> +		case BTF_KIND_TYPE_TAG:
> +			next_id = t->type;
> +			break;
> +		case BTF_KIND_ARRAY: {
> +			struct btf_array *a = btf_array(t);
> +
> +			next_id = a->type;
> +			break;
> +		}
> +		case BTF_KIND_STRUCT:
> +		case BTF_KIND_UNION:
> +			dist->ids[next_id] |= BTF_NEEDS_SIZE;
> +			return 0;
> +		default:
> +			return 0;
> +		}
> +
> +	} while (1);
> +
> +	return 0;
> +}
> +
> +/* Check if composite type has a duplicate-named type; if it does, retain
> + * size information to help guide later relocation towards the correct type.
> + * For example there are duplicate 'dma_chan' structs in vmlinux BTF;
> + * one is size 112, the other 16.  Having size information allows relocation
> + * to choose the right one.
> + */
> +static int btf_mark_composite_dups(struct btf_distill *dist, __u32 id)
> +{
> +	__u8 *cnt = calloc(dist->split_start_str, sizeof(__u8));
> +	struct btf_type *t;
> +	int i;
> +
> +	if (!cnt)
> +		return -ENOMEM;
> +
> +	/* First pass; collect name counts for composite types. */
> +	for (i = 1; i < dist->split_start_id; i++) {
> +		t = btf_type_by_id(dist->pipe.src, i);
> +		if (!btf_is_composite(t) || !t->name_off)
> +			continue;
> +		if (cnt[t->name_off] < 255)
> +			cnt[t->name_off]++;
> +	}
> +	/* Second pass; mark composite types with multiple instances of the
> +	 * same name as needing size information.
> +	 */
> +	for (i = 1; i < dist->split_start_id; i++) {
> +		/* id not needed or is already preserving size information */
> +		if (!dist->ids[i] || (dist->ids[i] & BTF_NEEDS_SIZE))
> +			continue;
> +		t = btf_type_by_id(dist->pipe.src, i);
> +		if (!btf_is_composite(t) || !t->name_off)
> +			continue;
> +		if (cnt[t->name_off] > 1)
> +			dist->ids[i] |= BTF_NEEDS_SIZE;
> +	}
> +	free(cnt);
> +
> +	return 0;
> +}
> +
> +static bool btf_is_eligible_named_fwd(const struct btf_type *t)
> +{
> +	return (btf_is_composite(t) || btf_is_any_enum(t)) && t->name_off != 0;
> +}
> +
> +static int btf_add_distilled_type_ids(__u32 *id, void *ctx)
> +{
> +	struct btf_distill *dist = ctx;
> +	struct btf_type *t = btf_type_by_id(dist->pipe.src, *id);
> +	int err;
> +
> +	if (!*id)
> +		return 0;
> +	/* split BTF id, not needed */
> +	if (*id >= dist->split_start_id)
> +		return 0;
> +	/* already added ? */
> +	if (BTF_ID(dist->ids[*id]) > 0)
> +		return 0;
> +
> +	/* only a subset of base BTF types should be referenced from split
> +	 * BTF; ensure nothing unexpected is referenced.
> +	 */
> +	switch (btf_kind(t)) {
> +	case BTF_KIND_INT:
> +	case BTF_KIND_FLOAT:
> +	case BTF_KIND_FWD:
> +	case BTF_KIND_ARRAY:
> +	case BTF_KIND_STRUCT:
> +	case BTF_KIND_UNION:
> +	case BTF_KIND_TYPEDEF:
> +	case BTF_KIND_ENUM:
> +	case BTF_KIND_ENUM64:
> +	case BTF_KIND_PTR:
> +	case BTF_KIND_CONST:
> +	case BTF_KIND_RESTRICT:
> +	case BTF_KIND_VOLATILE:
> +	case BTF_KIND_FUNC_PROTO:
> +	case BTF_KIND_TYPE_TAG:
> +		dist->ids[*id] |= *id;
> +		break;
> +	default:
> +		pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n",
> +			*id, btf_kind(t));
> +		return -EINVAL;
> +	}
> +
> +	/* struct/union members not needed, except for anonymous structs
> +	 * and unions, which we need since name won't help us determine
> +	 * matches; so if a named struct/union, no need to recurse
> +	 * into members.
> +	 */
> +	if (btf_is_eligible_named_fwd(t))
> +		return 0;
> +
> +	/* ensure references in type are added also. */
> +	err = btf_type_visit_type_ids(t, btf_add_distilled_type_ids, ctx);
> +	if (err < 0)
> +		return err;
> +	return 0;
> +}
> +
> +static int btf_add_distilled_types(struct btf_distill *dist)
> +{
> +	bool adding_to_base = dist->pipe.dst->start_id == 1;
> +	int id = btf__type_cnt(dist->pipe.dst);
> +	struct btf_type *t;
> +	int i, err = 0;
> +
> +	/* Add types for each of the required references to either distilled
> +	 * base or split BTF, depending on type characteristics.
> +	 */
> +	for (i = 1; i < dist->split_start_id; i++) {
> +		const char *name;
> +		int kind;
> +
> +		if (!BTF_ID(dist->ids[i]))
> +			continue;
> +		t = btf_type_by_id(dist->pipe.src, i);
> +		kind = btf_kind(t);
> +		name = btf__name_by_offset(dist->pipe.src, t->name_off);
> +
> +		/* Named int, float, fwd struct, union, enum[64] are added to
> +		 * base; everything else is added to split BTF.
> +		 */
> +		switch (kind) {
> +		case BTF_KIND_INT:
> +		case BTF_KIND_FLOAT:
> +		case BTF_KIND_FWD:
> +		case BTF_KIND_STRUCT:
> +		case BTF_KIND_UNION:
> +		case BTF_KIND_ENUM:
> +		case BTF_KIND_ENUM64:
> +			if ((adding_to_base && !t->name_off) || (!adding_to_base && t->name_off))
> +				continue;
> +			break;
> +		default:
> +			if (adding_to_base)
> +				continue;
> +			break;
> +		}
> +		if (dist->ids[i] & BTF_NEEDS_SIZE) {
> +			/* If a named struct/union in base BTF is referenced as a type
> +			 * in split BTF without use of a pointer - i.e. as an embedded
> +			 * struct/union - add an empty struct/union preserving size
> +			 * since size must be consistent when relocating split and
> +			 * possibly changed base BTF.  Similarly, when a struct/union
> +			 * has multiple instances of the same name in the original
> +			 * base BTF, retain size to help relocation later pick the
> +			 * right struct/union.
> +			 */
> +			err = btf_add_composite(dist->pipe.dst, kind, name, t->size);
> +		} else if (btf_is_eligible_named_fwd(t)) {
> +			/* If not embedded, use a fwd for named struct/unions since we
> +			 * can match via name without any other details.
> +			 */
> +			switch (kind) {
> +			case BTF_KIND_STRUCT:
> +				err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_STRUCT);
> +				break;
> +			case BTF_KIND_UNION:
> +				err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_UNION);
> +				break;
> +			case BTF_KIND_ENUM:
> +				err = btf__add_enum(dist->pipe.dst, name, t->size);
> +				break;
> +			case BTF_KIND_ENUM64:
> +				err = btf__add_enum(dist->pipe.dst, name, t->size);

Should this be btf__add_enum64() instead of btf__add_enum()?

> +				break;
> +			default:
> +				pr_warn("unexpected kind [%u] when creating distilled base BTF.\n",
> +					btf_kind(t));
> +				return -EINVAL;
> +			}
> +		} else {
> +			err = btf_add_type(&dist->pipe, t);
> +		}
> +		if (err < 0)
> +			break;
> +		dist->ids[i] = id++;
> +	}
> +	return err;
> +}
> +

[...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2d0840ef599a..953929d196c3 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1771,9 +1771,8 @@  static int btf_rewrite_str(__u32 *str_off, void *ctx)
 	return 0;
 }
 
-int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
 {
-	struct btf_pipe p = { .src = src_btf, .dst = btf };
 	struct btf_type *t;
 	int sz, err;
 
@@ -1782,20 +1781,27 @@  int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
 		return libbpf_err(sz);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
-	if (btf_ensure_modifiable(btf))
+	if (btf_ensure_modifiable(p->dst))
 		return libbpf_err(-ENOMEM);
 
-	t = btf_add_type_mem(btf, sz);
+	t = btf_add_type_mem(p->dst, sz);
 	if (!t)
 		return libbpf_err(-ENOMEM);
 
 	memcpy(t, src_type, sz);
 
-	err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
+	err = btf_type_visit_str_offs(t, btf_rewrite_str, p);
 	if (err)
 		return libbpf_err(err);
 
-	return btf_commit_type(btf, sz);
+	return btf_commit_type(p->dst, sz);
+}
+
+int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+{
+	struct btf_pipe p = { .src = src_btf, .dst = btf };
+
+	return btf_add_type(&p, src_type);
 }
 
 static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
@@ -5212,3 +5218,394 @@  int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
 
 	return 0;
 }
+
+#define BTF_NEEDS_SIZE	(1 << 31)	/* flag set if either struct/union is
+					 * embedded - and thus size info must
+					 * be preserved - or if there are
+					 * multiple instances of the same
+					 * struct/union - where size can be
+					 * used to clarify which is wanted.
+					 */
+#define BTF_ID(id)		(id & ~BTF_NEEDS_SIZE)
+
+struct btf_distill {
+	struct btf_pipe pipe;
+	int *ids;
+	unsigned int split_start_id;
+	unsigned int split_start_str;
+	unsigned int diff_id;
+};
+
+/* Check if a member of a split BTF struct/union refers to a base BTF
+ * struct/union.  Members can be const/restrict/volatile/typedef
+ * reference types, but if a pointer is encountered, type is no longer
+ * considered embedded.
+ */
+static int btf_find_embedded_composite_type_ids(__u32 *id, void *ctx)
+{
+	struct btf_distill *dist = ctx;
+	const struct btf_type *t;
+	__u32 next_id = *id;
+
+	do {
+		if (next_id == 0)
+			return 0;
+		t = btf_type_by_id(dist->pipe.src, next_id);
+		switch (btf_kind(t)) {
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_TYPE_TAG:
+			next_id = t->type;
+			break;
+		case BTF_KIND_ARRAY: {
+			struct btf_array *a = btf_array(t);
+
+			next_id = a->type;
+			break;
+		}
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			dist->ids[next_id] |= BTF_NEEDS_SIZE;
+			return 0;
+		default:
+			return 0;
+		}
+
+	} while (1);
+
+	return 0;
+}
+
+/* Check if composite type has a duplicate-named type; if it does, retain
+ * size information to help guide later relocation towards the correct type.
+ * For example there are duplicate 'dma_chan' structs in vmlinux BTF;
+ * one is size 112, the other 16.  Having size information allows relocation
+ * to choose the right one.
+ */
+static int btf_mark_composite_dups(struct btf_distill *dist, __u32 id)
+{
+	__u8 *cnt = calloc(dist->split_start_str, sizeof(__u8));
+	struct btf_type *t;
+	int i;
+
+	if (!cnt)
+		return -ENOMEM;
+
+	/* First pass; collect name counts for composite types. */
+	for (i = 1; i < dist->split_start_id; i++) {
+		t = btf_type_by_id(dist->pipe.src, i);
+		if (!btf_is_composite(t) || !t->name_off)
+			continue;
+		if (cnt[t->name_off] < 255)
+			cnt[t->name_off]++;
+	}
+	/* Second pass; mark composite types with multiple instances of the
+	 * same name as needing size information.
+	 */
+	for (i = 1; i < dist->split_start_id; i++) {
+		/* id not needed or is already preserving size information */
+		if (!dist->ids[i] || (dist->ids[i] & BTF_NEEDS_SIZE))
+			continue;
+		t = btf_type_by_id(dist->pipe.src, i);
+		if (!btf_is_composite(t) || !t->name_off)
+			continue;
+		if (cnt[t->name_off] > 1)
+			dist->ids[i] |= BTF_NEEDS_SIZE;
+	}
+	free(cnt);
+
+	return 0;
+}
+
+static bool btf_is_eligible_named_fwd(const struct btf_type *t)
+{
+	return (btf_is_composite(t) || btf_is_any_enum(t)) && t->name_off != 0;
+}
+
+static int btf_add_distilled_type_ids(__u32 *id, void *ctx)
+{
+	struct btf_distill *dist = ctx;
+	struct btf_type *t = btf_type_by_id(dist->pipe.src, *id);
+	int err;
+
+	if (!*id)
+		return 0;
+	/* split BTF id, not needed */
+	if (*id >= dist->split_start_id)
+		return 0;
+	/* already added ? */
+	if (BTF_ID(dist->ids[*id]) > 0)
+		return 0;
+
+	/* only a subset of base BTF types should be referenced from split
+	 * BTF; ensure nothing unexpected is referenced.
+	 */
+	switch (btf_kind(t)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_FLOAT:
+	case BTF_KIND_FWD:
+	case BTF_KIND_ARRAY:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+	case BTF_KIND_PTR:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_FUNC_PROTO:
+	case BTF_KIND_TYPE_TAG:
+		dist->ids[*id] |= *id;
+		break;
+	default:
+		pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n",
+			*id, btf_kind(t));
+		return -EINVAL;
+	}
+
+	/* struct/union members not needed, except for anonymous structs
+	 * and unions, which we need since name won't help us determine
+	 * matches; so if a named struct/union, no need to recurse
+	 * into members.
+	 */
+	if (btf_is_eligible_named_fwd(t))
+		return 0;
+
+	/* ensure references in type are added also. */
+	err = btf_type_visit_type_ids(t, btf_add_distilled_type_ids, ctx);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+static int btf_add_distilled_types(struct btf_distill *dist)
+{
+	bool adding_to_base = dist->pipe.dst->start_id == 1;
+	int id = btf__type_cnt(dist->pipe.dst);
+	struct btf_type *t;
+	int i, err = 0;
+
+	/* Add types for each of the required references to either distilled
+	 * base or split BTF, depending on type characteristics.
+	 */
+	for (i = 1; i < dist->split_start_id; i++) {
+		const char *name;
+		int kind;
+
+		if (!BTF_ID(dist->ids[i]))
+			continue;
+		t = btf_type_by_id(dist->pipe.src, i);
+		kind = btf_kind(t);
+		name = btf__name_by_offset(dist->pipe.src, t->name_off);
+
+		/* Named int, float, fwd struct, union, enum[64] are added to
+		 * base; everything else is added to split BTF.
+		 */
+		switch (kind) {
+		case BTF_KIND_INT:
+		case BTF_KIND_FLOAT:
+		case BTF_KIND_FWD:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_ENUM64:
+			if ((adding_to_base && !t->name_off) || (!adding_to_base && t->name_off))
+				continue;
+			break;
+		default:
+			if (adding_to_base)
+				continue;
+			break;
+		}
+		if (dist->ids[i] & BTF_NEEDS_SIZE) {
+			/* If a named struct/union in base BTF is referenced as a type
+			 * in split BTF without use of a pointer - i.e. as an embedded
+			 * struct/union - add an empty struct/union preserving size
+			 * since size must be consistent when relocating split and
+			 * possibly changed base BTF.  Similarly, when a struct/union
+			 * has multiple instances of the same name in the original
+			 * base BTF, retain size to help relocation later pick the
+			 * right struct/union.
+			 */
+			err = btf_add_composite(dist->pipe.dst, kind, name, t->size);
+		} else if (btf_is_eligible_named_fwd(t)) {
+			/* If not embedded, use a fwd for named struct/unions since we
+			 * can match via name without any other details.
+			 */
+			switch (kind) {
+			case BTF_KIND_STRUCT:
+				err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_STRUCT);
+				break;
+			case BTF_KIND_UNION:
+				err = btf__add_fwd(dist->pipe.dst, name, BTF_FWD_UNION);
+				break;
+			case BTF_KIND_ENUM:
+				err = btf__add_enum(dist->pipe.dst, name, t->size);
+				break;
+			case BTF_KIND_ENUM64:
+				err = btf__add_enum(dist->pipe.dst, name, t->size);
+				break;
+			default:
+				pr_warn("unexpected kind [%u] when creating distilled base BTF.\n",
+					btf_kind(t));
+				return -EINVAL;
+			}
+		} else {
+			err = btf_add_type(&dist->pipe, t);
+		}
+		if (err < 0)
+			break;
+		dist->ids[i] = id++;
+	}
+	return err;
+}
+
+/* Split BTF ids without a mapping will be shifted downwards since distilled
+ * base BTF is smaller than the original base BTF.  For those that have a
+ * mapping (either to base or updated split BTF), update the id based on
+ * that mapping.
+ */
+static int btf_update_distilled_type_ids(__u32 *id, void *ctx)
+{
+	struct btf_distill *dist = ctx;
+
+	if (BTF_ID(dist->ids[*id]))
+		*id = BTF_ID(dist->ids[*id]);
+	else if (*id >= dist->split_start_id)
+		*id -= dist->diff_id;
+	return 0;
+}
+
+/* Create updated split BTF with distilled base BTF; distilled base BTF
+ * consists of BTF information required to clarify the types that split
+ * BTF refers to, omitting unneeded details.  Specifically it will contain
+ * base types and forward declarations of named structs, unions and enumerated
+ * types. Associated reference types like pointers, arrays and anonymous
+ * structs, unions and enumerated types will be added to split BTF.
+ *
+ * The only case where structs, unions or enumerated types are fully represented
+ * is when they are anonymous; in such cases, the anonymous type is added to
+ * split BTF in full.
+ *
+ * We return newly-created split BTF where the split BTF refers to a newly-created
+ * distilled base BTF. Both must be freed separately by the caller.
+ */
+int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
+		      struct btf **new_split_btf)
+{
+	struct btf *new_base = NULL, *new_split = NULL;
+	const struct btf *old_base;
+	unsigned int n = btf__type_cnt(src_btf);
+	struct btf_distill dist = {};
+	struct btf_type *t;
+	int i, err = 0;
+
+	/* src BTF must be split BTF. */
+	old_base = btf__base_btf(src_btf);
+	if (!new_base_btf || !new_split_btf || !old_base)
+		return libbpf_err(-EINVAL);
+
+	new_base = btf__new_empty();
+	if (!new_base)
+		return libbpf_err(-ENOMEM);
+	dist.ids = calloc(n, sizeof(*dist.ids));
+	if (!dist.ids) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	dist.pipe.src = src_btf;
+	dist.pipe.dst = new_base;
+	dist.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+	if (IS_ERR(dist.pipe.str_off_map)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	dist.split_start_id = btf__type_cnt(old_base);
+	dist.split_start_str = old_base->hdr->str_len;
+
+	/* Pass over src split BTF; generate the list of base BTF
+	 * type ids it references; these will constitute our distilled
+	 * BTF set to be distributed over base and split BTF as appropriate.
+	 */
+	for (i = src_btf->start_id; i < n; i++) {
+		t = btf_type_by_id(src_btf, i);
+
+		/* check if members of struct/union in split BTF refer to base BTF
+		 * struct/union; if so, we will use an empty sized struct to represent
+		 * it rather than a FWD because its size must match on later BTF
+		 * relocation.
+		 */
+		if (btf_is_composite(t)) {
+			err = btf_type_visit_type_ids(t, btf_find_embedded_composite_type_ids,
+						      &dist);
+			if (err < 0)
+				goto err_out;
+		}
+		err = btf_type_visit_type_ids(t,  btf_add_distilled_type_ids, &dist);
+		if (err < 0)
+			goto err_out;
+	}
+	/* Next check the distilled type id list for any struct/unions that
+	 * have multiple instances of the same name in base BTF; size must be
+	 * preserved for those cases also to guide relocation matching.
+	 */
+	err = btf_mark_composite_dups(&dist, i);
+	if (err < 0)
+		goto err_out;
+
+	/* Next add types for each of the required references to base BTF and split BTF in turn. */
+	err = btf_add_distilled_types(&dist);
+	if (err < 0)
+		goto err_out;
+	/* now create new split BTF with distilled base BTF as its base; we end up with
+	 * split BTF that has base BTF that represents enough about its base references
+	 * to allow it to be relocated with the base BTF available.
+	 */
+	new_split = btf__new_empty_split(new_base);
+	if (!new_split_btf) {
+		err = -errno;
+		goto err_out;
+	}
+	dist.pipe.dst = new_split;
+	/* First add all split types */
+	for (i = src_btf->start_id; i < n; i++) {
+		t = btf_type_by_id(src_btf, i);
+		err = btf_add_type(&dist.pipe, t);
+		if (err < 0)
+			goto err_out;
+	}
+	/* Now add distilled types to split BTF that are not added to base. */
+	err = btf_add_distilled_types(&dist);
+	if (err < 0)
+		goto err_out;
+
+	/* all split BTF ids will be shifted downwards since there are less base BTF ids
+	 * in distilled base BTF.
+	 */
+	dist.diff_id = dist.split_start_id - btf__type_cnt(new_base);
+
+	n = btf__type_cnt(new_split);
+	/* Now update base/split BTF ids. */
+	for (i = 1; i < n; i++) {
+		t = btf_type_by_id(new_split, i);
+
+		err = btf_type_visit_type_ids(t, btf_update_distilled_type_ids, &dist);
+		if (err < 0)
+			goto err_out;
+	}
+	free(dist.ids);
+	hashmap__free(dist.pipe.str_off_map);
+	*new_base_btf = new_base;
+	*new_split_btf = new_split;
+	return 0;
+err_out:
+	free(dist.ids);
+	if (!IS_ERR(dist.pipe.str_off_map))
+		hashmap__free(dist.pipe.str_off_map);
+	btf__free(new_split);
+	btf__free(new_base);
+	return libbpf_err(err);
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 8e6880d91c84..f3f149a09088 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -107,6 +107,26 @@  LIBBPF_API struct btf *btf__new_empty(void);
  */
 LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
 
+/**
+ * @brief **btf__distill_base()** creates new versions of the split BTF
+ * *src_btf* and its base BTF. The new base BTF will only contain the types
+ * needed to improve robustness of the split BTF to small changes in base BTF.
+ * When that split BTF is loaded against a (possibly changed) base, this
+ * distilled base BTF will help update references to that (possibly changed)
+ * base BTF.
+ *
+ * Both the new split and its associated new base BTF must be freed by
+ * the caller.
+ *
+ * If successful, 0 is returned and **new_base_btf** and **new_split_btf**
+ * will point at new base/split BTF. Both the new split and its associated
+ * new base BTF must be freed by the caller.
+ *
+ * A negative value is returned on error.
+ */
+LIBBPF_API int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
+				 struct btf **new_split_btf);
+
 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
 LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf);
 LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c1ce8aa3520b..9e69d6e2a512 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -419,6 +419,7 @@  LIBBPF_1.4.0 {
 
 LIBBPF_1.5.0 {
 	global:
+		btf__distill_base;
 		bpf_program__attach_sockmap;
 		ring__consume_n;
 		ring_buffer__consume_n;