diff mbox series

[v4,bpf-next,07/11] libbpf: split BTF relocation

Message ID 20240517102246.4070184-8-alan.maguire@oracle.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: support resilient split BTF | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 fail Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 fail Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 fail Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 fail Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 fail Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 fail Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / test
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 fail Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next-0

Commit Message

Alan Maguire May 17, 2024, 10:22 a.m. UTC
Map distilled base BTF type ids referenced in split BTF and their
references to the base BTF passed in, and if the mapping succeeds,
reparent the split BTF to the base BTF.

Relocation is done by first verifying that distilled base BTF
only consists of named INT, FLOAT, ENUM, FWD, STRUCT and
UNION kinds; then we sort these to speed lookups.  Once sorted,
the base BTF is iterated, and for each relevant kind we check
for an equivalent in distilled base BTF.  When found, the
mapping from distilled -> base BTF id and string offset is recorded.

Once all mappings are established, we can update type ids
and string offsets in split BTF and reparent it to the new base.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 tools/lib/bpf/Build             |   2 +-
 tools/lib/bpf/btf.c             |  17 ++
 tools/lib/bpf/btf.h             |   8 +
 tools/lib/bpf/btf_relocate.c    | 318 ++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.map        |   1 +
 tools/lib/bpf/libbpf_internal.h |   3 +
 6 files changed, 348 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/bpf/btf_relocate.c

Comments

Andrii Nakryiko May 21, 2024, 10:34 p.m. UTC | #1
On Fri, May 17, 2024 at 3:23 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> Map distilled base BTF type ids referenced in split BTF and their
> references to the base BTF passed in, and if the mapping succeeds,
> reparent the split BTF to the base BTF.
>
> Relocation is done by first verifying that distilled base BTF
> only consists of named INT, FLOAT, ENUM, FWD, STRUCT and
> UNION kinds; then we sort these to speed lookups.  Once sorted,
> the base BTF is iterated, and for each relevant kind we check
> for an equivalent in distilled base BTF.  When found, the
> mapping from distilled -> base BTF id and string offset is recorded.
>
> Once all mappings are established, we can update type ids
> and string offsets in split BTF and reparent it to the new base.
>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  tools/lib/bpf/Build             |   2 +-
>  tools/lib/bpf/btf.c             |  17 ++
>  tools/lib/bpf/btf.h             |   8 +
>  tools/lib/bpf/btf_relocate.c    | 318 ++++++++++++++++++++++++++++++++
>  tools/lib/bpf/libbpf.map        |   1 +
>  tools/lib/bpf/libbpf_internal.h |   3 +
>  6 files changed, 348 insertions(+), 1 deletion(-)
>  create mode 100644 tools/lib/bpf/btf_relocate.c

[...]

>  LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
>
> +/**
> + * @brief **btf__relocate()** will check the split BTF *btf* for references
> + * to base BTF kinds, and verify those references are compatible with
> + * *base_btf*; if they are, *btf* is adjusted such that is re-parented to
> + * *base_btf* and type ids and strings are adjusted to accommodate this.
> + */

add boilerplate regarding return results?..

> +LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf);
> +
>  struct btf_dump;
>
>  struct btf_dump_opts {
> diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c
> new file mode 100644
> index 000000000000..c06851f05472
> --- /dev/null
> +++ b/tools/lib/bpf/btf_relocate.c
> @@ -0,0 +1,318 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024, Oracle and/or its affiliates. */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include "btf.h"
> +#include "bpf.h"
> +#include "libbpf.h"
> +#include "libbpf_internal.h"
> +
> +struct btf;
> +
> +struct btf_relocate {
> +       __u32 search_id;                                /* must be first field; see search below */

just put that comment before the field, why this horizontal placement?

[...]

> +
> +/* Comparison between base BTF type (search type) and distilled base types (target).
> + * Because there is no bsearch_r() we need to use the search key - which also is
> + * the first element of struct btf_relocate * - as a means to retrieve the
> + * struct btf_relocate *.
> + */
> +static int cmp_base_and_distilled_btf_types(const void *idbase, const void *iddist)
> +{
> +       struct btf_relocate *r = (struct btf_relocate *)idbase;
> +       const struct btf_type *tbase = btf_type_by_id(r->base_btf, *(__u32 *)idbase);
> +       const struct btf_type *tdist = btf_type_by_id(r->dist_base_btf, *(__u32 *)iddist);

boo, id_base or base_id, id_dist or dist_id, we went through such
naming already, I believe :)

I'd also use base_t and dist_t, like you do below with dist_t already

> +
> +       return strcmp(btf__name_by_offset(r->base_btf, tbase->name_off),
> +                     btf__name_by_offset(r->dist_base_btf, tdist->name_off));
> +}
> +
> +/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
> + * through base BTF looking up distilled type (using binary search) equivalents.
> + */
> +static int btf_relocate_map_distilled_base(struct btf_relocate *r)
> +{
> +       struct btf_type *t;
> +       const char *name;
> +       __u32 id;
> +
> +       /* generate a sort index array of type ids sorted by name for distilled
> +        * base BTF to speed lookups.
> +        */
> +       for (id = 1; id < r->nr_dist_base_types; id++)
> +               r->dist_base_index[id] = id;
> +       qsort_r(r->dist_base_index, r->nr_dist_base_types, sizeof(__u32), cmp_btf_types,
> +               (struct btf *)r->dist_base_btf);

Is qsort_r() supported in musl and in Android'd libc implementation?
I'd rather not have to scramble to fix the build for them after
release.

> +

[...]

> +               r->search_id = id;
> +               dist_id = bsearch(&r->search_id, r->dist_base_index, r->nr_dist_base_types,
> +                                 sizeof(__u32), cmp_base_and_distilled_btf_types);
> +               if (!dist_id)
> +                       continue;
> +               if (!*dist_id || *dist_id > r->nr_dist_base_types) {

>=

> +                       pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
> +                               id, *dist_id);
> +                       return -EINVAL;
> +               }
> +               /* validate that kinds are compatible */
> +               dist_t = btf_type_by_id(r->dist_base_btf, *dist_id);
> +               dist_kind = btf_kind(dist_t);
> +               name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
> +               compat_kind = dist_kind == kind;
> +               if (!compat_kind) {
> +                       switch (dist_kind) {
> +                       case BTF_KIND_FWD:
> +                               compat_kind = kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;

well, not quite. If we have FWD with kflag, then we should match it to
BTF_KIND_UNION, and otherwise to STRUCT. We shouldn't fix them.

also do we match FWD in *base BTF* with FWD in *distilled base BTF*?
That seems a bit wrong, no?


> +                               break;
> +                       case BTF_KIND_ENUM:
> +                               compat_kind = kind == BTF_KIND_ENUM64;
> +                               break;
> +                       default:
> +                               break;
> +                       }
> +                       if (!compat_kind) {
> +                               pr_warn("kind incompatibility (%d != %d) between distilled base type '%s'[%d] and base type [%d]\n",
> +                                       dist_kind, kind, name, *dist_id, id);
> +                               return -EINVAL;
> +                       }
> +               }

umm, what if we are !compat_kind here? go to next or error out, but
there has to be a check


> +               /* validate that int, float struct, union sizes are compatible;
> +                * distilled base BTF encodes an empty STRUCT/UNION with
> +                * specific size for cases where a type is embedded in a split
> +                * type (so has to preserve size info).  Do not error out
> +                * on mismatch as another size match may occur for an
> +                * identically-named type.
> +                */
> +               switch (btf_kind(dist_t)) {
> +               case BTF_KIND_INT:
> +                       if (*(__u32 *)(t + 1) != *(__u32 *)(dist_t + 1))
> +                               continue;
> +                       if (t->size != dist_t->size)
> +                               continue;
> +                       break;
> +               case BTF_KIND_FLOAT:
> +               case BTF_KIND_STRUCT:
> +               case BTF_KIND_UNION:
> +                       if (t->size != dist_t->size)
> +                               continue;
> +                       break;
> +               default:
> +                       break;
> +               }

I don't know, I feel like all these compatibility checks would be
cleaner to handle as part of single switch based on btf_kind(dist_t).
This split between that big if and switch is error-prone and hard to
follow

> +               /* map id and name */
> +               r->map[*dist_id] = id;
> +               r->str_map[dist_t->name_off] = t->name_off;
> +       }
> +       /* ensure all distilled BTF ids have a mapping... */
> +       for (id = 1; id < r->nr_dist_base_types; id++) {
> +               if (r->map[id])
> +                       continue;
> +               t = btf_type_by_id(r->dist_base_btf, id);
> +               name = btf__name_by_offset(r->dist_base_btf, t->name_off);
> +               pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
> +                       name, id);
> +               return -EINVAL;
> +       }
> +       return 0;
> +}
> +

[...]

> +static int btf_rewrite_strs(__u32 *str_off, void *ctx)
> +{
> +       struct btf_relocate *r = ctx;
> +       int off;
> +
> +       if (!*str_off)
> +               return 0;
> +       if (*str_off >= r->str_start) {
> +               *str_off += r->str_diff;
> +       } else {
> +               off = r->str_map[*str_off];
> +               if (!off) {
> +                       pr_warn("string '%s' [offset %d] is not mapped to base BTF",
> +                               btf__str_by_offset(r->btf, off), *str_off);

str_off is __u32, but you are using %d

> +                       return -ENOENT;
> +               }
> +               *str_off = off;
> +       }
> +       return 0;
> +}
> +
> +static int btf_relocate_finalize(struct btf_relocate *r)
> +{
> +       const struct btf_header *dist_base_hdr;
> +       const struct btf_header *base_hdr;
> +       struct btf_type *t;
> +       int i, err;
> +
> +       dist_base_hdr = btf_header(r->dist_base_btf);
> +       base_hdr = btf_header(r->base_btf);
> +       r->str_start = dist_base_hdr->str_len;
> +       r->str_diff = base_hdr->str_len - dist_base_hdr->str_len;

it's subjective, but I find str_diff a bit harder to follow compared
to just storing str_old_start and str_new_start, and then doing
obvious translation

str_off = str_off - str_old_start + str_new_start;

This is obvious and will work for any condition, whether old_start is
smaller or bigger than new_start. Same idea for ID translation.

Not a big deal, but I thought I'd call this out.

> +       for (i = 0; i < r->nr_split_types; i++) {
> +               t = btf_type_by_id(r->btf, i + r->nr_dist_base_types);
> +               err = btf_type_visit_str_offs(t, btf_rewrite_strs, r);
> +               if (err)
> +                       break;

return err? Why do we want to set btf_set_base_btf() in case of an error?

> +       }
> +       btf_set_base_btf(r->btf, r->base_btf);
> +
> +       return err;
> +}
> +
> +/* If successful, output of relocation is updated BTF with base BTF pointing
> + * at base_btf, and type ids, strings adjusted accordingly
> + */
> +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids)
> +{
> +       unsigned int nr_types = btf__type_cnt(btf);
> +       struct btf_relocate r = {};
> +       struct btf_type *t;
> +       int diff_id, err = 0;
> +       __u32 id, i;
> +
> +       r.dist_base_btf = btf__base_btf(btf);
> +       if (!base_btf || r.dist_base_btf == base_btf)
> +               return 0;

Why is this not an error condition? Users shouldn't be calling
relocate on something that shouldn't be relocated.

> +
> +       r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf);
> +       r.nr_base_types = btf__type_cnt(base_btf);
> +       r.nr_split_types = nr_types - r.nr_dist_base_types;
> +       r.btf = btf;
> +       r.base_btf = base_btf;
> +
> +       r.map = calloc(nr_types, sizeof(*r.map));

Is this an ID map? Then maybe call it id_map to be symmetrical to str_map?

> +       r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map));
> +       r.dist_base_index = calloc(r.nr_dist_base_types, sizeof(*r.dist_base_index));
> +       if (!r.map || !r.str_map || !r.dist_base_index) {
> +               err = -ENOMEM;
> +               goto err_out;
> +       }
> +
> +       err = btf_relocate_validate_distilled_base(&r);
> +       if (err)
> +               goto err_out;
> +
> +       diff_id = r.nr_base_types - r.nr_dist_base_types;
> +       /* Split BTF ids will start from after last base BTF id. */
> +       for (id = r.nr_dist_base_types; id < nr_types; id++)
> +               r.map[id] = id + diff_id;
> +
> +       /* Build a map from distilled base ids to actual base BTF ids; it is used
> +        * to update split BTF id references.
> +        */
> +       err = btf_relocate_map_distilled_base(&r);
> +       if (err)
> +               goto err_out;
> +
> +       /* Next, rewrite type ids in split BTF, replacing split ids with updated
> +        * ids based on number of types in base BTF, and base ids with
> +        * relocated ids from base_btf.
> +        */
> +       for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) {
> +               t = btf_type_by_id(btf, id);
> +               err = btf_type_visit_type_ids(t, btf_relocate_rewrite_type_id, &r);
> +               if (err)
> +                       goto err_out;
> +       }
> +       /* Finally reset base BTF to base_btf; as part of this operation, string
> +        * offsets are also updated, and we are done.
> +        */
> +       err = btf_relocate_finalize(&r);
> +err_out:
> +       if (!err && map_ids)
> +               *map_ids = r.map;
> +       else
> +               free(r.map);

this is a bit convoluted. maybe something like


    err = btf_relocate_finalize(&r);
    if (err)
        goto err_out;

    if (map_ids) {
        *map_ids = r.map;
        r.map = NULL;
    }

err_out:
    ... all the free()s unconditionally ...


(even just doing only error case for err_out and duplicating a few
free()'s in success path seems nicer)

> +       free(r.str_map);
> +       free(r.dist_base_index);
> +       return err;
> +}

[...]
Kui-Feng Lee May 23, 2024, 1:06 a.m. UTC | #2
On 5/17/24 03:22, Alan Maguire wrote:
> Map distilled base BTF type ids referenced in split BTF and their
> references to the base BTF passed in, and if the mapping succeeds,
> reparent the split BTF to the base BTF.
> 
> Relocation is done by first verifying that distilled base BTF
> only consists of named INT, FLOAT, ENUM, FWD, STRUCT and
> UNION kinds; then we sort these to speed lookups.  Once sorted,
> the base BTF is iterated, and for each relevant kind we check
> for an equivalent in distilled base BTF.  When found, the
> mapping from distilled -> base BTF id and string offset is recorded.
> 
> Once all mappings are established, we can update type ids
> and string offsets in split BTF and reparent it to the new base.
> 
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
[...]
> +/* Comparison between base BTF type (search type) and distilled base types (target).
> + * Because there is no bsearch_r() we need to use the search key - which also is
> + * the first element of struct btf_relocate * - as a means to retrieve the
> + * struct btf_relocate *.
> + */
> +static int cmp_base_and_distilled_btf_types(const void *idbase, const void *iddist)
> +{
> +	struct btf_relocate *r = (struct btf_relocate *)idbase;
> +	const struct btf_type *tbase = btf_type_by_id(r->base_btf, *(__u32 *)idbase);

"*(__u32 *)idbase" together with the previous line is a little difficult
to decrypt. Using "r->search_id" here is more intuitive, easier to read.

> +	const struct btf_type *tdist = btf_type_by_id(r->dist_base_btf, *(__u32 *)iddist);
> +
> +	return strcmp(btf__name_by_offset(r->base_btf, tbase->name_off),
> +		      btf__name_by_offset(r->dist_base_btf, tdist->name_off));
> +}
> +
> +/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
> + * through base BTF looking up distilled type (using binary search) equivalents.
> + */
> +static int btf_relocate_map_distilled_base(struct btf_relocate *r)
> +{
> +	struct btf_type *t;
> +	const char *name;
> +	__u32 id;
> +
> +	/* generate a sort index array of type ids sorted by name for distilled
> +	 * base BTF to speed lookups.
> +	 */
> +	for (id = 1; id < r->nr_dist_base_types; id++)
> +		r->dist_base_index[id] = id;
> +	qsort_r(r->dist_base_index, r->nr_dist_base_types, sizeof(__u32), cmp_btf_types,
> +		(struct btf *)r->dist_base_btf);
> +
> +	for (id = 1; id < r->nr_base_types; id++) {
> +		struct btf_type *dist_t;
> +		int dist_kind, kind;
> +		bool compat_kind;
> +		__u32 *dist_id;
> +
> +		t = btf_type_by_id(r->base_btf, id);
> +		kind = btf_kind(t);
> +		/* distilled base consists of named types only. */
> +		if (!t->name_off)
> +			continue;
> +		switch (kind) {
> +		case BTF_KIND_INT:
> +		case BTF_KIND_FLOAT:
> +		case BTF_KIND_ENUM:
> +		case BTF_KIND_ENUM64:
> +		case BTF_KIND_FWD:
> +		case BTF_KIND_STRUCT:
> +		case BTF_KIND_UNION:
> +			break;
> +		default:
> +			continue;
> +		}
> +		r->search_id = id;
> +		dist_id = bsearch(&r->search_id, r->dist_base_index, r->nr_dist_base_types,
> +				  sizeof(__u32), cmp_base_and_distilled_btf_types);
> +		if (!dist_id)
> +			continue;
> +		if (!*dist_id || *dist_id > r->nr_dist_base_types) {
> +			pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
> +				id, *dist_id);
> +			return -EINVAL;
> +		}
> +		/* validate that kinds are compatible */
> +		dist_t = btf_type_by_id(r->dist_base_btf, *dist_id);
> +		dist_kind = btf_kind(dist_t);
> +		name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
> +		compat_kind = dist_kind == kind;
> +		if (!compat_kind) {
> +			switch (dist_kind) {
> +			case BTF_KIND_FWD:
> +				compat_kind = kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
> +				break;
> +			case BTF_KIND_ENUM:
> +				compat_kind = kind == BTF_KIND_ENUM64;
> +				break;
> +			default:
> +				break;
> +			}
> +			if (!compat_kind) {
> +				pr_warn("kind incompatibility (%d != %d) between distilled base type '%s'[%d] and base type [%d]\n",
> +					dist_kind, kind, name, *dist_id, id);
> +				return -EINVAL;
> +			}
> +		}
> +		/* validate that int, float struct, union sizes are compatible;
> +		 * distilled base BTF encodes an empty STRUCT/UNION with
> +		 * specific size for cases where a type is embedded in a split
> +		 * type (so has to preserve size info).  Do not error out
> +		 * on mismatch as another size match may occur for an
> +		 * identically-named type.
> +		 */
> +		switch (btf_kind(dist_t)) {
> +		case BTF_KIND_INT:
> +			if (*(__u32 *)(t + 1) != *(__u32 *)(dist_t + 1))
> +				continue;

I know we have code like this here and there. But, could we just use
btf_int_encoding() and btf_int_offset() or invent another function to
return this value and make this comparison more meaningful?
Or just a line of comment to explain what it is.

> +			if (t->size != dist_t->size)
> +				continue;
> +			break;
> +		case BTF_KIND_FLOAT:
> +		case BTF_KIND_STRUCT:
> +		case BTF_KIND_UNION:
> +			if (t->size != dist_t->size)
> +				continue;
> +			break;
> +		default:
> +			break;
> +		}
> +		/* map id and name */
> +		r->map[*dist_id] = id;
> +		r->str_map[dist_t->name_off] = t->name_off;
> +	}
> +	/* ensure all distilled BTF ids have a mapping... */
> +	for (id = 1; id < r->nr_dist_base_types; id++) {
> +		if (r->map[id])
> +			continue;
> +		t = btf_type_by_id(r->dist_base_btf, id);
> +		name = btf__name_by_offset(r->dist_base_btf, t->name_off);
> +		pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
> +			name, id);
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
[...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index b6619199a706..336da6844d42 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@ 
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
 	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
-	    usdt.o zip.o elf.o features.o
+	    usdt.o zip.o elf.o features.o btf_relocate.o
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index feba071087a5..a4d6c46cc251 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -5626,3 +5626,20 @@  int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
 	btf__free(new_base);
 	return libbpf_err(err);
 }
+
+const struct btf_header *btf_header(const struct btf *btf)
+{
+	return btf->hdr;
+}
+
+void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
+{
+	btf->base_btf = (struct btf *)base_btf;
+	btf->start_id = btf__type_cnt(base_btf);
+	btf->start_str_off = base_btf->hdr->str_len;
+}
+
+int btf__relocate(struct btf *btf, const struct btf *base_btf)
+{
+	return btf_relocate(btf, base_btf, NULL);
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 8e1702ad5ef4..f75db650e426 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -282,6 +282,14 @@  struct btf_dedup_opts {
 
 LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
 
+/**
+ * @brief **btf__relocate()** will check the split BTF *btf* for references
+ * to base BTF kinds, and verify those references are compatible with
+ * *base_btf*; if they are, *btf* is adjusted such that is re-parented to
+ * *base_btf* and type ids and strings are adjusted to accommodate this.
+ */
+LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf);
+
 struct btf_dump;
 
 struct btf_dump_opts {
diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c
new file mode 100644
index 000000000000..c06851f05472
--- /dev/null
+++ b/tools/lib/bpf/btf_relocate.c
@@ -0,0 +1,318 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+struct btf;
+
+struct btf_relocate {
+	__u32 search_id;				/* must be first field; see search below */
+	struct btf *btf;
+	const struct btf *base_btf;
+	const struct btf *dist_base_btf;
+	unsigned int nr_base_types;
+	unsigned int nr_split_types;
+	unsigned int nr_dist_base_types;
+	int str_start;
+	int str_diff;
+	__u32 *map;
+	__u32 *str_map;
+	__u32 *dist_base_index;
+};
+
+static int btf_relocate_rewrite_type_id(__u32 *id, void *ctx)
+{
+	struct btf_relocate *r = ctx;
+
+	*id = r->map[*id];
+	return 0;
+}
+
+/* Simple string comparison used for sorting within BTF, since all distilled types are
+ * named.
+ */
+static int cmp_btf_types(const void *id1, const void *id2, void *priv)
+{
+	const struct btf *btf = priv;
+	const struct btf_type *t1 = btf_type_by_id(btf, *(__u32 *)id1);
+	const struct btf_type *t2 = btf_type_by_id(btf, *(__u32 *)id2);
+
+	return strcmp(btf__name_by_offset(btf, t1->name_off),
+		      btf__name_by_offset(btf, t2->name_off));
+}
+
+/* Comparison between base BTF type (search type) and distilled base types (target).
+ * Because there is no bsearch_r() we need to use the search key - which also is
+ * the first element of struct btf_relocate * - as a means to retrieve the
+ * struct btf_relocate *.
+ */
+static int cmp_base_and_distilled_btf_types(const void *idbase, const void *iddist)
+{
+	struct btf_relocate *r = (struct btf_relocate *)idbase;
+	const struct btf_type *tbase = btf_type_by_id(r->base_btf, *(__u32 *)idbase);
+	const struct btf_type *tdist = btf_type_by_id(r->dist_base_btf, *(__u32 *)iddist);
+
+	return strcmp(btf__name_by_offset(r->base_btf, tbase->name_off),
+		      btf__name_by_offset(r->dist_base_btf, tdist->name_off));
+}
+
+/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
+ * through base BTF looking up distilled type (using binary search) equivalents.
+ */
+static int btf_relocate_map_distilled_base(struct btf_relocate *r)
+{
+	struct btf_type *t;
+	const char *name;
+	__u32 id;
+
+	/* generate a sort index array of type ids sorted by name for distilled
+	 * base BTF to speed lookups.
+	 */
+	for (id = 1; id < r->nr_dist_base_types; id++)
+		r->dist_base_index[id] = id;
+	qsort_r(r->dist_base_index, r->nr_dist_base_types, sizeof(__u32), cmp_btf_types,
+		(struct btf *)r->dist_base_btf);
+
+	for (id = 1; id < r->nr_base_types; id++) {
+		struct btf_type *dist_t;
+		int dist_kind, kind;
+		bool compat_kind;
+		__u32 *dist_id;
+
+		t = btf_type_by_id(r->base_btf, id);
+		kind = btf_kind(t);
+		/* distilled base consists of named types only. */
+		if (!t->name_off)
+			continue;
+		switch (kind) {
+		case BTF_KIND_INT:
+		case BTF_KIND_FLOAT:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_ENUM64:
+		case BTF_KIND_FWD:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			break;
+		default:
+			continue;
+		}
+		r->search_id = id;
+		dist_id = bsearch(&r->search_id, r->dist_base_index, r->nr_dist_base_types,
+				  sizeof(__u32), cmp_base_and_distilled_btf_types);
+		if (!dist_id)
+			continue;
+		if (!*dist_id || *dist_id > r->nr_dist_base_types) {
+			pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
+				id, *dist_id);
+			return -EINVAL;
+		}
+		/* validate that kinds are compatible */
+		dist_t = btf_type_by_id(r->dist_base_btf, *dist_id);
+		dist_kind = btf_kind(dist_t);
+		name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
+		compat_kind = dist_kind == kind;
+		if (!compat_kind) {
+			switch (dist_kind) {
+			case BTF_KIND_FWD:
+				compat_kind = kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+				break;
+			case BTF_KIND_ENUM:
+				compat_kind = kind == BTF_KIND_ENUM64;
+				break;
+			default:
+				break;
+			}
+			if (!compat_kind) {
+				pr_warn("kind incompatibility (%d != %d) between distilled base type '%s'[%d] and base type [%d]\n",
+					dist_kind, kind, name, *dist_id, id);
+				return -EINVAL;
+			}
+		}
+		/* validate that int, float struct, union sizes are compatible;
+		 * distilled base BTF encodes an empty STRUCT/UNION with
+		 * specific size for cases where a type is embedded in a split
+		 * type (so has to preserve size info).  Do not error out
+		 * on mismatch as another size match may occur for an
+		 * identically-named type.
+		 */
+		switch (btf_kind(dist_t)) {
+		case BTF_KIND_INT:
+			if (*(__u32 *)(t + 1) != *(__u32 *)(dist_t + 1))
+				continue;
+			if (t->size != dist_t->size)
+				continue;
+			break;
+		case BTF_KIND_FLOAT:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			if (t->size != dist_t->size)
+				continue;
+			break;
+		default:
+			break;
+		}
+		/* map id and name */
+		r->map[*dist_id] = id;
+		r->str_map[dist_t->name_off] = t->name_off;
+	}
+	/* ensure all distilled BTF ids have a mapping... */
+	for (id = 1; id < r->nr_dist_base_types; id++) {
+		if (r->map[id])
+			continue;
+		t = btf_type_by_id(r->dist_base_btf, id);
+		name = btf__name_by_offset(r->dist_base_btf, t->name_off);
+		pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
+			name, id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* distilled base should only have named int/float/enum/fwd/struct/union types. */
+static int btf_relocate_validate_distilled_base(struct btf_relocate *r)
+{
+	unsigned int i;
+
+	for (i = 1; i < r->nr_dist_base_types; i++) {
+		struct btf_type *t = btf_type_by_id(r->dist_base_btf, i);
+		int kind = btf_kind(t);
+
+		switch (kind) {
+		case BTF_KIND_INT:
+		case BTF_KIND_FLOAT:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+		case BTF_KIND_FWD:
+			if (t->name_off)
+				break;
+			pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n",
+				i, kind);
+			return -EINVAL;
+		default:
+			pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n",
+				i, kind);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int btf_rewrite_strs(__u32 *str_off, void *ctx)
+{
+	struct btf_relocate *r = ctx;
+	int off;
+
+	if (!*str_off)
+		return 0;
+	if (*str_off >= r->str_start) {
+		*str_off += r->str_diff;
+	} else {
+		off = r->str_map[*str_off];
+		if (!off) {
+			pr_warn("string '%s' [offset %d] is not mapped to base BTF",
+				btf__str_by_offset(r->btf, off), *str_off);
+			return -ENOENT;
+		}
+		*str_off = off;
+	}
+	return 0;
+}
+
+static int btf_relocate_finalize(struct btf_relocate *r)
+{
+	const struct btf_header *dist_base_hdr;
+	const struct btf_header *base_hdr;
+	struct btf_type *t;
+	int i, err;
+
+	dist_base_hdr = btf_header(r->dist_base_btf);
+	base_hdr = btf_header(r->base_btf);
+	r->str_start = dist_base_hdr->str_len;
+	r->str_diff = base_hdr->str_len - dist_base_hdr->str_len;
+	for (i = 0; i < r->nr_split_types; i++) {
+		t = btf_type_by_id(r->btf, i + r->nr_dist_base_types);
+		err = btf_type_visit_str_offs(t, btf_rewrite_strs, r);
+		if (err)
+			break;
+	}
+	btf_set_base_btf(r->btf, r->base_btf);
+
+	return err;
+}
+
+/* If successful, output of relocation is updated BTF with base BTF pointing
+ * at base_btf, and type ids, strings adjusted accordingly
+ */
+int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids)
+{
+	unsigned int nr_types = btf__type_cnt(btf);
+	struct btf_relocate r = {};
+	struct btf_type *t;
+	int diff_id, err = 0;
+	__u32 id, i;
+
+	r.dist_base_btf = btf__base_btf(btf);
+	if (!base_btf || r.dist_base_btf == base_btf)
+		return 0;
+
+	r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf);
+	r.nr_base_types = btf__type_cnt(base_btf);
+	r.nr_split_types = nr_types - r.nr_dist_base_types;
+	r.btf = btf;
+	r.base_btf = base_btf;
+
+	r.map = calloc(nr_types, sizeof(*r.map));
+	r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map));
+	r.dist_base_index = calloc(r.nr_dist_base_types, sizeof(*r.dist_base_index));
+	if (!r.map || !r.str_map || !r.dist_base_index) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	err = btf_relocate_validate_distilled_base(&r);
+	if (err)
+		goto err_out;
+
+	diff_id = r.nr_base_types - r.nr_dist_base_types;
+	/* Split BTF ids will start from after last base BTF id. */
+	for (id = r.nr_dist_base_types; id < nr_types; id++)
+		r.map[id] = id + diff_id;
+
+	/* Build a map from distilled base ids to actual base BTF ids; it is used
+	 * to update split BTF id references.
+	 */
+	err = btf_relocate_map_distilled_base(&r);
+	if (err)
+		goto err_out;
+
+	/* Next, rewrite type ids in split BTF, replacing split ids with updated
+	 * ids based on number of types in base BTF, and base ids with
+	 * relocated ids from base_btf.
+	 */
+	for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) {
+		t = btf_type_by_id(btf, id);
+		err = btf_type_visit_type_ids(t, btf_relocate_rewrite_type_id, &r);
+		if (err)
+			goto err_out;
+	}
+	/* Finally reset base BTF to base_btf; as part of this operation, string
+	 * offsets are also updated, and we are done.
+	 */
+	err = btf_relocate_finalize(&r);
+err_out:
+	if (!err && map_ids)
+		*map_ids = r.map;
+	else
+		free(r.map);
+	free(r.str_map);
+	free(r.dist_base_index);
+	return err;
+}
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index fd7bfeaba542..849cbe0def00 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -421,6 +421,7 @@  LIBBPF_1.5.0 {
 	global:
 		btf__distill_base;
 		btf__parse_opts;
+		btf__relocate;
 		bpf_program__attach_sockmap;
 		ring__consume_n;
 		ring_buffer__consume_n;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index a0dcfb82e455..3b98edae254f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -234,6 +234,9 @@  struct btf_type;
 struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id);
 const char *btf_kind_str(const struct btf_type *t);
 const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+const struct btf_header *btf_header(const struct btf *btf);
+void btf_set_base_btf(struct btf *btf, const struct btf *base_btf);
+int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids);
 
 static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
 {