diff mbox series

[bpf-next] bpf: limit bpf_core_types_are_compat() recursion

Message ID 20211210172034.13614-1-mcroce@linux.microsoft.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next] bpf: limit bpf_core_types_are_compat() recursion | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 107 this patch: 107
netdev/cc_maintainers warning 6 maintainers not CCed: netdev@vger.kernel.org kafai@fb.com songliubraving@fb.com john.fastabend@gmail.com kpsingh@kernel.org yhs@fb.com
netdev/build_clang success Errors and warnings before: 39 this patch: 39
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 111 this patch: 111
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next success VM_Test
bpf/vmtest-bpf-next-PR success PR summary

Commit Message

Matteo Croce Dec. 10, 2021, 5:20 p.m. UTC
From: Matteo Croce <mcroce@microsoft.com>

In userspace, bpf_core_types_are_compat() is a recursive function which
can't be put in the kernel as is.
Limit the recursion depth to 2, to avoid potential stack overflows
in kernel.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 include/linux/btf.h |   5 +++
 kernel/bpf/btf.c    | 107 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 111 insertions(+), 1 deletion(-)

Comments

Alexei Starovoitov Dec. 15, 2021, 5:56 a.m. UTC | #1
On Fri, Dec 10, 2021 at 9:20 AM Matteo Croce <mcroce@linux.microsoft.com> wrote:
>
> From: Matteo Croce <mcroce@microsoft.com>
>
> In userspace, bpf_core_types_are_compat() is a recursive function which
> can't be put in the kernel as is.
> Limit the recursion depth to 2, to avoid potential stack overflows
> in kernel.
>
> Signed-off-by: Matteo Croce <mcroce@microsoft.com>

Thank you for taking a stab at it!

> +#define MAX_TYPES_ARE_COMPAT_DEPTH 2
> +
> +static
> +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
> +                               const struct btf *targ_btf, __u32 targ_id,
> +                               int level)
> +{
> +       const struct btf_type *local_type, *targ_type;
> +       int depth = 32; /* max recursion depth */
> +
> +       if (level <= 0)
> +               return -EINVAL;
> +
> +       /* caller made sure that names match (ignoring flavor suffix) */
> +       local_type = btf_type_by_id(local_btf, local_id);
> +       targ_type = btf_type_by_id(targ_btf, targ_id);
> +       if (btf_kind(local_type) != btf_kind(targ_type))
> +               return 0;
> +
> +recur:
> +       depth--;
> +       if (depth < 0)
> +               return -EINVAL;
> +
> +       local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
> +       targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
> +       if (!local_type || !targ_type)
> +               return -EINVAL;
> +
> +       if (btf_kind(local_type) != btf_kind(targ_type))
> +               return 0;
> +
> +       switch (btf_kind(local_type)) {
> +       case BTF_KIND_UNKN:
> +       case BTF_KIND_STRUCT:
> +       case BTF_KIND_UNION:
> +       case BTF_KIND_ENUM:
> +       case BTF_KIND_FWD:
> +               return 1;
> +       case BTF_KIND_INT:
> +               /* just reject deprecated bitfield-like integers; all other
> +                * integers are by default compatible between each other
> +                */
> +               return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
> +       case BTF_KIND_PTR:
> +               local_id = local_type->type;
> +               targ_id = targ_type->type;
> +               goto recur;
> +       case BTF_KIND_ARRAY:
> +               local_id = btf_array(local_type)->type;
> +               targ_id = btf_array(targ_type)->type;
> +               goto recur;
> +       case BTF_KIND_FUNC_PROTO: {
> +               struct btf_param *local_p = btf_params(local_type);
> +               struct btf_param *targ_p = btf_params(targ_type);
> +               __u16 local_vlen = btf_vlen(local_type);
> +               __u16 targ_vlen = btf_vlen(targ_type);
> +               int i, err;
> +
> +               if (local_vlen != targ_vlen)
> +                       return 0;
> +
> +               for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
> +                       btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
> +                       btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);


Maybe do a level check here?
Since calling it and immediately returning doesn't conserve
the stack.
If it gets called it can finish fine, but
calling it again would be too much.
In other words checking the level here gives us
room for one more frame.

> +                       err = __bpf_core_types_are_compat(local_btf, local_id,
> +                                                         targ_btf, targ_id,
> +                                                         level - 1);
> +                       if (err <= 0)
> +                               return err;
> +               }
> +
> +               /* tail recurse for return type check */
> +               btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
> +               btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
> +               goto recur;
> +       }
> +       default:
> +               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
> +                       btf_type_str(local_type), local_id, targ_id);

That should be bpf_log() instead.

> +               return 0;
> +       }
> +}

Please add tests that exercise this logic by enabling
additional lskels and a new test that hits the recursion limit.
I suspect we don't have such case in selftests.

Thanks!
Matteo Croce Dec. 15, 2021, 2:53 p.m. UTC | #2
On Wed, Dec 15, 2021 at 6:57 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Dec 10, 2021 at 9:20 AM Matteo Croce <mcroce@linux.microsoft.com> wrote:
> >
> > From: Matteo Croce <mcroce@microsoft.com>
> >
> > In userspace, bpf_core_types_are_compat() is a recursive function which
> > can't be put in the kernel as is.
> > Limit the recursion depth to 2, to avoid potential stack overflows
> > in kernel.
> >
> > Signed-off-by: Matteo Croce <mcroce@microsoft.com>
>
> Thank you for taking a stab at it!
>
> > +#define MAX_TYPES_ARE_COMPAT_DEPTH 2
> > +
> > +static
> > +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
> > +                               const struct btf *targ_btf, __u32 targ_id,
> > +                               int level)
> > +{
> > +       const struct btf_type *local_type, *targ_type;
> > +       int depth = 32; /* max recursion depth */
> > +
> > +       if (level <= 0)
> > +               return -EINVAL;
> > +
> > +       /* caller made sure that names match (ignoring flavor suffix) */
> > +       local_type = btf_type_by_id(local_btf, local_id);
> > +       targ_type = btf_type_by_id(targ_btf, targ_id);
> > +       if (btf_kind(local_type) != btf_kind(targ_type))
> > +               return 0;
> > +
> > +recur:
> > +       depth--;
> > +       if (depth < 0)
> > +               return -EINVAL;
> > +
> > +       local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
> > +       targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
> > +       if (!local_type || !targ_type)
> > +               return -EINVAL;
> > +
> > +       if (btf_kind(local_type) != btf_kind(targ_type))
> > +               return 0;
> > +
> > +       switch (btf_kind(local_type)) {
> > +       case BTF_KIND_UNKN:
> > +       case BTF_KIND_STRUCT:
> > +       case BTF_KIND_UNION:
> > +       case BTF_KIND_ENUM:
> > +       case BTF_KIND_FWD:
> > +               return 1;
> > +       case BTF_KIND_INT:
> > +               /* just reject deprecated bitfield-like integers; all other
> > +                * integers are by default compatible between each other
> > +                */
> > +               return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
> > +       case BTF_KIND_PTR:
> > +               local_id = local_type->type;
> > +               targ_id = targ_type->type;
> > +               goto recur;
> > +       case BTF_KIND_ARRAY:
> > +               local_id = btf_array(local_type)->type;
> > +               targ_id = btf_array(targ_type)->type;
> > +               goto recur;
> > +       case BTF_KIND_FUNC_PROTO: {
> > +               struct btf_param *local_p = btf_params(local_type);
> > +               struct btf_param *targ_p = btf_params(targ_type);
> > +               __u16 local_vlen = btf_vlen(local_type);
> > +               __u16 targ_vlen = btf_vlen(targ_type);
> > +               int i, err;
> > +
> > +               if (local_vlen != targ_vlen)
> > +                       return 0;
> > +
> > +               for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
> > +                       btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
> > +                       btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
>
>
> Maybe do a level check here?
> Since calling it and immediately returning doesn't conserve
> the stack.
> If it gets called it can finish fine, but
> calling it again would be too much.
> In other words checking the level here gives us
> room for one more frame.
>

I thought that the compiler was smart enough to return before
allocating most of the frame.
I tried and this is true only with gcc, not with clang.

> > +                       err = __bpf_core_types_are_compat(local_btf, local_id,
> > +                                                         targ_btf, targ_id,
> > +                                                         level - 1);
> > +                       if (err <= 0)
> > +                               return err;
> > +               }
> > +
> > +               /* tail recurse for return type check */
> > +               btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
> > +               btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
> > +               goto recur;
> > +       }
> > +       default:
> > +               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
> > +                       btf_type_str(local_type), local_id, targ_id);
>
> That should be bpf_log() instead.
>

To do that I need a struct bpf_verifier_log, which is not present
there, neither in bpf_core_spec_match() or bpf_core_apply_relo_insn().
Should we drop the message at all?

> > +               return 0;
> > +       }
> > +}
>
> Please add tests that exercise this logic by enabling
> additional lskels and a new test that hits the recursion limit.
> I suspect we don't have such case in selftests.
>
> Thanks!

Will do!

Regards,
Alexei Starovoitov Dec. 15, 2021, 5:29 p.m. UTC | #3
On Wed, Dec 15, 2021 at 6:54 AM Matteo Croce <mcroce@linux.microsoft.com> wrote:
> >
> > Maybe do a level check here?
> > Since calling it and immediately returning doesn't conserve
> > the stack.
> > If it gets called it can finish fine, but
> > calling it again would be too much.
> > In other words checking the level here gives us
> > room for one more frame.
> >
>
> I thought that the compiler was smart enough to return before
> allocating most of the frame.
> I tried and this is true only with gcc, not with clang.

Interesting. That's a surprise.
Could you share the asm that gcc generates?

> > > +                       err = __bpf_core_types_are_compat(local_btf, local_id,
> > > +                                                         targ_btf, targ_id,
> > > +                                                         level - 1);
> > > +                       if (err <= 0)
> > > +                               return err;
> > > +               }
> > > +
> > > +               /* tail recurse for return type check */
> > > +               btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
> > > +               btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
> > > +               goto recur;
> > > +       }
> > > +       default:
> > > +               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
> > > +                       btf_type_str(local_type), local_id, targ_id);
> >
> > That should be bpf_log() instead.
> >
>
> To do that I need a struct bpf_verifier_log, which is not present
> there, neither in bpf_core_spec_match() or bpf_core_apply_relo_insn().

It is there. See:
        err = bpf_core_apply_relo_insn((void *)ctx->log, insn, ...

> Should we drop the message at all?

Passing it into bpf_core_spec_match() and further into
bpf_core_types_are_compat() is probably unnecessary.
All callers have an error check with a log right after.
So I think we won't lose anything if we drop this log.

>
> > > +               return 0;
> > > +       }
> > > +}
> >
> > Please add tests that exercise this logic by enabling
> > additional lskels and a new test that hits the recursion limit.
> > I suspect we don't have such case in selftests.
> >
> > Thanks!
>
> Will do!

Thanks!
Matteo Croce Dec. 15, 2021, 6:21 p.m. UTC | #4
On Wed, Dec 15, 2021 at 6:29 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, Dec 15, 2021 at 6:54 AM Matteo Croce <mcroce@linux.microsoft.com> wrote:
> > >
> > > Maybe do a level check here?
> > > Since calling it and immediately returning doesn't conserve
> > > the stack.
> > > If it gets called it can finish fine, but
> > > calling it again would be too much.
> > > In other words checking the level here gives us
> > > room for one more frame.
> > >
> >
> > I thought that the compiler was smart enough to return before
> > allocating most of the frame.
> > I tried and this is true only with gcc, not with clang.
>
> Interesting. That's a surprise.
> Could you share the asm that gcc generates?
>

Sure,

This is the gcc x86_64 asm on a stripped down
bpf_core_types_are_compat() with a 1k struct on the stack:

bpf_core_types_are_compat:
test esi, esi
jle .L69
push r15
push r14
push r13
push r12
push rbp
mov rbp, rdi
push rbx
mov ebx, esi
sub rsp, 9112
[...]
.L69:
or eax, -1
ret

This latest clang:

bpf_core_types_are_compat: # @bpf_core_types_are_compat
push rbp
push r15
push r14
push rbx
sub rsp, 1000
mov r14d, -1
test esi, esi
jle .LBB0_7
[...]
.LBB0_7:
mov eax, r14d
add rsp, 1000
pop rbx
pop r14
pop r15
pop rbp
ret

> > > > +                       err = __bpf_core_types_are_compat(local_btf, local_id,
> > > > +                                                         targ_btf, targ_id,
> > > > +                                                         level - 1);
> > > > +                       if (err <= 0)
> > > > +                               return err;
> > > > +               }
> > > > +
> > > > +               /* tail recurse for return type check */
> > > > +               btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
> > > > +               btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
> > > > +               goto recur;
> > > > +       }
> > > > +       default:
> > > > +               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
> > > > +                       btf_type_str(local_type), local_id, targ_id);
> > >
> > > That should be bpf_log() instead.
> > >
> >
> > To do that I need a struct bpf_verifier_log, which is not present
> > there, neither in bpf_core_spec_match() or bpf_core_apply_relo_insn().
>
> It is there. See:
>         err = bpf_core_apply_relo_insn((void *)ctx->log, insn, ...
>
> > Should we drop the message at all?
>
> Passing it into bpf_core_spec_match() and further into
> bpf_core_types_are_compat() is probably unnecessary.
> All callers have an error check with a log right after.
> So I think we won't lose anything if we drop this log.
>

Nice.

> >
> > > > +               return 0;
> > > > +       }
> > > > +}
> > >
> > > Please add tests that exercise this logic by enabling
> > > additional lskels and a new test that hits the recursion limit.
> > > I suspect we don't have such case in selftests.
> > >
> > > Thanks!
> >
> > Will do!
>
> Thanks!
Matteo Croce Dec. 17, 2021, 7:31 p.m. UTC | #5
On Wed, Dec 15, 2021 at 7:21 PM Matteo Croce
<mcroce@linux.microsoft.com> wrote:
>
> On Wed, Dec 15, 2021 at 6:29 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Wed, Dec 15, 2021 at 6:54 AM Matteo Croce <mcroce@linux.microsoft.com> wrote:
> > > >
> > > > Maybe do a level check here?
> > > > Since calling it and immediately returning doesn't conserve
> > > > the stack.
> > > > If it gets called it can finish fine, but
> > > > calling it again would be too much.
> > > > In other words checking the level here gives us
> > > > room for one more frame.
> > > >
> > >
> > > I thought that the compiler was smart enough to return before
> > > allocating most of the frame.
> > > I tried and this is true only with gcc, not with clang.
> >
> > Interesting. That's a surprise.
> > Could you share the asm that gcc generates?
> >
>
> Sure,
>
> This is the gcc x86_64 asm on a stripped down
> bpf_core_types_are_compat() with a 1k struct on the stack:
>
> bpf_core_types_are_compat:
> test esi, esi
> jle .L69
> push r15
> push r14
> push r13
> push r12
> push rbp
> mov rbp, rdi
> push rbx
> mov ebx, esi
> sub rsp, 9112
> [...]
> .L69:
> or eax, -1
> ret
>
> This latest clang:
>
> bpf_core_types_are_compat: # @bpf_core_types_are_compat
> push rbp
> push r15
> push r14
> push rbx
> sub rsp, 1000
> mov r14d, -1
> test esi, esi
> jle .LBB0_7
> [...]
> .LBB0_7:
> mov eax, r14d
> add rsp, 1000
> pop rbx
> pop r14
> pop r15
> pop rbp
> ret
>
> > > > > +                       err = __bpf_core_types_are_compat(local_btf, local_id,
> > > > > +                                                         targ_btf, targ_id,
> > > > > +                                                         level - 1);
> > > > > +                       if (err <= 0)
> > > > > +                               return err;
> > > > > +               }
> > > > > +
> > > > > +               /* tail recurse for return type check */
> > > > > +               btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
> > > > > +               btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
> > > > > +               goto recur;
> > > > > +       }
> > > > > +       default:
> > > > > +               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
> > > > > +                       btf_type_str(local_type), local_id, targ_id);
> > > >
> > > > That should be bpf_log() instead.
> > > >
> > >
> > > To do that I need a struct bpf_verifier_log, which is not present
> > > there, neither in bpf_core_spec_match() or bpf_core_apply_relo_insn().
> >
> > It is there. See:
> >         err = bpf_core_apply_relo_insn((void *)ctx->log, insn, ...
> >
> > > Should we drop the message at all?
> >
> > Passing it into bpf_core_spec_match() and further into
> > bpf_core_types_are_compat() is probably unnecessary.
> > All callers have an error check with a log right after.
> > So I think we won't lose anything if we drop this log.
> >
>
> Nice.
>
> > >
> > > > > +               return 0;
> > > > > +       }
> > > > > +}
> > > >
> > > > Please add tests that exercise this logic by enabling
> > > > additional lskels and a new test that hits the recursion limit.
> > > > I suspect we don't have such case in selftests.
> > > >
> > > > Thanks!
> > >
> > > Will do!
> >
> > Thanks!
>

Hi,

I'm writing a test which exercise that function.
I can succesfully trigger a call to __bpf_core_types_are_compat() with
these  calls:

bpf_core_type_id_kernel(struct sk_buff);
bpf_core_type_exists(struct sk_buff);
bpf_core_type_size(struct sk_buff);

but the kind will obviously be BTF_KIND_STRUCT.
I'm trying to do the same with kind BTF_KIND_FUNC_PROTO instead with:

void func_proto(int, unsigned int);
bpf_core_type_id_kernel(func_proto);

but I get a clang crash[1], while just checking the existence with:

typedef int (*func_proto_typedef)(struct sk_buff *);
bpf_core_type_exists(func_proto_typedef);

I don't reach even bpf_core_spec_match().

Which is a simple way to generate a BTF_KIND_FUNC_PROTO BTF field?

[1] https://github.com/llvm/llvm-project/issues/52779

Regards,
Yonghong Song Dec. 21, 2021, 6:33 a.m. UTC | #6
On 12/17/21 11:31 AM, Matteo Croce wrote:
>   On Wed, Dec 15, 2021 at 7:21 PM Matteo Croce
> <mcroce@linux.microsoft.com> wrote:
>>
>> On Wed, Dec 15, 2021 at 6:29 PM Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>>>
>>> On Wed, Dec 15, 2021 at 6:54 AM Matteo Croce <mcroce@linux.microsoft.com> wrote:
>>>>>
>>>>> Maybe do a level check here?
>>>>> Since calling it and immediately returning doesn't conserve
>>>>> the stack.
>>>>> If it gets called it can finish fine, but
>>>>> calling it again would be too much.
>>>>> In other words checking the level here gives us
>>>>> room for one more frame.
>>>>>
>>>>
>>>> I thought that the compiler was smart enough to return before
>>>> allocating most of the frame.
>>>> I tried and this is true only with gcc, not with clang.
>>>
>>> Interesting. That's a surprise.
>>> Could you share the asm that gcc generates?
>>>
>>
>> Sure,
>>
>> This is the gcc x86_64 asm on a stripped down
>> bpf_core_types_are_compat() with a 1k struct on the stack:
>>
>> bpf_core_types_are_compat:
>> test esi, esi
>> jle .L69
>> push r15
>> push r14
>> push r13
>> push r12
>> push rbp
>> mov rbp, rdi
>> push rbx
>> mov ebx, esi
>> sub rsp, 9112
>> [...]
>> .L69:
>> or eax, -1
>> ret
>>
>> This latest clang:
>>
>> bpf_core_types_are_compat: # @bpf_core_types_are_compat
>> push rbp
>> push r15
>> push r14
>> push rbx
>> sub rsp, 1000
>> mov r14d, -1
>> test esi, esi
>> jle .LBB0_7
>> [...]
>> .LBB0_7:
>> mov eax, r14d
>> add rsp, 1000
>> pop rbx
>> pop r14
>> pop r15
>> pop rbp
>> ret
>>
>>>>>> +                       err = __bpf_core_types_are_compat(local_btf, local_id,
>>>>>> +                                                         targ_btf, targ_id,
>>>>>> +                                                         level - 1);
>>>>>> +                       if (err <= 0)
>>>>>> +                               return err;
>>>>>> +               }
>>>>>> +
>>>>>> +               /* tail recurse for return type check */
>>>>>> +               btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
>>>>>> +               btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
>>>>>> +               goto recur;
>>>>>> +       }
>>>>>> +       default:
>>>>>> +               pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
>>>>>> +                       btf_type_str(local_type), local_id, targ_id);
>>>>>
>>>>> That should be bpf_log() instead.
>>>>>
>>>>
>>>> To do that I need a struct bpf_verifier_log, which is not present
>>>> there, neither in bpf_core_spec_match() or bpf_core_apply_relo_insn().
>>>
>>> It is there. See:
>>>          err = bpf_core_apply_relo_insn((void *)ctx->log, insn, ...
>>>
>>>> Should we drop the message at all?
>>>
>>> Passing it into bpf_core_spec_match() and further into
>>> bpf_core_types_are_compat() is probably unnecessary.
>>> All callers have an error check with a log right after.
>>> So I think we won't lose anything if we drop this log.
>>>
>>
>> Nice.
>>
>>>>
>>>>>> +               return 0;
>>>>>> +       }
>>>>>> +}
>>>>>
>>>>> Please add tests that exercise this logic by enabling
>>>>> additional lskels and a new test that hits the recursion limit.
>>>>> I suspect we don't have such case in selftests.
>>>>>
>>>>> Thanks!
>>>>
>>>> Will do!
>>>
>>> Thanks!
>>
> 
> Hi,
> 
> I'm writing a test which exercise that function.
> I can succesfully trigger a call to __bpf_core_types_are_compat() with
> these  calls:
> 
> bpf_core_type_id_kernel(struct sk_buff);
> bpf_core_type_exists(struct sk_buff);
> bpf_core_type_size(struct sk_buff);
> 
> but the kind will obviously be BTF_KIND_STRUCT.
> I'm trying to do the same with kind BTF_KIND_FUNC_PROTO instead with:
> 
> void func_proto(int, unsigned int);
> bpf_core_type_id_kernel(func_proto);
> 
> but I get a clang crash[1], while just checking the existence with:
> 
> typedef int (*func_proto_typedef)(struct sk_buff *);
> bpf_core_type_exists(func_proto_typedef);
> 
> I don't reach even bpf_core_spec_match().
> 
> Which is a simple way to generate a BTF_KIND_FUNC_PROTO BTF field?
> 
> [1] https://github.com/llvm/llvm-project/issues/52779

Thanks for Matteo. The error message is improved in 
https://reviews.llvm.org/D116063 to make it easy to understand the 
problem. I also posted the explanation here so other people, if hitting
a similar issue, can be aware of what is going on.

The following is a simple reproducible test case:

$ cat bug.c 

extern int do_smth(int); 
 

int test() { 
 

   return __builtin_btf_type_id(*(typeof(do_smth) *)do_smth, 1); 
 

} 
 

$ clang -target bpf -O2 -g -c bug.c 

fatal error: error in backend: Empty type name for BTF_TYPE_ID_REMOTE reloc
...
Let us try to reproduce the IR to see what is really going on with command,

clang -target bpf -O2 -g bug.c -emit-llvm -S -Xclang -disable-llvm-passes
The IR,

define dso_local i32 @test() #0 !dbg !7 {
entry:
   %0 = call i64 @llvm.bpf.btf.type.id(i32 0, i64 1), !dbg !12, 
!llvm.preserve.access.index !13
   %conv = trunc i64 %0 to i32, !dbg !12
   ret i32 %conv, !dbg !15
}
...
!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, 
type: !8, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: 
DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
!8 = !DISubroutineType(types: !9)
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !{}
!12 = !DILocation(line: 3, column: 10, scope: !7)
!13 = !DISubroutineType(types: !14)
!14 = !{!10, !10}
In the above, we really try to relocate a 'subroutine' (func pointer) 
type with debuginfo id 13 which is actually "int ()(int)". There are no 
actually name for type 13 and libbpf is not able to relocate for a 
function "int ()(int)" as it could have many matches.

https://reviews.llvm.org/D116063 improved the error message as below
to make it a little bit more evident what is the problem:

$ clang -target bpf -O2 -g -c bug.c 

fatal error: error in backend: SubroutineType not supported for 
BTF_TYPE_ID_REMOTE reloc


> 
> Regards,
Alexei Starovoitov Jan. 28, 2022, 5:31 a.m. UTC | #7
On Mon, Dec 20, 2021 at 10:34 PM Yonghong Song <yhs@fb.com> wrote:
>
>
> https://reviews.llvm.org/D116063 improved the error message as below
> to make it a little bit more evident what is the problem:
>
> $ clang -target bpf -O2 -g -c bug.c
>
> fatal error: error in backend: SubroutineType not supported for
> BTF_TYPE_ID_REMOTE reloc

Hi Matteo,

Are you still working on a test?
What's a timeline to repost the patch set?

Thanks!
Matteo Croce Jan. 28, 2022, 6:51 p.m. UTC | #8
On Fri, Jan 28, 2022 at 6:31 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Mon, Dec 20, 2021 at 10:34 PM Yonghong Song <yhs@fb.com> wrote:
> >
> >
> > https://reviews.llvm.org/D116063 improved the error message as below
> > to make it a little bit more evident what is the problem:
> >
> > $ clang -target bpf -O2 -g -c bug.c
> >
> > fatal error: error in backend: SubroutineType not supported for
> > BTF_TYPE_ID_REMOTE reloc
>
> Hi Matteo,
>
> Are you still working on a test?
> What's a timeline to repost the patch set?
>
> Thanks!

Hi Alexei,

The change itself is ready, I'm just stuck at writing a test which
will effectively calls __bpf_core_types_are_compat() with some
recursion.
I guess that I have to generate a BTF_KIND_FUNC_PROTO type somehow, so
__bpf_core_types_are_compat() is called again to check the prototipe
arguments type.
I tried with these two, with no luck:

// 1
typedef int (*func_proto_typedef)(struct sk_buff *);
bpf_core_type_exists(func_proto_typedef);

// 2
void func_proto(int, unsigned int);
bpf_core_type_id_kernel(func_proto);

Which is a simple way to generate a BTF_KIND_FUNC_PROTO BTF field?

Regards,
Alexei Starovoitov Jan. 28, 2022, 8:08 p.m. UTC | #9
On Fri, Jan 28, 2022 at 10:51 AM Matteo Croce
<mcroce@linux.microsoft.com> wrote:
>
> On Fri, Jan 28, 2022 at 6:31 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Mon, Dec 20, 2021 at 10:34 PM Yonghong Song <yhs@fb.com> wrote:
> > >
> > >
> > > https://reviews.llvm.org/D116063 improved the error message as below
> > > to make it a little bit more evident what is the problem:
> > >
> > > $ clang -target bpf -O2 -g -c bug.c
> > >
> > > fatal error: error in backend: SubroutineType not supported for
> > > BTF_TYPE_ID_REMOTE reloc
> >
> > Hi Matteo,
> >
> > Are you still working on a test?
> > What's a timeline to repost the patch set?
> >
> > Thanks!
>
> Hi Alexei,
>
> The change itself is ready, I'm just stuck at writing a test which
> will effectively calls __bpf_core_types_are_compat() with some
> recursion.
> I guess that I have to generate a BTF_KIND_FUNC_PROTO type somehow, so
> __bpf_core_types_are_compat() is called again to check the prototipe
> arguments type.
> I tried with these two, with no luck:
>
> // 1
> typedef int (*func_proto_typedef)(struct sk_buff *);
> bpf_core_type_exists(func_proto_typedef);
>
> // 2
> void func_proto(int, unsigned int);
> bpf_core_type_id_kernel(func_proto);
>
> Which is a simple way to generate a BTF_KIND_FUNC_PROTO BTF field?

What do you mean 'no luck'?
Have you tried what progs/test_core_reloc_type_id.c is doing?
typedef int (*func_proto_typedef)(long);
bpf_core_type_id_kernel(func_proto_typedef);

Without macros:
typedef int (*func_proto_typedef)(long);

int test() {
   return __builtin_btf_type_id(*(typeof(func_proto_typedef) *)0, 1);
}
int test2() {
   return __builtin_preserve_type_info(*(typeof(func_proto_typedef) *)0, 0);
}


compiles fine and generates relos.
Matteo Croce Jan. 29, 2022, 12:35 a.m. UTC | #10
On Fri, Jan 28, 2022 at 9:09 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Jan 28, 2022 at 10:51 AM Matteo Croce
> <mcroce@linux.microsoft.com> wrote:
> >
> > On Fri, Jan 28, 2022 at 6:31 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Mon, Dec 20, 2021 at 10:34 PM Yonghong Song <yhs@fb.com> wrote:
> > > >
> > > >
> > > > https://reviews.llvm.org/D116063 improved the error message as below
> > > > to make it a little bit more evident what is the problem:
> > > >
> > > > $ clang -target bpf -O2 -g -c bug.c
> > > >
> > > > fatal error: error in backend: SubroutineType not supported for
> > > > BTF_TYPE_ID_REMOTE reloc
> > >
> > > Hi Matteo,
> > >
> > > Are you still working on a test?
> > > What's a timeline to repost the patch set?
> > >
> > > Thanks!
> >
> > Hi Alexei,
> >
> > The change itself is ready, I'm just stuck at writing a test which
> > will effectively calls __bpf_core_types_are_compat() with some
> > recursion.
> > I guess that I have to generate a BTF_KIND_FUNC_PROTO type somehow, so
> > __bpf_core_types_are_compat() is called again to check the prototipe
> > arguments type.
> > I tried with these two, with no luck:
> >
> > // 1
> > typedef int (*func_proto_typedef)(struct sk_buff *);
> > bpf_core_type_exists(func_proto_typedef);
> >
> > // 2
> > void func_proto(int, unsigned int);
> > bpf_core_type_id_kernel(func_proto);
> >
> > Which is a simple way to generate a BTF_KIND_FUNC_PROTO BTF field?
>
> What do you mean 'no luck'?
> Have you tried what progs/test_core_reloc_type_id.c is doing?
> typedef int (*func_proto_typedef)(long);
> bpf_core_type_id_kernel(func_proto_typedef);
>
> Without macros:
> typedef int (*func_proto_typedef)(long);
>
> int test() {
>    return __builtin_btf_type_id(*(typeof(func_proto_typedef) *)0, 1);
> }
> int test2() {
>    return __builtin_preserve_type_info(*(typeof(func_proto_typedef) *)0, 0);
> }
>
>
> compiles fine and generates relos.

Yes, I tried that one.
We reach bpf_core_apply_relo_insn() but not bpf_core_spec_match(),
since cands->len is 0.

[   16.424821] bpf_core_apply_relo_insn:1202 cands->len: 0

That's a very simple raw_tracepoint/sys_enter program:
Alexei Starovoitov Jan. 29, 2022, 1:11 a.m. UTC | #11
On Fri, Jan 28, 2022 at 4:36 PM Matteo Croce <mcroce@linux.microsoft.com> wrote:
>
> On Fri, Jan 28, 2022 at 9:09 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Fri, Jan 28, 2022 at 10:51 AM Matteo Croce
> > <mcroce@linux.microsoft.com> wrote:
> > >
> > > On Fri, Jan 28, 2022 at 6:31 AM Alexei Starovoitov
> > > <alexei.starovoitov@gmail.com> wrote:
> > > >
> > > > On Mon, Dec 20, 2021 at 10:34 PM Yonghong Song <yhs@fb.com> wrote:
> > > > >
> > > > >
> > > > > https://reviews.llvm.org/D116063 improved the error message as below
> > > > > to make it a little bit more evident what is the problem:
> > > > >
> > > > > $ clang -target bpf -O2 -g -c bug.c
> > > > >
> > > > > fatal error: error in backend: SubroutineType not supported for
> > > > > BTF_TYPE_ID_REMOTE reloc
> > > >
> > > > Hi Matteo,
> > > >
> > > > Are you still working on a test?
> > > > What's a timeline to repost the patch set?
> > > >
> > > > Thanks!
> > >
> > > Hi Alexei,
> > >
> > > The change itself is ready, I'm just stuck at writing a test which
> > > will effectively calls __bpf_core_types_are_compat() with some
> > > recursion.
> > > I guess that I have to generate a BTF_KIND_FUNC_PROTO type somehow, so
> > > __bpf_core_types_are_compat() is called again to check the prototipe
> > > arguments type.
> > > I tried with these two, with no luck:
> > >
> > > // 1
> > > typedef int (*func_proto_typedef)(struct sk_buff *);
> > > bpf_core_type_exists(func_proto_typedef);
> > >
> > > // 2
> > > void func_proto(int, unsigned int);
> > > bpf_core_type_id_kernel(func_proto);
> > >
> > > Which is a simple way to generate a BTF_KIND_FUNC_PROTO BTF field?
> >
> > What do you mean 'no luck'?
> > Have you tried what progs/test_core_reloc_type_id.c is doing?
> > typedef int (*func_proto_typedef)(long);
> > bpf_core_type_id_kernel(func_proto_typedef);
> >
> > Without macros:
> > typedef int (*func_proto_typedef)(long);
> >
> > int test() {
> >    return __builtin_btf_type_id(*(typeof(func_proto_typedef) *)0, 1);
> > }
> > int test2() {
> >    return __builtin_preserve_type_info(*(typeof(func_proto_typedef) *)0, 0);
> > }
> >
> >
> > compiles fine and generates relos.
>
> Yes, I tried that one.
> We reach bpf_core_apply_relo_insn() but not bpf_core_spec_match(),
> since cands->len is 0.
>
> [   16.424821] bpf_core_apply_relo_insn:1202 cands->len: 0
>
> That's a very simple raw_tracepoint/sys_enter program:

Did you forget to attach it ?

If it's doing bpf_core_type_id_kernel(func_proto_typedef)
then, of course, cands->len will be zero.
You need to add this typedef to bpf_testmod first.
Then use two typedef flavors: func_proto_typedef___match
and func_proto_typedef___doesnt_match
with matching and mismatching prototypes, so
both can call into bpf_core_types_are_compat() and
return different results.
Then build on top to test recursion.
Matteo Croce Feb. 2, 2022, 6:30 p.m. UTC | #12
On Sat, Jan 29, 2022 at 2:11 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Jan 28, 2022 at 4:36 PM Matteo Croce <mcroce@linux.microsoft.com> wrote:
> >
> > On Fri, Jan 28, 2022 at 9:09 PM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Fri, Jan 28, 2022 at 10:51 AM Matteo Croce
> > > <mcroce@linux.microsoft.com> wrote:
> > > >
> > > > On Fri, Jan 28, 2022 at 6:31 AM Alexei Starovoitov
> > > > <alexei.starovoitov@gmail.com> wrote:
> > > > >
> > > > > On Mon, Dec 20, 2021 at 10:34 PM Yonghong Song <yhs@fb.com> wrote:
> > > > > >
> > > > > >
> > > > > > https://reviews.llvm.org/D116063 improved the error message as below
> > > > > > to make it a little bit more evident what is the problem:
> > > > > >
> > > > > > $ clang -target bpf -O2 -g -c bug.c
> > > > > >
> > > > > > fatal error: error in backend: SubroutineType not supported for
> > > > > > BTF_TYPE_ID_REMOTE reloc
> > > > >
> > > > > Hi Matteo,
> > > > >
> > > > > Are you still working on a test?
> > > > > What's a timeline to repost the patch set?
> > > > >
> > > > > Thanks!
> > > >
> > > > Hi Alexei,
> > > >
> > > > The change itself is ready, I'm just stuck at writing a test which
> > > > will effectively calls __bpf_core_types_are_compat() with some
> > > > recursion.
> > > > I guess that I have to generate a BTF_KIND_FUNC_PROTO type somehow, so
> > > > __bpf_core_types_are_compat() is called again to check the prototipe
> > > > arguments type.
> > > > I tried with these two, with no luck:
> > > >
> > > > // 1
> > > > typedef int (*func_proto_typedef)(struct sk_buff *);
> > > > bpf_core_type_exists(func_proto_typedef);
> > > >
> > > > // 2
> > > > void func_proto(int, unsigned int);
> > > > bpf_core_type_id_kernel(func_proto);
> > > >
> > > > Which is a simple way to generate a BTF_KIND_FUNC_PROTO BTF field?
> > >
> > > What do you mean 'no luck'?
> > > Have you tried what progs/test_core_reloc_type_id.c is doing?
> > > typedef int (*func_proto_typedef)(long);
> > > bpf_core_type_id_kernel(func_proto_typedef);
> > >
> > > Without macros:
> > > typedef int (*func_proto_typedef)(long);
> > >
> > > int test() {
> > >    return __builtin_btf_type_id(*(typeof(func_proto_typedef) *)0, 1);
> > > }
> > > int test2() {
> > >    return __builtin_preserve_type_info(*(typeof(func_proto_typedef) *)0, 0);
> > > }
> > >
> > >
> > > compiles fine and generates relos.
> >
> > Yes, I tried that one.
> > We reach bpf_core_apply_relo_insn() but not bpf_core_spec_match(),
> > since cands->len is 0.
> >
> > [   16.424821] bpf_core_apply_relo_insn:1202 cands->len: 0
> >
> > That's a very simple raw_tracepoint/sys_enter program:
>
> Did you forget to attach it ?
>
> If it's doing bpf_core_type_id_kernel(func_proto_typedef)
> then, of course, cands->len will be zero.
> You need to add this typedef to bpf_testmod first.
> Then use two typedef flavors: func_proto_typedef___match
> and func_proto_typedef___doesnt_match
> with matching and mismatching prototypes, so
> both can call into bpf_core_types_are_compat() and
> return different results.
> Then build on top to test recursion.

Hi,

I'm able to trigger __bpf_core_types_are_compat() recursion now.
What do you think to generate also a prototype which needs 3 recursion
calls, thus invalid, and check that it returns error?
e.g.

typedef int (*func_proto_typedef)(long);
typedef int (*func_proto_typedef___of)(func_proto_typedef);

func_proto_typedef funcp = NULL;
func_proto_typedef___of funcp_of = NULL;

this gives:

[  190.875387] bpf_core_apply_relo_insn:1200 cands->len: 3
[  190.875435] __bpf_core_types_are_compat:6798 level: 2
[  190.875479] __bpf_core_types_are_compat:6798 level: 1
[  190.875506] bpf_core_types_are_compat:6896: ret: 0
[  190.875541] __bpf_core_types_are_compat:6798 level: 2
[  190.875570] __bpf_core_types_are_compat:6798 level: 1
[  190.875599] bpf_core_types_are_compat:6896: ret: 0
[  190.875629] __bpf_core_types_are_compat:6798 level: 2
[  190.875659] __bpf_core_types_are_compat:6798 level: 1
[  190.875686] bpf_core_types_are_compat:6896: ret: -22
failed to open and/or load BPF object
diff mbox series

Patch

diff --git a/include/linux/btf.h b/include/linux/btf.h
index acef6ef28768..03162ae04ace 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -300,6 +300,11 @@  static inline const struct btf_var_secinfo *btf_type_var_secinfo(
 	return (const struct btf_var_secinfo *)(t + 1);
 }
 
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+	return (struct btf_param *)(t + 1);
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 struct bpf_prog;
 
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 27b7de538697..dcbf127ad6d2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6418,10 +6418,115 @@  bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
 DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
 DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
 
+#define MAX_TYPES_ARE_COMPAT_DEPTH 2
+
+static
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				const struct btf *targ_btf, __u32 targ_id,
+				int level)
+{
+	const struct btf_type *local_type, *targ_type;
+	int depth = 32; /* max recursion depth */
+
+	if (level <= 0)
+		return -EINVAL;
+
+	/* caller made sure that names match (ignoring flavor suffix) */
+	local_type = btf_type_by_id(local_btf, local_id);
+	targ_type = btf_type_by_id(targ_btf, targ_id);
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
+	targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+		return 1;
+	case BTF_KIND_INT:
+		/* just reject deprecated bitfield-like integers; all other
+		 * integers are by default compatible between each other
+		 */
+		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+	case BTF_KIND_PTR:
+		local_id = local_type->type;
+		targ_id = targ_type->type;
+		goto recur;
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_type);
+		struct btf_param *targ_p = btf_params(targ_type);
+		__u16 local_vlen = btf_vlen(local_type);
+		__u16 targ_vlen = btf_vlen(targ_type);
+		int i, err;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
+			btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
+			err = __bpf_core_types_are_compat(local_btf, local_id,
+							  targ_btf, targ_id,
+							  level - 1);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
+		btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
+		goto recur;
+	}
+	default:
+		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+			btf_type_str(local_type), local_id, targ_id);
+		return 0;
+	}
+}
+
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ *     kind should match for local and target types (i.e., STRUCT is not
+ *     compatible with UNION);
+ *   - for ENUMs, the size is ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ *   - FUNC_PROTOs are compatible if they have compatible signature: same
+ *     number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id)
 {
-	return -EOPNOTSUPP;
+	return __bpf_core_types_are_compat(local_btf, local_id,
+					   targ_btf, targ_id,
+					   MAX_TYPES_ARE_COMPAT_DEPTH);
 }
 
 static bool bpf_core_is_flavor_sep(const char *s)