diff mbox series

[v4,bpf-next,08/11] bpf: Special verifier handling for bpf_rbtree_{remove, first}

Message ID 20230209174144.3280955-9-davemarchevsky@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series BPF rbtree next-gen datastructure | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 10 this patch: 10
netdev/cc_maintainers warning 8 maintainers not CCed: john.fastabend@gmail.com sdf@google.com jolsa@kernel.org song@kernel.org martin.lau@linux.dev haoluo@google.com yhs@fb.com kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 1 this patch: 1
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 10 this patch: 10
netdev/checkpatch warning WARNING: line length of 106 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Dave Marchevsky Feb. 9, 2023, 5:41 p.m. UTC
Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
that require handling in the verifier:

  * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
    the bpf_rb_node field, with the offset set to that field's offset,
    instead of a struct bpf_rb_node *
    * mark_reg_graph_node helper added in previous patch generalizes
      this logic, use it

  * bpf_rbtree_remove's node input is a node that's been inserted
    in the tree - a non-owning reference.

  * bpf_rbtree_remove must invalidate non-owning references in order to
    avoid aliasing issue. Use previously-added
    invalidate_non_owning_refs helper to mark this function as a
    non-owning ref invalidation point.

  * Unlike other functions, which convert one of their input arg regs to
    non-owning reference, bpf_rbtree_first takes no arguments and just
    returns a non-owning reference (possibly null)
    * For now verifier logic for this is special-cased instead of
      adding new kfunc flag.

This patch, along with the previous one, complete special verifier
handling for all rbtree API functions added in this series.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
 kernel/bpf/verifier.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

Comments

Alexei Starovoitov Feb. 10, 2023, 3:11 a.m. UTC | #1
On Thu, Feb 09, 2023 at 09:41:41AM -0800, Dave Marchevsky wrote:
> @@ -9924,11 +9934,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
>  				struct btf_field *field = meta.arg_list_head.field;
>  
> -				mark_reg_known_zero(env, regs, BPF_REG_0);
> -				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
> -				regs[BPF_REG_0].btf = field->graph_root.btf;
> -				regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
> -				regs[BPF_REG_0].off = field->graph_root.node_offset;
> +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
> +			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
> +				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
> +				struct btf_field *field = meta.arg_rbtree_root.field;
> +
> +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
>  			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
>  				mark_reg_known_zero(env, regs, BPF_REG_0);
>  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
> @@ -9994,7 +10005,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  			if (is_kfunc_ret_null(&meta))
>  				regs[BPF_REG_0].id = id;
>  			regs[BPF_REG_0].ref_obj_id = id;
> +		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
> +			ref_set_non_owning_lock(env, &regs[BPF_REG_0]);
>  		}

Looking at the above code where R0 state is set across two different if-s
it feels that bool non_owning_ref_lock from patch 2 shouldn't be a bool.

Patch 7 also has this split initialization of the reg state.
First it does mark_reg_graph_node() which sets regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC
and then it does ref_set_non_owning_lock() that sets that bool flag.
Setting PTR_TO_BTF_ID | MEM_ALLOC in the helper without setting ref_obj_id > 0
at the same time feels error prone.

This non_owning_ref_lock bool flag is actually a just flag.
I think it would be cleaner to make it similar to MEM_ALLOC and call it
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS).

Then we can set it at once in this patch and in patch 7 and avoid this split init.
The check in patch 2 also will become cleaner.
Instead of:
if (type_is_ptr_alloc_obj(reg->type) && reg->non_owning_ref_lock)
it will be
if (reg->type == PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF)

the transition from owning to non-owning would be easier to follow as well:
PTR_TO_BTF_ID | MEM_ALLOC with ref_obj_id > 0
 -> 
   PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF with ref_obj_id == 0

and it will probably help to avoid bugs where PTR_TO_BTF_ID | MEM_ALLOC is accepted
but we forgot to check ref_obj_id. There are no such places now, but it feels
less error prone with proper flag instead of bool.

I would also squash patches 1 and 2. Since we've analyzed correctness of patch 2 well enough
it doesn't make sense to go through the churn in patch 1 just to delete it in patch 2.

wdyt?
Dave Marchevsky Feb. 10, 2023, 8:22 a.m. UTC | #2
On 2/9/23 10:11 PM, Alexei Starovoitov wrote:
> On Thu, Feb 09, 2023 at 09:41:41AM -0800, Dave Marchevsky wrote:
>> @@ -9924,11 +9934,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>>  				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
>>  				struct btf_field *field = meta.arg_list_head.field;
>>  
>> -				mark_reg_known_zero(env, regs, BPF_REG_0);
>> -				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
>> -				regs[BPF_REG_0].btf = field->graph_root.btf;
>> -				regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
>> -				regs[BPF_REG_0].off = field->graph_root.node_offset;
>> +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
>> +			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
>> +				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
>> +				struct btf_field *field = meta.arg_rbtree_root.field;
>> +
>> +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
>>  			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
>>  				mark_reg_known_zero(env, regs, BPF_REG_0);
>>  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
>> @@ -9994,7 +10005,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>>  			if (is_kfunc_ret_null(&meta))
>>  				regs[BPF_REG_0].id = id;
>>  			regs[BPF_REG_0].ref_obj_id = id;
>> +		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
>> +			ref_set_non_owning_lock(env, &regs[BPF_REG_0]);
>>  		}
> 
> Looking at the above code where R0 state is set across two different if-s
> it feels that bool non_owning_ref_lock from patch 2 shouldn't be a bool.

Re: "set across two different if-s" - I see what you mean, and the fact that
both are doing 'meta.func_id == whatever' checks doesn't make it clear why
they're separate. But note that above the else if that the second check
is adding is "if (is_kfunc_acquire(&meta))" check, acquire_reference_state, etc.

"Is function acquire" is a function-level property and, as the kfunc flags I
tried to add in previous versions of this series indicate, I think that
"returns a non-owning reference" and "need to invalidate non-owning refs"
are function-level properties as well.

As a contrast, the first addition - with mark_reg_graph_node - is more of a
return-type-level property. Instead of doing meta.func_id checks in that change,
we could instead assume that any kfunc returning "bpf_rb_node *" is actually
returning a graph node type w/ bpf_rb_node field. It's certainly a blurry line
in this case since it's necessary to peek at the bpf_rb_root arg in order to
provide info about the node type to mark_reg_graph_node. But this is similar
to RET_PTR_TO_MAP_VALUE logic which requires a bpf_map * to provide info about
the map value being returned.

Why does this distinction matter at all? Because I'd like to eventually merge
helper and kfunc verification as much as possible / reasonable, especially the
different approaches to func_proto-like logic. Currently, the bpf_func_proto
approach used by bpf helpers is better at expressing 
{arg,return}-type level properties. A helper func_proto can do

  .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,

and it's obvious which arg is being released, whereas kfunc equivalent is
KF_RELEASE flag on the kfunc itself and verifier needs to assume that there's
a single arg w/ ref_obj_id which is being released. Sure, kfunc annotations
(e.g. __sz, __alloc) could be extended to support all of this, but that's
not the current state of things, and such name suffixes wouldn't work for
retval.

Similarly, current kfunc definition scheme is better at expressing function-
level properties:

  BTF_ID_FLAGS(func, whatever, KF_ACQUIRE)

There's no func_proto equivalent, the is_acquire_function helper used in
check_helper_call resorts to "func_id ==" checks. For acquire specifically
it could be faked with a OBJ_ACQUIRE flag on retval in the proto, but I
don't know if the same would make sense for "need to invalidate non-owning
refs" or something like KF_TRUSTED_ARGS.

Anyways, this was a long-winded way of saying that separating this logic across
two different if-s was intentional and will help with future refactoring.

> Patch 7 also has this split initialization of the reg state.
> First it does mark_reg_graph_node() which sets regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC
> and then it does ref_set_non_owning_lock() that sets that bool flag.
> Setting PTR_TO_BTF_ID | MEM_ALLOC in the helper without setting ref_obj_id > 0
> at the same time feels error prone.

It's unfortunate that the reg type isn't really complete for rbtree_first
until after the second chunk of code, but this was already happening with
bpf_list_pop_{front,back}, which rely on KF_ACQUIRE flag and
is_kfunc_acquire check to set ref_obj_id on the popped owning reference.

Maybe to assuage your 'error prone' concern some check can be added at
the end of check_kfunc_call which ensures that PTR_TO_BTF_ID | MEM_ALLOC
types are properly configured, and dies with 'verifier internal error'
if not. I'm not convinced it's necessary, but regardless it would be
similar to commit 47e34cb74d37 ("bpf: Add verifier check for BPF_PTR_POISON retval and arg")
which I added a few months ago.

> This non_owning_ref_lock bool flag is actually a just flag.
> I think it would be cleaner to make it similar to MEM_ALLOC and call it
> NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS).
> 
> Then we can set it at once in this patch and in patch 7 and avoid this split init.
> The check in patch 2 also will become cleaner.
> Instead of:
> if (type_is_ptr_alloc_obj(reg->type) && reg->non_owning_ref_lock)
> it will be
> if (reg->type == PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF)

Minor tangent: this is why I like helpers like type_is_ptr_alloc_obj - they make
it obvious what properties a reg should have to be considered a certain type by
the verifier, and provide more context as to what specific type check is
happening vs a raw check.

IMO the cleanest version of that check would be if(reg_is_non_owning_ref(reg))
with the newly-added helper doing what you'd expect.

> 
> the transition from owning to non-owning would be easier to follow as well:
> PTR_TO_BTF_ID | MEM_ALLOC with ref_obj_id > 0
>  -> 
>    PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF with ref_obj_id == 0
> 
> and it will probably help to avoid bugs where PTR_TO_BTF_ID | MEM_ALLOC is accepted
> but we forgot to check ref_obj_id. There are no such places now, but it feels
> less error prone with proper flag instead of bool.

I'm not strongly opposed to a NON_OWN_REF type flag. It's a more granular
version of the KF_RELEASE_NON_OWN flag which I tried to add in a previous
version of this series. But some comments:

* Such a flag would eliminate the need to change bpf_reg_state in this
  series, but I think this will be temporary. If we add support for nested
  locks we'll need to reintroduce some "lock identity" again. If we want
  to improve UX for non-owning reference invalidation in the case where
  a list_head and rb_root share the same lock, we'll need to introduce some
  "datastructure root identity" to allow invalidation of only the list's
  non-owning refs on list_pop.

* Sure, we could have both the NON_OWN_REF flag and additional {lock,root}
  identity structures. But there isn't infinite room for type flags and
  currently non-owning ref concept is contained to just two data structures.
  IMO in terms of generality this flag is closer to MEM_RINGBUF than
  PTR_MAYBE_NULL. If we're going to need {lock,root} identity structs
  and can use them to disambiguate between owning/non-owning refs quickly,
  why bother with an extra flag?

* Followup to earlier func_proto rant: I can't use NON_OWN_REF flag to tag
  a kfunc retval currently. So it'll really only be a verifier-internal flag.
  At that point, might as well add reg_is_non_owning_ref for canonical way
  of checking this.

> I would also squash patches 1 and 2. Since we've analyzed correctness of patch 2 well enough
> it doesn't make sense to go through the churn in patch 1 just to delete it in patch 2.
> 

Ack.

> wdyt?
Kumar Kartikeya Dwivedi Feb. 10, 2023, 1:55 p.m. UTC | #3
On Thu, Feb 09, 2023 at 06:41:41PM CET, Dave Marchevsky wrote:
> Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
> that require handling in the verifier:
>
>   * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
>     the bpf_rb_node field, with the offset set to that field's offset,
>     instead of a struct bpf_rb_node *
>     * mark_reg_graph_node helper added in previous patch generalizes
>       this logic, use it
>
>   * bpf_rbtree_remove's node input is a node that's been inserted
>     in the tree - a non-owning reference.
>
>   * bpf_rbtree_remove must invalidate non-owning references in order to
>     avoid aliasing issue. Use previously-added
>     invalidate_non_owning_refs helper to mark this function as a
>     non-owning ref invalidation point.
>
>   * Unlike other functions, which convert one of their input arg regs to
>     non-owning reference, bpf_rbtree_first takes no arguments and just
>     returns a non-owning reference (possibly null)
>     * For now verifier logic for this is special-cased instead of
>       adding new kfunc flag.
>
> This patch, along with the previous one, complete special verifier
> handling for all rbtree API functions added in this series.
>

I think there are two issues with the current approach. The fundamental
assumption with non-owning references is that it is part of the collection. So
bpf_rbtree_{add,first}, bpf_list_push_{front,back} will create them, as no node
is being removed from collection. Marking bpf_rbtree_remove (and in the future
bpf_list_del) as invalidation points is also right, since once a node has been
removed it is going to be unclear whether existing non-owning references have
the same value, and thus the property of 'part of the collection' will be
broken.

The first issue relates to usability. If I have non-owning references to nodes
inserted into both a list and an rbtree, bpf_rbtree_remove should only
invalidate the ones that are part of the particular rbtree. It should have no
effect on others. Likewise for the bpf_list_del operation in the future.
Therefore, we need to track the collection identity associated with each
non-owning reference, then only invalidate non-owning references associated with
the same collection.

The case of bpf_spin_unlock is different, which should invalidate all non-owning
references.

The second issue is more serious. By not tracking the collection identity, we
will currently allow a non-owning reference for an object inserted into a list
to be passed to bpf_rbtree_remove, because the verifier cannot discern between
'inserted into rbtree' vs 'inserted into list'. For it, both are currently
equivalent in the verifier state. An object is allowed to have both
bpf_list_node and bpf_rb_node, but it can only be part of one collection at a
time (because of no shared ownership).

	struct obj {
		bpf_list_node ln;
		bpf_rb_node rn;
	};

	bpf_list_push_front(head, &obj->ln); // node is non-own-ref
	bpf_rbtree_remove(&obj->rn); // should not work, but does

So some notion of a collection identity needs to be constructed, the amount of
data which needs to be remembered in each non-owning reference's register state
depends on our requirements.

The first sanity check is that bpf_rbtree_remove only removes something in an
rbtree, so probably an enum member indicating whether collection is a list or
rbtree. To ensure proper scoped invalidation, we will unfortunately need more
than just the reg->id of the reg holding the graph root, since map values of
different maps may have same id (0). Hence, we need id and ptr similar to the
active lock case for proper matching. Even this won't be enough, as there can be
multiple list or rbtree roots in a particular memory region, therefore the
offset also needs to be part of the collection identity.

So it seems it will amount to:

	struct bpf_collection_id {
		enum bpf_collection_type type;
		void *ptr;
		int id;
		int off;
	};

There might be ways to optimize the memory footprint of this struct, but I'm
just trying to state why we'll need to include all four, so we don't miss out on
a corner case again.

> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
> ---
> [...]
Kumar Kartikeya Dwivedi Feb. 10, 2023, 2:15 p.m. UTC | #4
On Fri, Feb 10, 2023 at 04:11:25AM CET, Alexei Starovoitov wrote:
> On Thu, Feb 09, 2023 at 09:41:41AM -0800, Dave Marchevsky wrote:
> > @@ -9924,11 +9934,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >  				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
> >  				struct btf_field *field = meta.arg_list_head.field;
> >
> > -				mark_reg_known_zero(env, regs, BPF_REG_0);
> > -				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
> > -				regs[BPF_REG_0].btf = field->graph_root.btf;
> > -				regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
> > -				regs[BPF_REG_0].off = field->graph_root.node_offset;
> > +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
> > +			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
> > +				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
> > +				struct btf_field *field = meta.arg_rbtree_root.field;
> > +
> > +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
> >  			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
> >  				mark_reg_known_zero(env, regs, BPF_REG_0);
> >  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
> > @@ -9994,7 +10005,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >  			if (is_kfunc_ret_null(&meta))
> >  				regs[BPF_REG_0].id = id;
> >  			regs[BPF_REG_0].ref_obj_id = id;
> > +		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
> > +			ref_set_non_owning_lock(env, &regs[BPF_REG_0]);
> >  		}
>
> Looking at the above code where R0 state is set across two different if-s
> it feels that bool non_owning_ref_lock from patch 2 shouldn't be a bool.
>
> Patch 7 also has this split initialization of the reg state.
> First it does mark_reg_graph_node() which sets regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC
> and then it does ref_set_non_owning_lock() that sets that bool flag.
> Setting PTR_TO_BTF_ID | MEM_ALLOC in the helper without setting ref_obj_id > 0
> at the same time feels error prone.
>
> This non_owning_ref_lock bool flag is actually a just flag.
> I think it would be cleaner to make it similar to MEM_ALLOC and call it
> NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS).
>
> Then we can set it at once in this patch and in patch 7 and avoid this split init.
> The check in patch 2 also will become cleaner.
> Instead of:
> if (type_is_ptr_alloc_obj(reg->type) && reg->non_owning_ref_lock)
> it will be
> if (reg->type == PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF)
>
> the transition from owning to non-owning would be easier to follow as well:
> PTR_TO_BTF_ID | MEM_ALLOC with ref_obj_id > 0
>  ->
>    PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF with ref_obj_id == 0
>

Separate type flag looks cleaner to me too, especially now that such non-owning
references have concrete semantics and context associated with them.

> and it will probably help to avoid bugs where PTR_TO_BTF_ID | MEM_ALLOC is accepted
> but we forgot to check ref_obj_id. There are no such places now, but it feels
> less error prone with proper flag instead of bool.
>
> I would also squash patches 1 and 2. Since we've analyzed correctness of patch 2 well enough
> it doesn't make sense to go through the churn in patch 1 just to delete it in patch 2.
>

+1

> wdyt?
Alexei Starovoitov Feb. 10, 2023, 5:21 p.m. UTC | #5
On Fri, Feb 10, 2023 at 02:55:41PM +0100, Kumar Kartikeya Dwivedi wrote:
> On Thu, Feb 09, 2023 at 06:41:41PM CET, Dave Marchevsky wrote:
> > Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
> > that require handling in the verifier:
> >
> >   * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
> >     the bpf_rb_node field, with the offset set to that field's offset,
> >     instead of a struct bpf_rb_node *
> >     * mark_reg_graph_node helper added in previous patch generalizes
> >       this logic, use it
> >
> >   * bpf_rbtree_remove's node input is a node that's been inserted
> >     in the tree - a non-owning reference.
> >
> >   * bpf_rbtree_remove must invalidate non-owning references in order to
> >     avoid aliasing issue. Use previously-added
> >     invalidate_non_owning_refs helper to mark this function as a
> >     non-owning ref invalidation point.
> >
> >   * Unlike other functions, which convert one of their input arg regs to
> >     non-owning reference, bpf_rbtree_first takes no arguments and just
> >     returns a non-owning reference (possibly null)
> >     * For now verifier logic for this is special-cased instead of
> >       adding new kfunc flag.
> >
> > This patch, along with the previous one, complete special verifier
> > handling for all rbtree API functions added in this series.
> >
> 
> I think there are two issues with the current approach. The fundamental
> assumption with non-owning references is that it is part of the collection. So
> bpf_rbtree_{add,first}, bpf_list_push_{front,back} will create them, as no node
> is being removed from collection. Marking bpf_rbtree_remove (and in the future
> bpf_list_del) as invalidation points is also right, since once a node has been
> removed it is going to be unclear whether existing non-owning references have
> the same value, and thus the property of 'part of the collection' will be
> broken.

correct, but the patch set does invalidate after bpf_rbtree_remove(),
so it's not an issue.

> The first issue relates to usability. If I have non-owning references to nodes
> inserted into both a list and an rbtree, bpf_rbtree_remove should only
> invalidate the ones that are part of the particular rbtree. It should have no
> effect on others. Likewise for the bpf_list_del operation in the future.
> Therefore, we need to track the collection identity associated with each
> non-owning reference, then only invalidate non-owning references associated with
> the same collection.
> 
> The case of bpf_spin_unlock is different, which should invalidate all non-owning
> references.
> 
> The second issue is more serious. By not tracking the collection identity, we
> will currently allow a non-owning reference for an object inserted into a list
> to be passed to bpf_rbtree_remove, because the verifier cannot discern between
> 'inserted into rbtree' vs 'inserted into list'. For it, both are currently
> equivalent in the verifier state. An object is allowed to have both
> bpf_list_node and bpf_rb_node, but it can only be part of one collection at a
> time (because of no shared ownership).
> 
> 	struct obj {
> 		bpf_list_node ln;
> 		bpf_rb_node rn;
> 	};
> 
> 	bpf_list_push_front(head, &obj->ln); // node is non-own-ref
> 	bpf_rbtree_remove(&obj->rn); // should not work, but does

Also correct, but inserting the same single owner node into rbtree and link list
is not supported. Only 'shared ownership' node can be inserted into
two collections.
The check to disallow bpf_list_node and bpf_rb_node in the same obj
can be a follow up patch to close this hole.

> So some notion of a collection identity needs to be constructed, the amount of
> data which needs to be remembered in each non-owning reference's register state
> depends on our requirements.
> 
> The first sanity check is that bpf_rbtree_remove only removes something in an
> rbtree, so probably an enum member indicating whether collection is a list or
> rbtree. To ensure proper scoped invalidation, we will unfortunately need more
> than just the reg->id of the reg holding the graph root, since map values of
> different maps may have same id (0). Hence, we need id and ptr similar to the
> active lock case for proper matching. Even this won't be enough, as there can be
> multiple list or rbtree roots in a particular memory region, therefore the
> offset also needs to be part of the collection identity.
> 
> So it seems it will amount to:
> 
> 	struct bpf_collection_id {
> 		enum bpf_collection_type type;
> 		void *ptr;
> 		int id;
> 		int off;
> 	};
> 
> There might be ways to optimize the memory footprint of this struct, but I'm
> just trying to state why we'll need to include all four, so we don't miss out on
> a corner case again.

The trade-off doesn't feel right here. Tracking collection id complexity in
the verifier for single owner case is not worth it imo.
We should focus on adding support for 'shared ownership' with explicit refcount in the obj.
Then the same obj can be inserted into two rbtrees or into rbtree and link list.

Single owner rb-tree is a big step and we've been trying to make this step for the last ~6 month.
I prefer to do it now and worry about UX, shared owner, etc in the follow ups.
We need to start using lists and rbtree in bpf progs that do real work to get a feel
on whether UX is right or unusable or somewhere in-between.
Alexei Starovoitov Feb. 10, 2023, 5:30 p.m. UTC | #6
On Fri, Feb 10, 2023 at 03:22:43AM -0500, Dave Marchevsky wrote:
> On 2/9/23 10:11 PM, Alexei Starovoitov wrote:
> > On Thu, Feb 09, 2023 at 09:41:41AM -0800, Dave Marchevsky wrote:
> >> @@ -9924,11 +9934,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >>  				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
> >>  				struct btf_field *field = meta.arg_list_head.field;
> >>  
> >> -				mark_reg_known_zero(env, regs, BPF_REG_0);
> >> -				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
> >> -				regs[BPF_REG_0].btf = field->graph_root.btf;
> >> -				regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
> >> -				regs[BPF_REG_0].off = field->graph_root.node_offset;
> >> +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
> >> +			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
> >> +				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
> >> +				struct btf_field *field = meta.arg_rbtree_root.field;
> >> +
> >> +				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
> >>  			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
> >>  				mark_reg_known_zero(env, regs, BPF_REG_0);
> >>  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
> >> @@ -9994,7 +10005,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >>  			if (is_kfunc_ret_null(&meta))
> >>  				regs[BPF_REG_0].id = id;
> >>  			regs[BPF_REG_0].ref_obj_id = id;
> >> +		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
> >> +			ref_set_non_owning_lock(env, &regs[BPF_REG_0]);
> >>  		}
> > 
> > Looking at the above code where R0 state is set across two different if-s
> > it feels that bool non_owning_ref_lock from patch 2 shouldn't be a bool.
> 
> Re: "set across two different if-s" - I see what you mean, and the fact that
> both are doing 'meta.func_id == whatever' checks doesn't make it clear why
> they're separate. But note that above the else if that the second check
> is adding is "if (is_kfunc_acquire(&meta))" check, acquire_reference_state, etc.

fair enough. let's keep it split.

> "Is function acquire" is a function-level property and, as the kfunc flags I
> tried to add in previous versions of this series indicate, I think that
> "returns a non-owning reference" and "need to invalidate non-owning refs"
> are function-level properties as well.

agree

> As a contrast, the first addition - with mark_reg_graph_node - is more of a
> return-type-level property. Instead of doing meta.func_id checks in that change,
> we could instead assume that any kfunc returning "bpf_rb_node *" is actually
> returning a graph node type w/ bpf_rb_node field. It's certainly a blurry line
> in this case since it's necessary to peek at the bpf_rb_root arg in order to
> provide info about the node type to mark_reg_graph_node. But this is similar
> to RET_PTR_TO_MAP_VALUE logic which requires a bpf_map * to provide info about
> the map value being returned.
> 
> Why does this distinction matter at all? Because I'd like to eventually merge
> helper and kfunc verification as much as possible / reasonable, especially the
> different approaches to func_proto-like logic. Currently, the bpf_func_proto
> approach used by bpf helpers is better at expressing 
> {arg,return}-type level properties. A helper func_proto can do
> 
>   .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
> 
> and it's obvious which arg is being released, whereas kfunc equivalent is
> KF_RELEASE flag on the kfunc itself and verifier needs to assume that there's
> a single arg w/ ref_obj_id which is being released. Sure, kfunc annotations
> (e.g. __sz, __alloc) could be extended to support all of this, but that's
> not the current state of things, and such name suffixes wouldn't work for
> retval.
> 
> Similarly, current kfunc definition scheme is better at expressing function-
> level properties:
> 
>   BTF_ID_FLAGS(func, whatever, KF_ACQUIRE)
> 
> There's no func_proto equivalent, the is_acquire_function helper used in
> check_helper_call resorts to "func_id ==" checks. For acquire specifically
> it could be faked with a OBJ_ACQUIRE flag on retval in the proto, but I
> don't know if the same would make sense for "need to invalidate non-owning
> refs" or something like KF_TRUSTED_ARGS.
> 
> Anyways, this was a long-winded way of saying that separating this logic across
> two different if-s was intentional and will help with future refactoring.

Agree with all of the above. We need to address this tech debt and merge
kfunc and helper validation, but this is orthogonal to this patch set.
It's a separate discussion to have with lots of bike shedding ahead.

> > Patch 7 also has this split initialization of the reg state.
> > First it does mark_reg_graph_node() which sets regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC
> > and then it does ref_set_non_owning_lock() that sets that bool flag.
> > Setting PTR_TO_BTF_ID | MEM_ALLOC in the helper without setting ref_obj_id > 0
> > at the same time feels error prone.
> 
> It's unfortunate that the reg type isn't really complete for rbtree_first
> until after the second chunk of code, but this was already happening with
> bpf_list_pop_{front,back}, which rely on KF_ACQUIRE flag and
> is_kfunc_acquire check to set ref_obj_id on the popped owning reference.
> 
> Maybe to assuage your 'error prone' concern some check can be added at
> the end of check_kfunc_call which ensures that PTR_TO_BTF_ID | MEM_ALLOC
> types are properly configured, and dies with 'verifier internal error'
> if not. I'm not convinced it's necessary, but regardless it would be
> similar to commit 47e34cb74d37 ("bpf: Add verifier check for BPF_PTR_POISON retval and arg")
> which I added a few months ago.

I think it's a good idea to add such safety check, but let's do it in the follow up.

> > This non_owning_ref_lock bool flag is actually a just flag.
> > I think it would be cleaner to make it similar to MEM_ALLOC and call it
> > NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS).
> > 
> > Then we can set it at once in this patch and in patch 7 and avoid this split init.
> > The check in patch 2 also will become cleaner.
> > Instead of:
> > if (type_is_ptr_alloc_obj(reg->type) && reg->non_owning_ref_lock)
> > it will be
> > if (reg->type == PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF)
> 
> Minor tangent: this is why I like helpers like type_is_ptr_alloc_obj - they make
> it obvious what properties a reg should have to be considered a certain type by
> the verifier, and provide more context as to what specific type check is
> happening vs a raw check.
> 
> IMO the cleanest version of that check would be if(reg_is_non_owning_ref(reg))
> with the newly-added helper doing what you'd expect.

Like
static bool type_is_non_owning_ref(u32 type)
{
        return base_type(type) == PTR_TO_BTF_ID &&  type_flag(type) & (MEM_ALLOC | NON_OWN_REF);
}

?
If so that makes sense.

> > 
> > the transition from owning to non-owning would be easier to follow as well:
> > PTR_TO_BTF_ID | MEM_ALLOC with ref_obj_id > 0
> >  -> 
> >    PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF with ref_obj_id == 0
> > 
> > and it will probably help to avoid bugs where PTR_TO_BTF_ID | MEM_ALLOC is accepted
> > but we forgot to check ref_obj_id. There are no such places now, but it feels
> > less error prone with proper flag instead of bool.
> 
> I'm not strongly opposed to a NON_OWN_REF type flag. It's a more granular
> version of the KF_RELEASE_NON_OWN flag which I tried to add in a previous
> version of this series. But some comments:
> 
> * Such a flag would eliminate the need to change bpf_reg_state in this
>   series, but I think this will be temporary. If we add support for nested
>   locks we'll need to reintroduce some "lock identity" again. If we want
>   to improve UX for non-owning reference invalidation in the case where
>   a list_head and rb_root share the same lock, we'll need to introduce some
>   "datastructure root identity" to allow invalidation of only the list's
>   non-owning refs on list_pop.

As replied to Kumar I prefer to minimize the complexity now and worry about
list_head and rb_root under the same lock later.

> * Sure, we could have both the NON_OWN_REF flag and additional {lock,root}
>   identity structures. But there isn't infinite room for type flags and
>   currently non-owning ref concept is contained to just two data structures.
>   IMO in terms of generality this flag is closer to MEM_RINGBUF than
>   PTR_MAYBE_NULL. If we're going to need {lock,root} identity structs
>   and can use them to disambiguate between owning/non-owning refs quickly,
>   why bother with an extra flag?

Let's cross that bridge when we get there.
Currently NON_OWN_REF flag looks strictly cleaner to me than extra 'bool' in reg_state.
If we need to refactor it later we can certainly do that.
I really want to move this feature forward.
Kumar Kartikeya Dwivedi Feb. 10, 2023, 6:03 p.m. UTC | #7
On Fri, Feb 10, 2023 at 06:21:37PM CET, Alexei Starovoitov wrote:
> On Fri, Feb 10, 2023 at 02:55:41PM +0100, Kumar Kartikeya Dwivedi wrote:
> > On Thu, Feb 09, 2023 at 06:41:41PM CET, Dave Marchevsky wrote:
> > > Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
> > > that require handling in the verifier:
> > >
> > >   * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
> > >     the bpf_rb_node field, with the offset set to that field's offset,
> > >     instead of a struct bpf_rb_node *
> > >     * mark_reg_graph_node helper added in previous patch generalizes
> > >       this logic, use it
> > >
> > >   * bpf_rbtree_remove's node input is a node that's been inserted
> > >     in the tree - a non-owning reference.
> > >
> > >   * bpf_rbtree_remove must invalidate non-owning references in order to
> > >     avoid aliasing issue. Use previously-added
> > >     invalidate_non_owning_refs helper to mark this function as a
> > >     non-owning ref invalidation point.
> > >
> > >   * Unlike other functions, which convert one of their input arg regs to
> > >     non-owning reference, bpf_rbtree_first takes no arguments and just
> > >     returns a non-owning reference (possibly null)
> > >     * For now verifier logic for this is special-cased instead of
> > >       adding new kfunc flag.
> > >
> > > This patch, along with the previous one, complete special verifier
> > > handling for all rbtree API functions added in this series.
> > >
> >
> > I think there are two issues with the current approach. The fundamental
> > assumption with non-owning references is that it is part of the collection. So
> > bpf_rbtree_{add,first}, bpf_list_push_{front,back} will create them, as no node
> > is being removed from collection. Marking bpf_rbtree_remove (and in the future
> > bpf_list_del) as invalidation points is also right, since once a node has been
> > removed it is going to be unclear whether existing non-owning references have
> > the same value, and thus the property of 'part of the collection' will be
> > broken.
>
> correct, but the patch set does invalidate after bpf_rbtree_remove(),
> so it's not an issue.
>
> > The first issue relates to usability. If I have non-owning references to nodes
> > inserted into both a list and an rbtree, bpf_rbtree_remove should only
> > invalidate the ones that are part of the particular rbtree. It should have no
> > effect on others. Likewise for the bpf_list_del operation in the future.
> > Therefore, we need to track the collection identity associated with each
> > non-owning reference, then only invalidate non-owning references associated with
> > the same collection.
> >
> > The case of bpf_spin_unlock is different, which should invalidate all non-owning
> > references.
> >
> > The second issue is more serious. By not tracking the collection identity, we
> > will currently allow a non-owning reference for an object inserted into a list
> > to be passed to bpf_rbtree_remove, because the verifier cannot discern between
> > 'inserted into rbtree' vs 'inserted into list'. For it, both are currently
> > equivalent in the verifier state. An object is allowed to have both
> > bpf_list_node and bpf_rb_node, but it can only be part of one collection at a
> > time (because of no shared ownership).
> >
> > 	struct obj {
> > 		bpf_list_node ln;
> > 		bpf_rb_node rn;
> > 	};
> >
> > 	bpf_list_push_front(head, &obj->ln); // node is non-own-ref
> > 	bpf_rbtree_remove(&obj->rn); // should not work, but does
>
> Also correct, but inserting the same single owner node into rbtree and link list
> is not supported. Only 'shared ownership' node can be inserted into
> two collections.

What is supported is having an object be part of a list and an rbtree one at a
time, which is what I'm talking about here. Shared ownership has nothing to do
with this.

> The check to disallow bpf_list_node and bpf_rb_node in the same obj
> can be a follow up patch to close this hole.
>

Fine, that would also 'fix' this problem, where a non-owning reference part of a
list could be passed to bpf_rbtree_remove etc. If there can only be a list node
or an rbtree node in an object, such a case cannot be constructed. But I think
it's an awkward limitation.

> > So some notion of a collection identity needs to be constructed, the amount of
> > data which needs to be remembered in each non-owning reference's register state
> > depends on our requirements.
> >
> > The first sanity check is that bpf_rbtree_remove only removes something in an
> > rbtree, so probably an enum member indicating whether collection is a list or
> > rbtree. To ensure proper scoped invalidation, we will unfortunately need more
> > than just the reg->id of the reg holding the graph root, since map values of
> > different maps may have same id (0). Hence, we need id and ptr similar to the
> > active lock case for proper matching. Even this won't be enough, as there can be
> > multiple list or rbtree roots in a particular memory region, therefore the
> > offset also needs to be part of the collection identity.
> >
> > So it seems it will amount to:
> >
> > 	struct bpf_collection_id {
> > 		enum bpf_collection_type type;
> > 		void *ptr;
> > 		int id;
> > 		int off;
> > 	};
> >
> > There might be ways to optimize the memory footprint of this struct, but I'm
> > just trying to state why we'll need to include all four, so we don't miss out on
> > a corner case again.
>
> The trade-off doesn't feel right here. Tracking collection id complexity in
> the verifier for single owner case is not worth it imo.

It was more about correctness after this set is applied than being worth it. We
could argue it's not worth it for the first issue which relates to usability.
Dave has already mentioned that point. But the second one is simply incorrect to
allow. As soon as you want an object which can be part of both a list and rbtree
at different times (e.g., an item usually part of an rbtree but which is popped
out and moved to a list for some processing), people will be able to trigger it.

Simply disallowing that (as you said above) is also an option. But whenever you
do allow it, you will need to add something like this. It has little to do with
whether shared ownership is supported or not.

> We should focus on adding support for 'shared ownership' with explicit refcount in the obj.
> Then the same obj can be inserted into two rbtrees or into rbtree and link list.
>
> Single owner rb-tree is a big step and we've been trying to make this step for the last ~6 month.
> I prefer to do it now and worry about UX, shared owner, etc in the follow ups.
> We need to start using lists and rbtree in bpf progs that do real work to get a feel
> on whether UX is right or unusable or somewhere in-between.
Alexei Starovoitov Feb. 10, 2023, 6:58 p.m. UTC | #8
On Fri, Feb 10, 2023 at 07:03:46PM +0100, Kumar Kartikeya Dwivedi wrote:
> On Fri, Feb 10, 2023 at 06:21:37PM CET, Alexei Starovoitov wrote:
> > On Fri, Feb 10, 2023 at 02:55:41PM +0100, Kumar Kartikeya Dwivedi wrote:
> > > On Thu, Feb 09, 2023 at 06:41:41PM CET, Dave Marchevsky wrote:
> > > > Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
> > > > that require handling in the verifier:
> > > >
> > > >   * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
> > > >     the bpf_rb_node field, with the offset set to that field's offset,
> > > >     instead of a struct bpf_rb_node *
> > > >     * mark_reg_graph_node helper added in previous patch generalizes
> > > >       this logic, use it
> > > >
> > > >   * bpf_rbtree_remove's node input is a node that's been inserted
> > > >     in the tree - a non-owning reference.
> > > >
> > > >   * bpf_rbtree_remove must invalidate non-owning references in order to
> > > >     avoid aliasing issue. Use previously-added
> > > >     invalidate_non_owning_refs helper to mark this function as a
> > > >     non-owning ref invalidation point.
> > > >
> > > >   * Unlike other functions, which convert one of their input arg regs to
> > > >     non-owning reference, bpf_rbtree_first takes no arguments and just
> > > >     returns a non-owning reference (possibly null)
> > > >     * For now verifier logic for this is special-cased instead of
> > > >       adding new kfunc flag.
> > > >
> > > > This patch, along with the previous one, complete special verifier
> > > > handling for all rbtree API functions added in this series.
> > > >
> > >
> > > I think there are two issues with the current approach. The fundamental
> > > assumption with non-owning references is that it is part of the collection. So
> > > bpf_rbtree_{add,first}, bpf_list_push_{front,back} will create them, as no node
> > > is being removed from collection. Marking bpf_rbtree_remove (and in the future
> > > bpf_list_del) as invalidation points is also right, since once a node has been
> > > removed it is going to be unclear whether existing non-owning references have
> > > the same value, and thus the property of 'part of the collection' will be
> > > broken.
> >
> > correct, but the patch set does invalidate after bpf_rbtree_remove(),
> > so it's not an issue.
> >
> > > The first issue relates to usability. If I have non-owning references to nodes
> > > inserted into both a list and an rbtree, bpf_rbtree_remove should only
> > > invalidate the ones that are part of the particular rbtree. It should have no
> > > effect on others. Likewise for the bpf_list_del operation in the future.
> > > Therefore, we need to track the collection identity associated with each
> > > non-owning reference, then only invalidate non-owning references associated with
> > > the same collection.
> > >
> > > The case of bpf_spin_unlock is different, which should invalidate all non-owning
> > > references.
> > >
> > > The second issue is more serious. By not tracking the collection identity, we
> > > will currently allow a non-owning reference for an object inserted into a list
> > > to be passed to bpf_rbtree_remove, because the verifier cannot discern between
> > > 'inserted into rbtree' vs 'inserted into list'. For it, both are currently
> > > equivalent in the verifier state. An object is allowed to have both
> > > bpf_list_node and bpf_rb_node, but it can only be part of one collection at a
> > > time (because of no shared ownership).
> > >
> > > 	struct obj {
> > > 		bpf_list_node ln;
> > > 		bpf_rb_node rn;
> > > 	};
> > >
> > > 	bpf_list_push_front(head, &obj->ln); // node is non-own-ref
> > > 	bpf_rbtree_remove(&obj->rn); // should not work, but does
> >
> > Also correct, but inserting the same single owner node into rbtree and link list
> > is not supported. Only 'shared ownership' node can be inserted into
> > two collections.
> 
> What is supported is having an object be part of a list and an rbtree one at a
> time, which is what I'm talking about here. Shared ownership has nothing to do
> with this.

I wouldn't call it "supported".
btf_parse_struct_metas() doesn't error on structs with bpf_list_node and bpf_rb_node in them.
It allows two bpf_list_node-s as well.
list_node+rb_node is broken as you said.
I'm not sure two list_node-s is correct either. For the same reasons.
So it's an omission rather than support.

> > The check to disallow bpf_list_node and bpf_rb_node in the same obj
> > can be a follow up patch to close this hole.
> >
> 
> Fine, that would also 'fix' this problem, where a non-owning reference part of a
> list could be passed to bpf_rbtree_remove etc. If there can only be a list node
> or an rbtree node in an object, such a case cannot be constructed. But I think
> it's an awkward limitation.

Regardless whether non-own refs exist or not. Two list_node-s are unusable
with single owner model.
struct obj {
	bpf_list_node ln;
	bpf_list_node ln2;
};

bpf_list_push_front(head, &obj->ln); // doesn't matter what we do with obj here
bpf_list_push_front(head2, &obj->ln2); // cannot be made to work correctly 
The only way to actually support this case is to have refcount in obj.

It can "work" when obj is a part of only one list at a time, but one can reasonably
argue that a requirement to have two 'bpf_list_node'-s in a obj just two insert obj
in one list, pop it, and then insert into another list through a different list_node
is nothing but a waste of memory.
The user should be able to use the same 'bpf_list_node' to insert into one
list_head, remove it from there, and then insert into another list_head.

> > > So some notion of a collection identity needs to be constructed, the amount of
> > > data which needs to be remembered in each non-owning reference's register state
> > > depends on our requirements.
> > >
> > > The first sanity check is that bpf_rbtree_remove only removes something in an
> > > rbtree, so probably an enum member indicating whether collection is a list or
> > > rbtree. To ensure proper scoped invalidation, we will unfortunately need more
> > > than just the reg->id of the reg holding the graph root, since map values of
> > > different maps may have same id (0). Hence, we need id and ptr similar to the
> > > active lock case for proper matching. Even this won't be enough, as there can be
> > > multiple list or rbtree roots in a particular memory region, therefore the
> > > offset also needs to be part of the collection identity.
> > >
> > > So it seems it will amount to:
> > >
> > > 	struct bpf_collection_id {
> > > 		enum bpf_collection_type type;
> > > 		void *ptr;
> > > 		int id;
> > > 		int off;
> > > 	};
> > >
> > > There might be ways to optimize the memory footprint of this struct, but I'm
> > > just trying to state why we'll need to include all four, so we don't miss out on
> > > a corner case again.
> >
> > The trade-off doesn't feel right here. Tracking collection id complexity in
> > the verifier for single owner case is not worth it imo.
> 
> It was more about correctness after this set is applied than being worth it. We
> could argue it's not worth it for the first issue which relates to usability.
> Dave has already mentioned that point. But the second one is simply incorrect to
> allow. As soon as you want an object which can be part of both a list and rbtree
> at different times (e.g., an item usually part of an rbtree but which is popped
> out and moved to a list for some processing), people will be able to trigger it.

obj-in-a-list or obj-in-a-rbtree is technically possible, but I argue it's equally
weird corner case to support. See above why two list_node-s for head-at-a-time are awkward.

> Simply disallowing that (as you said above) is also an option. But whenever you
> do allow it, you will need to add something like this. It has little to do with
> whether shared ownership is supported or not.

I think we will allow two list_nodes or list_node+rb_node or ten rb_nodes
only when refcount is present as well and that would be 'shared ownership' case.
Dave Marchevsky Feb. 10, 2023, 7:01 p.m. UTC | #9
On 2/10/23 8:55 AM, Kumar Kartikeya Dwivedi wrote:
> On Thu, Feb 09, 2023 at 06:41:41PM CET, Dave Marchevsky wrote:
>> Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
>> that require handling in the verifier:
>>
>>   * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
>>     the bpf_rb_node field, with the offset set to that field's offset,
>>     instead of a struct bpf_rb_node *
>>     * mark_reg_graph_node helper added in previous patch generalizes
>>       this logic, use it
>>
>>   * bpf_rbtree_remove's node input is a node that's been inserted
>>     in the tree - a non-owning reference.
>>
>>   * bpf_rbtree_remove must invalidate non-owning references in order to
>>     avoid aliasing issue. Use previously-added
>>     invalidate_non_owning_refs helper to mark this function as a
>>     non-owning ref invalidation point.
>>
>>   * Unlike other functions, which convert one of their input arg regs to
>>     non-owning reference, bpf_rbtree_first takes no arguments and just
>>     returns a non-owning reference (possibly null)
>>     * For now verifier logic for this is special-cased instead of
>>       adding new kfunc flag.
>>
>> This patch, along with the previous one, complete special verifier
>> handling for all rbtree API functions added in this series.
>>

As I was writing this response I noticed that Alexei started a subthread
which makes similar points.

> 
> I think there are two issues with the current approach. The fundamental
> assumption with non-owning references is that it is part of the collection. So
> bpf_rbtree_{add,first}, bpf_list_push_{front,back} will create them, as no node
> is being removed from collection. Marking bpf_rbtree_remove (and in the future
> bpf_list_del) as invalidation points is also right, since once a node has been
> removed it is going to be unclear whether existing non-owning references have
> the same value, and thus the property of 'part of the collection' will be
> broken.
> 
> The first issue relates to usability. If I have non-owning references to nodes
> inserted into both a list and an rbtree, bpf_rbtree_remove should only
> invalidate the ones that are part of the particular rbtree. It should have no
> effect on others. Likewise for the bpf_list_del operation in the future.
> Therefore, we need to track the collection identity associated with each
> non-owning reference, then only invalidate non-owning references associated with
> the same collection.
> 
> The case of bpf_spin_unlock is different, which should invalidate all non-owning
> references.
> 

I agree that if we had "data structure" or "collection" identity we would be
able to make this optimization. I also agree that such an optimization is likely
necessary for good usability in nontrivial scenarios. Alexei convinced me to
punt on it for now (over VC, I think, so can't link a thread), with the goal
of keeping complexity of this series under control.

Since "invalidate all non-owning references" is a superset of "invalidate
non-owning references associated with a particular collection", I think it's
fine to proceed with this for now if it means we can avoid adding collection
identity until followup. But I do plan on adding it and was thinking along
similar lines to your struct bpf_collection_id below. Which brings us to
second issue...

> The second issue is more serious. By not tracking the collection identity, we
> will currently allow a non-owning reference for an object inserted into a list
> to be passed to bpf_rbtree_remove, because the verifier cannot discern between
> 'inserted into rbtree' vs 'inserted into list'. For it, both are currently
> equivalent in the verifier state. An object is allowed to have both
> bpf_list_node and bpf_rb_node, but it can only be part of one collection at a
> time (because of no shared ownership).
> 
> 	struct obj {
> 		bpf_list_node ln;
> 		bpf_rb_node rn;
> 	};
> 
> 	bpf_list_push_front(head, &obj->ln); // node is non-own-ref
> 	bpf_rbtree_remove(&obj->rn); // should not work, but does
> 
> So some notion of a collection identity needs to be constructed, the amount of
> data which needs to be remembered in each non-owning reference's register state
> depends on our requirements.
> 
> The first sanity check is that bpf_rbtree_remove only removes something in an
> rbtree, so probably an enum member indicating whether collection is a list or
> rbtree. To ensure proper scoped invalidation, we will unfortunately need more
> than just the reg->id of the reg holding the graph root, since map values of
> different maps may have same id (0). Hence, we need id and ptr similar to the
> active lock case for proper matching. Even this won't be enough, as there can be
> multiple list or rbtree roots in a particular memory region, therefore the
> offset also needs to be part of the collection identity.
> 

I agree with your logic here: the logic in this series will consider your
example code a valid program when it should fail verification.

Since there's no support for "shared ownership" added in this series, we must
prevent an object from being part of both list and rbtree in your above example.
Adding "collection identity" as you propose seems like it would be sufficient,
but would increase complexity of this series. Alternatively, we could add a
small patch which causes btf_parse_fields to fail if a struct has both
bpf_list_node and bpf_rb_node. Then a "collection identity"-focused followup
could remove that kludge in favor of proper logic.

This was an oversight on my part, I wasn't separating "node can be part of both
list and rbtree" from "node can be part of either, but not both". So I see why
you mention this second issue being orthogonal from shared ownership in the
other subthread. But, at least for our current internal usecases, we always want
shared ownership, not "part of either, but not both". If it's possible to
turn both off for now and do "collection identity" and "shared ownership"
followups, I'd rather do that and ship complexity as gradually as possible.

> So it seems it will amount to:
> 
> 	struct bpf_collection_id {
> 		enum bpf_collection_type type;
> 		void *ptr;
> 		int id;
> 		int off;
> 	};
> 
> There might be ways to optimize the memory footprint of this struct, but I'm
> just trying to state why we'll need to include all four, so we don't miss out on
> a corner case again.
> 
>> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
>> ---
>> [...]
Kumar Kartikeya Dwivedi Feb. 10, 2023, 7:38 p.m. UTC | #10
On Fri, Feb 10, 2023 at 07:58:54PM CET, Alexei Starovoitov wrote:
> On Fri, Feb 10, 2023 at 07:03:46PM +0100, Kumar Kartikeya Dwivedi wrote:
> > On Fri, Feb 10, 2023 at 06:21:37PM CET, Alexei Starovoitov wrote:
> > > On Fri, Feb 10, 2023 at 02:55:41PM +0100, Kumar Kartikeya Dwivedi wrote:
> > > > On Thu, Feb 09, 2023 at 06:41:41PM CET, Dave Marchevsky wrote:
> > > > > Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
> > > > > that require handling in the verifier:
> > > > >
> > > > >   * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
> > > > >     the bpf_rb_node field, with the offset set to that field's offset,
> > > > >     instead of a struct bpf_rb_node *
> > > > >     * mark_reg_graph_node helper added in previous patch generalizes
> > > > >       this logic, use it
> > > > >
> > > > >   * bpf_rbtree_remove's node input is a node that's been inserted
> > > > >     in the tree - a non-owning reference.
> > > > >
> > > > >   * bpf_rbtree_remove must invalidate non-owning references in order to
> > > > >     avoid aliasing issue. Use previously-added
> > > > >     invalidate_non_owning_refs helper to mark this function as a
> > > > >     non-owning ref invalidation point.
> > > > >
> > > > >   * Unlike other functions, which convert one of their input arg regs to
> > > > >     non-owning reference, bpf_rbtree_first takes no arguments and just
> > > > >     returns a non-owning reference (possibly null)
> > > > >     * For now verifier logic for this is special-cased instead of
> > > > >       adding new kfunc flag.
> > > > >
> > > > > This patch, along with the previous one, complete special verifier
> > > > > handling for all rbtree API functions added in this series.
> > > > >
> > > >
> > > > I think there are two issues with the current approach. The fundamental
> > > > assumption with non-owning references is that it is part of the collection. So
> > > > bpf_rbtree_{add,first}, bpf_list_push_{front,back} will create them, as no node
> > > > is being removed from collection. Marking bpf_rbtree_remove (and in the future
> > > > bpf_list_del) as invalidation points is also right, since once a node has been
> > > > removed it is going to be unclear whether existing non-owning references have
> > > > the same value, and thus the property of 'part of the collection' will be
> > > > broken.
> > >
> > > correct, but the patch set does invalidate after bpf_rbtree_remove(),
> > > so it's not an issue.
> > >
> > > > The first issue relates to usability. If I have non-owning references to nodes
> > > > inserted into both a list and an rbtree, bpf_rbtree_remove should only
> > > > invalidate the ones that are part of the particular rbtree. It should have no
> > > > effect on others. Likewise for the bpf_list_del operation in the future.
> > > > Therefore, we need to track the collection identity associated with each
> > > > non-owning reference, then only invalidate non-owning references associated with
> > > > the same collection.
> > > >
> > > > The case of bpf_spin_unlock is different, which should invalidate all non-owning
> > > > references.
> > > >
> > > > The second issue is more serious. By not tracking the collection identity, we
> > > > will currently allow a non-owning reference for an object inserted into a list
> > > > to be passed to bpf_rbtree_remove, because the verifier cannot discern between
> > > > 'inserted into rbtree' vs 'inserted into list'. For it, both are currently
> > > > equivalent in the verifier state. An object is allowed to have both
> > > > bpf_list_node and bpf_rb_node, but it can only be part of one collection at a
> > > > time (because of no shared ownership).
> > > >
> > > > 	struct obj {
> > > > 		bpf_list_node ln;
> > > > 		bpf_rb_node rn;
> > > > 	};
> > > >
> > > > 	bpf_list_push_front(head, &obj->ln); // node is non-own-ref
> > > > 	bpf_rbtree_remove(&obj->rn); // should not work, but does
> > >
> > > Also correct, but inserting the same single owner node into rbtree and link list
> > > is not supported. Only 'shared ownership' node can be inserted into
> > > two collections.
> >
> > What is supported is having an object be part of a list and an rbtree one at a
> > time, which is what I'm talking about here. Shared ownership has nothing to do
> > with this.
>
> I wouldn't call it "supported".
> btf_parse_struct_metas() doesn't error on structs with bpf_list_node and bpf_rb_node in them.
> It allows two bpf_list_node-s as well.
> list_node+rb_node is broken as you said.
> I'm not sure two list_node-s is correct either. For the same reasons.

You mean it's incorrect before or after this series? If before, can you
elaborate? The node isn't usable with kfuncs after push before this series.

> So it's an omission rather than support.
>
> > > The check to disallow bpf_list_node and bpf_rb_node in the same obj
> > > can be a follow up patch to close this hole.
> > >
> >
> > Fine, that would also 'fix' this problem, where a non-owning reference part of a
> > list could be passed to bpf_rbtree_remove etc. If there can only be a list node
> > or an rbtree node in an object, such a case cannot be constructed. But I think
> > it's an awkward limitation.
>
> Regardless whether non-own refs exist or not. Two list_node-s are unusable
> with single owner model.
> struct obj {
> 	bpf_list_node ln;
> 	bpf_list_node ln2;
> };
>
> bpf_list_push_front(head, &obj->ln); // doesn't matter what we do with obj here
> bpf_list_push_front(head2, &obj->ln2); // cannot be made to work correctly
> The only way to actually support this case is to have refcount in obj.
>

Yes, I think we all agree it's not possible currently.

> It can "work" when obj is a part of only one list at a time, but one can reasonably
> argue that a requirement to have two 'bpf_list_node'-s in a obj just two insert obj
> in one list, pop it, and then insert into another list through a different list_node
> is nothing but a waste of memory.

I didn't get this. Unless I'm mistaken, you should be able to move objects
around from one list to another using the same single bpf_list_node. You don't
need two. Just that the list_head needs correct __contains annotation.

> The user should be able to use the same 'bpf_list_node' to insert into one
> list_head, remove it from there, and then insert into another list_head.
>

I think this already works.

> > > > So some notion of a collection identity needs to be constructed, the amount of
> > > > data which needs to be remembered in each non-owning reference's register state
> > > > depends on our requirements.
> > > >
> > > > The first sanity check is that bpf_rbtree_remove only removes something in an
> > > > rbtree, so probably an enum member indicating whether collection is a list or
> > > > rbtree. To ensure proper scoped invalidation, we will unfortunately need more
> > > > than just the reg->id of the reg holding the graph root, since map values of
> > > > different maps may have same id (0). Hence, we need id and ptr similar to the
> > > > active lock case for proper matching. Even this won't be enough, as there can be
> > > > multiple list or rbtree roots in a particular memory region, therefore the
> > > > offset also needs to be part of the collection identity.
> > > >
> > > > So it seems it will amount to:
> > > >
> > > > 	struct bpf_collection_id {
> > > > 		enum bpf_collection_type type;
> > > > 		void *ptr;
> > > > 		int id;
> > > > 		int off;
> > > > 	};
> > > >
> > > > There might be ways to optimize the memory footprint of this struct, but I'm
> > > > just trying to state why we'll need to include all four, so we don't miss out on
> > > > a corner case again.
> > >
> > > The trade-off doesn't feel right here. Tracking collection id complexity in
> > > the verifier for single owner case is not worth it imo.
> >
> > It was more about correctness after this set is applied than being worth it. We
> > could argue it's not worth it for the first issue which relates to usability.
> > Dave has already mentioned that point. But the second one is simply incorrect to
> > allow. As soon as you want an object which can be part of both a list and rbtree
> > at different times (e.g., an item usually part of an rbtree but which is popped
> > out and moved to a list for some processing), people will be able to trigger it.
>
> obj-in-a-list or obj-in-a-rbtree is technically possible, but I argue it's equally
> weird corner case to support. See above why two list_node-s for head-at-a-time are awkward.
>
> > Simply disallowing that (as you said above) is also an option. But whenever you
> > do allow it, you will need to add something like this. It has little to do with
> > whether shared ownership is supported or not.
>
> I think we will allow two list_nodes or list_node+rb_node or ten rb_nodes
> only when refcount is present as well and that would be 'shared ownership' case.

I get that you just want to punt it for now until someone has a use case, and
move forward.

But saying that people will have to use a refcount in their object just to be
able to have single unique ownership while allowing it to be part of multiple
data structures at different times is weird. It's like forcing someone to use a
shared_ptr in C++ (when they can use a unique_ptr) just because they want to
std::move it from one place to another.

I thought the whole point was building composable building blocks and letting
people choose their tradeoffs.
Alexei Starovoitov Feb. 10, 2023, 8:01 p.m. UTC | #11
On Fri, Feb 10, 2023 at 08:38:47PM +0100, Kumar Kartikeya Dwivedi wrote:
> On Fri, Feb 10, 2023 at 07:58:54PM CET, Alexei Starovoitov wrote:
> > On Fri, Feb 10, 2023 at 07:03:46PM +0100, Kumar Kartikeya Dwivedi wrote:
> > > On Fri, Feb 10, 2023 at 06:21:37PM CET, Alexei Starovoitov wrote:
> > > > On Fri, Feb 10, 2023 at 02:55:41PM +0100, Kumar Kartikeya Dwivedi wrote:
> > > > > On Thu, Feb 09, 2023 at 06:41:41PM CET, Dave Marchevsky wrote:
> > > > > > Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
> > > > > > that require handling in the verifier:
> > > > > >
> > > > > >   * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
> > > > > >     the bpf_rb_node field, with the offset set to that field's offset,
> > > > > >     instead of a struct bpf_rb_node *
> > > > > >     * mark_reg_graph_node helper added in previous patch generalizes
> > > > > >       this logic, use it
> > > > > >
> > > > > >   * bpf_rbtree_remove's node input is a node that's been inserted
> > > > > >     in the tree - a non-owning reference.
> > > > > >
> > > > > >   * bpf_rbtree_remove must invalidate non-owning references in order to
> > > > > >     avoid aliasing issue. Use previously-added
> > > > > >     invalidate_non_owning_refs helper to mark this function as a
> > > > > >     non-owning ref invalidation point.
> > > > > >
> > > > > >   * Unlike other functions, which convert one of their input arg regs to
> > > > > >     non-owning reference, bpf_rbtree_first takes no arguments and just
> > > > > >     returns a non-owning reference (possibly null)
> > > > > >     * For now verifier logic for this is special-cased instead of
> > > > > >       adding new kfunc flag.
> > > > > >
> > > > > > This patch, along with the previous one, complete special verifier
> > > > > > handling for all rbtree API functions added in this series.
> > > > > >
> > > > >
> > > > > I think there are two issues with the current approach. The fundamental
> > > > > assumption with non-owning references is that it is part of the collection. So
> > > > > bpf_rbtree_{add,first}, bpf_list_push_{front,back} will create them, as no node
> > > > > is being removed from collection. Marking bpf_rbtree_remove (and in the future
> > > > > bpf_list_del) as invalidation points is also right, since once a node has been
> > > > > removed it is going to be unclear whether existing non-owning references have
> > > > > the same value, and thus the property of 'part of the collection' will be
> > > > > broken.
> > > >
> > > > correct, but the patch set does invalidate after bpf_rbtree_remove(),
> > > > so it's not an issue.
> > > >
> > > > > The first issue relates to usability. If I have non-owning references to nodes
> > > > > inserted into both a list and an rbtree, bpf_rbtree_remove should only
> > > > > invalidate the ones that are part of the particular rbtree. It should have no
> > > > > effect on others. Likewise for the bpf_list_del operation in the future.
> > > > > Therefore, we need to track the collection identity associated with each
> > > > > non-owning reference, then only invalidate non-owning references associated with
> > > > > the same collection.
> > > > >
> > > > > The case of bpf_spin_unlock is different, which should invalidate all non-owning
> > > > > references.
> > > > >
> > > > > The second issue is more serious. By not tracking the collection identity, we
> > > > > will currently allow a non-owning reference for an object inserted into a list
> > > > > to be passed to bpf_rbtree_remove, because the verifier cannot discern between
> > > > > 'inserted into rbtree' vs 'inserted into list'. For it, both are currently
> > > > > equivalent in the verifier state. An object is allowed to have both
> > > > > bpf_list_node and bpf_rb_node, but it can only be part of one collection at a
> > > > > time (because of no shared ownership).
> > > > >
> > > > > 	struct obj {
> > > > > 		bpf_list_node ln;
> > > > > 		bpf_rb_node rn;
> > > > > 	};
> > > > >
> > > > > 	bpf_list_push_front(head, &obj->ln); // node is non-own-ref
> > > > > 	bpf_rbtree_remove(&obj->rn); // should not work, but does
> > > >
> > > > Also correct, but inserting the same single owner node into rbtree and link list
> > > > is not supported. Only 'shared ownership' node can be inserted into
> > > > two collections.
> > >
> > > What is supported is having an object be part of a list and an rbtree one at a
> > > time, which is what I'm talking about here. Shared ownership has nothing to do
> > > with this.
> >
> > I wouldn't call it "supported".
> > btf_parse_struct_metas() doesn't error on structs with bpf_list_node and bpf_rb_node in them.
> > It allows two bpf_list_node-s as well.
> > list_node+rb_node is broken as you said.
> > I'm not sure two list_node-s is correct either. For the same reasons.
> 
> You mean it's incorrect before or after this series? If before, can you
> elaborate? The node isn't usable with kfuncs after push before this series.

I meant once we add non-own-refs and bpf_list_del. Something like:
struct obj {
	bpf_list_node ln;
	bpf_list_node ln2;
};

bpf_list_push_front(head, &obj->ln); // obj is non-own-ref
bpf_list_del(&obj->ln2); // should not work, but does

...details to fill in...
I'm arguing that bpf_list_node x 2 is the same danger zone as bpf_list_node + bpf_rb_node.
It's safe today only because link list is so limited.
I'm looking at full featured rb tree and link list with similar ops allowed in both.

> 
> > So it's an omission rather than support.
> >
> > > > The check to disallow bpf_list_node and bpf_rb_node in the same obj
> > > > can be a follow up patch to close this hole.
> > > >
> > >
> > > Fine, that would also 'fix' this problem, where a non-owning reference part of a
> > > list could be passed to bpf_rbtree_remove etc. If there can only be a list node
> > > or an rbtree node in an object, such a case cannot be constructed. But I think
> > > it's an awkward limitation.
> >
> > Regardless whether non-own refs exist or not. Two list_node-s are unusable
> > with single owner model.
> > struct obj {
> > 	bpf_list_node ln;
> > 	bpf_list_node ln2;
> > };
> >
> > bpf_list_push_front(head, &obj->ln); // doesn't matter what we do with obj here
> > bpf_list_push_front(head2, &obj->ln2); // cannot be made to work correctly
> > The only way to actually support this case is to have refcount in obj.
> >
> 
> Yes, I think we all agree it's not possible currently.
> 
> > It can "work" when obj is a part of only one list at a time, but one can reasonably
> > argue that a requirement to have two 'bpf_list_node'-s in a obj just two insert obj
> > in one list, pop it, and then insert into another list through a different list_node
> > is nothing but a waste of memory.
> 
> I didn't get this. Unless I'm mistaken, you should be able to move objects
> around from one list to another using the same single bpf_list_node. You don't
> need two. Just that the list_head needs correct __contains annotation.
> 
> > The user should be able to use the same 'bpf_list_node' to insert into one
> > list_head, remove it from there, and then insert into another list_head.
> >
> 
> I think this already works.

It could be. We don't have tests for it, so it's a guess.

> 
> > > > > So some notion of a collection identity needs to be constructed, the amount of
> > > > > data which needs to be remembered in each non-owning reference's register state
> > > > > depends on our requirements.
> > > > >
> > > > > The first sanity check is that bpf_rbtree_remove only removes something in an
> > > > > rbtree, so probably an enum member indicating whether collection is a list or
> > > > > rbtree. To ensure proper scoped invalidation, we will unfortunately need more
> > > > > than just the reg->id of the reg holding the graph root, since map values of
> > > > > different maps may have same id (0). Hence, we need id and ptr similar to the
> > > > > active lock case for proper matching. Even this won't be enough, as there can be
> > > > > multiple list or rbtree roots in a particular memory region, therefore the
> > > > > offset also needs to be part of the collection identity.
> > > > >
> > > > > So it seems it will amount to:
> > > > >
> > > > > 	struct bpf_collection_id {
> > > > > 		enum bpf_collection_type type;
> > > > > 		void *ptr;
> > > > > 		int id;
> > > > > 		int off;
> > > > > 	};
> > > > >
> > > > > There might be ways to optimize the memory footprint of this struct, but I'm
> > > > > just trying to state why we'll need to include all four, so we don't miss out on
> > > > > a corner case again.
> > > >
> > > > The trade-off doesn't feel right here. Tracking collection id complexity in
> > > > the verifier for single owner case is not worth it imo.
> > >
> > > It was more about correctness after this set is applied than being worth it. We
> > > could argue it's not worth it for the first issue which relates to usability.
> > > Dave has already mentioned that point. But the second one is simply incorrect to
> > > allow. As soon as you want an object which can be part of both a list and rbtree
> > > at different times (e.g., an item usually part of an rbtree but which is popped
> > > out and moved to a list for some processing), people will be able to trigger it.
> >
> > obj-in-a-list or obj-in-a-rbtree is technically possible, but I argue it's equally
> > weird corner case to support. See above why two list_node-s for head-at-a-time are awkward.
> >
> > > Simply disallowing that (as you said above) is also an option. But whenever you
> > > do allow it, you will need to add something like this. It has little to do with
> > > whether shared ownership is supported or not.
> >
> > I think we will allow two list_nodes or list_node+rb_node or ten rb_nodes
> > only when refcount is present as well and that would be 'shared ownership' case.
> 
> I get that you just want to punt it for now until someone has a use case, and
> move forward.
> 
> But saying that people will have to use a refcount in their object just to be
> able to have single unique ownership while allowing it to be part of multiple
> data structures at different times is weird. It's like forcing someone to use a
> shared_ptr in C++ (when they can use a unique_ptr) just because they want to
> std::move it from one place to another.
> 
> I thought the whole point was building composable building blocks and letting
> people choose their tradeoffs.

That's exactly what we're doing. rb-tree, link lists are building blocks.
If we can improve their UX we will certainly do it. I'm arguing that we don't
have a real user yet, so whether UX is good or bad is subjective.

For example in this thread and never before we looked critically at kptrs
and kptr_xchg. The first time people tried to use for real they said it sucks.
That's a feedback that we should listen to instead of our own believes.
We try to strike a balance between feature being useful while keeping the verifier
complexity to the minimum. Then deliver the feature into the hands of users and
iterate based on their feedback.
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 842f8b98c9b4..269853c632f3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9670,10 +9670,20 @@  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
 				return -EINVAL;
 			}
-			if (!reg->ref_obj_id) {
+			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
+				if (reg->ref_obj_id) {
+					verbose(env, "rbtree_remove node input must be non-owning ref\n");
+					return -EINVAL;
+				}
+				if (in_rbtree_lock_required_cb(env)) {
+					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+					return -EINVAL;
+				}
+			} else if (!reg->ref_obj_id) {
 				verbose(env, "allocated object must be referenced\n");
 				return -EINVAL;
 			}
+
 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
 			if (ret < 0)
 				return ret;
@@ -9924,11 +9934,12 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
 				struct btf_field *field = meta.arg_list_head.field;
 
-				mark_reg_known_zero(env, regs, BPF_REG_0);
-				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
-				regs[BPF_REG_0].btf = field->graph_root.btf;
-				regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
-				regs[BPF_REG_0].off = field->graph_root.node_offset;
+				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+				struct btf_field *field = meta.arg_rbtree_root.field;
+
+				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
 			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
 				mark_reg_known_zero(env, regs, BPF_REG_0);
 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
@@ -9994,7 +10005,13 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			if (is_kfunc_ret_null(&meta))
 				regs[BPF_REG_0].id = id;
 			regs[BPF_REG_0].ref_obj_id = id;
+		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+			ref_set_non_owning_lock(env, &regs[BPF_REG_0]);
 		}
+
+		if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
+			invalidate_non_owning_refs(env);
+
 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
 			regs[BPF_REG_0].id = ++env->id_gen;
 	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */