diff mbox series

[v1,bpf-next,1/5] bpf: Add bpf_dynptr_trim and bpf_dynptr_advance

Message ID 20230409033431.3992432-2-joannelkoong@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Dynptr convenience helpers | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 53 this patch: 55
netdev/cc_maintainers warning 8 maintainers not CCed: song@kernel.org sdf@google.com haoluo@google.com yhs@fb.com john.fastabend@gmail.com kpsingh@kernel.org jolsa@kernel.org martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 53 this patch: 55
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-16

Commit Message

Joanne Koong April 9, 2023, 3:34 a.m. UTC
bpf_dynptr_trim decreases the size of a dynptr by the specified
number of bytes (offset remains the same). bpf_dynptr_advance advances
the offset of the dynptr by the specified number of bytes (size
decreases correspondingly).

Trimming or advancing the dynptr may be useful in certain situations.
For example, when hashing which takes in generic dynptrs, if the dynptr
points to a struct but only a certain memory region inside the struct
should be hashed, advance/trim can be used to narrow in on the
specific region to hash.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 kernel/bpf/helpers.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

Comments

Andrii Nakryiko April 12, 2023, 9:46 p.m. UTC | #1
On Sat, Apr 8, 2023 at 8:34 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> bpf_dynptr_trim decreases the size of a dynptr by the specified
> number of bytes (offset remains the same). bpf_dynptr_advance advances
> the offset of the dynptr by the specified number of bytes (size
> decreases correspondingly).
>
> Trimming or advancing the dynptr may be useful in certain situations.
> For example, when hashing which takes in generic dynptrs, if the dynptr
> points to a struct but only a certain memory region inside the struct
> should be hashed, advance/trim can be used to narrow in on the
> specific region to hash.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  kernel/bpf/helpers.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 49 insertions(+)
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index b6a5cda5bb59..51b4c4b5dbed 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1448,6 +1448,13 @@ u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
>         return ptr->size & DYNPTR_SIZE_MASK;
>  }
>
> +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
> +{
> +       u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
> +
> +       ptr->size = new_size | metadata;
> +}
> +
>  int bpf_dynptr_check_size(u32 size)
>  {
>         return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
> @@ -2275,6 +2282,46 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
>         return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
>  }
>
> +/* For dynptrs, the offset may only be advanced and the size may only be decremented */
> +static int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 off_inc, u32 sz_dec)

it feels like this helper just makes it a bit harder to follow what's
going on. Half of this function isn't actually executed for
bpf_dynptr_trim, so I don't think we are saving all that much code,
maybe let's code each of advance and trim explicitly?

> +{
> +       u32 size;
> +
> +       if (!ptr->data)
> +               return -EINVAL;
> +
> +       size = bpf_dynptr_get_size(ptr);
> +
> +       if (sz_dec > size)
> +               return -ERANGE;
> +
> +       if (off_inc) {
> +               u32 new_off;
> +
> +               if (off_inc > size)

like here it becomes confusing if off_inc includes sz_dec, or they
should be added to each other. I think it's convoluted as is.


> +                       return -ERANGE;
> +
> +               if (check_add_overflow(ptr->offset, off_inc, &new_off))

why do we need to worry about overflow, we checked all the error
conditions above?..

> +                       return -ERANGE;
> +
> +               ptr->offset = new_off;
> +       }
> +
> +       bpf_dynptr_set_size(ptr, size - sz_dec);
> +
> +       return 0;
> +}
> +
> +__bpf_kfunc int bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> +{
> +       return bpf_dynptr_adjust(ptr, len, len);
> +}
> +
> +__bpf_kfunc int bpf_dynptr_trim(struct bpf_dynptr_kern *ptr, u32 len)

I'm also wondering if trim operation is a bit unusual for dealing
ranges? Instead of a relative size decrement, maybe it's more
straightforward to have bpf_dynptr_resize() to set new desired size?
So if someone has original dynptr with 100 bytes but wants to have
dynptr for bytes [10, 30), they'd do a pretty natural:

bpf_dynptr_advance(&dynptr, 10);
bpf_dynptr_resize(&dynptr, 20);

?

> +{
> +       return bpf_dynptr_adjust(ptr, 0, len);
> +}
> +
>  __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
>  {
>         return obj;
> @@ -2347,6 +2394,8 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
>  BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
> +BTF_ID_FLAGS(func, bpf_dynptr_trim)
> +BTF_ID_FLAGS(func, bpf_dynptr_advance)
>  BTF_SET8_END(common_btf_ids)
>
>  static const struct btf_kfunc_id_set common_kfunc_set = {
> --
> 2.34.1
>
Joanne Koong April 14, 2023, 5:15 a.m. UTC | #2
On Wed, Apr 12, 2023 at 2:46 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Sat, Apr 8, 2023 at 8:34 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > bpf_dynptr_trim decreases the size of a dynptr by the specified
> > number of bytes (offset remains the same). bpf_dynptr_advance advances
> > the offset of the dynptr by the specified number of bytes (size
> > decreases correspondingly).
> >
> > Trimming or advancing the dynptr may be useful in certain situations.
> > For example, when hashing which takes in generic dynptrs, if the dynptr
> > points to a struct but only a certain memory region inside the struct
> > should be hashed, advance/trim can be used to narrow in on the
> > specific region to hash.
> >
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
> >  kernel/bpf/helpers.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 49 insertions(+)
> >
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index b6a5cda5bb59..51b4c4b5dbed 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -1448,6 +1448,13 @@ u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
> >         return ptr->size & DYNPTR_SIZE_MASK;
> >  }
> >
> > +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
> > +{
> > +       u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
> > +
> > +       ptr->size = new_size | metadata;
> > +}
> > +
> >  int bpf_dynptr_check_size(u32 size)
> >  {
> >         return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
> > @@ -2275,6 +2282,46 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
> >         return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
> >  }
> >
> > +/* For dynptrs, the offset may only be advanced and the size may only be decremented */
> > +static int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 off_inc, u32 sz_dec)
>
> it feels like this helper just makes it a bit harder to follow what's
> going on. Half of this function isn't actually executed for
> bpf_dynptr_trim, so I don't think we are saving all that much code,
> maybe let's code each of advance and trim explicitly?
>

Sounds good, I will change this in v2 to handle advance and trim separately

> > +{
> > +       u32 size;
> > +
> > +       if (!ptr->data)
> > +               return -EINVAL;
> > +
> > +       size = bpf_dynptr_get_size(ptr);
> > +
> > +       if (sz_dec > size)
> > +               return -ERANGE;
> > +
> > +       if (off_inc) {
> > +               u32 new_off;
> > +
> > +               if (off_inc > size)
>
> like here it becomes confusing if off_inc includes sz_dec, or they
> should be added to each other. I think it's convoluted as is.
>
>
> > +                       return -ERANGE;
> > +
> > +               if (check_add_overflow(ptr->offset, off_inc, &new_off))
>
> why do we need to worry about overflow, we checked all the error
> conditions above?..

Ahh you're right, this cant overflow u32. The dynptr max supported
size is 2^24 - 1 as well

>
> > +                       return -ERANGE;
> > +
> > +               ptr->offset = new_off;
> > +       }
> > +
> > +       bpf_dynptr_set_size(ptr, size - sz_dec);
> > +
> > +       return 0;
> > +}
> > +
> > +__bpf_kfunc int bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> > +{
> > +       return bpf_dynptr_adjust(ptr, len, len);
> > +}
> > +
> > +__bpf_kfunc int bpf_dynptr_trim(struct bpf_dynptr_kern *ptr, u32 len)
>
> I'm also wondering if trim operation is a bit unusual for dealing
> ranges? Instead of a relative size decrement, maybe it's more
> straightforward to have bpf_dynptr_resize() to set new desired size?
> So if someone has original dynptr with 100 bytes but wants to have
> dynptr for bytes [10, 30), they'd do a pretty natural:
>
> bpf_dynptr_advance(&dynptr, 10);
> bpf_dynptr_resize(&dynptr, 20);
>
> ?
>

Yeah! I like this idea a lot, that way they dont' need to know the
current size of the dynptr before they trim. This seems a lot more
ergonomic

> > +{
> > +       return bpf_dynptr_adjust(ptr, 0, len);
> > +}
> > +
> >  __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
> >  {
> >         return obj;
> > @@ -2347,6 +2394,8 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
> >  BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
> >  BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
> >  BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
> > +BTF_ID_FLAGS(func, bpf_dynptr_trim)
> > +BTF_ID_FLAGS(func, bpf_dynptr_advance)
> >  BTF_SET8_END(common_btf_ids)
> >
> >  static const struct btf_kfunc_id_set common_kfunc_set = {
> > --
> > 2.34.1
> >
Andrii Nakryiko April 17, 2023, 11:35 p.m. UTC | #3
On Thu, Apr 13, 2023 at 10:15 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> On Wed, Apr 12, 2023 at 2:46 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Sat, Apr 8, 2023 at 8:34 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> > >
> > > bpf_dynptr_trim decreases the size of a dynptr by the specified
> > > number of bytes (offset remains the same). bpf_dynptr_advance advances
> > > the offset of the dynptr by the specified number of bytes (size
> > > decreases correspondingly).
> > >
> > > Trimming or advancing the dynptr may be useful in certain situations.
> > > For example, when hashing which takes in generic dynptrs, if the dynptr
> > > points to a struct but only a certain memory region inside the struct
> > > should be hashed, advance/trim can be used to narrow in on the
> > > specific region to hash.
> > >
> > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > > ---
> > >  kernel/bpf/helpers.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 49 insertions(+)
> > >
> > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > index b6a5cda5bb59..51b4c4b5dbed 100644
> > > --- a/kernel/bpf/helpers.c
> > > +++ b/kernel/bpf/helpers.c
> > > @@ -1448,6 +1448,13 @@ u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
> > >         return ptr->size & DYNPTR_SIZE_MASK;
> > >  }
> > >
> > > +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
> > > +{
> > > +       u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
> > > +
> > > +       ptr->size = new_size | metadata;
> > > +}
> > > +
> > >  int bpf_dynptr_check_size(u32 size)
> > >  {
> > >         return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
> > > @@ -2275,6 +2282,46 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
> > >         return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
> > >  }
> > >
> > > +/* For dynptrs, the offset may only be advanced and the size may only be decremented */
> > > +static int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 off_inc, u32 sz_dec)
> >
> > it feels like this helper just makes it a bit harder to follow what's
> > going on. Half of this function isn't actually executed for
> > bpf_dynptr_trim, so I don't think we are saving all that much code,
> > maybe let's code each of advance and trim explicitly?
> >
>
> Sounds good, I will change this in v2 to handle advance and trim separately
>
> > > +{
> > > +       u32 size;
> > > +
> > > +       if (!ptr->data)
> > > +               return -EINVAL;
> > > +
> > > +       size = bpf_dynptr_get_size(ptr);
> > > +
> > > +       if (sz_dec > size)
> > > +               return -ERANGE;
> > > +
> > > +       if (off_inc) {
> > > +               u32 new_off;
> > > +
> > > +               if (off_inc > size)
> >
> > like here it becomes confusing if off_inc includes sz_dec, or they
> > should be added to each other. I think it's convoluted as is.
> >
> >
> > > +                       return -ERANGE;
> > > +
> > > +               if (check_add_overflow(ptr->offset, off_inc, &new_off))
> >
> > why do we need to worry about overflow, we checked all the error
> > conditions above?..
>
> Ahh you're right, this cant overflow u32. The dynptr max supported
> size is 2^24 - 1 as well
>
> >
> > > +                       return -ERANGE;
> > > +
> > > +               ptr->offset = new_off;
> > > +       }
> > > +
> > > +       bpf_dynptr_set_size(ptr, size - sz_dec);
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +__bpf_kfunc int bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> > > +{
> > > +       return bpf_dynptr_adjust(ptr, len, len);
> > > +}
> > > +
> > > +__bpf_kfunc int bpf_dynptr_trim(struct bpf_dynptr_kern *ptr, u32 len)
> >
> > I'm also wondering if trim operation is a bit unusual for dealing
> > ranges? Instead of a relative size decrement, maybe it's more
> > straightforward to have bpf_dynptr_resize() to set new desired size?
> > So if someone has original dynptr with 100 bytes but wants to have
> > dynptr for bytes [10, 30), they'd do a pretty natural:
> >
> > bpf_dynptr_advance(&dynptr, 10);
> > bpf_dynptr_resize(&dynptr, 20);
> >
> > ?
> >
>
> Yeah! I like this idea a lot, that way they dont' need to know the
> current size of the dynptr before they trim. This seems a lot more
> ergonomic

Thinking a bit more, I'm now wondering if we should actually merge
those two into one API to allow adjust both at the same time.
Similarly how langauges like Go and Rust allow to adjust array slices
by specifying new [start, end) offsets, should we have just one:

bpf_dynptr_adjust(&dynptr, 10, 30);

bpf_dynptr_advance() could be expressed as:

bpf_dynptr_adjust(&dynptr, 10, bpf_dynptr_size(&dynptr) - 10);

I suspect full adjust with custom [start, end) will be actually more
common than just advancing offset.

>
> > > +{
> > > +       return bpf_dynptr_adjust(ptr, 0, len);
> > > +}
> > > +
> > >  __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
> > >  {
> > >         return obj;
> > > @@ -2347,6 +2394,8 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
> > >  BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
> > >  BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
> > >  BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
> > > +BTF_ID_FLAGS(func, bpf_dynptr_trim)
> > > +BTF_ID_FLAGS(func, bpf_dynptr_advance)
> > >  BTF_SET8_END(common_btf_ids)
> > >
> > >  static const struct btf_kfunc_id_set common_kfunc_set = {
> > > --
> > > 2.34.1
> > >
Joanne Koong April 19, 2023, 6:22 a.m. UTC | #4
On Mon, Apr 17, 2023 at 4:36 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Apr 13, 2023 at 10:15 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > On Wed, Apr 12, 2023 at 2:46 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Sat, Apr 8, 2023 at 8:34 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> > > >
> > > > bpf_dynptr_trim decreases the size of a dynptr by the specified
> > > > number of bytes (offset remains the same). bpf_dynptr_advance advances
> > > > the offset of the dynptr by the specified number of bytes (size
> > > > decreases correspondingly).
> > > >
> > > > Trimming or advancing the dynptr may be useful in certain situations.
> > > > For example, when hashing which takes in generic dynptrs, if the dynptr
> > > > points to a struct but only a certain memory region inside the struct
> > > > should be hashed, advance/trim can be used to narrow in on the
> > > > specific region to hash.
> > > >
> > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > > > ---
> > > >  kernel/bpf/helpers.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 49 insertions(+)
> > > >
> > > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > > index b6a5cda5bb59..51b4c4b5dbed 100644
> > > > --- a/kernel/bpf/helpers.c
> > > > +++ b/kernel/bpf/helpers.c
> > > > @@ -1448,6 +1448,13 @@ u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
> > > >         return ptr->size & DYNPTR_SIZE_MASK;
> > > >  }
> > > >
> > > > +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
> > > > +{
> > > > +       u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
> > > > +
> > > > +       ptr->size = new_size | metadata;
> > > > +}
> > > > +
> > > >  int bpf_dynptr_check_size(u32 size)
> > > >  {
> > > >         return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
> > > > @@ -2275,6 +2282,46 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
> > > >         return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
> > > >  }
> > > >
> > > > +/* For dynptrs, the offset may only be advanced and the size may only be decremented */
> > > > +static int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 off_inc, u32 sz_dec)
> > >
> > > it feels like this helper just makes it a bit harder to follow what's
> > > going on. Half of this function isn't actually executed for
> > > bpf_dynptr_trim, so I don't think we are saving all that much code,
> > > maybe let's code each of advance and trim explicitly?
> > >
> >
> > Sounds good, I will change this in v2 to handle advance and trim separately
> >
> > > > +{
> > > > +       u32 size;
> > > > +
> > > > +       if (!ptr->data)
> > > > +               return -EINVAL;
> > > > +
> > > > +       size = bpf_dynptr_get_size(ptr);
> > > > +
> > > > +       if (sz_dec > size)
> > > > +               return -ERANGE;
> > > > +
> > > > +       if (off_inc) {
> > > > +               u32 new_off;
> > > > +
> > > > +               if (off_inc > size)
> > >
> > > like here it becomes confusing if off_inc includes sz_dec, or they
> > > should be added to each other. I think it's convoluted as is.
> > >
> > >
> > > > +                       return -ERANGE;
> > > > +
> > > > +               if (check_add_overflow(ptr->offset, off_inc, &new_off))
> > >
> > > why do we need to worry about overflow, we checked all the error
> > > conditions above?..
> >
> > Ahh you're right, this cant overflow u32. The dynptr max supported
> > size is 2^24 - 1 as well
> >
> > >
> > > > +                       return -ERANGE;
> > > > +
> > > > +               ptr->offset = new_off;
> > > > +       }
> > > > +
> > > > +       bpf_dynptr_set_size(ptr, size - sz_dec);
> > > > +
> > > > +       return 0;
> > > > +}
> > > > +
> > > > +__bpf_kfunc int bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> > > > +{
> > > > +       return bpf_dynptr_adjust(ptr, len, len);
> > > > +}
> > > > +
> > > > +__bpf_kfunc int bpf_dynptr_trim(struct bpf_dynptr_kern *ptr, u32 len)
> > >
> > > I'm also wondering if trim operation is a bit unusual for dealing
> > > ranges? Instead of a relative size decrement, maybe it's more
> > > straightforward to have bpf_dynptr_resize() to set new desired size?
> > > So if someone has original dynptr with 100 bytes but wants to have
> > > dynptr for bytes [10, 30), they'd do a pretty natural:
> > >
> > > bpf_dynptr_advance(&dynptr, 10);
> > > bpf_dynptr_resize(&dynptr, 20);
> > >
> > > ?
> > >
> >
> > Yeah! I like this idea a lot, that way they dont' need to know the
> > current size of the dynptr before they trim. This seems a lot more
> > ergonomic
>
> Thinking a bit more, I'm now wondering if we should actually merge
> those two into one API to allow adjust both at the same time.
> Similarly how langauges like Go and Rust allow to adjust array slices
> by specifying new [start, end) offsets, should we have just one:
>
> bpf_dynptr_adjust(&dynptr, 10, 30);
>
> bpf_dynptr_advance() could be expressed as:
>
> bpf_dynptr_adjust(&dynptr, 10, bpf_dynptr_size(&dynptr) - 10);
>
I think for expressing advance where only start offset changes, end
needs to be "bpf_dynptr_size(&dynptr)" (no minus 10) here?

> I suspect full adjust with custom [start, end) will be actually more
> common than just advancing offset.
>

I think this might get quickly cumbersome for the use cases where the
user just wants to parse through the data with only adjusting start
offset, for example parsing an skb's header options. maybe there's
some way to combine the two?:

bpf_dynptr_adjust(&dynptr, start, end);
where if end is -1 or some #define macro set to u32_max or something
like that then that signifies dont' modify the end offset, just modify
the start? That way the user can just advance instead of needing to
know its size every time. I don't know if that makes the interface
uglier / more confusing though. WDYT?

> >
> > > > +{
> > > > +       return bpf_dynptr_adjust(ptr, 0, len);
> > > > +}
> > > > +
> > > >  __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
> > > >  {
> > > >         return obj;
> > > > @@ -2347,6 +2394,8 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
> > > >  BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
> > > >  BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
> > > >  BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
> > > > +BTF_ID_FLAGS(func, bpf_dynptr_trim)
> > > > +BTF_ID_FLAGS(func, bpf_dynptr_advance)
> > > >  BTF_SET8_END(common_btf_ids)
> > > >
> > > >  static const struct btf_kfunc_id_set common_kfunc_set = {
> > > > --
> > > > 2.34.1
> > > >
Andrii Nakryiko April 19, 2023, 4:30 p.m. UTC | #5
On Tue, Apr 18, 2023 at 11:22 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> On Mon, Apr 17, 2023 at 4:36 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Apr 13, 2023 at 10:15 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> > >
> > > On Wed, Apr 12, 2023 at 2:46 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Sat, Apr 8, 2023 at 8:34 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> > > > >
> > > > > bpf_dynptr_trim decreases the size of a dynptr by the specified
> > > > > number of bytes (offset remains the same). bpf_dynptr_advance advances
> > > > > the offset of the dynptr by the specified number of bytes (size
> > > > > decreases correspondingly).
> > > > >
> > > > > Trimming or advancing the dynptr may be useful in certain situations.
> > > > > For example, when hashing which takes in generic dynptrs, if the dynptr
> > > > > points to a struct but only a certain memory region inside the struct
> > > > > should be hashed, advance/trim can be used to narrow in on the
> > > > > specific region to hash.
> > > > >
> > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > > > > ---
> > > > >  kernel/bpf/helpers.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
> > > > >  1 file changed, 49 insertions(+)
> > > > >
> > > > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > > > index b6a5cda5bb59..51b4c4b5dbed 100644
> > > > > --- a/kernel/bpf/helpers.c
> > > > > +++ b/kernel/bpf/helpers.c
> > > > > @@ -1448,6 +1448,13 @@ u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
> > > > >         return ptr->size & DYNPTR_SIZE_MASK;
> > > > >  }
> > > > >
> > > > > +static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
> > > > > +{
> > > > > +       u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
> > > > > +
> > > > > +       ptr->size = new_size | metadata;
> > > > > +}
> > > > > +
> > > > >  int bpf_dynptr_check_size(u32 size)
> > > > >  {
> > > > >         return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
> > > > > @@ -2275,6 +2282,46 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
> > > > >         return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
> > > > >  }
> > > > >
> > > > > +/* For dynptrs, the offset may only be advanced and the size may only be decremented */
> > > > > +static int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 off_inc, u32 sz_dec)
> > > >
> > > > it feels like this helper just makes it a bit harder to follow what's
> > > > going on. Half of this function isn't actually executed for
> > > > bpf_dynptr_trim, so I don't think we are saving all that much code,
> > > > maybe let's code each of advance and trim explicitly?
> > > >
> > >
> > > Sounds good, I will change this in v2 to handle advance and trim separately
> > >
> > > > > +{
> > > > > +       u32 size;
> > > > > +
> > > > > +       if (!ptr->data)
> > > > > +               return -EINVAL;
> > > > > +
> > > > > +       size = bpf_dynptr_get_size(ptr);
> > > > > +
> > > > > +       if (sz_dec > size)
> > > > > +               return -ERANGE;
> > > > > +
> > > > > +       if (off_inc) {
> > > > > +               u32 new_off;
> > > > > +
> > > > > +               if (off_inc > size)
> > > >
> > > > like here it becomes confusing if off_inc includes sz_dec, or they
> > > > should be added to each other. I think it's convoluted as is.
> > > >
> > > >
> > > > > +                       return -ERANGE;
> > > > > +
> > > > > +               if (check_add_overflow(ptr->offset, off_inc, &new_off))
> > > >
> > > > why do we need to worry about overflow, we checked all the error
> > > > conditions above?..
> > >
> > > Ahh you're right, this cant overflow u32. The dynptr max supported
> > > size is 2^24 - 1 as well
> > >
> > > >
> > > > > +                       return -ERANGE;
> > > > > +
> > > > > +               ptr->offset = new_off;
> > > > > +       }
> > > > > +
> > > > > +       bpf_dynptr_set_size(ptr, size - sz_dec);
> > > > > +
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > > +__bpf_kfunc int bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
> > > > > +{
> > > > > +       return bpf_dynptr_adjust(ptr, len, len);
> > > > > +}
> > > > > +
> > > > > +__bpf_kfunc int bpf_dynptr_trim(struct bpf_dynptr_kern *ptr, u32 len)
> > > >
> > > > I'm also wondering if trim operation is a bit unusual for dealing
> > > > ranges? Instead of a relative size decrement, maybe it's more
> > > > straightforward to have bpf_dynptr_resize() to set new desired size?
> > > > So if someone has original dynptr with 100 bytes but wants to have
> > > > dynptr for bytes [10, 30), they'd do a pretty natural:
> > > >
> > > > bpf_dynptr_advance(&dynptr, 10);
> > > > bpf_dynptr_resize(&dynptr, 20);
> > > >
> > > > ?
> > > >
> > >
> > > Yeah! I like this idea a lot, that way they dont' need to know the
> > > current size of the dynptr before they trim. This seems a lot more
> > > ergonomic
> >
> > Thinking a bit more, I'm now wondering if we should actually merge
> > those two into one API to allow adjust both at the same time.
> > Similarly how langauges like Go and Rust allow to adjust array slices
> > by specifying new [start, end) offsets, should we have just one:
> >
> > bpf_dynptr_adjust(&dynptr, 10, 30);
> >
> > bpf_dynptr_advance() could be expressed as:
> >
> > bpf_dynptr_adjust(&dynptr, 10, bpf_dynptr_size(&dynptr) - 10);
> >
> I think for expressing advance where only start offset changes, end
> needs to be "bpf_dynptr_size(&dynptr)" (no minus 10) here?

yep, you are right! it's end offset, so no need to adjust for 10. So
even better:

bpf_dynptr_adjust(&dynptr, 10, bpf_dynptr_size(&dynptr));

>
> > I suspect full adjust with custom [start, end) will be actually more
> > common than just advancing offset.
> >
>
> I think this might get quickly cumbersome for the use cases where the
> user just wants to parse through the data with only adjusting start
> offset, for example parsing an skb's header options. maybe there's
> some way to combine the two?:
>
> bpf_dynptr_adjust(&dynptr, start, end);
> where if end is -1 or some #define macro set to u32_max or something
> like that then that signifies dont' modify the end offset, just modify
> the start? That way the user can just advance instead of needing to
> know its size every time. I don't know if that makes the interface
> uglier / more confusing though. WDYT?

I think it does make it more cumbersome, I'd keep it as [start, end)
offset always. We can inline bpf_dynptr_size() if there is a
performance concern.

At least I'd start there, and if there is demand we can also add -1 as
a special case later.

>
> > >
> > > > > +{
> > > > > +       return bpf_dynptr_adjust(ptr, 0, len);
> > > > > +}
> > > > > +
> > > > >  __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
> > > > >  {
> > > > >         return obj;
> > > > > @@ -2347,6 +2394,8 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
> > > > >  BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
> > > > >  BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
> > > > >  BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
> > > > > +BTF_ID_FLAGS(func, bpf_dynptr_trim)
> > > > > +BTF_ID_FLAGS(func, bpf_dynptr_advance)
> > > > >  BTF_SET8_END(common_btf_ids)
> > > > >
> > > > >  static const struct btf_kfunc_id_set common_kfunc_set = {
> > > > > --
> > > > > 2.34.1
> > > > >
diff mbox series

Patch

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b6a5cda5bb59..51b4c4b5dbed 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1448,6 +1448,13 @@  u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
 	return ptr->size & DYNPTR_SIZE_MASK;
 }
 
+static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
+{
+	u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
+
+	ptr->size = new_size | metadata;
+}
+
 int bpf_dynptr_check_size(u32 size)
 {
 	return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
@@ -2275,6 +2282,46 @@  __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
 	return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
 }
 
+/* For dynptrs, the offset may only be advanced and the size may only be decremented */
+static int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 off_inc, u32 sz_dec)
+{
+	u32 size;
+
+	if (!ptr->data)
+		return -EINVAL;
+
+	size = bpf_dynptr_get_size(ptr);
+
+	if (sz_dec > size)
+		return -ERANGE;
+
+	if (off_inc) {
+		u32 new_off;
+
+		if (off_inc > size)
+			return -ERANGE;
+
+		if (check_add_overflow(ptr->offset, off_inc, &new_off))
+			return -ERANGE;
+
+		ptr->offset = new_off;
+	}
+
+	bpf_dynptr_set_size(ptr, size - sz_dec);
+
+	return 0;
+}
+
+__bpf_kfunc int bpf_dynptr_advance(struct bpf_dynptr_kern *ptr, u32 len)
+{
+	return bpf_dynptr_adjust(ptr, len, len);
+}
+
+__bpf_kfunc int bpf_dynptr_trim(struct bpf_dynptr_kern *ptr, u32 len)
+{
+	return bpf_dynptr_adjust(ptr, 0, len);
+}
+
 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
 {
 	return obj;
@@ -2347,6 +2394,8 @@  BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
 BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_dynptr_trim)
+BTF_ID_FLAGS(func, bpf_dynptr_advance)
 BTF_SET8_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {