diff mbox series

[bpf-next,v2,1/7] bpf: Implement bpf_probe_read_kernel_dynptr helper

Message ID 20250125-bpf_dynptr_probe-v2-1-c42c87f97afe@outlook.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf: Add probe_read_{kernel,user}_dynptr and copy_from_user_dynptr | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-19 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 196 this patch: 196
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers success CCed 18 of 18 maintainers
netdev/build_clang fail Errors and warnings before: 9354 this patch: 9355
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 7001 this patch: 7003
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: line length of 123 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 17 this patch: 17
netdev/source_inline success Was 0 now: 0

Commit Message

Levi Zim via B4 Relay Jan. 25, 2025, 8:29 a.m. UTC
From: Levi Zim <rsworktech@outlook.com>

This patch add a helper function bpf_probe_read_kernel_dynptr:

long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
	u32 offset, u32 size, const void *unsafe_ptr, u64 flags);

It is useful for reading variable-length data from kernel memory into
dynptr.

Link: https://lore.kernel.org/bpf/MEYP282MB2312CFCE5F7712FDE313215AC64D2@MEYP282MB2312.AUSP282.PROD.OUTLOOK.COM/
Signed-off-by: Levi Zim <rsworktech@outlook.com>
---
 include/linux/bpf.h      |  2 ++
 include/uapi/linux/bpf.h | 16 ++++++++++++++++
 kernel/bpf/helpers.c     |  8 ++++++--
 kernel/trace/bpf_trace.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 2 deletions(-)

Comments

Alexei Starovoitov Jan. 25, 2025, 4:58 p.m. UTC | #1
On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
<devnull+rsworktech.outlook.com@kernel.org> wrote:
>
> From: Levi Zim <rsworktech@outlook.com>
>
> This patch add a helper function bpf_probe_read_kernel_dynptr:
>
> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
>         u32 offset, u32 size, const void *unsafe_ptr, u64 flags);

We stopped adding helpers years ago.
Only new kfuncs are allowed.

This particular one doesn't look useful as-is.
The same logic can be expressed with
- create dynptr
- dynptr_slice
- copy_from_kernel

pw-bot: cr
Levi Zim Jan. 26, 2025, 1:05 a.m. UTC | #2
On 2025/1/26 00:58, Alexei Starovoitov wrote:
 > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
 > <devnull+rsworktech.outlook.com@kernel.org> wrote:
 >> From: Levi Zim <rsworktech@outlook.com>
 >>
 >> This patch add a helper function bpf_probe_read_kernel_dynptr:
 >>
 >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
 >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
 > We stopped adding helpers years ago.
 > Only new kfuncs are allowed.

Sorry, I didn't know that. Just asking, is there any 
documentation/discussion
about stopping adding helpers?

I will switch the implementation to kfuncs in v3.

 > This particular one doesn't look useful as-is.
 > The same logic can be expressed with
 > - create dynptr
 > - dynptr_slice
 > - copy_from_kernel

By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
with dynptr_slice_rdwr and probe_read_kernel is that they only support a
compile-time constant size [1].

But in order to best utilize the space on a BPF ringbuf, it is possible 
to reserve a
variable length of space as dynptr on a ringbuf with 
bpf_ringbuf_reserve_dynptr.

Then currently we have no way to read a variable length of kernel memory
into this dynptr, except doing it chunk by chunk[2], which is kinda awkward.
That's the problem the new helpers trying to solve.
And I am not the only one needing this kind of feature [3].

Andrii said it would be a straightforward addition as it is a super thin 
wrapper
around existing functionality (we are just avoiding fixed buffer size 
restrictions of
existing probe/copy_from APIs)

[1]: 
https://elixir.bootlin.com/linux/v6.12.6/source/kernel/bpf/helpers.c#L2600-L2601
[2]: 
https://github.com/libbpf/libbpf-bootstrap/commit/046fad60df3e39540937b5ec6ee86054f33d3f28
[3]: https://github.com/libbpf/libbpf-rs/issues/1041
[4]: 
https://lore.kernel.org/bpf/CAEf4BzZctXJsR+TwMhmXNWnR0_BV802-3KJw226ZZt8St4xNkw@mail.gmail.com/

 > pw-bot: cr
Alexei Starovoitov Jan. 27, 2025, 10:04 p.m. UTC | #3
On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
>
> On 2025/1/26 00:58, Alexei Starovoitov wrote:
>  > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
>  > <devnull+rsworktech.outlook.com@kernel.org> wrote:
>  >> From: Levi Zim <rsworktech@outlook.com>
>  >>
>  >> This patch add a helper function bpf_probe_read_kernel_dynptr:
>  >>
>  >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
>  >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
>  > We stopped adding helpers years ago.
>  > Only new kfuncs are allowed.
>
> Sorry, I didn't know that. Just asking, is there any
> documentation/discussion
> about stopping adding helpers?
>
> I will switch the implementation to kfuncs in v3.
>
>  > This particular one doesn't look useful as-is.
>  > The same logic can be expressed with
>  > - create dynptr
>  > - dynptr_slice
>  > - copy_from_kernel
>
> By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
> with dynptr_slice_rdwr and probe_read_kernel is that they only support a
> compile-time constant size [1].
>
> But in order to best utilize the space on a BPF ringbuf, it is possible
> to reserve a
> variable length of space as dynptr on a ringbuf with
> bpf_ringbuf_reserve_dynptr.

That makes sense. The commit log didn't call it out.
Please spell out the motivation clearly.
Also why bpf_probe_read_kernel_common ?
Do we need to memset() it on failure?
Andrii Nakryiko Jan. 27, 2025, 10:05 p.m. UTC | #4
On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
>
> On 2025/1/26 00:58, Alexei Starovoitov wrote:
>  > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
>  > <devnull+rsworktech.outlook.com@kernel.org> wrote:
>  >> From: Levi Zim <rsworktech@outlook.com>
>  >>
>  >> This patch add a helper function bpf_probe_read_kernel_dynptr:
>  >>
>  >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
>  >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
>  > We stopped adding helpers years ago.
>  > Only new kfuncs are allowed.
>
> Sorry, I didn't know that. Just asking, is there any
> documentation/discussion
> about stopping adding helpers?
>
> I will switch the implementation to kfuncs in v3.

you might want to look at Documentation/bpf/kfuncs.rst as well

>
>  > This particular one doesn't look useful as-is.
>  > The same logic can be expressed with
>  > - create dynptr
>  > - dynptr_slice
>  > - copy_from_kernel
>
> By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
> with dynptr_slice_rdwr and probe_read_kernel is that they only support a
> compile-time constant size [1].
>
> But in order to best utilize the space on a BPF ringbuf, it is possible
> to reserve a
> variable length of space as dynptr on a ringbuf with
> bpf_ringbuf_reserve_dynptr.
>
> Then currently we have no way to read a variable length of kernel memory
> into this dynptr, except doing it chunk by chunk[2], which is kinda awkward.
> That's the problem the new helpers trying to solve.
> And I am not the only one needing this kind of feature [3].
>
> Andrii said it would be a straightforward addition as it is a super thin
> wrapper
> around existing functionality (we are just avoiding fixed buffer size
> restrictions of
> existing probe/copy_from APIs)

Yep, exactly. All the probe/copy APIs we have require statically known
dst buffer and, worst still, statically known read size, which makes
it very hard and cumbersome to request data which size is known at
runtime (e.g., process argv/environ data).

So, in short, I think these new dynptr-based APIs are a good addition,
but yeah, they'll have to be added as kfuncs.

One thing that we need to figure out is how to deal with SKB/XDP
dynptrs, we can't just reject them as destination buffers for all
possible APIs. I think we'll need to develop a set of internal helpers
that would handle skb/xdp dynptr cases almost transparently:

  - if skb/xdp memory is linear, we just memcpy/probe_read into it directly;
  - if it's not linear, we use on-the-stack small buffer (128 or 256
bytes, something like that), to do desired operation (memcpy or probe)
into that buffer, and then using generic dynptr write functionality
from that buffer into skb/xdp.

We probably can start simple with excluding xdp/skb, but this should
be the second step.

>
> [1]:
> https://elixir.bootlin.com/linux/v6.12.6/source/kernel/bpf/helpers.c#L2600-L2601
> [2]:
> https://github.com/libbpf/libbpf-bootstrap/commit/046fad60df3e39540937b5ec6ee86054f33d3f28
> [3]: https://github.com/libbpf/libbpf-rs/issues/1041
> [4]:
> https://lore.kernel.org/bpf/CAEf4BzZctXJsR+TwMhmXNWnR0_BV802-3KJw226ZZt8St4xNkw@mail.gmail.com/
>
>  > pw-bot: cr
Andrei Matei Jan. 27, 2025, 10:53 p.m. UTC | #5
On Mon, Jan 27, 2025 at 5:04 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
> >
> > On 2025/1/26 00:58, Alexei Starovoitov wrote:
> >  > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
> >  > <devnull+rsworktech.outlook.com@kernel.org> wrote:
> >  >> From: Levi Zim <rsworktech@outlook.com>
> >  >>
> >  >> This patch add a helper function bpf_probe_read_kernel_dynptr:
> >  >>
> >  >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
> >  >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
> >  > We stopped adding helpers years ago.
> >  > Only new kfuncs are allowed.
> >
> > Sorry, I didn't know that. Just asking, is there any
> > documentation/discussion
> > about stopping adding helpers?
> >
> > I will switch the implementation to kfuncs in v3.
> >
> >  > This particular one doesn't look useful as-is.
> >  > The same logic can be expressed with
> >  > - create dynptr
> >  > - dynptr_slice
> >  > - copy_from_kernel
> >
> > By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
> > with dynptr_slice_rdwr and probe_read_kernel is that they only support a
> > compile-time constant size [1].
> >
> > But in order to best utilize the space on a BPF ringbuf, it is possible
> > to reserve a
> > variable length of space as dynptr on a ringbuf with
> > bpf_ringbuf_reserve_dynptr.

For our uprobes, we've run into similar issues around doing variable-sized
bpf_probe_read_user() into ring buffers for our debugger [1]. Our use case
is that we generate uprobes that recursively read data structures until we
fill up a buffer. The verifier's insistence on knowing statically that a read
fits into the buffer makes for awkward code, and makes it hard to pack the
buffer fully; we have to split our reads into a couple of static size classes.

Any chance there'd be interest in taking the opportunity to support
dynamically-sized reads from userspace too? :)

[1] https://side-eye.io

>
> That makes sense. The commit log didn't call it out.
> Please spell out the motivation clearly.
> Also why bpf_probe_read_kernel_common ?
> Do we need to memset() it on failure?
>
Andrii Nakryiko Jan. 27, 2025, 11:09 p.m. UTC | #6
On Mon, Jan 27, 2025 at 2:54 PM Andrei Matei <andreimatei1@gmail.com> wrote:
>
> On Mon, Jan 27, 2025 at 5:04 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
> > >
> > > On 2025/1/26 00:58, Alexei Starovoitov wrote:
> > >  > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
> > >  > <devnull+rsworktech.outlook.com@kernel.org> wrote:
> > >  >> From: Levi Zim <rsworktech@outlook.com>
> > >  >>
> > >  >> This patch add a helper function bpf_probe_read_kernel_dynptr:
> > >  >>
> > >  >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
> > >  >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
> > >  > We stopped adding helpers years ago.
> > >  > Only new kfuncs are allowed.
> > >
> > > Sorry, I didn't know that. Just asking, is there any
> > > documentation/discussion
> > > about stopping adding helpers?
> > >
> > > I will switch the implementation to kfuncs in v3.
> > >
> > >  > This particular one doesn't look useful as-is.
> > >  > The same logic can be expressed with
> > >  > - create dynptr
> > >  > - dynptr_slice
> > >  > - copy_from_kernel
> > >
> > > By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
> > > with dynptr_slice_rdwr and probe_read_kernel is that they only support a
> > > compile-time constant size [1].
> > >
> > > But in order to best utilize the space on a BPF ringbuf, it is possible
> > > to reserve a
> > > variable length of space as dynptr on a ringbuf with
> > > bpf_ringbuf_reserve_dynptr.
>
> For our uprobes, we've run into similar issues around doing variable-sized
> bpf_probe_read_user() into ring buffers for our debugger [1]. Our use case
> is that we generate uprobes that recursively read data structures until we
> fill up a buffer. The verifier's insistence on knowing statically that a read
> fits into the buffer makes for awkward code, and makes it hard to pack the
> buffer fully; we have to split our reads into a couple of static size classes.
>
> Any chance there'd be interest in taking the opportunity to support
> dynamically-sized reads from userspace too? :)

That's bpf_probe_read_user_dynptr() from patch #2, no?

But generally speaking, here's a list of new APIs that we'd need to
cover all existing fixed buffer versions:

- non-sleepable probe reads:

  bpf_probe_read_kernel_dynptr()
  bpf_probe_read_user_dynptr()
  bpf_probe_read_kernel_str_dynptr()
  bpf_probe_read_user_str_dynptr()

- sleepable probe reads (copy_from_user):

bpf_copy_from_user_dynptr()
bpf_copy_from_user_str_dynptr()

- and then we have complementary task-based APIs for non-current process:

bpf_probe_read_user_task_dynptr()
bpf_probe_read_user_str_task_dynptr()
bpf_copy_from_user_task_dynptr()
bpf_copy_from_user_str_task_dynptr()

Jordan is working on non-dynptr version of
bpf_copy_from_user_str_task(), once he's done with that, we'll add
dynptr version, probably.

>
> [1] https://side-eye.io
>
> >
> > That makes sense. The commit log didn't call it out.
> > Please spell out the motivation clearly.
> > Also why bpf_probe_read_kernel_common ?
> > Do we need to memset() it on failure?
> >
Levi Zim Jan. 28, 2025, 12:31 a.m. UTC | #7
On 2025-01-28 07:09, Andrii Nakryiko wrote:
> On Mon, Jan 27, 2025 at 2:54 PM Andrei Matei <andreimatei1@gmail.com> wrote:
>> On Mon, Jan 27, 2025 at 5:04 PM Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>>> On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
>>>> On 2025/1/26 00:58, Alexei Starovoitov wrote:
>>>>   > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
>>>>   > <devnull+rsworktech.outlook.com@kernel.org> wrote:
>>>>   >> From: Levi Zim <rsworktech@outlook.com>
>>>>   >>
>>>>   >> This patch add a helper function bpf_probe_read_kernel_dynptr:
>>>>   >>
>>>>   >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
>>>>   >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
>>>>   > We stopped adding helpers years ago.
>>>>   > Only new kfuncs are allowed.
>>>>
>>>> Sorry, I didn't know that. Just asking, is there any
>>>> documentation/discussion
>>>> about stopping adding helpers?
>>>>
>>>> I will switch the implementation to kfuncs in v3.
>>>>
>>>>   > This particular one doesn't look useful as-is.
>>>>   > The same logic can be expressed with
>>>>   > - create dynptr
>>>>   > - dynptr_slice
>>>>   > - copy_from_kernel
>>>>
>>>> By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
>>>> with dynptr_slice_rdwr and probe_read_kernel is that they only support a
>>>> compile-time constant size [1].
>>>>
>>>> But in order to best utilize the space on a BPF ringbuf, it is possible
>>>> to reserve a
>>>> variable length of space as dynptr on a ringbuf with
>>>> bpf_ringbuf_reserve_dynptr.
>> For our uprobes, we've run into similar issues around doing variable-sized
>> bpf_probe_read_user() into ring buffers for our debugger [1]. Our use case
>> is that we generate uprobes that recursively read data structures until we
>> fill up a buffer. The verifier's insistence on knowing statically that a read
>> fits into the buffer makes for awkward code, and makes it hard to pack the
>> buffer fully; we have to split our reads into a couple of static size classes.
>>
>> Any chance there'd be interest in taking the opportunity to support
>> dynamically-sized reads from userspace too? :)
> That's bpf_probe_read_user_dynptr() from patch #2, no?
>
> But generally speaking, here's a list of new APIs that we'd need to
> cover all existing fixed buffer versions:
>
> - non-sleepable probe reads:
>
>    bpf_probe_read_kernel_dynptr()
>    bpf_probe_read_user_dynptr()
>    bpf_probe_read_kernel_str_dynptr()
I think the _str_dynptr versions are probably not worth adding.
For example, when we use probe_read_kernel_str, the length of the str is 
usually
not known and we usually allocate a fixed size buffer for it. If we do 
know the
length of the str beforehand, we can just use probe_read_kernel_dynptr.
>    bpf_probe_read_user_str_dynptr()
>
> - sleepable probe reads (copy_from_user):
>
> bpf_copy_from_user_dynptr()
> bpf_copy_from_user_str_dynptr()
>
> - and then we have complementary task-based APIs for non-current process:
>
> bpf_probe_read_user_task_dynptr()
> bpf_probe_read_user_str_task_dynptr()
> bpf_copy_from_user_task_dynptr()
> bpf_copy_from_user_str_task_dynptr()
>
> Jordan is working on non-dynptr version of
> bpf_copy_from_user_str_task(), once he's done with that, we'll add
> dynptr version, probably.
>
>> [1] https://side-eye.io
>>
>>> That makes sense. The commit log didn't call it out.
>>> Please spell out the motivation clearly.
>>> Also why bpf_probe_read_kernel_common ?
>>> Do we need to memset() it on failure?
>>>
Alexei Starovoitov Jan. 28, 2025, 2:57 a.m. UTC | #8
On Mon, Jan 27, 2025 at 3:09 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Jan 27, 2025 at 2:54 PM Andrei Matei <andreimatei1@gmail.com> wrote:
> >
> > On Mon, Jan 27, 2025 at 5:04 PM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
> > > >
> > > > On 2025/1/26 00:58, Alexei Starovoitov wrote:
> > > >  > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
> > > >  > <devnull+rsworktech.outlook.com@kernel.org> wrote:
> > > >  >> From: Levi Zim <rsworktech@outlook.com>
> > > >  >>
> > > >  >> This patch add a helper function bpf_probe_read_kernel_dynptr:
> > > >  >>
> > > >  >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
> > > >  >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
> > > >  > We stopped adding helpers years ago.
> > > >  > Only new kfuncs are allowed.
> > > >
> > > > Sorry, I didn't know that. Just asking, is there any
> > > > documentation/discussion
> > > > about stopping adding helpers?
> > > >
> > > > I will switch the implementation to kfuncs in v3.
> > > >
> > > >  > This particular one doesn't look useful as-is.
> > > >  > The same logic can be expressed with
> > > >  > - create dynptr
> > > >  > - dynptr_slice
> > > >  > - copy_from_kernel
> > > >
> > > > By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
> > > > with dynptr_slice_rdwr and probe_read_kernel is that they only support a
> > > > compile-time constant size [1].
> > > >
> > > > But in order to best utilize the space on a BPF ringbuf, it is possible
> > > > to reserve a
> > > > variable length of space as dynptr on a ringbuf with
> > > > bpf_ringbuf_reserve_dynptr.
> >
> > For our uprobes, we've run into similar issues around doing variable-sized
> > bpf_probe_read_user() into ring buffers for our debugger [1]. Our use case
> > is that we generate uprobes that recursively read data structures until we
> > fill up a buffer. The verifier's insistence on knowing statically that a read
> > fits into the buffer makes for awkward code, and makes it hard to pack the
> > buffer fully; we have to split our reads into a couple of static size classes.
> >
> > Any chance there'd be interest in taking the opportunity to support
> > dynamically-sized reads from userspace too? :)
>
> That's bpf_probe_read_user_dynptr() from patch #2, no?
>
> But generally speaking, here's a list of new APIs that we'd need to
> cover all existing fixed buffer versions:
>
> - non-sleepable probe reads:
>
>   bpf_probe_read_kernel_dynptr()
>   bpf_probe_read_user_dynptr()
>   bpf_probe_read_kernel_str_dynptr()
>   bpf_probe_read_user_str_dynptr()
>
> - sleepable probe reads (copy_from_user):
>
> bpf_copy_from_user_dynptr()
> bpf_copy_from_user_str_dynptr()
>
> - and then we have complementary task-based APIs for non-current process:
>
> bpf_probe_read_user_task_dynptr()
> bpf_probe_read_user_str_task_dynptr()
> bpf_copy_from_user_task_dynptr()
> bpf_copy_from_user_str_task_dynptr()
>
> Jordan is working on non-dynptr version of
> bpf_copy_from_user_str_task(), once he's done with that, we'll add
> dynptr version, probably.

This is quite a bunch of kfuncs.
It doesn't look like adding _dynptr suffix and duplicating
kfuncs approach scales.

Let's make the existing helpers/kfuncs more flexible ?

We can introduce a kfunc bpf_dynptr_buf() that checks that
dynptr is not readonly and type == local or ringbuf and
return dynptr->data as PTR_TO_MEM | dynptr_flag | VERIFIER_ADDS_SIZE_CHECK.

Then allow bpf_probe_read_user/kernel/... all of them to accept
this register type where PTR_TO_MEM is required
while relaxing ARG_CONST_SIZE 2nd argument to ARG_ANYTHING.
Then the verifier will insert an extra check
if (arg1->size < arg2)
before the call.

Not only the bpf_probe_read_kernel/user, _str variants will work
but things like bpf_strtol, bpf_strncmp, bpf_snprintf, bpf_get_stack
will auto-magically work as well.

I think those are quite valuable to make available with non-constant size.
bpf_get_stack_*() directly into the ring buffer sounds very useful.
Levi Zim Jan. 28, 2025, 11:13 a.m. UTC | #9
On 2025/1/28 06:04, Alexei Starovoitov wrote:
> On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
>> On 2025/1/26 00:58, Alexei Starovoitov wrote:
>>   > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
>>   > <devnull+rsworktech.outlook.com@kernel.org> wrote:
>>   >> From: Levi Zim <rsworktech@outlook.com>
>>   >>
>>   >> This patch add a helper function bpf_probe_read_kernel_dynptr:
>>   >>
>>   >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
>>   >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
>>   > We stopped adding helpers years ago.
>>   > Only new kfuncs are allowed.
>>
>> Sorry, I didn't know that. Just asking, is there any
>> documentation/discussion
>> about stopping adding helpers?
>>
>> I will switch the implementation to kfuncs in v3.
>>
>>   > This particular one doesn't look useful as-is.
>>   > The same logic can be expressed with
>>   > - create dynptr
>>   > - dynptr_slice
>>   > - copy_from_kernel
>>
>> By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
>> with dynptr_slice_rdwr and probe_read_kernel is that they only support a
>> compile-time constant size [1].
>>
>> But in order to best utilize the space on a BPF ringbuf, it is possible
>> to reserve a
>> variable length of space as dynptr on a ringbuf with
>> bpf_ringbuf_reserve_dynptr.
> That makes sense. The commit log didn't call it out.
> Please spell out the motivation clearly.
Thanks for the advice! I will include it in v3.
> Also why bpf_probe_read_kernel_common ?
> Do we need to memset() it on failure?
Since the current patch is basically a thin wrapper around 
bpf_probe_read_kernel,
I think we'd better not deviate from the wrapped function.
Levi Zim Jan. 28, 2025, 11:22 a.m. UTC | #10
On 2025/1/28 10:57, Alexei Starovoitov wrote:
> On Mon, Jan 27, 2025 at 3:09 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>> On Mon, Jan 27, 2025 at 2:54 PM Andrei Matei <andreimatei1@gmail.com> wrote:
>>> On Mon, Jan 27, 2025 at 5:04 PM Alexei Starovoitov
>>> <alexei.starovoitov@gmail.com> wrote:
>>>> On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
>>>>> On 2025/1/26 00:58, Alexei Starovoitov wrote:
>>>>>   > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
>>>>>   > <devnull+rsworktech.outlook.com@kernel.org> wrote:
>>>>>   >> From: Levi Zim <rsworktech@outlook.com>
>>>>>   >>
>>>>>   >> This patch add a helper function bpf_probe_read_kernel_dynptr:
>>>>>   >>
>>>>>   >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
>>>>>   >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
>>>>>   > We stopped adding helpers years ago.
>>>>>   > Only new kfuncs are allowed.
>>>>>
>>>>> Sorry, I didn't know that. Just asking, is there any
>>>>> documentation/discussion
>>>>> about stopping adding helpers?
>>>>>
>>>>> I will switch the implementation to kfuncs in v3.
>>>>>
>>>>>   > This particular one doesn't look useful as-is.
>>>>>   > The same logic can be expressed with
>>>>>   > - create dynptr
>>>>>   > - dynptr_slice
>>>>>   > - copy_from_kernel
>>>>>
>>>>> By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
>>>>> with dynptr_slice_rdwr and probe_read_kernel is that they only support a
>>>>> compile-time constant size [1].
>>>>>
>>>>> But in order to best utilize the space on a BPF ringbuf, it is possible
>>>>> to reserve a
>>>>> variable length of space as dynptr on a ringbuf with
>>>>> bpf_ringbuf_reserve_dynptr.
>>> For our uprobes, we've run into similar issues around doing variable-sized
>>> bpf_probe_read_user() into ring buffers for our debugger [1]. Our use case
>>> is that we generate uprobes that recursively read data structures until we
>>> fill up a buffer. The verifier's insistence on knowing statically that a read
>>> fits into the buffer makes for awkward code, and makes it hard to pack the
>>> buffer fully; we have to split our reads into a couple of static size classes.
>>>
>>> Any chance there'd be interest in taking the opportunity to support
>>> dynamically-sized reads from userspace too? :)
>> That's bpf_probe_read_user_dynptr() from patch #2, no?
>>
>> But generally speaking, here's a list of new APIs that we'd need to
>> cover all existing fixed buffer versions:
>>
>> - non-sleepable probe reads:
>>
>>    bpf_probe_read_kernel_dynptr()
>>    bpf_probe_read_user_dynptr()
>>    bpf_probe_read_kernel_str_dynptr()
>>    bpf_probe_read_user_str_dynptr()
>>
>> - sleepable probe reads (copy_from_user):
>>
>> bpf_copy_from_user_dynptr()
>> bpf_copy_from_user_str_dynptr()
>>
>> - and then we have complementary task-based APIs for non-current process:
>>
>> bpf_probe_read_user_task_dynptr()
>> bpf_probe_read_user_str_task_dynptr()
>> bpf_copy_from_user_task_dynptr()
>> bpf_copy_from_user_str_task_dynptr()
>>
>> Jordan is working on non-dynptr version of
>> bpf_copy_from_user_str_task(), once he's done with that, we'll add
>> dynptr version, probably.
> This is quite a bunch of kfuncs.
> It doesn't look like adding _dynptr suffix and duplicating
> kfuncs approach scales.

The _str_dynptr versions might not worth adding [1].
So only four read_{kernel,user}_dynptr and copy_from_user{,_task}_dynptr 
are needed,
which seems manageable for now.

But taking other helpers like bpf_strtol into account does quickly show 
that this approach
is not scalable.

> Let's make the existing helpers/kfuncs more flexible ?
>
> We can introduce a kfunc bpf_dynptr_buf() that checks that
> dynptr is not readonly and type == local or ringbuf and
> return dynptr->data as PTR_TO_MEM | dynptr_flag | VERIFIER_ADDS_SIZE_CHECK.
>
> Then allow bpf_probe_read_user/kernel/... all of them to accept
> this register type where PTR_TO_MEM is required
> while relaxing ARG_CONST_SIZE 2nd argument to ARG_ANYTHING.
> Then the verifier will insert an extra check
> if (arg1->size < arg2)
> before the call.
Nice idea. I will try this approach first.
>
> Not only the bpf_probe_read_kernel/user, _str variants will work
> but things like bpf_strtol, bpf_strncmp, bpf_snprintf, bpf_get_stack
> will auto-magically work as well.
>
> I think those are quite valuable to make available with non-constant size.
> bpf_get_stack_*() directly into the ring buffer sounds very useful.

[1]: 
https://lore.kernel.org/bpf/20250125-bpf_dynptr_probe-v2-0-c42c87f97afe@outlook.com/T/#m9700146d286a88abc0b25ef47041015ba6c477a3
Levi Zim Jan. 31, 2025, 6:14 a.m. UTC | #11
On 2025-01-28 10:57, Alexei Starovoitov wrote:
> On Mon, Jan 27, 2025 at 3:09 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>> On Mon, Jan 27, 2025 at 2:54 PM Andrei Matei <andreimatei1@gmail.com> wrote:
>>> On Mon, Jan 27, 2025 at 5:04 PM Alexei Starovoitov
>>> <alexei.starovoitov@gmail.com> wrote:
>>>> On Sat, Jan 25, 2025 at 5:05 PM Levi Zim <rsworktech@outlook.com> wrote:
>>>>> On 2025/1/26 00:58, Alexei Starovoitov wrote:
>>>>>   > On Sat, Jan 25, 2025 at 12:30 AM Levi Zim via B4 Relay
>>>>>   > <devnull+rsworktech.outlook.com@kernel.org> wrote:
>>>>>   >> From: Levi Zim <rsworktech@outlook.com>
>>>>>   >>
>>>>>   >> This patch add a helper function bpf_probe_read_kernel_dynptr:
>>>>>   >>
>>>>>   >> long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst,
>>>>>   >>          u32 offset, u32 size, const void *unsafe_ptr, u64 flags);
>>>>>   > We stopped adding helpers years ago.
>>>>>   > Only new kfuncs are allowed.
>>>>>
>>>>> Sorry, I didn't know that. Just asking, is there any
>>>>> documentation/discussion
>>>>> about stopping adding helpers?
>>>>>
>>>>> I will switch the implementation to kfuncs in v3.
>>>>>
>>>>>   > This particular one doesn't look useful as-is.
>>>>>   > The same logic can be expressed with
>>>>>   > - create dynptr
>>>>>   > - dynptr_slice
>>>>>   > - copy_from_kernel
>>>>>
>>>>> By copy_from_kernel I assume you mean bpf_probe_read_kernel. The problem
>>>>> with dynptr_slice_rdwr and probe_read_kernel is that they only support a
>>>>> compile-time constant size [1].
>>>>>
>>>>> But in order to best utilize the space on a BPF ringbuf, it is possible
>>>>> to reserve a
>>>>> variable length of space as dynptr on a ringbuf with
>>>>> bpf_ringbuf_reserve_dynptr.
>>> For our uprobes, we've run into similar issues around doing variable-sized
>>> bpf_probe_read_user() into ring buffers for our debugger [1]. Our use case
>>> is that we generate uprobes that recursively read data structures until we
>>> fill up a buffer. The verifier's insistence on knowing statically that a read
>>> fits into the buffer makes for awkward code, and makes it hard to pack the
>>> buffer fully; we have to split our reads into a couple of static size classes.
>>>
>>> Any chance there'd be interest in taking the opportunity to support
>>> dynamically-sized reads from userspace too? :)
>> That's bpf_probe_read_user_dynptr() from patch #2, no?
>>
>> But generally speaking, here's a list of new APIs that we'd need to
>> cover all existing fixed buffer versions:
>>
>> - non-sleepable probe reads:
>>
>>    bpf_probe_read_kernel_dynptr()
>>    bpf_probe_read_user_dynptr()
>>    bpf_probe_read_kernel_str_dynptr()
>>    bpf_probe_read_user_str_dynptr()
>>
>> - sleepable probe reads (copy_from_user):
>>
>> bpf_copy_from_user_dynptr()
>> bpf_copy_from_user_str_dynptr()
>>
>> - and then we have complementary task-based APIs for non-current process:
>>
>> bpf_probe_read_user_task_dynptr()
>> bpf_probe_read_user_str_task_dynptr()
>> bpf_copy_from_user_task_dynptr()
>> bpf_copy_from_user_str_task_dynptr()
>>
>> Jordan is working on non-dynptr version of
>> bpf_copy_from_user_str_task(), once he's done with that, we'll add
>> dynptr version, probably.
> This is quite a bunch of kfuncs.
> It doesn't look like adding _dynptr suffix and duplicating
> kfuncs approach scales.
>
> Let's make the existing helpers/kfuncs more flexible ?
>
> We can introduce a kfunc bpf_dynptr_buf() that checks that
> dynptr is not readonly and type == local or ringbuf and
> return dynptr->data as PTR_TO_MEM | dynptr_flag | VERIFIER_ADDS_SIZE_CHECK.
>
> Then allow bpf_probe_read_user/kernel/... all of them to accept
> this register type where PTR_TO_MEM is required
> while relaxing ARG_CONST_SIZE 2nd argument to ARG_ANYTHING.
> Then the verifier will insert an extra check
> if (arg1->size < arg2)
> before the call.
The verifier does static analysis of the program and I can't find any 
existing
example that modifies the program in the verifier. I don't think I am 
familiar
enough with the verifier to change it to modify the program to insert a 
bound
check. And I think inserting extra instructions into the program while 
verifying
it would add a lot of complexity.

But I think the verifier could instead verify that the user program contains
such a bound check before calling helpers like bpf_probe_read_user on the
buf and otherwise reject the program. That being said, I don't know how
the verifier could verify if there is a bound check in the user program.

Still, there lacks a mechanism to add another prototype for a helper.
I think we need such a mechanism for adding multiple prototypes for a helper
and the verifier will try them one by one.

So that we can have two prototypes for bpf_probe_read_user:

The old one and the new one where size is ARG_ANYTHING and dst is
PTR_TO_MANUAL_BOUND_CHECKED_MEM.
>
> Not only the bpf_probe_read_kernel/user, _str variants will work
> but things like bpf_strtol, bpf_strncmp, bpf_snprintf, bpf_get_stack
> will auto-magically work as well.
>
> I think those are quite valuable to make available with non-constant size.
> bpf_get_stack_*() directly into the ring buffer sounds very useful.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f3f50e29d63929acaf12c81f8356173f1f5e154b..9d5ae8b4b7d82c4523bf0ab041d4b76bf134a106 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1323,6 +1323,8 @@  u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
 const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
 void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
 bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
+int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len);
+enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr);
 
 #ifdef CONFIG_BPF_JIT
 int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2acf9b33637174bd16b1d12ccc6410c5f55a7ea9..2e08a59527ecf56732ea14ac34446b5eb25b5690 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5805,6 +5805,21 @@  union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dst, u32 offset, u32 size, const void *unsafe_ptr, u64 flags)
+ *	Description
+ *		Safely attempt to read *size* bytes from kernel space address
+ *		*unsafe_ptr* and store the data in *dst* starting from *offset*.
+ *		*flags* is currently unused.
+ *	Return
+ *		0 on success.
+ *
+ *		**-E2BIG** if *offset* + *len* exceeds the length of *src*'s data
+ *
+ *		**-EINVAL** if *src* is an invalid dynptr or doesn't support this
+ *		support this helper, or if *flags* is not 0.
+ *
+ *		Or other negative errors on failure reading kernel memory.
  */
 #define ___BPF_FUNC_MAPPER(FN, ctx...)			\
 	FN(unspec, 0, ##ctx)				\
@@ -6019,6 +6034,7 @@  union bpf_attr {
 	FN(user_ringbuf_drain, 209, ##ctx)		\
 	FN(cgrp_storage_get, 210, ##ctx)		\
 	FN(cgrp_storage_delete, 211, ##ctx)		\
+	FN(probe_read_kernel_dynptr, 212, ##ctx)		\
 	/* */
 
 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f27ce162427ab4040d2e2d2eb84a883fe57de59e..a736dc9e7be98571103ba404420be0da4dac4fbe 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1678,7 +1678,7 @@  static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
 	ptr->size |= type << DYNPTR_TYPE_SHIFT;
 }
 
-static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
+enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
 {
 	return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
 }
@@ -1714,7 +1714,7 @@  void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
 	memset(ptr, 0, sizeof(*ptr));
 }
 
-static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
 {
 	u32 size = __bpf_dynptr_size(ptr);
 
@@ -1900,6 +1900,7 @@  const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
+const struct bpf_func_proto bpf_probe_read_kernel_dynptr_proto __weak;
 const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
 
 const struct bpf_func_proto *
@@ -2031,6 +2032,9 @@  bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_probe_read_kernel:
 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
 		       NULL : &bpf_probe_read_kernel_proto;
+	case BPF_FUNC_probe_read_kernel_dynptr:
+		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
+		       NULL : &bpf_probe_read_kernel_dynptr_proto;
 	case BPF_FUNC_probe_read_user_str:
 		return &bpf_probe_read_user_str_proto;
 	case BPF_FUNC_probe_read_kernel_str:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index adc947587eb8132ebbd54778d2db937b3b8861de..75c9d1e8d04c3b8930ae81345f5586756ce8b5ec 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -248,6 +248,48 @@  const struct bpf_func_proto bpf_probe_read_kernel_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_probe_read_kernel_dynptr, const struct bpf_dynptr_kern *, dst,
+	u32, offset, u32, size, void *, unsafe_ptr, u64, flags)
+{
+	enum bpf_dynptr_type type;
+	int err;
+
+	if (!dst->data || __bpf_dynptr_is_rdonly(dst))
+		return -EINVAL;
+
+	err = bpf_dynptr_check_off_len(dst, offset, size);
+	if (err)
+		return err;
+
+	type = bpf_dynptr_get_type(dst);
+
+	switch (type) {
+	case BPF_DYNPTR_TYPE_LOCAL:
+	case BPF_DYNPTR_TYPE_RINGBUF:
+		if (flags)
+			return -EINVAL;
+		return bpf_probe_read_kernel_common(dst->data + dst->offset + offset,
+				size, unsafe_ptr);
+	case BPF_DYNPTR_TYPE_SKB:
+	case BPF_DYNPTR_TYPE_XDP:
+		return -EINVAL;
+	default:
+		WARN_ONCE(true, "%s: unknown dynptr type %d\n", __func__, type);
+		return -EFAULT;
+	}
+}
+
+const struct bpf_func_proto bpf_probe_read_kernel_dynptr_proto = {
+	.func		= bpf_probe_read_kernel_dynptr,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
 static __always_inline int
 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
 {