diff mbox series

[RFC,bpf-next,5/5] selftests/bpf: test __kptr_user on the value of a task storage map.

Message ID 20240807235755.1435806-6-thinker.li@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Share user memory to BPF program through task storage map. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 42 this patch: 42
netdev/build_tools success Errors and warnings before: 11 this patch: 11
netdev/cc_maintainers warning 18 maintainers not CCed: kpsingh@kernel.org shuah@kernel.org haoluo@google.com nathan@kernel.org elver@google.com john.fastabend@gmail.com mykolal@fb.com morbo@google.com sdf@fomichev.me daniel@iogearbox.net llvm@lists.linux.dev justinstitt@google.com jolsa@kernel.org linux-kselftest@vger.kernel.org yonghong.song@linux.dev eddyz87@gmail.com brauner@kernel.org ndesaulniers@google.com
netdev/build_clang success Errors and warnings before: 44 this patch: 44
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 49 this patch: 49
netdev/checkpatch warning CHECK: spaces preferred around that '*' (ctx:WxV) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kui-Feng Lee Aug. 7, 2024, 11:57 p.m. UTC
Make sure the memory of user kptrs have been mapped to the kernel
properly. Also ensure the values of user kptrs in the kernel haven't been
copied to userspace.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 .../bpf/prog_tests/task_local_storage.c       | 122 ++++++++++++++++++
 .../selftests/bpf/progs/task_ls_kptr_user.c   |  72 +++++++++++
 2 files changed, 194 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/task_ls_kptr_user.c

Comments

Alexei Starovoitov Aug. 12, 2024, 4:58 p.m. UTC | #1
On Wed, Aug 7, 2024 at 4:58 PM Kui-Feng Lee <thinker.li@gmail.com> wrote:
> +
> +       user_data_mmap = mmap(NULL, sizeof(*user_data_mmap), PROT_READ | PROT_WRITE,
> +                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +       if (!ASSERT_NEQ(user_data_mmap, MAP_FAILED, "mmap"))
> +               return;
> +
> +       memcpy(user_data_mmap, &user_data_mmap_v, sizeof(*user_data_mmap));
> +       value.udata_mmap = user_data_mmap;
> +       value.udata = &user_data;

There shouldn't be a need to do mmap(). It's too much memory overhead.
The user should be able to write:
static __thread struct user_data udata;
value.udata = &udata;
bpf_map_update_elem(map_fd, my_task_fd, &value)
and do it once.
Later multi thread user code will just access "udata".
No map lookups.

If sizeof(udata) is small enough the kernel will pin either
one or two pages (if udata crosses page boundary).

So no extra memory consumption by the user process while the kernel
pins a page or two.
In a good case it's one page and no extra vmap.
I wonder whether we should enforce that one page case.
It's not hard for users to write:
static __thread struct user_data udata __attribute__((aligned(sizeof(udata))));
Kui-Feng Lee Aug. 12, 2024, 5:15 p.m. UTC | #2
On 8/12/24 09:58, Alexei Starovoitov wrote:
> On Wed, Aug 7, 2024 at 4:58 PM Kui-Feng Lee <thinker.li@gmail.com> wrote:
>> +
>> +       user_data_mmap = mmap(NULL, sizeof(*user_data_mmap), PROT_READ | PROT_WRITE,
>> +                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>> +       if (!ASSERT_NEQ(user_data_mmap, MAP_FAILED, "mmap"))
>> +               return;
>> +
>> +       memcpy(user_data_mmap, &user_data_mmap_v, sizeof(*user_data_mmap));
>> +       value.udata_mmap = user_data_mmap;
>> +       value.udata = &user_data;
> 
> There shouldn't be a need to do mmap(). It's too much memory overhead.
> The user should be able to write:
> static __thread struct user_data udata;
> value.udata = &udata;
> bpf_map_update_elem(map_fd, my_task_fd, &value)
> and do it once.
> Later multi thread user code will just access "udata".
> No map lookups.

mmap() is not necessary here. There are two pointers here.
udata_mmap one is used to test the case crossing page boundary although
in the current RFC it fails to do it. It will be fixed later.
udata one works just like what you have described, except user_data is a
local variable.

> 
> If sizeof(udata) is small enough the kernel will pin either
> one or two pages (if udata crosses page boundary).
> 
> So no extra memory consumption by the user process while the kernel
> pins a page or two.
> In a good case it's one page and no extra vmap.
> I wonder whether we should enforce that one page case.
> It's not hard for users to write:
> static __thread struct user_data udata __attribute__((aligned(sizeof(udata))));

With one page restriction, the implementation would be much simpler. If 
you think it is a reasonable restriction, I would enforce this rule.
Alexei Starovoitov Aug. 12, 2024, 5:31 p.m. UTC | #3
On Mon, Aug 12, 2024 at 10:15 AM Kui-Feng Lee <sinquersw@gmail.com> wrote:
>
>
>
> On 8/12/24 09:58, Alexei Starovoitov wrote:
> > On Wed, Aug 7, 2024 at 4:58 PM Kui-Feng Lee <thinker.li@gmail.com> wrote:
> >> +
> >> +       user_data_mmap = mmap(NULL, sizeof(*user_data_mmap), PROT_READ | PROT_WRITE,
> >> +                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> >> +       if (!ASSERT_NEQ(user_data_mmap, MAP_FAILED, "mmap"))
> >> +               return;
> >> +
> >> +       memcpy(user_data_mmap, &user_data_mmap_v, sizeof(*user_data_mmap));
> >> +       value.udata_mmap = user_data_mmap;
> >> +       value.udata = &user_data;
> >
> > There shouldn't be a need to do mmap(). It's too much memory overhead.
> > The user should be able to write:
> > static __thread struct user_data udata;
> > value.udata = &udata;
> > bpf_map_update_elem(map_fd, my_task_fd, &value)
> > and do it once.
> > Later multi thread user code will just access "udata".
> > No map lookups.
>
> mmap() is not necessary here. There are two pointers here.
> udata_mmap one is used to test the case crossing page boundary although
> in the current RFC it fails to do it. It will be fixed later.
> udata one works just like what you have described, except user_data is a
> local variable.

Hmm. I guess I misread the code.
But then:
+       struct user_data user_data user_data = ...;
+       value.udata = &user_data;

how is that supposed to work when the address points to the stack?
I guess the kernel can still pin that page, but it will be junk
as soon as the function returns.

> >
> > If sizeof(udata) is small enough the kernel will pin either
> > one or two pages (if udata crosses page boundary).
> >
> > So no extra memory consumption by the user process while the kernel
> > pins a page or two.
> > In a good case it's one page and no extra vmap.
> > I wonder whether we should enforce that one page case.
> > It's not hard for users to write:
> > static __thread struct user_data udata __attribute__((aligned(sizeof(udata))));
>
> With one page restriction, the implementation would be much simpler. If
> you think it is a reasonable restriction, I would enforce this rule.

I'm worried about vmap(). Doing it for every map elemen (same as every
task) might add substantial kernel side overhead.
On my devserver:
sudo cat /proc/vmallocinfo |grep vmap|wc -l
105
sudo cat /proc/vmallocinfo |wc -l
17608

I believe that the mechanism scales to millions, but adding one more
vmap per element feels like a footgun.
To avoid that the user would need to make sure their user_data doesn't
cross the page, so imo we can make this mandatory.
Kui-Feng Lee Aug. 12, 2024, 6:10 p.m. UTC | #4
On 8/12/24 10:31, Alexei Starovoitov wrote:
> On Mon, Aug 12, 2024 at 10:15 AM Kui-Feng Lee <sinquersw@gmail.com> wrote:
>>
>>
>>
>> On 8/12/24 09:58, Alexei Starovoitov wrote:
>>> On Wed, Aug 7, 2024 at 4:58 PM Kui-Feng Lee <thinker.li@gmail.com> wrote:
>>>> +
>>>> +       user_data_mmap = mmap(NULL, sizeof(*user_data_mmap), PROT_READ | PROT_WRITE,
>>>> +                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>>>> +       if (!ASSERT_NEQ(user_data_mmap, MAP_FAILED, "mmap"))
>>>> +               return;
>>>> +
>>>> +       memcpy(user_data_mmap, &user_data_mmap_v, sizeof(*user_data_mmap));
>>>> +       value.udata_mmap = user_data_mmap;
>>>> +       value.udata = &user_data;
>>>
>>> There shouldn't be a need to do mmap(). It's too much memory overhead.
>>> The user should be able to write:
>>> static __thread struct user_data udata;
>>> value.udata = &udata;
>>> bpf_map_update_elem(map_fd, my_task_fd, &value)
>>> and do it once.
>>> Later multi thread user code will just access "udata".
>>> No map lookups.
>>
>> mmap() is not necessary here. There are two pointers here.
>> udata_mmap one is used to test the case crossing page boundary although
>> in the current RFC it fails to do it. It will be fixed later.
>> udata one works just like what you have described, except user_data is a
>> local variable.
> 
> Hmm. I guess I misread the code.
> But then:
> +       struct user_data user_data user_data = ...;
> +       value.udata = &user_data;
> 
> how is that supposed to work when the address points to the stack?
> I guess the kernel can still pin that page, but it will be junk
> as soon as the function returns.

You are right! It works only for this test case since the map will be
destroyed before leaving this function. I will move it to a static variable.

> 
>>>
>>> If sizeof(udata) is small enough the kernel will pin either
>>> one or two pages (if udata crosses page boundary).
>>>
>>> So no extra memory consumption by the user process while the kernel
>>> pins a page or two.
>>> In a good case it's one page and no extra vmap.
>>> I wonder whether we should enforce that one page case.
>>> It's not hard for users to write:
>>> static __thread struct user_data udata __attribute__((aligned(sizeof(udata))));
>>
>> With one page restriction, the implementation would be much simpler. If
>> you think it is a reasonable restriction, I would enforce this rule.
> 
> I'm worried about vmap(). Doing it for every map elemen (same as every
> task) might add substantial kernel side overhead.
> On my devserver:
> sudo cat /proc/vmallocinfo |grep vmap|wc -l
> 105
> sudo cat /proc/vmallocinfo |wc -l
> 17608
> 
> I believe that the mechanism scales to millions, but adding one more
> vmap per element feels like a footgun.
> To avoid that the user would need to make sure their user_data doesn't
> cross the page, so imo we can make this mandatory.

If the memory block that is pointed by a uptr takes only one page,
vmap() is not called. vmap() is called only for the cases that take two
or more pages. Without the one page restriction, there is a chance to
have additional vmaps even for small memory blocks, but not every uptr
having that extra vmap.

Users can accidentally create a new vmap. But, with current
implementation, they can also avoid it by aligning memory properly. The
trade-off is between supporting big chunks of memory and idiot-proof.
However, in my opinion, big chunks are very unlikely for task local storage.

So, I will make this restriction mandatory.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index c33c05161a9e..17221024fb28 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -5,8 +5,10 @@ 
 #include <unistd.h>
 #include <sched.h>
 #include <pthread.h>
+#include <sys/eventfd.h>
 #include <sys/syscall.h>   /* For SYS_xxx definitions */
 #include <sys/types.h>
+#include <sys/mman.h>
 #include <test_progs.h>
 #include "task_local_storage_helpers.h"
 #include "task_local_storage.skel.h"
@@ -14,6 +16,21 @@ 
 #include "task_ls_recursion.skel.h"
 #include "task_storage_nodeadlock.skel.h"
 
+struct user_data {
+	int a;
+	int b;
+	int result;
+};
+
+struct value_type {
+	struct user_data *udata_mmap;
+	struct user_data *udata;
+};
+
+#define MAGIC_VALUE 0xabcd1234
+
+#include "task_ls_kptr_user.skel.h"
+
 static void test_sys_enter_exit(void)
 {
 	struct task_local_storage *skel;
@@ -40,6 +57,109 @@  static void test_sys_enter_exit(void)
 	task_local_storage__destroy(skel);
 }
 
+static void test_kptr_user(void)
+{
+	struct user_data user_data = { .a = 1, .b = 2, .result = 0 };
+	struct user_data user_data_mmap_v = { .a = 7, .b = 7 };
+	struct task_ls_kptr_user *skel = NULL;
+	struct user_data *user_data_mmap;
+	int task_fd = -1, ev_fd = -1;
+	struct value_type value;
+	pid_t pid;
+	int err, wstatus;
+	__u64 dummy = 1;
+
+	user_data_mmap = mmap(NULL, sizeof(*user_data_mmap), PROT_READ | PROT_WRITE,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (!ASSERT_NEQ(user_data_mmap, MAP_FAILED, "mmap"))
+		return;
+
+	memcpy(user_data_mmap, &user_data_mmap_v, sizeof(*user_data_mmap));
+	value.udata_mmap = user_data_mmap;
+	value.udata = &user_data;
+
+	task_fd = sys_pidfd_open(getpid(), 0);
+	if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
+		goto out;
+
+	ev_fd = eventfd(0, 0);
+	if (!ASSERT_NEQ(ev_fd, -1, "eventfd"))
+		goto out;
+
+	skel = task_ls_kptr_user__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		goto out;
+
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.datamap), &task_fd, &value, 0);
+	if (!ASSERT_OK(err, "update_datamap"))
+		exit(1);
+
+	/* Make sure the BPF program can access the user_data_mmap even if
+	 * it's munmapped already.
+	 */
+	munmap(user_data_mmap, sizeof(*user_data_mmap));
+	user_data_mmap = NULL;
+
+	err = task_ls_kptr_user__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	fflush(stdout);
+	fflush(stderr);
+
+	pid = fork();
+	if (pid < 0)
+		goto out;
+
+	/* Call syscall in the child process, but access the map value of
+	 * the parent process in the BPF program to check if the user kptr
+	 * is translated/mapped correctly.
+	 */
+	if (pid == 0) {
+		/* child */
+
+		/* Overwrite the user_data in the child process to check if
+		 * the BPF program accesses the user_data of the parent.
+		 */
+		user_data.a = 0;
+		user_data.b = 0;
+
+		/* Wait for the parent to set child_pid */
+		read(ev_fd, &dummy, sizeof(dummy));
+
+		exit(0);
+	}
+
+	skel->bss->parent_pid = syscall(SYS_gettid);
+	skel->bss->child_pid = pid;
+
+	write(ev_fd, &dummy, sizeof(dummy));
+
+	err = waitpid(pid, &wstatus, 0);
+	ASSERT_EQ(err, pid, "waitpid");
+	skel->bss->child_pid = 0;
+
+	ASSERT_EQ(MAGIC_VALUE + user_data.a + user_data.b +
+		  user_data_mmap_v.a + user_data_mmap_v.b,
+		  user_data.result, "result");
+
+	/* Check if user programs can access the value of user kptrs
+	 * through bpf_map_lookup_elem(). Make sure the kernel value is not
+	 * leaked.
+	 */
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.datamap), &task_fd, &value);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		goto out;
+	ASSERT_EQ(value.udata_mmap, NULL, "lookup_udata_mmap");
+	ASSERT_EQ(value.udata, NULL, "lookup_udata");
+
+out:
+	task_ls_kptr_user__destroy(skel);
+	close(ev_fd);
+	close(task_fd);
+	munmap(user_data_mmap, sizeof(*user_data_mmap));
+}
+
 static void test_exit_creds(void)
 {
 	struct task_local_storage_exit_creds *skel;
@@ -237,4 +357,6 @@  void test_task_local_storage(void)
 		test_recursion();
 	if (test__start_subtest("nodeadlock"))
 		test_nodeadlock();
+	if (test__start_subtest("kptr_user"))
+		test_kptr_user();
 }
diff --git a/tools/testing/selftests/bpf/progs/task_ls_kptr_user.c b/tools/testing/selftests/bpf/progs/task_ls_kptr_user.c
new file mode 100644
index 000000000000..ff5ca3a5da1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_kptr_user.c
@@ -0,0 +1,72 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct user_data {
+	int a;
+	int b;
+	int result;
+};
+
+struct value_type {
+	struct user_data __kptr_user *udata_mmap;
+	struct user_data __kptr_user *udata;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct value_type);
+} datamap SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+/* This is a workaround to avoid clang generating a forward reference for
+ * struct user_data. This is a known issue and will be fixed in the future.
+ */
+struct user_data __dummy;
+
+pid_t child_pid = 0;
+pid_t parent_pid = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task, *data_task;
+	struct value_type *ptr;
+	struct user_data *udata;
+	int acc;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != child_pid)
+		return 0;
+
+	data_task = bpf_task_from_pid(parent_pid);
+	if (!data_task)
+		return 0;
+
+	ptr = bpf_task_storage_get(&datamap, data_task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	bpf_task_release(data_task);
+	if (!ptr)
+		return 0;
+
+	udata = ptr->udata_mmap;
+	if (!udata)
+		return 0;
+	acc = udata->a + udata->b;
+
+	udata = ptr->udata;
+	if (!udata)
+		return 0;
+	udata->result = MAGIC_VALUE + udata->a + udata->b + acc;
+
+	return 0;
+}