diff mbox series

[v3,bpf-next,5/5] selftests/bpf: Add bench for task storage creation

Message ID 20230322215246.1675516-6-martin.lau@linux.dev (mailing list archive)
State Accepted
Commit cbe9d93d58b16b5912498ea42b5173022fff7c04
Delegated to: BPF
Headers show
Series bpf: Use bpf_mem_cache_alloc/free in bpf_local_storage | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 20 this patch: 20
netdev/cc_maintainers warning 10 maintainers not CCed: mykolal@fb.com song@kernel.org shuah@kernel.org sdf@google.com haoluo@google.com yhs@fb.com john.fastabend@gmail.com kpsingh@kernel.org jolsa@kernel.org linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch warning WARNING: Statements terminations use 1 semicolon WARNING: externs should be avoided in .c files WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-40 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for test_progs_parallel on x86_64 with llvm-15
bpf/vmtest-bpf-next-VM_Test-43 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-47 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_progs_parallel on aarch64 with llvm-15
bpf/vmtest-bpf-next-VM_Test-39 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-42 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-44 success Logs for test_verifier on aarch64 with llvm-15
bpf/vmtest-bpf-next-VM_Test-45 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-46 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-48 success Logs for test_verifier on x86_64 with llvm-15
bpf/vmtest-bpf-next-VM_Test-49 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-PR success PR summary

Commit Message

Martin KaFai Lau March 22, 2023, 9:52 p.m. UTC
From: Martin KaFai Lau <martin.lau@kernel.org>

This patch adds a task storage benchmark to the existing
local-storage-create benchmark.

For task storage,
./bench --storage-type task --batch-size 32:
   bpf_ma: Summary: creates   30.456 ± 0.507k/s ( 30.456k/prod), 6.08 kmallocs/create
no bpf_ma: Summary: creates   31.962 ± 0.486k/s ( 31.962k/prod), 6.13 kmallocs/create

./bench --storage-type task --batch-size 64:
   bpf_ma: Summary: creates   30.197 ± 1.476k/s ( 30.197k/prod), 6.08 kmallocs/create
no bpf_ma: Summary: creates   31.103 ± 0.297k/s ( 31.103k/prod), 6.13 kmallocs/create

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/bench.c           |   2 +
 .../bpf/benchs/bench_local_storage_create.c   | 151 ++++++++++++++++--
 .../bpf/progs/bench_local_storage_create.c    |  25 +++
 3 files changed, 164 insertions(+), 14 deletions(-)

Comments

James Hilliard March 28, 2023, 3:51 a.m. UTC | #1
On Mon, Mar 27, 2023 at 9:42 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> From: Martin KaFai Lau <martin.lau@kernel.org>
>
> This patch adds a task storage benchmark to the existing
> local-storage-create benchmark.
>
> For task storage,
> ./bench --storage-type task --batch-size 32:
>    bpf_ma: Summary: creates   30.456 ± 0.507k/s ( 30.456k/prod), 6.08 kmallocs/create
> no bpf_ma: Summary: creates   31.962 ± 0.486k/s ( 31.962k/prod), 6.13 kmallocs/create
>
> ./bench --storage-type task --batch-size 64:
>    bpf_ma: Summary: creates   30.197 ± 1.476k/s ( 30.197k/prod), 6.08 kmallocs/create
> no bpf_ma: Summary: creates   31.103 ± 0.297k/s ( 31.103k/prod), 6.13 kmallocs/create
>
> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
> ---
>  tools/testing/selftests/bpf/bench.c           |   2 +
>  .../bpf/benchs/bench_local_storage_create.c   | 151 ++++++++++++++++--
>  .../bpf/progs/bench_local_storage_create.c    |  25 +++
>  3 files changed, 164 insertions(+), 14 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
> index dc3827c1f139..d9c080ac1796 100644
> --- a/tools/testing/selftests/bpf/bench.c
> +++ b/tools/testing/selftests/bpf/bench.c
> @@ -278,6 +278,7 @@ extern struct argp bench_local_storage_argp;
>  extern struct argp bench_local_storage_rcu_tasks_trace_argp;
>  extern struct argp bench_strncmp_argp;
>  extern struct argp bench_hashmap_lookup_argp;
> +extern struct argp bench_local_storage_create_argp;
>
>  static const struct argp_child bench_parsers[] = {
>         { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
> @@ -288,6 +289,7 @@ static const struct argp_child bench_parsers[] = {
>         { &bench_local_storage_rcu_tasks_trace_argp, 0,
>                 "local_storage RCU Tasks Trace slowdown benchmark", 0 },
>         { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
> +       { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
>         {},
>  };
>
> diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
> index f8b2a640ccbe..abb0321d4f34 100644
> --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
> +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
> @@ -3,19 +3,71 @@
>
>  #include <sys/types.h>
>  #include <sys/socket.h>
> +#include <pthread.h>
> +#include <argp.h>
>
>  #include "bench.h"
>  #include "bench_local_storage_create.skel.h"
>
> -#define BATCH_SZ 32
> -
>  struct thread {
> -       int fds[BATCH_SZ];
> +       int *fds;
> +       pthread_t *pthds;
> +       int *pthd_results;
>  };
>
>  static struct bench_local_storage_create *skel;
>  static struct thread *threads;
> -static long socket_errs;
> +static long create_owner_errs;
> +static int storage_type = BPF_MAP_TYPE_SK_STORAGE;
> +static int batch_sz = 32;
> +
> +enum {
> +       ARG_BATCH_SZ = 9000,
> +       ARG_STORAGE_TYPE = 9001,
> +};
> +
> +static const struct argp_option opts[] = {
> +       { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0,
> +         "The number of storage creations in each batch" },
> +       { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0,
> +         "The type of local storage to test (socket or task)" },
> +       {},
> +};
> +
> +static error_t parse_arg(int key, char *arg, struct argp_state *state)
> +{
> +       int ret;
> +
> +       switch (key) {
> +       case ARG_BATCH_SZ:
> +               ret = atoi(arg);
> +               if (ret < 1) {
> +                       fprintf(stderr, "invalid batch-size\n");
> +                       argp_usage(state);
> +               }
> +               batch_sz = ret;
> +               break;
> +       case ARG_STORAGE_TYPE:
> +               if (!strcmp(arg, "task")) {
> +                       storage_type = BPF_MAP_TYPE_TASK_STORAGE;
> +               } else if (!strcmp(arg, "socket")) {
> +                       storage_type = BPF_MAP_TYPE_SK_STORAGE;
> +               } else {
> +                       fprintf(stderr, "invalid storage-type (socket or task)\n");
> +                       argp_usage(state);
> +               }
> +               break;
> +       default:
> +               return ARGP_ERR_UNKNOWN;
> +       }
> +
> +       return 0;
> +}
> +
> +const struct argp bench_local_storage_create_argp = {
> +       .options = opts,
> +       .parser = parse_arg,
> +};
>
>  static void validate(void)
>  {
> @@ -28,6 +80,8 @@ static void validate(void)
>
>  static void setup(void)
>  {
> +       int i;
> +
>         skel = bench_local_storage_create__open_and_load();
>         if (!skel) {
>                 fprintf(stderr, "error loading skel\n");
> @@ -35,10 +89,16 @@ static void setup(void)
>         }
>
>         skel->bss->bench_pid = getpid();
> -
> -       if (!bpf_program__attach(skel->progs.socket_post_create)) {
> -               fprintf(stderr, "Error attaching bpf program\n");
> -               exit(1);
> +       if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
> +               if (!bpf_program__attach(skel->progs.socket_post_create)) {
> +                       fprintf(stderr, "Error attaching bpf program\n");
> +                       exit(1);
> +               }
> +       } else {
> +               if (!bpf_program__attach(skel->progs.fork)) {
> +                       fprintf(stderr, "Error attaching bpf program\n");
> +                       exit(1);
> +               }
>         }
>
>         if (!bpf_program__attach(skel->progs.kmalloc)) {
> @@ -52,6 +112,29 @@ static void setup(void)
>                 fprintf(stderr, "cannot alloc thread_res\n");
>                 exit(1);
>         }
> +
> +       for (i = 0; i < env.producer_cnt; i++) {
> +               struct thread *t = &threads[i];
> +
> +               if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
> +                       t->fds = malloc(batch_sz * sizeof(*t->fds));
> +                       if (!t->fds) {
> +                               fprintf(stderr, "cannot alloc t->fds\n");
> +                               exit(1);
> +                       }
> +               } else {
> +                       t->pthds = malloc(batch_sz * sizeof(*t->pthds));
> +                       if (!t->pthds) {
> +                               fprintf(stderr, "cannot alloc t->pthds\n");
> +                               exit(1);
> +                       }
> +                       t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results));
> +                       if (!t->pthd_results) {
> +                               fprintf(stderr, "cannot alloc t->pthd_results\n");
> +                               exit(1);
> +                       }
> +               }
> +       }
>  }
>
>  static void measure(struct bench_res *res)
> @@ -65,20 +148,20 @@ static void *consumer(void *input)
>         return NULL;
>  }
>
> -static void *producer(void *input)
> +static void *sk_producer(void *input)
>  {
>         struct thread *t = &threads[(long)(input)];
>         int *fds = t->fds;
>         int i;
>
>         while (true) {
> -               for (i = 0; i < BATCH_SZ; i++) {
> +               for (i = 0; i < batch_sz; i++) {
>                         fds[i] = socket(AF_INET6, SOCK_DGRAM, 0);
>                         if (fds[i] == -1)
> -                               atomic_inc(&socket_errs);
> +                               atomic_inc(&create_owner_errs);
>                 }
>
> -               for (i = 0; i < BATCH_SZ; i++) {
> +               for (i = 0; i < batch_sz; i++) {
>                         if (fds[i] != -1)
>                                 close(fds[i]);
>                 }
> @@ -87,6 +170,42 @@ static void *producer(void *input)
>         return NULL;
>  }
>
> +static void *thread_func(void *arg)
> +{
> +       return NULL;
> +}
> +
> +static void *task_producer(void *input)
> +{
> +       struct thread *t = &threads[(long)(input)];
> +       pthread_t *pthds = t->pthds;
> +       int *pthd_results = t->pthd_results;
> +       int i;
> +
> +       while (true) {
> +               for (i = 0; i < batch_sz; i++) {
> +                       pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL);
> +                       if (pthd_results[i])
> +                               atomic_inc(&create_owner_errs);
> +               }
> +
> +               for (i = 0; i < batch_sz; i++) {
> +                       if (!pthd_results[i])
> +                               pthread_join(pthds[i], NULL);;
> +               }
> +       }
> +
> +       return NULL;
> +}
> +
> +static void *producer(void *input)
> +{
> +       if (storage_type == BPF_MAP_TYPE_SK_STORAGE)
> +               return sk_producer(input);
> +       else
> +               return task_producer(input);
> +}
> +
>  static void report_progress(int iter, struct bench_res *res, long delta_ns)
>  {
>         double creates_per_sec, kmallocs_per_create;
> @@ -123,14 +242,18 @@ static void report_final(struct bench_res res[], int res_cnt)
>         printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ",
>                creates_mean, creates_stddev, creates_mean / env.producer_cnt);
>         printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates);
> -       if (socket_errs || skel->bss->create_errs)
> -               printf("socket() errors %ld create_errs %ld\n", socket_errs,
> +       if (create_owner_errs || skel->bss->create_errs)
> +               printf("%s() errors %ld create_errs %ld\n",
> +                      storage_type == BPF_MAP_TYPE_SK_STORAGE ?
> +                      "socket" : "pthread_create",
> +                      create_owner_errs,
>                        skel->bss->create_errs);
>  }
>
>  /* Benchmark performance of creating bpf local storage  */
>  const struct bench bench_local_storage_create = {
>         .name = "local-storage-create",
> +       .argp = &bench_local_storage_create_argp,
>         .validate = validate,
>         .setup = setup,
>         .producer_thread = producer,
> diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> index 2814bab54d28..7c851c9d5e47 100644
> --- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> @@ -22,6 +22,13 @@ struct {
>         __type(value, struct storage);
>  } sk_storage_map SEC(".maps");
>
> +struct {
> +       __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
> +       __uint(map_flags, BPF_F_NO_PREALLOC);
> +       __type(key, int);
> +       __type(value, struct storage);
> +} task_storage_map SEC(".maps");
> +
>  SEC("raw_tp/kmalloc")
>  int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
>              size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
> @@ -32,6 +39,24 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
>         return 0;
>  }
>
> +SEC("tp_btf/sched_process_fork")
> +int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child)

Apparently fork is a built-in function in bpf-gcc:

In file included from progs/bench_local_storage_create.c:6:
progs/bench_local_storage_create.c:43:14: error: conflicting types for
built-in function 'fork'; expected 'int(void)'
[-Werror=builtin-declaration-mismatch]
   43 | int BPF_PROG(fork, struct task_struct *parent, struct
task_struct *child)
      |              ^~~~

I haven't been able to find this documented anywhere however.

> +{
> +       struct storage *stg;
> +
> +       if (parent->tgid != bench_pid)
> +               return 0;
> +
> +       stg = bpf_task_storage_get(&task_storage_map, child, NULL,
> +                                  BPF_LOCAL_STORAGE_GET_F_CREATE);
> +       if (stg)
> +               __sync_fetch_and_add(&create_cnts, 1);
> +       else
> +               __sync_fetch_and_add(&create_errs, 1);
> +
> +       return 0;
> +}
> +
>  SEC("lsm.s/socket_post_create")
>  int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
>              int protocol, int kern)
> --
> 2.34.1
>
>
Martin KaFai Lau March 29, 2023, 5:02 p.m. UTC | #2
On 3/27/23 8:51 PM, James Hilliard wrote:
>> diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
>> index 2814bab54d28..7c851c9d5e47 100644
>> --- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
>> +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
>> @@ -22,6 +22,13 @@ struct {
>>          __type(value, struct storage);
>>   } sk_storage_map SEC(".maps");
>>
>> +struct {
>> +       __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
>> +       __uint(map_flags, BPF_F_NO_PREALLOC);
>> +       __type(key, int);
>> +       __type(value, struct storage);
>> +} task_storage_map SEC(".maps");
>> +
>>   SEC("raw_tp/kmalloc")
>>   int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
>>               size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
>> @@ -32,6 +39,24 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
>>          return 0;
>>   }
>>
>> +SEC("tp_btf/sched_process_fork")
>> +int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child)
> 
> Apparently fork is a built-in function in bpf-gcc:

It is also failing in a plain C program

#>  gcc -Werror=builtin-declaration-mismatch -o test test.c
test.c:14:35: error: conflicting types for built-in function ‘fork’; expected 
‘int(void)’ [-Werror=builtin-declaration-mismatch]
    14 | int __attribute__((__noinline__)) fork(long x, long y)
       |                                   ^~~~
cc1: some warnings being treated as errors

#> clang -o test test.c
succeed

I am not too attached to the name but it seems something should be addressed in 
the gcc instead.

> 
> In file included from progs/bench_local_storage_create.c:6:
> progs/bench_local_storage_create.c:43:14: error: conflicting types for
> built-in function 'fork'; expected 'int(void)'
> [-Werror=builtin-declaration-mismatch]
>     43 | int BPF_PROG(fork, struct task_struct *parent, struct
> task_struct *child)
>        |              ^~~~
> 
> I haven't been able to find this documented anywhere however.
James Hilliard March 29, 2023, 7:12 p.m. UTC | #3
On Wed, Mar 29, 2023 at 11:03 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 3/27/23 8:51 PM, James Hilliard wrote:
> >> diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> >> index 2814bab54d28..7c851c9d5e47 100644
> >> --- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> >> +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> >> @@ -22,6 +22,13 @@ struct {
> >>          __type(value, struct storage);
> >>   } sk_storage_map SEC(".maps");
> >>
> >> +struct {
> >> +       __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
> >> +       __uint(map_flags, BPF_F_NO_PREALLOC);
> >> +       __type(key, int);
> >> +       __type(value, struct storage);
> >> +} task_storage_map SEC(".maps");
> >> +
> >>   SEC("raw_tp/kmalloc")
> >>   int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
> >>               size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
> >> @@ -32,6 +39,24 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
> >>          return 0;
> >>   }
> >>
> >> +SEC("tp_btf/sched_process_fork")
> >> +int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child)
> >
> > Apparently fork is a built-in function in bpf-gcc:
>
> It is also failing in a plain C program
>
> #>  gcc -Werror=builtin-declaration-mismatch -o test test.c
> test.c:14:35: error: conflicting types for built-in function ‘fork’; expected
> ‘int(void)’ [-Werror=builtin-declaration-mismatch]
>     14 | int __attribute__((__noinline__)) fork(long x, long y)
>        |                                   ^~~~
> cc1: some warnings being treated as errors
>
> #> clang -o test test.c
> succeed
>
> I am not too attached to the name but it seems something should be addressed in
> the gcc instead.

Hmm, so it looks like it's marked as a builtin here:
https://github.com/gcc-mirror/gcc/blob/releases/gcc-12.1.0/gcc/builtins.def#L875

The macro for that is here:
https://github.com/gcc-mirror/gcc/blob/releases/gcc-12.1.0/gcc/builtins.def#L104-L111

Which has this comment:
/* Like DEF_LIB_BUILTIN, except that the function is not one that is
specified by ANSI/ISO C. So, when we're being fully conformant we
ignore the version of these builtins that does not begin with
__builtin. */

Looks like this builtin was originally added here:
https://github.com/gcc-mirror/gcc/commit/d1c38823924506d389ca58d02926ace21bdf82fa

Based on this issue it looks like fork is treated as a builtin for
libgcov support:
https://gcc.gnu.org/bugzilla//show_bug.cgi?id=82457

So from my understanding fork is a gcc builtin when building with -std=gnu11
but is not a builtin when building with -std=c11.

So it looks like fork is translated to __gcov_fork when -std=gnu* is set which
is why we get this error.

As this appears to be intended behavior for gcc I think the best option is
to just rename the function so that we don't run into issues when building
with gnu extensions like -std=gnu11.

>
> >
> > In file included from progs/bench_local_storage_create.c:6:
> > progs/bench_local_storage_create.c:43:14: error: conflicting types for
> > built-in function 'fork'; expected 'int(void)'
> > [-Werror=builtin-declaration-mismatch]
> >     43 | int BPF_PROG(fork, struct task_struct *parent, struct
> > task_struct *child)
> >        |              ^~~~
> >
> > I haven't been able to find this documented anywhere however.
>
Martin KaFai Lau March 29, 2023, 7:59 p.m. UTC | #4
On 3/29/23 12:12 PM, James Hilliard wrote:
> On Wed, Mar 29, 2023 at 11:03 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 3/27/23 8:51 PM, James Hilliard wrote:
>>>> diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
>>>> index 2814bab54d28..7c851c9d5e47 100644
>>>> --- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
>>>> +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
>>>> @@ -22,6 +22,13 @@ struct {
>>>>           __type(value, struct storage);
>>>>    } sk_storage_map SEC(".maps");
>>>>
>>>> +struct {
>>>> +       __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
>>>> +       __uint(map_flags, BPF_F_NO_PREALLOC);
>>>> +       __type(key, int);
>>>> +       __type(value, struct storage);
>>>> +} task_storage_map SEC(".maps");
>>>> +
>>>>    SEC("raw_tp/kmalloc")
>>>>    int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
>>>>                size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
>>>> @@ -32,6 +39,24 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
>>>>           return 0;
>>>>    }
>>>>
>>>> +SEC("tp_btf/sched_process_fork")
>>>> +int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child)
>>>
>>> Apparently fork is a built-in function in bpf-gcc:
>>
>> It is also failing in a plain C program
>>
>> #>  gcc -Werror=builtin-declaration-mismatch -o test test.c
>> test.c:14:35: error: conflicting types for built-in function ‘fork’; expected
>> ‘int(void)’ [-Werror=builtin-declaration-mismatch]
>>      14 | int __attribute__((__noinline__)) fork(long x, long y)
>>         |                                   ^~~~
>> cc1: some warnings being treated as errors
>>
>> #> clang -o test test.c
>> succeed
>>
>> I am not too attached to the name but it seems something should be addressed in
>> the gcc instead.
> 
> Hmm, so it looks like it's marked as a builtin here:
> https://github.com/gcc-mirror/gcc/blob/releases/gcc-12.1.0/gcc/builtins.def#L875
> 
> The macro for that is here:
> https://github.com/gcc-mirror/gcc/blob/releases/gcc-12.1.0/gcc/builtins.def#L104-L111
> 
> Which has this comment:
> /* Like DEF_LIB_BUILTIN, except that the function is not one that is
> specified by ANSI/ISO C. So, when we're being fully conformant we
> ignore the version of these builtins that does not begin with
> __builtin. */
> 
> Looks like this builtin was originally added here:
> https://github.com/gcc-mirror/gcc/commit/d1c38823924506d389ca58d02926ace21bdf82fa
> 
> Based on this issue it looks like fork is treated as a builtin for
> libgcov support:
> https://gcc.gnu.org/bugzilla//show_bug.cgi?id=82457
> 
> So from my understanding fork is a gcc builtin when building with -std=gnu11
> but is not a builtin when building with -std=c11.

That sounds like there is a knob to turn this behavior on and off. Do the same 
for the bpf target?

> 
> So it looks like fork is translated to __gcov_fork when -std=gnu* is set which
> is why we get this error.
> 
> As this appears to be intended behavior for gcc I think the best option is
> to just rename the function so that we don't run into issues when building
> with gnu extensions like -std=gnu11.

Is it sure 'fork' is the only culprit? If not, it is better to address it 
properly because this unnecessary name change is annoying when switching bpf 
prog from clang to gcc. Like changing the name in this .c here has to make 
another change to the .c in the prog_tests/ directory.

> 
>>
>>>
>>> In file included from progs/bench_local_storage_create.c:6:
>>> progs/bench_local_storage_create.c:43:14: error: conflicting types for
>>> built-in function 'fork'; expected 'int(void)'
>>> [-Werror=builtin-declaration-mismatch]
>>>      43 | int BPF_PROG(fork, struct task_struct *parent, struct
>>> task_struct *child)
>>>         |              ^~~~
>>>
>>> I haven't been able to find this documented anywhere however.
>>
James Hilliard March 29, 2023, 8:03 p.m. UTC | #5
On Wed, Mar 29, 2023 at 1:59 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 3/29/23 12:12 PM, James Hilliard wrote:
> > On Wed, Mar 29, 2023 at 11:03 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
> >>
> >> On 3/27/23 8:51 PM, James Hilliard wrote:
> >>>> diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> >>>> index 2814bab54d28..7c851c9d5e47 100644
> >>>> --- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> >>>> +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
> >>>> @@ -22,6 +22,13 @@ struct {
> >>>>           __type(value, struct storage);
> >>>>    } sk_storage_map SEC(".maps");
> >>>>
> >>>> +struct {
> >>>> +       __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
> >>>> +       __uint(map_flags, BPF_F_NO_PREALLOC);
> >>>> +       __type(key, int);
> >>>> +       __type(value, struct storage);
> >>>> +} task_storage_map SEC(".maps");
> >>>> +
> >>>>    SEC("raw_tp/kmalloc")
> >>>>    int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
> >>>>                size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
> >>>> @@ -32,6 +39,24 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
> >>>>           return 0;
> >>>>    }
> >>>>
> >>>> +SEC("tp_btf/sched_process_fork")
> >>>> +int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child)
> >>>
> >>> Apparently fork is a built-in function in bpf-gcc:
> >>
> >> It is also failing in a plain C program
> >>
> >> #>  gcc -Werror=builtin-declaration-mismatch -o test test.c
> >> test.c:14:35: error: conflicting types for built-in function ‘fork’; expected
> >> ‘int(void)’ [-Werror=builtin-declaration-mismatch]
> >>      14 | int __attribute__((__noinline__)) fork(long x, long y)
> >>         |                                   ^~~~
> >> cc1: some warnings being treated as errors
> >>
> >> #> clang -o test test.c
> >> succeed
> >>
> >> I am not too attached to the name but it seems something should be addressed in
> >> the gcc instead.
> >
> > Hmm, so it looks like it's marked as a builtin here:
> > https://github.com/gcc-mirror/gcc/blob/releases/gcc-12.1.0/gcc/builtins.def#L875
> >
> > The macro for that is here:
> > https://github.com/gcc-mirror/gcc/blob/releases/gcc-12.1.0/gcc/builtins.def#L104-L111
> >
> > Which has this comment:
> > /* Like DEF_LIB_BUILTIN, except that the function is not one that is
> > specified by ANSI/ISO C. So, when we're being fully conformant we
> > ignore the version of these builtins that does not begin with
> > __builtin. */
> >
> > Looks like this builtin was originally added here:
> > https://github.com/gcc-mirror/gcc/commit/d1c38823924506d389ca58d02926ace21bdf82fa
> >
> > Based on this issue it looks like fork is treated as a builtin for
> > libgcov support:
> > https://gcc.gnu.org/bugzilla//show_bug.cgi?id=82457
> >
> > So from my understanding fork is a gcc builtin when building with -std=gnu11
> > but is not a builtin when building with -std=c11.
>
> That sounds like there is a knob to turn this behavior on and off. Do the same
> for the bpf target?

I don't think we want to have to do that.

>
> >
> > So it looks like fork is translated to __gcov_fork when -std=gnu* is set which
> > is why we get this error.
> >
> > As this appears to be intended behavior for gcc I think the best option is
> > to just rename the function so that we don't run into issues when building
> > with gnu extensions like -std=gnu11.
>
> Is it sure 'fork' is the only culprit? If not, it is better to address it
> properly because this unnecessary name change is annoying when switching bpf
> prog from clang to gcc. Like changing the name in this .c here has to make
> another change to the .c in the prog_tests/ directory.

We've fixed a similar issue in the past by renaming to avoid a
conflict with the builtin:
https://github.com/torvalds/linux/commit/ab0350c743d5c93fd88742f02b3dff12168ab435

>
> >
> >>
> >>>
> >>> In file included from progs/bench_local_storage_create.c:6:
> >>> progs/bench_local_storage_create.c:43:14: error: conflicting types for
> >>> built-in function 'fork'; expected 'int(void)'
> >>> [-Werror=builtin-declaration-mismatch]
> >>>      43 | int BPF_PROG(fork, struct task_struct *parent, struct
> >>> task_struct *child)
> >>>         |              ^~~~
> >>>
> >>> I haven't been able to find this documented anywhere however.
> >>
>
Martin KaFai Lau March 29, 2023, 8:07 p.m. UTC | #6
On 3/29/23 1:03 PM, James Hilliard wrote:
>>> So it looks like fork is translated to __gcov_fork when -std=gnu* is set which
>>> is why we get this error.
>>>
>>> As this appears to be intended behavior for gcc I think the best option is
>>> to just rename the function so that we don't run into issues when building
>>> with gnu extensions like -std=gnu11.
>> Is it sure 'fork' is the only culprit? If not, it is better to address it
>> properly because this unnecessary name change is annoying when switching bpf
>> prog from clang to gcc. Like changing the name in this .c here has to make
>> another change to the .c in the prog_tests/ directory.
> We've fixed a similar issue in the past by renaming to avoid a
> conflict with the builtin:
> https://github.com/torvalds/linux/commit/ab0350c743d5c93fd88742f02b3dff12168ab435
> 

Fair enough. Please post a patch for the name change.
James Hilliard March 30, 2023, 7:51 a.m. UTC | #7
On Wed, Mar 29, 2023 at 2:07 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 3/29/23 1:03 PM, James Hilliard wrote:
> >>> So it looks like fork is translated to __gcov_fork when -std=gnu* is set which
> >>> is why we get this error.
> >>>
> >>> As this appears to be intended behavior for gcc I think the best option is
> >>> to just rename the function so that we don't run into issues when building
> >>> with gnu extensions like -std=gnu11.
> >> Is it sure 'fork' is the only culprit? If not, it is better to address it
> >> properly because this unnecessary name change is annoying when switching bpf
> >> prog from clang to gcc. Like changing the name in this .c here has to make
> >> another change to the .c in the prog_tests/ directory.
> > We've fixed a similar issue in the past by renaming to avoid a
> > conflict with the builtin:
> > https://github.com/torvalds/linux/commit/ab0350c743d5c93fd88742f02b3dff12168ab435
> >
>
> Fair enough. Please post a patch for the name change.

Any suggestions/preferences on what name I should use instead?
Martin KaFai Lau March 30, 2023, 6:12 p.m. UTC | #8
On 3/30/23 12:51 AM, James Hilliard wrote:
> On Wed, Mar 29, 2023 at 2:07 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 3/29/23 1:03 PM, James Hilliard wrote:
>>>>> So it looks like fork is translated to __gcov_fork when -std=gnu* is set which
>>>>> is why we get this error.
>>>>>
>>>>> As this appears to be intended behavior for gcc I think the best option is
>>>>> to just rename the function so that we don't run into issues when building
>>>>> with gnu extensions like -std=gnu11.
>>>> Is it sure 'fork' is the only culprit? If not, it is better to address it
>>>> properly because this unnecessary name change is annoying when switching bpf
>>>> prog from clang to gcc. Like changing the name in this .c here has to make
>>>> another change to the .c in the prog_tests/ directory.
>>> We've fixed a similar issue in the past by renaming to avoid a
>>> conflict with the builtin:
>>> https://github.com/torvalds/linux/commit/ab0350c743d5c93fd88742f02b3dff12168ab435
>>>
>>
>> Fair enough. Please post a patch for the name change.
> 
> Any suggestions/preferences on what name I should use instead?

May be 'sched_process_fork'?
that will make it the same as the tracepoint's name.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index dc3827c1f139..d9c080ac1796 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -278,6 +278,7 @@  extern struct argp bench_local_storage_argp;
 extern struct argp bench_local_storage_rcu_tasks_trace_argp;
 extern struct argp bench_strncmp_argp;
 extern struct argp bench_hashmap_lookup_argp;
+extern struct argp bench_local_storage_create_argp;
 
 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -288,6 +289,7 @@  static const struct argp_child bench_parsers[] = {
 	{ &bench_local_storage_rcu_tasks_trace_argp, 0,
 		"local_storage RCU Tasks Trace slowdown benchmark", 0 },
 	{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
+	{ &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
 	{},
 };
 
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
index f8b2a640ccbe..abb0321d4f34 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -3,19 +3,71 @@ 
 
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <pthread.h>
+#include <argp.h>
 
 #include "bench.h"
 #include "bench_local_storage_create.skel.h"
 
-#define BATCH_SZ 32
-
 struct thread {
-	int fds[BATCH_SZ];
+	int *fds;
+	pthread_t *pthds;
+	int *pthd_results;
 };
 
 static struct bench_local_storage_create *skel;
 static struct thread *threads;
-static long socket_errs;
+static long create_owner_errs;
+static int storage_type = BPF_MAP_TYPE_SK_STORAGE;
+static int batch_sz = 32;
+
+enum {
+	ARG_BATCH_SZ = 9000,
+	ARG_STORAGE_TYPE = 9001,
+};
+
+static const struct argp_option opts[] = {
+	{ "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0,
+	  "The number of storage creations in each batch" },
+	{ "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0,
+	  "The type of local storage to test (socket or task)" },
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	int ret;
+
+	switch (key) {
+	case ARG_BATCH_SZ:
+		ret = atoi(arg);
+		if (ret < 1) {
+			fprintf(stderr, "invalid batch-size\n");
+			argp_usage(state);
+		}
+		batch_sz = ret;
+		break;
+	case ARG_STORAGE_TYPE:
+		if (!strcmp(arg, "task")) {
+			storage_type = BPF_MAP_TYPE_TASK_STORAGE;
+		} else if (!strcmp(arg, "socket")) {
+			storage_type = BPF_MAP_TYPE_SK_STORAGE;
+		} else {
+			fprintf(stderr, "invalid storage-type (socket or task)\n");
+			argp_usage(state);
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_local_storage_create_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
 
 static void validate(void)
 {
@@ -28,6 +80,8 @@  static void validate(void)
 
 static void setup(void)
 {
+	int i;
+
 	skel = bench_local_storage_create__open_and_load();
 	if (!skel) {
 		fprintf(stderr, "error loading skel\n");
@@ -35,10 +89,16 @@  static void setup(void)
 	}
 
 	skel->bss->bench_pid = getpid();
-
-	if (!bpf_program__attach(skel->progs.socket_post_create)) {
-		fprintf(stderr, "Error attaching bpf program\n");
-		exit(1);
+	if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+		if (!bpf_program__attach(skel->progs.socket_post_create)) {
+			fprintf(stderr, "Error attaching bpf program\n");
+			exit(1);
+		}
+	} else {
+		if (!bpf_program__attach(skel->progs.fork)) {
+			fprintf(stderr, "Error attaching bpf program\n");
+			exit(1);
+		}
 	}
 
 	if (!bpf_program__attach(skel->progs.kmalloc)) {
@@ -52,6 +112,29 @@  static void setup(void)
 		fprintf(stderr, "cannot alloc thread_res\n");
 		exit(1);
 	}
+
+	for (i = 0; i < env.producer_cnt; i++) {
+		struct thread *t = &threads[i];
+
+		if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+			t->fds = malloc(batch_sz * sizeof(*t->fds));
+			if (!t->fds) {
+				fprintf(stderr, "cannot alloc t->fds\n");
+				exit(1);
+			}
+		} else {
+			t->pthds = malloc(batch_sz * sizeof(*t->pthds));
+			if (!t->pthds) {
+				fprintf(stderr, "cannot alloc t->pthds\n");
+				exit(1);
+			}
+			t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results));
+			if (!t->pthd_results) {
+				fprintf(stderr, "cannot alloc t->pthd_results\n");
+				exit(1);
+			}
+		}
+	}
 }
 
 static void measure(struct bench_res *res)
@@ -65,20 +148,20 @@  static void *consumer(void *input)
 	return NULL;
 }
 
-static void *producer(void *input)
+static void *sk_producer(void *input)
 {
 	struct thread *t = &threads[(long)(input)];
 	int *fds = t->fds;
 	int i;
 
 	while (true) {
-		for (i = 0; i < BATCH_SZ; i++) {
+		for (i = 0; i < batch_sz; i++) {
 			fds[i] = socket(AF_INET6, SOCK_DGRAM, 0);
 			if (fds[i] == -1)
-				atomic_inc(&socket_errs);
+				atomic_inc(&create_owner_errs);
 		}
 
-		for (i = 0; i < BATCH_SZ; i++) {
+		for (i = 0; i < batch_sz; i++) {
 			if (fds[i] != -1)
 				close(fds[i]);
 		}
@@ -87,6 +170,42 @@  static void *producer(void *input)
 	return NULL;
 }
 
+static void *thread_func(void *arg)
+{
+	return NULL;
+}
+
+static void *task_producer(void *input)
+{
+	struct thread *t = &threads[(long)(input)];
+	pthread_t *pthds = t->pthds;
+	int *pthd_results = t->pthd_results;
+	int i;
+
+	while (true) {
+		for (i = 0; i < batch_sz; i++) {
+			pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL);
+			if (pthd_results[i])
+				atomic_inc(&create_owner_errs);
+		}
+
+		for (i = 0; i < batch_sz; i++) {
+			if (!pthd_results[i])
+				pthread_join(pthds[i], NULL);;
+		}
+	}
+
+	return NULL;
+}
+
+static void *producer(void *input)
+{
+	if (storage_type == BPF_MAP_TYPE_SK_STORAGE)
+		return sk_producer(input);
+	else
+		return task_producer(input);
+}
+
 static void report_progress(int iter, struct bench_res *res, long delta_ns)
 {
 	double creates_per_sec, kmallocs_per_create;
@@ -123,14 +242,18 @@  static void report_final(struct bench_res res[], int res_cnt)
 	printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ",
 	       creates_mean, creates_stddev, creates_mean / env.producer_cnt);
 	printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates);
-	if (socket_errs || skel->bss->create_errs)
-		printf("socket() errors %ld create_errs %ld\n", socket_errs,
+	if (create_owner_errs || skel->bss->create_errs)
+		printf("%s() errors %ld create_errs %ld\n",
+		       storage_type == BPF_MAP_TYPE_SK_STORAGE ?
+		       "socket" : "pthread_create",
+		       create_owner_errs,
 		       skel->bss->create_errs);
 }
 
 /* Benchmark performance of creating bpf local storage  */
 const struct bench bench_local_storage_create = {
 	.name = "local-storage-create",
+	.argp = &bench_local_storage_create_argp,
 	.validate = validate,
 	.setup = setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
index 2814bab54d28..7c851c9d5e47 100644
--- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
@@ -22,6 +22,13 @@  struct {
 	__type(value, struct storage);
 } sk_storage_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct storage);
+} task_storage_map SEC(".maps");
+
 SEC("raw_tp/kmalloc")
 int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
 	     size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
@@ -32,6 +39,24 @@  int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
 	return 0;
 }
 
+SEC("tp_btf/sched_process_fork")
+int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child)
+{
+	struct storage *stg;
+
+	if (parent->tgid != bench_pid)
+		return 0;
+
+	stg = bpf_task_storage_get(&task_storage_map, child, NULL,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (stg)
+		__sync_fetch_and_add(&create_cnts, 1);
+	else
+		__sync_fetch_and_add(&create_errs, 1);
+
+	return 0;
+}
+
 SEC("lsm.s/socket_post_create")
 int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	     int protocol, int kern)