diff mbox series

[bpf-next,4/4] selftests/bpf: Add a case to test global percpu data

Message ID 20250127162158.84906-5-leon.hwang@linux.dev (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf: Introduce global percpu data | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers warning 9 maintainers not CCed: kpsingh@kernel.org sdf@fomichev.me jolsa@kernel.org shuah@kernel.org john.fastabend@gmail.com linux-kselftest@vger.kernel.org haoluo@google.com mykolal@fb.com martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 47 this patch: 47
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 10 this patch: 10
netdev/checkpatch warning CHECK: Comparison to NULL could be written "!percpu_data" CHECK: No space is necessary after a cast CHECK: spaces preferred around that '%' (ctx:VxV) CHECK: spaces preferred around that '+' (ctx:VxV) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 82 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18

Commit Message

Leon Hwang Jan. 27, 2025, 4:21 p.m. UTC
If the arch, like s390x, does not support percpu insn, this case won't
test global percpu data by checking -EOPNOTSUPP when load prog.

The following APIs have been tested for global percpu data:
1. bpf_map__set_initial_value()
2. bpf_map__initial_value()
3. generated percpu struct pointer that points to internal map's data
4. bpf_map__lookup_elem() for global percpu data map

cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
124     global_percpu_data_init:OK
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 .../bpf/prog_tests/global_data_init.c         | 89 ++++++++++++++++++-
 .../bpf/progs/test_global_percpu_data.c       | 21 +++++
 2 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c

Comments

Andrii Nakryiko Feb. 6, 2025, 12:09 a.m. UTC | #1
On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> If the arch, like s390x, does not support percpu insn, this case won't
> test global percpu data by checking -EOPNOTSUPP when load prog.
>
> The following APIs have been tested for global percpu data:
> 1. bpf_map__set_initial_value()
> 2. bpf_map__initial_value()
> 3. generated percpu struct pointer that points to internal map's data
> 4. bpf_map__lookup_elem() for global percpu data map
>
> cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
> 124     global_percpu_data_init:OK
> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  .../bpf/prog_tests/global_data_init.c         | 89 ++++++++++++++++++-
>  .../bpf/progs/test_global_percpu_data.c       | 21 +++++
>  2 files changed, 109 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> index 8466332d7406f..a5d0890444f67 100644
> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
> @@ -1,5 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <test_progs.h>
> +#include "test_global_percpu_data.skel.h"
>
>  void test_global_data_init(void)
>  {
> @@ -8,7 +9,7 @@ void test_global_data_init(void)
>         __u8 *buff = NULL, *newval = NULL;
>         struct bpf_object *obj;
>         struct bpf_map *map;
> -        __u32 duration = 0;
> +       __u32 duration = 0;
>         size_t sz;
>
>         obj = bpf_object__open_file(file, NULL);
> @@ -60,3 +61,89 @@ void test_global_data_init(void)
>         free(newval);
>         bpf_object__close(obj);
>  }
> +
> +void test_global_percpu_data_init(void)
> +{
> +       struct test_global_percpu_data *skel = NULL;
> +       u64 *percpu_data = NULL;

there is that test_global_percpu_data__percpu type you are declaring
in the BPF skeleton, right? We should try using it here.

And for that array access, we should make sure that it's __aligned(8),
so indexing by CPU index works correctly.

Also, you define per-CPU variable as int, but here it is u64, what's
up with that?

> +       struct bpf_map *map;
> +       size_t init_data_sz;
> +       char buff[128] = {};
> +       int init_value = 2;
> +       int key, value_sz;
> +       int prog_fd, err;
> +       int *init_data;
> +       int num_cpus;
> +

nit: LIBBPF_OPTS below is variable declaration, so there shouldn't be
an empty line here (and maybe group those int variables a bit more
tightly?)

> +       LIBBPF_OPTS(bpf_test_run_opts, topts,
> +                   .data_in = buff,
> +                   .data_size_in = sizeof(buff),
> +                   .repeat = 1,
> +       );
> +
> +       num_cpus = libbpf_num_possible_cpus();
> +       if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
> +               return;
> +
> +       percpu_data = calloc(num_cpus, sizeof(*percpu_data));
> +       if (!ASSERT_FALSE(percpu_data == NULL, "calloc percpu_data"))

ASSERT_OK_PTR()

> +               return;
> +
> +       value_sz = sizeof(*percpu_data) * num_cpus;
> +       memset(percpu_data, 0, value_sz);

you calloc()'ed it, it's already zero-initialized


> +
> +       skel = test_global_percpu_data__open();
> +       if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
> +               goto out;
> +
> +       ASSERT_EQ(skel->percpu->percpu_data, -1, "skel->percpu->percpu_data");
> +
> +       map = skel->maps.percpu;
> +       err = bpf_map__set_initial_value(map, &init_value,
> +                                        sizeof(init_value));
> +       if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
> +               goto out;
> +
> +       init_data = bpf_map__initial_value(map, &init_data_sz);
> +       if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
> +               goto out;
> +
> +       ASSERT_EQ(*init_data, init_value, "initial_value");
> +       ASSERT_EQ(init_data_sz, sizeof(init_value), "initial_value size");
> +
> +       if (!ASSERT_EQ((void *) init_data, (void *) skel->percpu, "skel->percpu"))
> +               goto out;
> +       ASSERT_EQ(skel->percpu->percpu_data, init_value, "skel->percpu->percpu_data");
> +
> +       err = test_global_percpu_data__load(skel);
> +       if (err == -EOPNOTSUPP)
> +               goto out;
> +       if (!ASSERT_OK(err, "test_global_percpu_data__load"))
> +               goto out;
> +
> +       ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY,
> +                 "bpf_map__type");
> +
> +       prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
> +       err = bpf_prog_test_run_opts(prog_fd, &topts);

at least one of BPF programs (don't remember which one, could be
raw_tp) supports specifying CPU index to run on, it would be nice to
loop over CPUs, triggering BPF program on each one and filling per-CPU
variable with current CPU index. Then we can check that all per-CPU
values have expected values.


> +       ASSERT_OK(err, "update_percpu_data");
> +       ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
> +
> +       key = 0;
> +       err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
> +                                  value_sz, 0);
> +       if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
> +               goto out;
> +
> +       if (!ASSERT_LT(skel->bss->curr_cpu, num_cpus, "curr_cpu"))
> +               goto out;
> +       ASSERT_EQ((int) percpu_data[skel->bss->curr_cpu], 1, "percpu_data");
> +       if (num_cpus > 1)
> +               ASSERT_EQ((int) percpu_data[(skel->bss->curr_cpu+1)%num_cpus],
> +                         init_value, "init_value");
> +
> +out:
> +       test_global_percpu_data__destroy(skel);
> +       if (percpu_data)
> +               free(percpu_data);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> new file mode 100644
> index 0000000000000..731c3214b0bb4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> @@ -0,0 +1,21 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright Leon Hwang */
> +
> +#include <linux/bpf.h>
> +#include <linux/pkt_cls.h>
> +
> +#include <bpf/bpf_helpers.h>
> +
> +int percpu_data SEC(".percpu") = -1;
> +int curr_cpu;
> +
> +SEC("tc")
> +int update_percpu_data(struct __sk_buff *skb)
> +{
> +       curr_cpu = bpf_get_smp_processor_id();
> +       percpu_data = 1;
> +
> +       return TC_ACT_OK;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.47.1
>
Leon Hwang Feb. 7, 2025, 10 a.m. UTC | #2
On 6/2/25 08:09, Andrii Nakryiko wrote:
> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> If the arch, like s390x, does not support percpu insn, this case won't
>> test global percpu data by checking -EOPNOTSUPP when load prog.
>>
>> The following APIs have been tested for global percpu data:
>> 1. bpf_map__set_initial_value()
>> 2. bpf_map__initial_value()
>> 3. generated percpu struct pointer that points to internal map's data
>> 4. bpf_map__lookup_elem() for global percpu data map
>>
>> cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
>> 124     global_percpu_data_init:OK
>> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>>  .../bpf/prog_tests/global_data_init.c         | 89 ++++++++++++++++++-
>>  .../bpf/progs/test_global_percpu_data.c       | 21 +++++
>>  2 files changed, 109 insertions(+), 1 deletion(-)
>>  create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> index 8466332d7406f..a5d0890444f67 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
>> @@ -1,5 +1,6 @@
>>  // SPDX-License-Identifier: GPL-2.0
>>  #include <test_progs.h>
>> +#include "test_global_percpu_data.skel.h"
>>
>>  void test_global_data_init(void)
>>  {
>> @@ -8,7 +9,7 @@ void test_global_data_init(void)
>>         __u8 *buff = NULL, *newval = NULL;
>>         struct bpf_object *obj;
>>         struct bpf_map *map;
>> -        __u32 duration = 0;
>> +       __u32 duration = 0;
>>         size_t sz;
>>
>>         obj = bpf_object__open_file(file, NULL);
>> @@ -60,3 +61,89 @@ void test_global_data_init(void)
>>         free(newval);
>>         bpf_object__close(obj);
>>  }
>> +
>> +void test_global_percpu_data_init(void)
>> +{
>> +       struct test_global_percpu_data *skel = NULL;
>> +       u64 *percpu_data = NULL;
> 
> there is that test_global_percpu_data__percpu type you are declaring
> in the BPF skeleton, right? We should try using it here.
> 

No. bpftool does not generate test_global_percpu_data__percpu. The
struct for global variables is embedded into skeleton struct.

Should we generate type for global variables?

> And for that array access, we should make sure that it's __aligned(8),
> so indexing by CPU index works correctly.
> 

Ack.

> Also, you define per-CPU variable as int, but here it is u64, what's
> up with that?
> 

Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
__aligned(8).

>> +       struct bpf_map *map;
>> +       size_t init_data_sz;
>> +       char buff[128] = {};
>> +       int init_value = 2;
>> +       int key, value_sz;
>> +       int prog_fd, err;
>> +       int *init_data;
>> +       int num_cpus;
>> +
> 
> nit: LIBBPF_OPTS below is variable declaration, so there shouldn't be
> an empty line here (and maybe group those int variables a bit more
> tightly?)
> 

Ack.

>> +       LIBBPF_OPTS(bpf_test_run_opts, topts,
>> +                   .data_in = buff,
>> +                   .data_size_in = sizeof(buff),
>> +                   .repeat = 1,
>> +       );
>> +
>> +       num_cpus = libbpf_num_possible_cpus();
>> +       if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
>> +               return;
>> +
>> +       percpu_data = calloc(num_cpus, sizeof(*percpu_data));
>> +       if (!ASSERT_FALSE(percpu_data == NULL, "calloc percpu_data"))
> 
> ASSERT_OK_PTR()
> 

Ack.

>> +               return;
>> +
>> +       value_sz = sizeof(*percpu_data) * num_cpus;
>> +       memset(percpu_data, 0, value_sz);
> 
> you calloc()'ed it, it's already zero-initialized
> 

Ack. Thanks. I should check "man calloc" to use it.

> 
>> +
>> +       skel = test_global_percpu_data__open();
>> +       if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
>> +               goto out;
>> +
>> +       ASSERT_EQ(skel->percpu->percpu_data, -1, "skel->percpu->percpu_data");
>> +
>> +       map = skel->maps.percpu;
>> +       err = bpf_map__set_initial_value(map, &init_value,
>> +                                        sizeof(init_value));
>> +       if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
>> +               goto out;
>> +
>> +       init_data = bpf_map__initial_value(map, &init_data_sz);
>> +       if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
>> +               goto out;
>> +
>> +       ASSERT_EQ(*init_data, init_value, "initial_value");
>> +       ASSERT_EQ(init_data_sz, sizeof(init_value), "initial_value size");
>> +
>> +       if (!ASSERT_EQ((void *) init_data, (void *) skel->percpu, "skel->percpu"))
>> +               goto out;
>> +       ASSERT_EQ(skel->percpu->percpu_data, init_value, "skel->percpu->percpu_data");
>> +
>> +       err = test_global_percpu_data__load(skel);
>> +       if (err == -EOPNOTSUPP)
>> +               goto out;
>> +       if (!ASSERT_OK(err, "test_global_percpu_data__load"))
>> +               goto out;
>> +
>> +       ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY,
>> +                 "bpf_map__type");
>> +
>> +       prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
>> +       err = bpf_prog_test_run_opts(prog_fd, &topts);
> 
> at least one of BPF programs (don't remember which one, could be
> raw_tp) supports specifying CPU index to run on, it would be nice to
> loop over CPUs, triggering BPF program on each one and filling per-CPU
> variable with current CPU index. Then we can check that all per-CPU
> values have expected values.
> 

Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?

Your suggestion looks good to me. I'll do it.

> 
>> +       ASSERT_OK(err, "update_percpu_data");
>> +       ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
>> +
>> +       key = 0;
>> +       err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
>> +                                  value_sz, 0);
>> +       if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
>> +               goto out;
>> +

[...]

Thanks,
Leon
Andrii Nakryiko Feb. 7, 2025, 7:48 p.m. UTC | #3
On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 6/2/25 08:09, Andrii Nakryiko wrote:
> > On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >> If the arch, like s390x, does not support percpu insn, this case won't
> >> test global percpu data by checking -EOPNOTSUPP when load prog.
> >>
> >> The following APIs have been tested for global percpu data:
> >> 1. bpf_map__set_initial_value()
> >> 2. bpf_map__initial_value()
> >> 3. generated percpu struct pointer that points to internal map's data
> >> 4. bpf_map__lookup_elem() for global percpu data map
> >>
> >> cd tools/testing/selftests/bpf; ./test_progs -t global_percpu_data
> >> 124     global_percpu_data_init:OK
> >> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
> >>
> >> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> >> ---
> >>  .../bpf/prog_tests/global_data_init.c         | 89 ++++++++++++++++++-
> >>  .../bpf/progs/test_global_percpu_data.c       | 21 +++++
> >>  2 files changed, 109 insertions(+), 1 deletion(-)
> >>  create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c
> >>

[...]

> >> +void test_global_percpu_data_init(void)
> >> +{
> >> +       struct test_global_percpu_data *skel = NULL;
> >> +       u64 *percpu_data = NULL;
> >
> > there is that test_global_percpu_data__percpu type you are declaring
> > in the BPF skeleton, right? We should try using it here.
> >
>
> No. bpftool does not generate test_global_percpu_data__percpu. The
> struct for global variables is embedded into skeleton struct.
>
> Should we generate type for global variables?

we already have custom skeleton-specific type for .data, .rodata,
.bss, so we should provide one for .percpu as well, yes

>
> > And for that array access, we should make sure that it's __aligned(8),
> > so indexing by CPU index works correctly.
> >
>
> Ack.
>
> > Also, you define per-CPU variable as int, but here it is u64, what's
> > up with that?
> >
>
> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
> __aligned(8).

It's hacky, and it won't work correctly on big-endian architectures.
But you shouldn't need that if we have a struct representing this
.percpu memory image. Just make sure that struct has 8 byte alignment
(from bpftool side during skeleton generation, probably).

[...]

> > at least one of BPF programs (don't remember which one, could be
> > raw_tp) supports specifying CPU index to run on, it would be nice to
> > loop over CPUs, triggering BPF program on each one and filling per-CPU
> > variable with current CPU index. Then we can check that all per-CPU
> > values have expected values.
> >
>
> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
>

No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
a selftest using it, so you can work backwards from that.

> Your suggestion looks good to me. I'll do it.
>
> >
> >> +       ASSERT_OK(err, "update_percpu_data");
> >> +       ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
> >> +
> >> +       key = 0;
> >> +       err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
> >> +                                  value_sz, 0);
> >> +       if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
> >> +               goto out;
> >> +
>
> [...]
>
> Thanks,
> Leon
>
>
Leon Hwang Feb. 10, 2025, 9:52 a.m. UTC | #4
On 8/2/25 03:48, Andrii Nakryiko wrote:
> On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 6/2/25 08:09, Andrii Nakryiko wrote:
>>> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>>>

[...]

>>>> +void test_global_percpu_data_init(void)
>>>> +{
>>>> +       struct test_global_percpu_data *skel = NULL;
>>>> +       u64 *percpu_data = NULL;
>>>
>>> there is that test_global_percpu_data__percpu type you are declaring
>>> in the BPF skeleton, right? We should try using it here.
>>>
>>
>> No. bpftool does not generate test_global_percpu_data__percpu. The
>> struct for global variables is embedded into skeleton struct.
>>
>> Should we generate type for global variables?
> 
> we already have custom skeleton-specific type for .data, .rodata,
> .bss, so we should provide one for .percpu as well, yes
> 

Yes, I've generated it. But it should not add '__aligned(8)' to it. Or
bpf_map__set_initial_value() will fails because the aligned size is
different from the actual spec's value size.

If the actual value size is not __aligned(8), how should we lookup
element from percpu_array map?

The doc[0] does not provide a good practice for this case.

[0] https://docs.kernel.org/bpf/map_array.html#bpf-map-type-percpu-array

>>
>>> And for that array access, we should make sure that it's __aligned(8),
>>> so indexing by CPU index works correctly.
>>>
>>
>> Ack.
>>
>>> Also, you define per-CPU variable as int, but here it is u64, what's
>>> up with that?
>>>
>>
>> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
>> __aligned(8).
> 
> It's hacky, and it won't work correctly on big-endian architectures.
> But you shouldn't need that if we have a struct representing this
> .percpu memory image. Just make sure that struct has 8 byte alignment
> (from bpftool side during skeleton generation, probably).
> 
> [...]
> 
>>> at least one of BPF programs (don't remember which one, could be
>>> raw_tp) supports specifying CPU index to run on, it would be nice to
>>> loop over CPUs, triggering BPF program on each one and filling per-CPU
>>> variable with current CPU index. Then we can check that all per-CPU
>>> values have expected values.
>>>
>>
>> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
>>
> 
> No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
> a selftest using it, so you can work backwards from that.
> 

By referencing raw_tp, which uses `opts.cpu`, if use it to test global
percpu data, it will fail to test on non-zero CPU, because
bpf_prog_test_run_skb() disallows setting `opts.cpu`.

Then, when `setaffinity` like perf_buffer.c::trigger_on_cpu(), it's OK
to run the adding selftests on all CPUs.

So, should I use `setaffinity` or change the bpf prog type from tc to
raw_tp to use `opts.cpu`?

Thanks,
Leon
Andrii Nakryiko Feb. 11, 2025, 12:15 a.m. UTC | #5
On Mon, Feb 10, 2025 at 1:52 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 8/2/25 03:48, Andrii Nakryiko wrote:
> > On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >>
> >>
> >> On 6/2/25 08:09, Andrii Nakryiko wrote:
> >>> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>>>
>
> [...]
>
> >>>> +void test_global_percpu_data_init(void)
> >>>> +{
> >>>> +       struct test_global_percpu_data *skel = NULL;
> >>>> +       u64 *percpu_data = NULL;
> >>>
> >>> there is that test_global_percpu_data__percpu type you are declaring
> >>> in the BPF skeleton, right? We should try using it here.
> >>>
> >>
> >> No. bpftool does not generate test_global_percpu_data__percpu. The
> >> struct for global variables is embedded into skeleton struct.
> >>
> >> Should we generate type for global variables?
> >
> > we already have custom skeleton-specific type for .data, .rodata,
> > .bss, so we should provide one for .percpu as well, yes
> >
>
> Yes, I've generated it. But it should not add '__aligned(8)' to it. Or
> bpf_map__set_initial_value() will fails because the aligned size is
> different from the actual spec's value size.
>
> If the actual value size is not __aligned(8), how should we lookup
> element from percpu_array map?

for .percpu libbpf can ensure that map is created with correct value
size that matches __aligned(8) size? It's an error-prone corner case
to non-multiple-of-8 size anyways (for per-CPU data), so just prevent
the issue altogether?...

>
> The doc[0] does not provide a good practice for this case.
>
> [0] https://docs.kernel.org/bpf/map_array.html#bpf-map-type-percpu-array
>
> >>
> >>> And for that array access, we should make sure that it's __aligned(8),
> >>> so indexing by CPU index works correctly.
> >>>
> >>
> >> Ack.
> >>
> >>> Also, you define per-CPU variable as int, but here it is u64, what's
> >>> up with that?
> >>>
> >>
> >> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
> >> __aligned(8).
> >
> > It's hacky, and it won't work correctly on big-endian architectures.
> > But you shouldn't need that if we have a struct representing this
> > .percpu memory image. Just make sure that struct has 8 byte alignment
> > (from bpftool side during skeleton generation, probably).
> >
> > [...]
> >
> >>> at least one of BPF programs (don't remember which one, could be
> >>> raw_tp) supports specifying CPU index to run on, it would be nice to
> >>> loop over CPUs, triggering BPF program on each one and filling per-CPU
> >>> variable with current CPU index. Then we can check that all per-CPU
> >>> values have expected values.
> >>>
> >>
> >> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
> >>
> >
> > No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
> > a selftest using it, so you can work backwards from that.
> >
>
> By referencing raw_tp, which uses `opts.cpu`, if use it to test global
> percpu data, it will fail to test on non-zero CPU, because
> bpf_prog_test_run_skb() disallows setting `opts.cpu`.
>
> Then, when `setaffinity` like perf_buffer.c::trigger_on_cpu(), it's OK
> to run the adding selftests on all CPUs.
>
> So, should I use `setaffinity` or change the bpf prog type from tc to
> raw_tp to use `opts.cpu`?

Is it a problem to use raw_tp (we do it a lot)? If not, I'd go with
raw_tp and opts.cpu.

>
> Thanks,
> Leon
>
Leon Hwang Feb. 12, 2025, 1:50 a.m. UTC | #6
On 11/2/25 08:15, Andrii Nakryiko wrote:
> On Mon, Feb 10, 2025 at 1:52 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 8/2/25 03:48, Andrii Nakryiko wrote:
>>> On Fri, Feb 7, 2025 at 2:00 AM Leon Hwang <leon.hwang@linux.dev> wrote:

[...]

>>
>> Yes, I've generated it. But it should not add '__aligned(8)' to it. Or
>> bpf_map__set_initial_value() will fails because the aligned size is
>> different from the actual spec's value size.
>>
>> If the actual value size is not __aligned(8), how should we lookup
>> element from percpu_array map?
> 
> for .percpu libbpf can ensure that map is created with correct value
> size that matches __aligned(8) size? It's an error-prone corner case
> to non-multiple-of-8 size anyways (for per-CPU data), so just prevent
> the issue altogether?...
> 

Ack.

I'll update value size of .percpu maps to match __aligned(8) size, and
add '__aligned(8)' to the generated .percpu types.

>>
>> The doc[0] does not provide a good practice for this case.
>>
>> [0] https://docs.kernel.org/bpf/map_array.html#bpf-map-type-percpu-array
>>
>>>>
>>>>> And for that array access, we should make sure that it's __aligned(8),
>>>>> so indexing by CPU index works correctly.
>>>>>
>>>>
>>>> Ack.
>>>>
>>>>> Also, you define per-CPU variable as int, but here it is u64, what's
>>>>> up with that?
>>>>>
>>>>
>>>> Like __aligned(8), it's to make sure 8-bytes aligned. It's better to use
>>>> __aligned(8).
>>>
>>> It's hacky, and it won't work correctly on big-endian architectures.
>>> But you shouldn't need that if we have a struct representing this
>>> .percpu memory image. Just make sure that struct has 8 byte alignment
>>> (from bpftool side during skeleton generation, probably).
>>>
>>> [...]
>>>
>>>>> at least one of BPF programs (don't remember which one, could be
>>>>> raw_tp) supports specifying CPU index to run on, it would be nice to
>>>>> loop over CPUs, triggering BPF program on each one and filling per-CPU
>>>>> variable with current CPU index. Then we can check that all per-CPU
>>>>> values have expected values.
>>>>>
>>>>
>>>> Do you mean prog_tests/perf_buffer.c::trigger_on_cpu()?
>>>>
>>>
>>> No, look at `cpu` field of `struct bpf_test_run_opts`. We should have
>>> a selftest using it, so you can work backwards from that.
>>>
>>
>> By referencing raw_tp, which uses `opts.cpu`, if use it to test global
>> percpu data, it will fail to test on non-zero CPU, because
>> bpf_prog_test_run_skb() disallows setting `opts.cpu`.
>>
>> Then, when `setaffinity` like perf_buffer.c::trigger_on_cpu(), it's OK
>> to run the adding selftests on all CPUs.
>>
>> So, should I use `setaffinity` or change the bpf prog type from tc to
>> raw_tp to use `opts.cpu`?
> 
> Is it a problem to use raw_tp (we do it a lot)? If not, I'd go with
> raw_tp and opts.cpu.
> 

There's no problem to use raw_tp. Let us use raw_tp and opts.cpu.

Thanks,
Leon
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 8466332d7406f..a5d0890444f67 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -1,5 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include "test_global_percpu_data.skel.h"
 
 void test_global_data_init(void)
 {
@@ -8,7 +9,7 @@  void test_global_data_init(void)
 	__u8 *buff = NULL, *newval = NULL;
 	struct bpf_object *obj;
 	struct bpf_map *map;
-        __u32 duration = 0;
+	__u32 duration = 0;
 	size_t sz;
 
 	obj = bpf_object__open_file(file, NULL);
@@ -60,3 +61,89 @@  void test_global_data_init(void)
 	free(newval);
 	bpf_object__close(obj);
 }
+
+void test_global_percpu_data_init(void)
+{
+	struct test_global_percpu_data *skel = NULL;
+	u64 *percpu_data = NULL;
+	struct bpf_map *map;
+	size_t init_data_sz;
+	char buff[128] = {};
+	int init_value = 2;
+	int key, value_sz;
+	int prog_fd, err;
+	int *init_data;
+	int num_cpus;
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		    .data_in = buff,
+		    .data_size_in = sizeof(buff),
+		    .repeat = 1,
+	);
+
+	num_cpus = libbpf_num_possible_cpus();
+	if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
+		return;
+
+	percpu_data = calloc(num_cpus, sizeof(*percpu_data));
+	if (!ASSERT_FALSE(percpu_data == NULL, "calloc percpu_data"))
+		return;
+
+	value_sz = sizeof(*percpu_data) * num_cpus;
+	memset(percpu_data, 0, value_sz);
+
+	skel = test_global_percpu_data__open();
+	if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
+		goto out;
+
+	ASSERT_EQ(skel->percpu->percpu_data, -1, "skel->percpu->percpu_data");
+
+	map = skel->maps.percpu;
+	err = bpf_map__set_initial_value(map, &init_value,
+					 sizeof(init_value));
+	if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
+		goto out;
+
+	init_data = bpf_map__initial_value(map, &init_data_sz);
+	if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
+		goto out;
+
+	ASSERT_EQ(*init_data, init_value, "initial_value");
+	ASSERT_EQ(init_data_sz, sizeof(init_value), "initial_value size");
+
+	if (!ASSERT_EQ((void *) init_data, (void *) skel->percpu, "skel->percpu"))
+		goto out;
+	ASSERT_EQ(skel->percpu->percpu_data, init_value, "skel->percpu->percpu_data");
+
+	err = test_global_percpu_data__load(skel);
+	if (err == -EOPNOTSUPP)
+		goto out;
+	if (!ASSERT_OK(err, "test_global_percpu_data__load"))
+		goto out;
+
+	ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY,
+		  "bpf_map__type");
+
+	prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "update_percpu_data");
+	ASSERT_EQ(topts.retval, 0, "update_percpu_data retval");
+
+	key = 0;
+	err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
+				   value_sz, 0);
+	if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
+		goto out;
+
+	if (!ASSERT_LT(skel->bss->curr_cpu, num_cpus, "curr_cpu"))
+		goto out;
+	ASSERT_EQ((int) percpu_data[skel->bss->curr_cpu], 1, "percpu_data");
+	if (num_cpus > 1)
+		ASSERT_EQ((int) percpu_data[(skel->bss->curr_cpu+1)%num_cpus],
+			  init_value, "init_value");
+
+out:
+	test_global_percpu_data__destroy(skel);
+	if (percpu_data)
+		free(percpu_data);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
new file mode 100644
index 0000000000000..731c3214b0bb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
@@ -0,0 +1,21 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+
+int percpu_data SEC(".percpu") = -1;
+int curr_cpu;
+
+SEC("tc")
+int update_percpu_data(struct __sk_buff *skb)
+{
+	curr_cpu = bpf_get_smp_processor_id();
+	percpu_data = 1;
+
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";