diff mbox series

[bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size

Message ID 20220127024939.364016-1-houtao1@huawei.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 7 maintainers not CCed: linux-kselftest@vger.kernel.org kpsingh@kernel.org john.fastabend@gmail.com songliubraving@fb.com shuah@kernel.org haoluo@google.com memxor@gmail.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 73 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next success VM_Test

Commit Message

Hou Tao Jan. 27, 2022, 2:49 a.m. UTC
4096 is OK for x86-64, but for other archs with greater than 4KB
page size (e.g. 64KB under arm64), test_verifier for test case
"check valid spill/fill, ptr to mem" will fail, so just use
getpagesize() to initialize the ring buffer size. Do this for
test_progs as well.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 tools/testing/selftests/bpf/prog_tests/d_path.c | 14 ++++++++++++--
 .../testing/selftests/bpf/prog_tests/test_ima.c | 17 +++++++++++++----
 tools/testing/selftests/bpf/progs/ima.c         |  1 -
 .../bpf/progs/test_d_path_check_types.c         |  1 -
 tools/testing/selftests/bpf/test_verifier.c     |  2 +-
 5 files changed, 26 insertions(+), 9 deletions(-)

Comments

Andrii Nakryiko Feb. 1, 2022, 12:02 a.m. UTC | #1
On Wed, Jan 26, 2022 at 6:34 PM Hou Tao <houtao1@huawei.com> wrote:
>
> 4096 is OK for x86-64, but for other archs with greater than 4KB
> page size (e.g. 64KB under arm64), test_verifier for test case
> "check valid spill/fill, ptr to mem" will fail, so just use
> getpagesize() to initialize the ring buffer size. Do this for
> test_progs as well.
>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  tools/testing/selftests/bpf/prog_tests/d_path.c | 14 ++++++++++++--
>  .../testing/selftests/bpf/prog_tests/test_ima.c | 17 +++++++++++++----
>  tools/testing/selftests/bpf/progs/ima.c         |  1 -
>  .../bpf/progs/test_d_path_check_types.c         |  1 -
>  tools/testing/selftests/bpf/test_verifier.c     |  2 +-
>  5 files changed, 26 insertions(+), 9 deletions(-)
>

[...]

> @@ -86,5 +94,6 @@ void test_test_ima(void)
>         CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
>  close_prog:
>         ring_buffer__free(ringbuf);
> +destroy_skel:
>         ima__destroy(skel);
>  }
> diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> index 96060ff4ffc6..e192a9f16aea 100644
> --- a/tools/testing/selftests/bpf/progs/ima.c
> +++ b/tools/testing/selftests/bpf/progs/ima.c
> @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
>
>  struct {
>         __uint(type, BPF_MAP_TYPE_RINGBUF);
> -       __uint(max_entries, 1 << 12);

Should we just bump it to 64/128/256KB instead? It's quite annoying to
do a split open and then load just due to this...

I'm also wondering if we should either teach kernel to round up to
closes power-of-2 of page_size internally, or teach libbpf to do this
for RINGBUF maps. Thoughts?


>  } ringbuf SEC(".maps");
>
>  char _license[] SEC("license") = "GPL";
> diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
> index 7e02b7361307..1b68d4a65abb 100644
> --- a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
> +++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
> @@ -8,7 +8,6 @@ extern const int bpf_prog_active __ksym;
>
>  struct {
>         __uint(type, BPF_MAP_TYPE_RINGBUF);
> -       __uint(max_entries, 1 << 12);
>  } ringbuf SEC(".maps");
>
>  SEC("fentry/security_inode_getattr")
> diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> index 29bbaa58233c..6acb5e747715 100644
> --- a/tools/testing/selftests/bpf/test_verifier.c
> +++ b/tools/testing/selftests/bpf/test_verifier.c
> @@ -931,7 +931,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
>         }
>         if (*fixup_map_ringbuf) {
>                 map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
> -                                          0, 4096);
> +                                          0, getpagesize());
>                 do {
>                         prog[*fixup_map_ringbuf].imm = map_fds[20];
>                         fixup_map_ringbuf++;
> --
> 2.29.2
>
Hou Tao Feb. 1, 2022, 8:43 a.m. UTC | #2
Hi Andrii,

> >
> > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > page size (e.g. 64KB under arm64), test_verifier for test case
> > "check valid spill/fill, ptr to mem" will fail, so just use
> > getpagesize() to initialize the ring buffer size. Do this for
> > test_progs as well.
> >
[...]

> > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > index 96060ff4ffc6..e192a9f16aea 100644
> > --- a/tools/testing/selftests/bpf/progs/ima.c
> > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> >
> >  struct {
> >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > -       __uint(max_entries, 1 << 12);
> 
> Should we just bump it to 64/128/256KB instead? It's quite annoying to
> do a split open and then load just due to this...
>
Agreed.

> I'm also wondering if we should either teach kernel to round up to
> closes power-of-2 of page_size internally, or teach libbpf to do this
> for RINGBUF maps. Thoughts?
>
It seems that max_entries doesn't need to be page-aligned. For example
if max_entries is 4096 and page size is 65536, we can allocate a
65536-sized page and set rb->mask 4095 and it will work. The only
downside is 60KB memory is waster, but it is the implementation
details and can be improved if subpage mapping can be supported.

So how about removing the page-aligned restraint in kernel ?

Regards,
Tao
Andrii Nakryiko Feb. 2, 2022, 1:29 a.m. UTC | #3
On Tue, Feb 1, 2022 at 12:43 AM Hou Tao <hotforest@gmail.com> wrote:
>
> Hi Andrii,
>
> > >
> > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > getpagesize() to initialize the ring buffer size. Do this for
> > > test_progs as well.
> > >
> [...]
>
> > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > index 96060ff4ffc6..e192a9f16aea 100644
> > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > >
> > >  struct {
> > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > -       __uint(max_entries, 1 << 12);
> >
> > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > do a split open and then load just due to this...
> >
> Agreed.
>
> > I'm also wondering if we should either teach kernel to round up to
> > closes power-of-2 of page_size internally, or teach libbpf to do this
> > for RINGBUF maps. Thoughts?
> >
> It seems that max_entries doesn't need to be page-aligned. For example
> if max_entries is 4096 and page size is 65536, we can allocate a
> 65536-sized page and set rb->mask 4095 and it will work. The only
> downside is 60KB memory is waster, but it is the implementation
> details and can be improved if subpage mapping can be supported.
>
> So how about removing the page-aligned restraint in kernel ?
>

No, if you read BPF ringbuf code carefully you'll see that we map the
entire ringbuf data twice in the memory (see [0] for lame ASCII
diagram), so that records that are wrapped at the end of the ringbuf
and go back to the start are still accessible as a linear array. It's
a very important guarantee, so it has to be page size multiple. But
auto-increasing it to the closest power-of-2 of page size seems like a
pretty low-impact change. Hard to imagine breaking anything except
some carefully crafted tests for ENOSPC behavior.

  [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89

> Regards,
> Tao
Hou Tao Feb. 2, 2022, 2:36 a.m. UTC | #4
Hi,

> >
> > Hi Andrii,
> >
> > > >
> > > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > > getpagesize() to initialize the ring buffer size. Do this for
> > > > test_progs as well.
> > > >
> > [...]
> >
> > > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > > index 96060ff4ffc6..e192a9f16aea 100644
> > > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > > >
> > > >  struct {
> > > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > > -       __uint(max_entries, 1 << 12);
> > >
> > > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > > do a split open and then load just due to this...
> > >
> > Agreed.
> >
> > > I'm also wondering if we should either teach kernel to round up to
> > > closes power-of-2 of page_size internally, or teach libbpf to do this
> > > for RINGBUF maps. Thoughts?
> > >
> > It seems that max_entries doesn't need to be page-aligned. For example
> > if max_entries is 4096 and page size is 65536, we can allocate a
> > 65536-sized page and set rb->mask 4095 and it will work. The only
> > downside is 60KB memory is waster, but it is the implementation
> > details and can be improved if subpage mapping can be supported.
> >
> > So how about removing the page-aligned restraint in kernel ?
> >
> 
> No, if you read BPF ringbuf code carefully you'll see that we map the
> entire ringbuf data twice in the memory (see [0] for lame ASCII
> diagram), so that records that are wrapped at the end of the ringbuf
> and go back to the start are still accessible as a linear array. It's
> a very important guarantee, so it has to be page size multiple. But
> auto-increasing it to the closest power-of-2 of page size seems like a
> pretty low-impact change. Hard to imagine breaking anything except
> some carefully crafted tests for ENOSPC behavior.
>

Yes, i know the double map trick. What i tried to say is that:
(1) remove the page-aligned restrain for max_entries
(2) still allocate page-aligned memory for ringbuf

instead of rounding max_entries up to closest power-of-2 page size
directly, so max_entries from userspace is unchanged and double map trick
still works.

> [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89

> > Regards,
> > Tao
Andrii Nakryiko Feb. 2, 2022, 6:45 a.m. UTC | #5
On Tue, Feb 1, 2022 at 6:36 PM Hou Tao <hotforest@gmail.com> wrote:
>
> Hi,
>
> > >
> > > Hi Andrii,
> > >
> > > > >
> > > > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > > > getpagesize() to initialize the ring buffer size. Do this for
> > > > > test_progs as well.
> > > > >
> > > [...]
> > >
> > > > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > > > index 96060ff4ffc6..e192a9f16aea 100644
> > > > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > > > >
> > > > >  struct {
> > > > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > > > -       __uint(max_entries, 1 << 12);
> > > >
> > > > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > > > do a split open and then load just due to this...
> > > >
> > > Agreed.
> > >
> > > > I'm also wondering if we should either teach kernel to round up to
> > > > closes power-of-2 of page_size internally, or teach libbpf to do this
> > > > for RINGBUF maps. Thoughts?
> > > >
> > > It seems that max_entries doesn't need to be page-aligned. For example
> > > if max_entries is 4096 and page size is 65536, we can allocate a
> > > 65536-sized page and set rb->mask 4095 and it will work. The only
> > > downside is 60KB memory is waster, but it is the implementation
> > > details and can be improved if subpage mapping can be supported.
> > >
> > > So how about removing the page-aligned restraint in kernel ?
> > >
> >
> > No, if you read BPF ringbuf code carefully you'll see that we map the
> > entire ringbuf data twice in the memory (see [0] for lame ASCII
> > diagram), so that records that are wrapped at the end of the ringbuf
> > and go back to the start are still accessible as a linear array. It's
> > a very important guarantee, so it has to be page size multiple. But
> > auto-increasing it to the closest power-of-2 of page size seems like a
> > pretty low-impact change. Hard to imagine breaking anything except
> > some carefully crafted tests for ENOSPC behavior.
> >
>
> Yes, i know the double map trick. What i tried to say is that:
> (1) remove the page-aligned restrain for max_entries
> (2) still allocate page-aligned memory for ringbuf
>
> instead of rounding max_entries up to closest power-of-2 page size
> directly, so max_entries from userspace is unchanged and double map trick
> still works.

I don't see how. Knowing the correct and exact size of the ringbuf
data area is mandatory for correctly consuming ringbuf data from
user-space. But if I'm missing something, feel free to give it a try
and see if it actually works.

>
> > [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89
>
> > > Regards,
> > > Tao
>
Hou Tao Feb. 3, 2022, 11:12 a.m. UTC | #6
Hi,

> On Tue, Feb 1, 2022 at 6:36 PM Hou Tao <hotforest@gmail.com> wrote:
> >
> > Hi,
> >
> > > >
> > > > Hi Andrii,
> > > >
> > > > > >
> > > > > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > > > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > > > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > > > > getpagesize() to initialize the ring buffer size. Do this for
> > > > > > test_progs as well.
> > > > > >
> > > > [...]
> > > >
> > > > > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > > > > index 96060ff4ffc6..e192a9f16aea 100644
> > > > > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > > > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > > > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > > > > >
> > > > > >  struct {
> > > > > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > > > > -       __uint(max_entries, 1 << 12);
> > > > >
> > > > > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > > > > do a split open and then load just due to this...
> > > > >
> > > > Agreed.
> > > >
> > > > > I'm also wondering if we should either teach kernel to round up to
> > > > > closes power-of-2 of page_size internally, or teach libbpf to do this
> > > > > for RINGBUF maps. Thoughts?
> > > > >
[...]
> > >
> > > No, if you read BPF ringbuf code carefully you'll see that we map the
> > > entire ringbuf data twice in the memory (see [0] for lame ASCII
> > > diagram), so that records that are wrapped at the end of the ringbuf
> > > and go back to the start are still accessible as a linear array. It's
> > > a very important guarantee, so it has to be page size multiple. But
> > > auto-increasing it to the closest power-of-2 of page size seems like a
> > > pretty low-impact change. Hard to imagine breaking anything except
> > > some carefully crafted tests for ENOSPC behavior.
> > >
> >
> > Yes, i know the double map trick. What i tried to say is that:
> > (1) remove the page-aligned restrain for max_entries
> > (2) still allocate page-aligned memory for ringbuf
> >
> > instead of rounding max_entries up to closest power-of-2 page size
> > directly, so max_entries from userspace is unchanged and double map trick
> > still works.
> 
> I don't see how. Knowing the correct and exact size of the ringbuf
> data area is mandatory for correctly consuming ringbuf data from
> user-space. But if I'm missing something, feel free to give it a try
> and see if it actually works.
> 
You are right. The userspace needs max_entries to do mmap() for data
area, so max_entries must be page-sized aligned.

If we want to do the automatic round-up, i think libbpf would be a better
place, because if the round-up is done in kernel, the userspace program
may use the old max_entries to call mmap(), the consumer side will not
work and leads to confusion for usage. If we do auto-round-up in libbpf,
the setup procedure is hidden from libbpf user. Will add the auto
round-up and its tests in libbpf.

Regards
Tao
> 
> >
> > > [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89
> >
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 911345c526e6..abfa3697e34d 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -171,10 +171,20 @@  static void test_d_path_check_rdonly_mem(void)
 static void test_d_path_check_types(void)
 {
 	struct test_d_path_check_types *skel;
+	int err;
+
+	skel = test_d_path_check_types__open();
+	if (!ASSERT_OK_PTR(skel, "d_path_check_types open failed"))
+		return;
 
-	skel = test_d_path_check_types__open_and_load();
-	ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+	err = bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+	if (!ASSERT_OK(err, "set max entries"))
+		goto cleanup;
 
+	err = test_d_path_check_types__load(skel);
+	ASSERT_EQ(err, -EACCES, "unexpected_load_passing_wrong_type");
+
+cleanup:
 	test_d_path_check_types__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index 97d8a6f84f4a..ffc4d8b6e753 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -48,11 +48,19 @@  void test_test_ima(void)
 	char cmd[256];
 
 	int err, duration = 0;
-	struct ima *skel = NULL;
+	struct ima *skel;
 
-	skel = ima__open_and_load();
-	if (CHECK(!skel, "skel_load", "skeleton failed\n"))
-		goto close_prog;
+	skel = ima__open();
+	if (!ASSERT_OK_PTR(skel, "skel open"))
+		return;
+
+	err = bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+	if (!ASSERT_OK(err, "set max entries"))
+		goto destroy_skel;
+
+	err = ima__load(skel);
+	if (!ASSERT_OK(err, "skel load"))
+		goto destroy_skel;
 
 	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
 				   process_sample, NULL, NULL);
@@ -86,5 +94,6 @@  void test_test_ima(void)
 	CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
 close_prog:
 	ring_buffer__free(ringbuf);
+destroy_skel:
 	ima__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index 96060ff4ffc6..e192a9f16aea 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -13,7 +13,6 @@  u32 monitored_pid = 0;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
-	__uint(max_entries, 1 << 12);
 } ringbuf SEC(".maps");
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
index 7e02b7361307..1b68d4a65abb 100644
--- a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
@@ -8,7 +8,6 @@  extern const int bpf_prog_active __ksym;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
-	__uint(max_entries, 1 << 12);
 } ringbuf SEC(".maps");
 
 SEC("fentry/security_inode_getattr")
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 29bbaa58233c..6acb5e747715 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -931,7 +931,7 @@  static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	}
 	if (*fixup_map_ringbuf) {
 		map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
-					   0, 4096);
+					   0, getpagesize());
 		do {
 			prog[*fixup_map_ringbuf].imm = map_fds[20];
 			fixup_map_ringbuf++;