diff mbox series

bpf: Fix percpu address space issues

Message ID 20240804185604.54770-1-ubizjak@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Fix percpu address space issues | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 29 this patch: 29
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/build_clang success Errors and warnings before: 29 this patch: 29
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 98 this patch: 88
netdev/checkpatch warning WARNING: Missing a blank line after declarations WARNING: Statements should start on a tabstop WARNING: line length of 81 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: suspect code indent for conditional statements (8, 15)
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 9 this patch: 9
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18

Commit Message

Uros Bizjak Aug. 4, 2024, 6:55 p.m. UTC
In arraymap.c:

Assign return values of bpf_array_map_seq_start() and
bpf_array_map_seq_next() from the non-percpu array.

Correct the declaration of pptr pointer in __bpf_array_map_seq_show()
to void * __percpu * (void __percpu pointer to void pointer) and
cast the value from the generic address space to the __percpu
address space via uintptr_t [1].

In hashtab.c:

Assign the return value from bpf_mem_cache_alloc() to void pointer
and cast the value to void __percpu ** (void pointer to percpu void
pointer) before dereferencing.

In memalloc.c:

Explicitly declare __percpu variables.

Cast obj to void __percpu **.

In helpers.c:

Cast ptr in BPF_CALL_1 and BPF_CALL_2 from generic address space
to __percpu address space via const uintptr_t [1].

Found by GCC's named address space checks.

There were no changes in the resulting object files.

[1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
---
 kernel/bpf/arraymap.c |  8 ++++----
 kernel/bpf/hashtab.c  |  8 ++++----
 kernel/bpf/helpers.c  |  4 ++--
 kernel/bpf/memalloc.c | 12 ++++++------
 4 files changed, 16 insertions(+), 16 deletions(-)

Comments

Eduard Zingerman Aug. 9, 2024, 8:28 a.m. UTC | #1
On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:

[...]

> Found by GCC's named address space checks.

Please provide some additional details.
I assume that the definition of __percpu was changed from
__attribute__((btf_type_tag(percpu))) to
__attribute__((address_space(??)), is that correct?

What is the motivation for this patch?
Currently __percpu is defined as a type tag and is used only by BPF verifier,
where it seems to be relevant only for structure fields and function parameters.
This patch only changes local variables.

> There were no changes in the resulting object files.
> 
> [1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Andrii Nakryiko <andrii@kernel.org>
> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> Cc: Eduard Zingerman <eddyz87@gmail.com>
> Cc: Song Liu <song@kernel.org>
> Cc: Yonghong Song <yonghong.song@linux.dev>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: KP Singh <kpsingh@kernel.org>
> Cc: Stanislav Fomichev <sdf@fomichev.me>
> Cc: Hao Luo <haoluo@google.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> ---
>  kernel/bpf/arraymap.c |  8 ++++----
>  kernel/bpf/hashtab.c  |  8 ++++----
>  kernel/bpf/helpers.c  |  4 ++--
>  kernel/bpf/memalloc.c | 12 ++++++------
>  4 files changed, 16 insertions(+), 16 deletions(-)
> 
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 188e3c2effb2..544ca433275e 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
>  	array = container_of(map, struct bpf_array, map);
>  	index = info->index & array->index_mask;
>  	if (info->percpu_value_buf)
> -	       return array->pptrs[index];
> +	       return array->ptrs[index];

I disagree with this change.
One might say that indeed the address space is cast away here,
however, value returned by this function is only used in functions
bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
is necessary.

>  	return array_map_elem_ptr(array, index);
>  }
>  
> @@ -619,7 +619,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>  	array = container_of(map, struct bpf_array, map);
>  	index = info->index & array->index_mask;
>  	if (info->percpu_value_buf)
> -	       return array->pptrs[index];
> +	       return array->ptrs[index];

Same as above.

>  	return array_map_elem_ptr(array, index);
>  }
>  
> @@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
>  	struct bpf_iter_meta meta;
>  	struct bpf_prog *prog;
>  	int off = 0, cpu = 0;
> -	void __percpu **pptr;
> +	void * __percpu *pptr;

Should this be 'void __percpu *pptr;?
The value comes from array->pptrs[*] field,
which has the above type for elements.

>  	u32 size;
>  
>  	meta.seq = seq;
> @@ -648,7 +648,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
>  		if (!info->percpu_value_buf) {
>  			ctx.value = v;
>  		} else {
> -			pptr = v;
> +			pptr = (void __percpu *)(uintptr_t)v;
>  			size = array->elem_size;
>  			for_each_possible_cpu(cpu) {
>  				copy_map_value_long(map, info->percpu_value_buf + off,
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index be1f64c20125..a49212bbda09 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -1049,14 +1049,14 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
>  			pptr = htab_elem_get_ptr(l_new, key_size);
>  		} else {
>  			/* alloc_percpu zero-fills */
> -			pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> -			if (!pptr) {
> +			void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> +			if (!ptr) {

Why adding an intermediate variable here?
Is casting bpf_mem_cache_alloc() result to percpu not sufficient?
It looks like bpf_mem_cache_alloc() returns a percpu pointer,
should it be declared as such?

>  				bpf_mem_cache_free(&htab->ma, l_new);
>  				l_new = ERR_PTR(-ENOMEM);
>  				goto dec_count;
>  			}
> -			l_new->ptr_to_pptr = pptr;
> -			pptr = *(void **)pptr;
> +			l_new->ptr_to_pptr = ptr;
> +			pptr = *(void __percpu **)ptr;
>  		}
>  
>  		pcpu_init_value(htab, pptr, value, onallcpus);

[...]

> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index dec892ded031..b3858a76e0b3 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
>  static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
>  {
>  	if (c->percpu_size) {
> -		void **obj = kmalloc_node(c->percpu_size, flags, node);
> -		void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
> +		void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);

Why __percpu is needed for obj?
kmalloc_node is defined as 'alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))',
alloc_hooks(X) is a macro and it produces result of type typeof(X),
kmalloc_node_noprof() returns void*, not __percpu void*.
Do I miss something?

> +		void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
>  
>  		if (!obj || !pptr) {
>  			free_percpu(pptr);

[...]
Uros Bizjak Aug. 9, 2024, 10:15 a.m. UTC | #2
On Fri, Aug 9, 2024 at 10:28 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:
>
> [...]
>
> > Found by GCC's named address space checks.
>
> Please provide some additional details.
> I assume that the definition of __percpu was changed from
> __attribute__((btf_type_tag(percpu))) to
> __attribute__((address_space(??)), is that correct?

This is correct. The fixes in the patch are based on the patch series
[1] that enable strict percpu check via GCC's x86 named address space
qualifiers, and in its RFC state hacks __seg_gs into the __percpu
qualifier (as can be seen in the 3/3 patch). The compiler will detect
pointer address space mismatches for e.g.:

--cut here--
int __seg_gs m;

int *foo (void) { return &m; }
--cut here--

v.c: In function ‘foo’:
v.c:5:26: error: return from pointer to non-enclosed address space
   5 | int *foo (void) { return &m; }
     |                          ^~
v.c:5:26: note: expected ‘int *’ but pointer is of type ‘__seg_gs int *’

and expects explicit casts via uintptr_t when these casts are really
intended ([2], please also see [3] for similar sparse requirement):

int *foo (void) { return (int *)(uintptr_t)&m; }

[1] https://lore.kernel.org/lkml/20240805184012.358023-1-ubizjak@gmail.com/
[2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
[3] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name

For reference, I have attached a test source file with more complex
checks that also includes linux percpu verifier macro.

> What is the motivation for this patch?

With the percpu checker patch applied, the build will break with the
above error due to mismatched address space pointers.

> Currently __percpu is defined as a type tag and is used only by BPF verifier,
> where it seems to be relevant only for structure fields and function parameters.
> This patch only changes local variables.

__percpu is also used for sparse checks (Use C=1 CHECK="sparse
-Wcast-from-as"). Sparse also reports address space violations, but
its warnings are not fatal and are not as precise as the compiler. So,
if you try to compile the bpf source when the kernel source is patched
with the percpu checker, the compiler will report several errors that
my bpf patch tries to fix.

> > There were no changes in the resulting object files.
> >
> > [1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
> >
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Andrii Nakryiko <andrii@kernel.org>
> > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > Cc: Song Liu <song@kernel.org>
> > Cc: Yonghong Song <yonghong.song@linux.dev>
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: KP Singh <kpsingh@kernel.org>
> > Cc: Stanislav Fomichev <sdf@fomichev.me>
> > Cc: Hao Luo <haoluo@google.com>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  kernel/bpf/arraymap.c |  8 ++++----
> >  kernel/bpf/hashtab.c  |  8 ++++----
> >  kernel/bpf/helpers.c  |  4 ++--
> >  kernel/bpf/memalloc.c | 12 ++++++------
> >  4 files changed, 16 insertions(+), 16 deletions(-)
> >
> > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > index 188e3c2effb2..544ca433275e 100644
> > --- a/kernel/bpf/arraymap.c
> > +++ b/kernel/bpf/arraymap.c
> > @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
> >       array = container_of(map, struct bpf_array, map);
> >       index = info->index & array->index_mask;
> >       if (info->percpu_value_buf)
> > -            return array->pptrs[index];
> > +            return array->ptrs[index];
>
> I disagree with this change.
> One might say that indeed the address space is cast away here,
> however, value returned by this function is only used in functions
> bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
> 'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
> is necessary.

If this is the case, you have to inform the compiler that address
space is cast away with explicit (void *)(uintptr_t) cast, placed
before return. But looking at the union with ptrs and pptrs members,
it looked to me that it is just the case of wrong union member
accessed.

> >       return array_map_elem_ptr(array, index);
> >  }
> >
> > @@ -619,7 +619,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> >       array = container_of(map, struct bpf_array, map);
> >       index = info->index & array->index_mask;
> >       if (info->percpu_value_buf)
> > -            return array->pptrs[index];
> > +            return array->ptrs[index];
>
> Same as above.
>
> >       return array_map_elem_ptr(array, index);
> >  }
> >
> > @@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> >       struct bpf_iter_meta meta;
> >       struct bpf_prog *prog;
> >       int off = 0, cpu = 0;
> > -     void __percpu **pptr;
> > +     void * __percpu *pptr;
>
> Should this be 'void __percpu *pptr;?
> The value comes from array->pptrs[*] field,
> which has the above type for elements.

I didn't want to introduce semantic changes, so I have just changed
the base type fo __percpu one, due to:

per_cpu_ptr(pptr, cpu));

later in the code.

>
> >       u32 size;
> >
> >       meta.seq = seq;
> > @@ -648,7 +648,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> >               if (!info->percpu_value_buf) {
> >                       ctx.value = v;
> >               } else {
> > -                     pptr = v;
> > +                     pptr = (void __percpu *)(uintptr_t)v;
> >                       size = array->elem_size;
> >                       for_each_possible_cpu(cpu) {
> >                               copy_map_value_long(map, info->percpu_value_buf + off,
> > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > index be1f64c20125..a49212bbda09 100644
> > --- a/kernel/bpf/hashtab.c
> > +++ b/kernel/bpf/hashtab.c
> > @@ -1049,14 +1049,14 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
> >                       pptr = htab_elem_get_ptr(l_new, key_size);
> >               } else {
> >                       /* alloc_percpu zero-fills */
> > -                     pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > -                     if (!pptr) {
> > +                     void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > +                     if (!ptr) {
>
> Why adding an intermediate variable here?

Mainly to avoid several inter-as casts, because l_new->ptr_to_pptr
also expects assignment from generic address space.

> Is casting bpf_mem_cache_alloc() result to percpu not sufficient?
> It looks like bpf_mem_cache_alloc() returns a percpu pointer,
> should it be declared as such?

This function is also used a couple of lines above changed hunk:

        l_new = bpf_mem_cache_alloc(&htab->ma);

where l_new is declared in generic address space.

>
> >                               bpf_mem_cache_free(&htab->ma, l_new);
> >                               l_new = ERR_PTR(-ENOMEM);
> >                               goto dec_count;
> >                       }
> > -                     l_new->ptr_to_pptr = pptr;
> > -                     pptr = *(void **)pptr;
> > +                     l_new->ptr_to_pptr = ptr;
> > +                     pptr = *(void __percpu **)ptr;
> >               }
> >
> >               pcpu_init_value(htab, pptr, value, onallcpus);
>
> [...]
>
> > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> > index dec892ded031..b3858a76e0b3 100644
> > --- a/kernel/bpf/memalloc.c
> > +++ b/kernel/bpf/memalloc.c
> > @@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
> >  static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
> >  {
> >       if (c->percpu_size) {
> > -             void **obj = kmalloc_node(c->percpu_size, flags, node);
> > -             void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
> > +             void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
>
> Why __percpu is needed for obj?

The new declaration declares "void pointer to percpu pointer", it is
needed because some lines below we have:

        obj[1] = pptr;

and pptr needs to be declared in __percpu named address space due to:

    free_percpu(pptr);

> kmalloc_node is defined as 'alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))',
> alloc_hooks(X) is a macro and it produces result of type typeof(X),
> kmalloc_node_noprof() returns void*, not __percpu void*.
> Do I miss something?

Yes, as explained above, the declaration:

void __percpu **obj

somehow unintuitively declares void pointer to percpu pointer, so the
types match (otherwise, the compiler would complain ;) ).

Thanks,
Uros.
Eduard Zingerman Aug. 9, 2024, 8:29 p.m. UTC | #3
On Fri, 2024-08-09 at 12:15 +0200, Uros Bizjak wrote:
> On Fri, Aug 9, 2024 at 10:28 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:
> > 
> > [...]
> > 
> > > Found by GCC's named address space checks.
> > 
> > Please provide some additional details.
> > I assume that the definition of __percpu was changed from
> > __attribute__((btf_type_tag(percpu))) to
> > __attribute__((address_space(??)), is that correct?
> 
> This is correct. The fixes in the patch are based on the patch series
> [1] that enable strict percpu check via GCC's x86 named address space
> qualifiers, and in its RFC state hacks __seg_gs into the __percpu
> qualifier (as can be seen in the 3/3 patch). The compiler will detect
> pointer address space mismatches for e.g.:
> 
> --cut here--
> int __seg_gs m;
> 
> int *foo (void) { return &m; }
> --cut here--
> 
> v.c: In function ‘foo’:
> v.c:5:26: error: return from pointer to non-enclosed address space
>    5 | int *foo (void) { return &m; }
>      |                          ^~
> v.c:5:26: note: expected ‘int *’ but pointer is of type ‘__seg_gs int *’
> 
> and expects explicit casts via uintptr_t when these casts are really
> intended ([2], please also see [3] for similar sparse requirement):
> 
> int *foo (void) { return (int *)(uintptr_t)&m; }
> 
> [1] https://lore.kernel.org/lkml/20240805184012.358023-1-ubizjak@gmail.com/
> [2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
> [3] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name

Understood, thank you for the details.
Interestingly, clang does not require (uintptr_t) intermediate cast, e.g.:

    $ cat test.c
    #define __as(N) __attribute__((address_space(N)))

    void *foo(void __as(1)* x) { return x; }         // error
    void *bar(void __as(1)* x) { return (void *)x; } // fine

    $ clang -o /dev/null -c test.c
    test.c:3:37: error: returning '__as(1) void *' from a function with result type 'void *' changes address space of pointer
        3 | void *foo(void __as(1)* x) { return x; }         // error
          |                                     ^
    1 error generated.
    

[...]

> > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > index 188e3c2effb2..544ca433275e 100644
> > > --- a/kernel/bpf/arraymap.c
> > > +++ b/kernel/bpf/arraymap.c
> > > @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
> > >       array = container_of(map, struct bpf_array, map);
> > >       index = info->index & array->index_mask;
> > >       if (info->percpu_value_buf)
> > > -            return array->pptrs[index];
> > > +            return array->ptrs[index];
> > 
> > I disagree with this change.
> > One might say that indeed the address space is cast away here,
> > however, value returned by this function is only used in functions
> > bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
> > 'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
> > is necessary.
> 
> If this is the case, you have to inform the compiler that address
> space is cast away with explicit (void *)(uintptr_t) cast, placed
> before return. But looking at the union with ptrs and pptrs members,
> it looked to me that it is just the case of wrong union member
> accessed.

I'd say it's better to use pptr and add a cast in this case.

[...]

> > > @@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
> > >       struct bpf_iter_meta meta;
> > >       struct bpf_prog *prog;
> > >       int off = 0, cpu = 0;
> > > -     void __percpu **pptr;
> > > +     void * __percpu *pptr;
> > 
> > Should this be 'void __percpu *pptr;?
> > The value comes from array->pptrs[*] field,
> > which has the above type for elements.
> 
> I didn't want to introduce semantic changes, so I have just changed
> the base type fo __percpu one, due to:
> 
> per_cpu_ptr(pptr, cpu));
> 
> later in the code.

There would be no semantic changes if type of pptr is changed to 'void __percpu *'.

[...]

> > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > > index be1f64c20125..a49212bbda09 100644
> > > --- a/kernel/bpf/hashtab.c
> > > +++ b/kernel/bpf/hashtab.c
> > > @@ -1049,14 +1049,14 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
> > >                       pptr = htab_elem_get_ptr(l_new, key_size);
> > >               } else {
> > >                       /* alloc_percpu zero-fills */
> > > -                     pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > > -                     if (!pptr) {
> > > +                     void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
> > > +                     if (!ptr) {
> > 
> > Why adding an intermediate variable here?
> 
> Mainly to avoid several inter-as casts, because l_new->ptr_to_pptr
> also expects assignment from generic address space.

Ok, makes sense.

[...]

> > > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> > > index dec892ded031..b3858a76e0b3 100644
> > > --- a/kernel/bpf/memalloc.c
> > > +++ b/kernel/bpf/memalloc.c
> > > @@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
> > >  static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
> > >  {
> > >       if (c->percpu_size) {
> > > -             void **obj = kmalloc_node(c->percpu_size, flags, node);
> > > -             void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
> > > +             void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
> > 
> > Why __percpu is needed for obj?
> 
> The new declaration declares "void pointer to percpu pointer", it is
> needed because some lines below we have:
> 
>         obj[1] = pptr;

Oh, right.

[...]
Uros Bizjak Aug. 10, 2024, 8:35 a.m. UTC | #4
On Fri, Aug 9, 2024 at 10:29 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2024-08-09 at 12:15 +0200, Uros Bizjak wrote:
> > On Fri, Aug 9, 2024 at 10:28 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > On Sun, 2024-08-04 at 20:55 +0200, Uros Bizjak wrote:
> > >
> > > [...]
> > >
> > > > Found by GCC's named address space checks.
> > >
> > > Please provide some additional details.
> > > I assume that the definition of __percpu was changed from
> > > __attribute__((btf_type_tag(percpu))) to
> > > __attribute__((address_space(??)), is that correct?
> >
> > This is correct. The fixes in the patch are based on the patch series
> > [1] that enable strict percpu check via GCC's x86 named address space
> > qualifiers, and in its RFC state hacks __seg_gs into the __percpu
> > qualifier (as can be seen in the 3/3 patch). The compiler will detect
> > pointer address space mismatches for e.g.:
> >
> > --cut here--
> > int __seg_gs m;
> >
> > int *foo (void) { return &m; }
> > --cut here--
> >
> > v.c: In function ‘foo’:
> > v.c:5:26: error: return from pointer to non-enclosed address space
> >    5 | int *foo (void) { return &m; }
> >      |                          ^~
> > v.c:5:26: note: expected ‘int *’ but pointer is of type ‘__seg_gs int *’
> >
> > and expects explicit casts via uintptr_t when these casts are really
> > intended ([2], please also see [3] for similar sparse requirement):
> >
> > int *foo (void) { return (int *)(uintptr_t)&m; }
> >
> > [1] https://lore.kernel.org/lkml/20240805184012.358023-1-ubizjak@gmail.com/
> > [2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
> > [3] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name
>
> Understood, thank you for the details.
> Interestingly, clang does not require (uintptr_t) intermediate cast, e.g.:
>
>     $ cat test.c
>     #define __as(N) __attribute__((address_space(N)))
>
>     void *foo(void __as(1)* x) { return x; }         // error
>     void *bar(void __as(1)* x) { return (void *)x; } // fine
>
>     $ clang -o /dev/null -c test.c
>     test.c:3:37: error: returning '__as(1) void *' from a function with result type 'void *' changes address space of pointer
>         3 | void *foo(void __as(1)* x) { return x; }         // error
>           |                                     ^
>     1 error generated.

This is probably a deficiency in clang, sparse nicely explains the
reason for intermediate cast:

-q-
Sparse treats pointers with different address spaces as distinct types
and will warn on casts (implicit or explicit) mixing the address
spaces. An exception to this is when the destination type is uintptr_t
or unsigned long since the resulting integer value is independent of
the address space and can’t be dereferenced without first casting it
back to a pointer type.
-/q-

>
>
> [...]
>
> > > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > > index 188e3c2effb2..544ca433275e 100644
> > > > --- a/kernel/bpf/arraymap.c
> > > > +++ b/kernel/bpf/arraymap.c
> > > > @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
> > > >       array = container_of(map, struct bpf_array, map);
> > > >       index = info->index & array->index_mask;
> > > >       if (info->percpu_value_buf)
> > > > -            return array->pptrs[index];
> > > > +            return array->ptrs[index];
> > >
> > > I disagree with this change.
> > > One might say that indeed the address space is cast away here,
> > > however, value returned by this function is only used in functions
> > > bpf_array_map_seq_{next,show,stop}(), where it is guarded by the same
> > > 'if (info->percpu_value_buf)' condition to identify if per_cpu_ptr()
> > > is necessary.
> >
> > If this is the case, you have to inform the compiler that address
> > space is cast away with explicit (void *)(uintptr_t) cast, placed
> > before return. But looking at the union with ptrs and pptrs members,
> > it looked to me that it is just the case of wrong union member
> > accessed.
>
> I'd say it's better to use pptr and add a cast in this case.

OK, will do in a v2 patch, together with your other suggestion.

Thanks,
Uros.
diff mbox series

Patch

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 188e3c2effb2..544ca433275e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -600,7 +600,7 @@  static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
 	array = container_of(map, struct bpf_array, map);
 	index = info->index & array->index_mask;
 	if (info->percpu_value_buf)
-	       return array->pptrs[index];
+	       return array->ptrs[index];
 	return array_map_elem_ptr(array, index);
 }
 
@@ -619,7 +619,7 @@  static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	array = container_of(map, struct bpf_array, map);
 	index = info->index & array->index_mask;
 	if (info->percpu_value_buf)
-	       return array->pptrs[index];
+	       return array->ptrs[index];
 	return array_map_elem_ptr(array, index);
 }
 
@@ -632,7 +632,7 @@  static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
 	struct bpf_iter_meta meta;
 	struct bpf_prog *prog;
 	int off = 0, cpu = 0;
-	void __percpu **pptr;
+	void * __percpu *pptr;
 	u32 size;
 
 	meta.seq = seq;
@@ -648,7 +648,7 @@  static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
 		if (!info->percpu_value_buf) {
 			ctx.value = v;
 		} else {
-			pptr = v;
+			pptr = (void __percpu *)(uintptr_t)v;
 			size = array->elem_size;
 			for_each_possible_cpu(cpu) {
 				copy_map_value_long(map, info->percpu_value_buf + off,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index be1f64c20125..a49212bbda09 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1049,14 +1049,14 @@  static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 			pptr = htab_elem_get_ptr(l_new, key_size);
 		} else {
 			/* alloc_percpu zero-fills */
-			pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
-			if (!pptr) {
+			void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
+			if (!ptr) {
 				bpf_mem_cache_free(&htab->ma, l_new);
 				l_new = ERR_PTR(-ENOMEM);
 				goto dec_count;
 			}
-			l_new->ptr_to_pptr = pptr;
-			pptr = *(void **)pptr;
+			l_new->ptr_to_pptr = ptr;
+			pptr = *(void __percpu **)ptr;
 		}
 
 		pcpu_init_value(htab, pptr, value, onallcpus);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index d02ae323996b..dd7529153146 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -715,7 +715,7 @@  BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 	if (cpu >= nr_cpu_ids)
 		return (unsigned long)NULL;
 
-	return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
+	return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu);
 }
 
 const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
@@ -728,7 +728,7 @@  const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
 
 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
 {
-	return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
+	return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr);
 }
 
 const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index dec892ded031..b3858a76e0b3 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -138,8 +138,8 @@  static struct llist_node notrace *__llist_del_first(struct llist_head *head)
 static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
 {
 	if (c->percpu_size) {
-		void **obj = kmalloc_node(c->percpu_size, flags, node);
-		void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
+		void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
+		void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
 
 		if (!obj || !pptr) {
 			free_percpu(pptr);
@@ -253,7 +253,7 @@  static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic)
 static void free_one(void *obj, bool percpu)
 {
 	if (percpu) {
-		free_percpu(((void **)obj)[1]);
+		free_percpu(((void __percpu **)obj)[1]);
 		kfree(obj);
 		return;
 	}
@@ -509,8 +509,8 @@  static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
  */
 int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 {
-	struct bpf_mem_caches *cc, __percpu *pcc;
-	struct bpf_mem_cache *c, __percpu *pc;
+	struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
+	struct bpf_mem_cache *c; struct bpf_mem_cache __percpu *pc;
 	struct obj_cgroup *objcg = NULL;
 	int cpu, i, unit_size, percpu_size = 0;
 
@@ -591,7 +591,7 @@  int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
 
 int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size)
 {
-	struct bpf_mem_caches *cc, __percpu *pcc;
+	struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
 	int cpu, i, unit_size, percpu_size;
 	struct obj_cgroup *objcg;
 	struct bpf_mem_cache *c;