Message ID | 20240209040608.98927-13-alexei.starovoitov@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | bpf: Introduce BPF arena. | expand |
On Fri, 9 Feb 2024 at 05:07, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > From: Alexei Starovoitov <ast@kernel.org> > > mmap() bpf_arena right after creation, since the kernel needs to > remember the address returned from mmap. This is user_vm_start. > LLVM will generate bpf_arena_cast_user() instructions where > necessary and JIT will add upper 32-bit of user_vm_start > to such pointers. > > Fix up bpf_map_mmap_sz() to compute mmap size as > map->value_size * map->max_entries for arrays and > PAGE_SIZE * map->max_entries for arena. > > Don't set BTF at arena creation time, since it doesn't support it. > > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > --- > tools/lib/bpf/libbpf.c | 43 ++++++++++++++++++++++++++++++----- > tools/lib/bpf/libbpf_probes.c | 7 ++++++ > 2 files changed, 44 insertions(+), 6 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 01f407591a92..4880d623098d 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { > [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", > [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", > [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", > + [BPF_MAP_TYPE_ARENA] = "arena", > }; > > static const char * const prog_type_name[] = { > @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) > return map; > } > > -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > { > const long page_sz = sysconf(_SC_PAGE_SIZE); > size_t map_sz; > @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > return map_sz; > } > > +static size_t bpf_map_mmap_sz(const struct bpf_map *map) > +{ > + const long page_sz = sysconf(_SC_PAGE_SIZE); > + > + switch (map->def.type) { > + case BPF_MAP_TYPE_ARRAY: > + return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > + case BPF_MAP_TYPE_ARENA: > + return page_sz * map->def.max_entries; > + default: > + return 0; /* not supported */ > + } > +} > + > static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) > { > void *mmaped; > @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, > pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", > map->name, map->sec_idx, map->sec_offset, def->map_flags); > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > + mmap_sz = bpf_map_mmap_sz(map); > map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, > MAP_SHARED | MAP_ANONYMOUS, -1, 0); > if (map->mmaped == MAP_FAILED) { > @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > case BPF_MAP_TYPE_SOCKHASH: > case BPF_MAP_TYPE_QUEUE: > case BPF_MAP_TYPE_STACK: > + case BPF_MAP_TYPE_ARENA: > create_attr.btf_fd = 0; > create_attr.btf_key_type_id = 0; > create_attr.btf_value_type_id = 0; > @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > if (map->fd == map_fd) > return 0; > > + if (def->type == BPF_MAP_TYPE_ARENA) { > + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), > + PROT_READ | PROT_WRITE, > + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, > + map_fd, 0); > + if (map->mmaped == MAP_FAILED) { > + err = -errno; > + map->mmaped = NULL; > + close(map_fd); > + pr_warn("map '%s': failed to mmap bpf_arena: %d\n", > + bpf_map__name(map), err); > + return err; > + } > + } > + Would it be possible to introduce a public API accessor for getting the value of map->mmaped? Otherwise one would have to parse through /proc/self/maps in case map_extra is 0. The use case is to be able to use the arena as a backing store for userspace malloc arenas, so that we can pass through malloc/mallocx calls (or class specific operator new) directly to malloc arena using the BPF arena. In such a case a lot of the burden of converting existing data structures or code can be avoided by making much of the process transparent. Userspace malloced objects can also be easily shared to BPF progs as a pool through bpf_ma style per-CPU allocator. > [...]
On Thu, 2024-02-08 at 20:06 -0800, Alexei Starovoitov wrote: [...] > @@ -9830,8 +9861,8 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) > int err; > size_t mmap_old_sz, mmap_new_sz; > > - mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > - mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); > + mmap_old_sz = bpf_map_mmap_sz(map); > + mmap_new_sz = __bpf_map_mmap_sz(size, map->def.max_entries); > err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); > if (err) { > pr_warn("map '%s': failed to resize memory-mapped region: %d\n", I think that as is bpf_map__set_value_size() won't work for arenas. The bpf_map_mmap_resize() does the following: static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) { ... mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); ... memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); munmap(map->mmaped, old_sz); map->mmaped = mmaped; ... } Which does not seem to tie the new mapping to arena, or am I missing something?
On Fri, Feb 9, 2024 at 11:17 PM Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote: > > On Fri, 9 Feb 2024 at 05:07, Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > From: Alexei Starovoitov <ast@kernel.org> > > > > mmap() bpf_arena right after creation, since the kernel needs to > > remember the address returned from mmap. This is user_vm_start. > > LLVM will generate bpf_arena_cast_user() instructions where > > necessary and JIT will add upper 32-bit of user_vm_start > > to such pointers. > > > > Fix up bpf_map_mmap_sz() to compute mmap size as > > map->value_size * map->max_entries for arrays and > > PAGE_SIZE * map->max_entries for arena. > > > > Don't set BTF at arena creation time, since it doesn't support it. > > > > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > > --- > > tools/lib/bpf/libbpf.c | 43 ++++++++++++++++++++++++++++++----- > > tools/lib/bpf/libbpf_probes.c | 7 ++++++ > > 2 files changed, 44 insertions(+), 6 deletions(-) > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > > index 01f407591a92..4880d623098d 100644 > > --- a/tools/lib/bpf/libbpf.c > > +++ b/tools/lib/bpf/libbpf.c > > @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { > > [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", > > [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", > > [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", > > + [BPF_MAP_TYPE_ARENA] = "arena", > > }; > > > > static const char * const prog_type_name[] = { > > @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) > > return map; > > } > > > > -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > { > > const long page_sz = sysconf(_SC_PAGE_SIZE); > > size_t map_sz; > > @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > return map_sz; > > } > > > > +static size_t bpf_map_mmap_sz(const struct bpf_map *map) > > +{ > > + const long page_sz = sysconf(_SC_PAGE_SIZE); > > + > > + switch (map->def.type) { > > + case BPF_MAP_TYPE_ARRAY: > > + return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > + case BPF_MAP_TYPE_ARENA: > > + return page_sz * map->def.max_entries; > > + default: > > + return 0; /* not supported */ > > + } > > +} > > + > > static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) > > { > > void *mmaped; > > @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, > > pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", > > map->name, map->sec_idx, map->sec_offset, def->map_flags); > > > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > + mmap_sz = bpf_map_mmap_sz(map); > > map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, > > MAP_SHARED | MAP_ANONYMOUS, -1, 0); > > if (map->mmaped == MAP_FAILED) { > > @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > case BPF_MAP_TYPE_SOCKHASH: > > case BPF_MAP_TYPE_QUEUE: > > case BPF_MAP_TYPE_STACK: > > + case BPF_MAP_TYPE_ARENA: > > create_attr.btf_fd = 0; > > create_attr.btf_key_type_id = 0; > > create_attr.btf_value_type_id = 0; > > @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > if (map->fd == map_fd) > > return 0; > > > > + if (def->type == BPF_MAP_TYPE_ARENA) { > > + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), > > + PROT_READ | PROT_WRITE, > > + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, > > + map_fd, 0); > > + if (map->mmaped == MAP_FAILED) { > > + err = -errno; > > + map->mmaped = NULL; > > + close(map_fd); > > + pr_warn("map '%s': failed to mmap bpf_arena: %d\n", > > + bpf_map__name(map), err); > > + return err; > > + } > > + } > > + > > Would it be possible to introduce a public API accessor for getting > the value of map->mmaped? That would be bpf_map__initial_value(), no? > Otherwise one would have to parse through /proc/self/maps in case > map_extra is 0. > > The use case is to be able to use the arena as a backing store for > userspace malloc arenas, so that > we can pass through malloc/mallocx calls (or class specific operator > new) directly to malloc arena using the BPF arena. > In such a case a lot of the burden of converting existing data > structures or code can be avoided by making much of the process > transparent. > Userspace malloced objects can also be easily shared to BPF progs as a > pool through bpf_ma style per-CPU allocator. > > > [...]
On Mon, Feb 12, 2024 at 10:12 AM Eduard Zingerman <eddyz87@gmail.com> wrote: > > On Thu, 2024-02-08 at 20:06 -0800, Alexei Starovoitov wrote: > [...] > > > @@ -9830,8 +9861,8 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) > > int err; > > size_t mmap_old_sz, mmap_new_sz; > > > > - mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > - mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); > > + mmap_old_sz = bpf_map_mmap_sz(map); > > + mmap_new_sz = __bpf_map_mmap_sz(size, map->def.max_entries); > > err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); > > if (err) { > > pr_warn("map '%s': failed to resize memory-mapped region: %d\n", > > I think that as is bpf_map__set_value_size() won't work for arenas. It doesn't and doesn't work for ringbuf either. I guess we can add a filter by map type, but I'm not sure how big this can of worms (extra checks) will be. There are probably many libbpf apis that can be misused. Like bpf_map__set_type()
On Mon, 2024-02-12 at 12:14 -0800, Alexei Starovoitov wrote: [...] > It doesn't and doesn't work for ringbuf either. > I guess we can add a filter by map type, but I'm not sure > how big this can of worms (extra checks) will be. > There are probably many libbpf apis that can be misused. > Like bpf_map__set_type() Right, probably such extra checks should be a subject of a different patch-set (if any).
On Mon, 12 Feb 2024 at 20:11, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Fri, Feb 9, 2024 at 11:17 PM Kumar Kartikeya Dwivedi > <memxor@gmail.com> wrote: > > > > On Fri, 9 Feb 2024 at 05:07, Alexei Starovoitov > > <alexei.starovoitov@gmail.com> wrote: > > > > > > From: Alexei Starovoitov <ast@kernel.org> > > > > > > mmap() bpf_arena right after creation, since the kernel needs to > > > remember the address returned from mmap. This is user_vm_start. > > > LLVM will generate bpf_arena_cast_user() instructions where > > > necessary and JIT will add upper 32-bit of user_vm_start > > > to such pointers. > > > > > > Fix up bpf_map_mmap_sz() to compute mmap size as > > > map->value_size * map->max_entries for arrays and > > > PAGE_SIZE * map->max_entries for arena. > > > > > > Don't set BTF at arena creation time, since it doesn't support it. > > > > > > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > > > --- > > > tools/lib/bpf/libbpf.c | 43 ++++++++++++++++++++++++++++++----- > > > tools/lib/bpf/libbpf_probes.c | 7 ++++++ > > > 2 files changed, 44 insertions(+), 6 deletions(-) > > > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > > > index 01f407591a92..4880d623098d 100644 > > > --- a/tools/lib/bpf/libbpf.c > > > +++ b/tools/lib/bpf/libbpf.c > > > @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { > > > [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", > > > [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", > > > [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", > > > + [BPF_MAP_TYPE_ARENA] = "arena", > > > }; > > > > > > static const char * const prog_type_name[] = { > > > @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) > > > return map; > > > } > > > > > > -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > > +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > > { > > > const long page_sz = sysconf(_SC_PAGE_SIZE); > > > size_t map_sz; > > > @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > > return map_sz; > > > } > > > > > > +static size_t bpf_map_mmap_sz(const struct bpf_map *map) > > > +{ > > > + const long page_sz = sysconf(_SC_PAGE_SIZE); > > > + > > > + switch (map->def.type) { > > > + case BPF_MAP_TYPE_ARRAY: > > > + return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > > + case BPF_MAP_TYPE_ARENA: > > > + return page_sz * map->def.max_entries; > > > + default: > > > + return 0; /* not supported */ > > > + } > > > +} > > > + > > > static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) > > > { > > > void *mmaped; > > > @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, > > > pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", > > > map->name, map->sec_idx, map->sec_offset, def->map_flags); > > > > > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > > + mmap_sz = bpf_map_mmap_sz(map); > > > map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, > > > MAP_SHARED | MAP_ANONYMOUS, -1, 0); > > > if (map->mmaped == MAP_FAILED) { > > > @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > > case BPF_MAP_TYPE_SOCKHASH: > > > case BPF_MAP_TYPE_QUEUE: > > > case BPF_MAP_TYPE_STACK: > > > + case BPF_MAP_TYPE_ARENA: > > > create_attr.btf_fd = 0; > > > create_attr.btf_key_type_id = 0; > > > create_attr.btf_value_type_id = 0; > > > @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > > if (map->fd == map_fd) > > > return 0; > > > > > > + if (def->type == BPF_MAP_TYPE_ARENA) { > > > + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), > > > + PROT_READ | PROT_WRITE, > > > + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, > > > + map_fd, 0); > > > + if (map->mmaped == MAP_FAILED) { > > > + err = -errno; > > > + map->mmaped = NULL; > > > + close(map_fd); > > > + pr_warn("map '%s': failed to mmap bpf_arena: %d\n", > > > + bpf_map__name(map), err); > > > + return err; > > > + } > > > + } > > > + > > > > Would it be possible to introduce a public API accessor for getting > > the value of map->mmaped? > > That would be bpf_map__initial_value(), no? > Ah, indeed, that would do the trick. Thanks Andrii! > [...]
On Thu, Feb 8, 2024 at 8:07 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > From: Alexei Starovoitov <ast@kernel.org> > > mmap() bpf_arena right after creation, since the kernel needs to > remember the address returned from mmap. This is user_vm_start. > LLVM will generate bpf_arena_cast_user() instructions where > necessary and JIT will add upper 32-bit of user_vm_start > to such pointers. > > Fix up bpf_map_mmap_sz() to compute mmap size as > map->value_size * map->max_entries for arrays and > PAGE_SIZE * map->max_entries for arena. > > Don't set BTF at arena creation time, since it doesn't support it. > > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > --- > tools/lib/bpf/libbpf.c | 43 ++++++++++++++++++++++++++++++----- > tools/lib/bpf/libbpf_probes.c | 7 ++++++ > 2 files changed, 44 insertions(+), 6 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 01f407591a92..4880d623098d 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { > [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", > [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", > [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", > + [BPF_MAP_TYPE_ARENA] = "arena", > }; > > static const char * const prog_type_name[] = { > @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) > return map; > } > > -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) please rename this to array_map_mmap_sz, underscores are not very meaningful > { > const long page_sz = sysconf(_SC_PAGE_SIZE); > size_t map_sz; > @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > return map_sz; > } > > +static size_t bpf_map_mmap_sz(const struct bpf_map *map) > +{ > + const long page_sz = sysconf(_SC_PAGE_SIZE); > + > + switch (map->def.type) { > + case BPF_MAP_TYPE_ARRAY: > + return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > + case BPF_MAP_TYPE_ARENA: > + return page_sz * map->def.max_entries; > + default: > + return 0; /* not supported */ > + } > +} > + > static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) > { > void *mmaped; > @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, > pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", > map->name, map->sec_idx, map->sec_offset, def->map_flags); > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > + mmap_sz = bpf_map_mmap_sz(map); > map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, > MAP_SHARED | MAP_ANONYMOUS, -1, 0); > if (map->mmaped == MAP_FAILED) { > @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > case BPF_MAP_TYPE_SOCKHASH: > case BPF_MAP_TYPE_QUEUE: > case BPF_MAP_TYPE_STACK: > + case BPF_MAP_TYPE_ARENA: > create_attr.btf_fd = 0; > create_attr.btf_key_type_id = 0; > create_attr.btf_value_type_id = 0; > @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > if (map->fd == map_fd) > return 0; > > + if (def->type == BPF_MAP_TYPE_ARENA) { > + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), > + PROT_READ | PROT_WRITE, > + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, > + map_fd, 0); > + if (map->mmaped == MAP_FAILED) { > + err = -errno; > + map->mmaped = NULL; > + close(map_fd); > + pr_warn("map '%s': failed to mmap bpf_arena: %d\n", > + bpf_map__name(map), err); seems like we just use `map->name` directly elsewhere in this function, let's keep it consistent > + return err; > + } > + } > + > /* Keep placeholder FD value but now point it to the BPF map object. > * This way everything that relied on this map's FD (e.g., relocated > * ldimm64 instructions) will stay valid and won't need adjustments. > @@ -8582,7 +8613,7 @@ static void bpf_map__destroy(struct bpf_map *map) > if (map->mmaped) { > size_t mmap_sz; > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > + mmap_sz = bpf_map_mmap_sz(map); > munmap(map->mmaped, mmap_sz); > map->mmaped = NULL; > } > @@ -9830,8 +9861,8 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) > int err; > size_t mmap_old_sz, mmap_new_sz; > this logic assumes ARRAY (which are the only ones so far that could have `map->mapped != NULL`, so I think we should error out for ARENA maps here, instead of silently doing the wrong thing? if (map->type != BPF_MAP_TYPE_ARRAY) return -EOPNOTSUPP; should do > - mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > - mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); > + mmap_old_sz = bpf_map_mmap_sz(map); > + mmap_new_sz = __bpf_map_mmap_sz(size, map->def.max_entries); > err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); > if (err) { > pr_warn("map '%s': failed to resize memory-mapped region: %d\n", > @@ -13356,7 +13387,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) > > for (i = 0; i < s->map_cnt; i++) { > struct bpf_map *map = *s->maps[i].map; > - size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > + size_t mmap_sz = bpf_map_mmap_sz(map); > int prot, map_fd = map->fd; > void **mmaped = s->maps[i].mmaped; > > diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c > index ee9b1dbea9eb..302188122439 100644 > --- a/tools/lib/bpf/libbpf_probes.c > +++ b/tools/lib/bpf/libbpf_probes.c > @@ -338,6 +338,13 @@ static int probe_map_create(enum bpf_map_type map_type) > key_size = 0; > max_entries = 1; > break; > + case BPF_MAP_TYPE_ARENA: > + key_size = 0; > + value_size = 0; > + max_entries = 1; /* one page */ > + opts.map_extra = 0; /* can mmap() at any address */ > + opts.map_flags = BPF_F_MMAPABLE; > + break; > case BPF_MAP_TYPE_HASH: > case BPF_MAP_TYPE_ARRAY: > case BPF_MAP_TYPE_PROG_ARRAY: > -- > 2.34.1 >
On Tue, Feb 13, 2024 at 3:15 PM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Thu, Feb 8, 2024 at 8:07 PM Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > From: Alexei Starovoitov <ast@kernel.org> > > > > mmap() bpf_arena right after creation, since the kernel needs to > > remember the address returned from mmap. This is user_vm_start. > > LLVM will generate bpf_arena_cast_user() instructions where > > necessary and JIT will add upper 32-bit of user_vm_start > > to such pointers. > > > > Fix up bpf_map_mmap_sz() to compute mmap size as > > map->value_size * map->max_entries for arrays and > > PAGE_SIZE * map->max_entries for arena. > > > > Don't set BTF at arena creation time, since it doesn't support it. > > > > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > > --- > > tools/lib/bpf/libbpf.c | 43 ++++++++++++++++++++++++++++++----- > > tools/lib/bpf/libbpf_probes.c | 7 ++++++ > > 2 files changed, 44 insertions(+), 6 deletions(-) > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > > index 01f407591a92..4880d623098d 100644 > > --- a/tools/lib/bpf/libbpf.c > > +++ b/tools/lib/bpf/libbpf.c > > @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { > > [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", > > [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", > > [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", > > + [BPF_MAP_TYPE_ARENA] = "arena", > > }; > > > > static const char * const prog_type_name[] = { > > @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) > > return map; > > } > > > > -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > please rename this to array_map_mmap_sz, underscores are not very meaningful makes sense. > > { > > const long page_sz = sysconf(_SC_PAGE_SIZE); > > size_t map_sz; > > @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > return map_sz; > > } > > > > +static size_t bpf_map_mmap_sz(const struct bpf_map *map) > > +{ > > + const long page_sz = sysconf(_SC_PAGE_SIZE); > > + > > + switch (map->def.type) { > > + case BPF_MAP_TYPE_ARRAY: > > + return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > + case BPF_MAP_TYPE_ARENA: > > + return page_sz * map->def.max_entries; > > + default: > > + return 0; /* not supported */ > > + } > > +} > > + > > static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) > > { > > void *mmaped; > > @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, > > pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", > > map->name, map->sec_idx, map->sec_offset, def->map_flags); > > > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > + mmap_sz = bpf_map_mmap_sz(map); > > map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, > > MAP_SHARED | MAP_ANONYMOUS, -1, 0); > > if (map->mmaped == MAP_FAILED) { > > @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > case BPF_MAP_TYPE_SOCKHASH: > > case BPF_MAP_TYPE_QUEUE: > > case BPF_MAP_TYPE_STACK: > > + case BPF_MAP_TYPE_ARENA: > > create_attr.btf_fd = 0; > > create_attr.btf_key_type_id = 0; > > create_attr.btf_value_type_id = 0; > > @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > if (map->fd == map_fd) > > return 0; > > > > + if (def->type == BPF_MAP_TYPE_ARENA) { > > + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), > > + PROT_READ | PROT_WRITE, > > + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, > > + map_fd, 0); > > + if (map->mmaped == MAP_FAILED) { > > + err = -errno; > > + map->mmaped = NULL; > > + close(map_fd); > > + pr_warn("map '%s': failed to mmap bpf_arena: %d\n", > > + bpf_map__name(map), err); > > seems like we just use `map->name` directly elsewhere in this > function, let's keep it consistent that was to match the next patch, since arena is using real_name. map->name is also correct and will have the same name here. The next patch will have two arena maps, but one will never be passed into this function to create a real kernel map. So I can use map->name here, but bpf_map__name() is a bit more correct. > > + return err; > > + } > > + } > > + > > /* Keep placeholder FD value but now point it to the BPF map object. > > * This way everything that relied on this map's FD (e.g., relocated > > * ldimm64 instructions) will stay valid and won't need adjustments. > > @@ -8582,7 +8613,7 @@ static void bpf_map__destroy(struct bpf_map *map) > > if (map->mmaped) { > > size_t mmap_sz; > > > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > + mmap_sz = bpf_map_mmap_sz(map); > > munmap(map->mmaped, mmap_sz); > > map->mmaped = NULL; > > } > > @@ -9830,8 +9861,8 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) > > int err; > > size_t mmap_old_sz, mmap_new_sz; > > > > this logic assumes ARRAY (which are the only ones so far that could > have `map->mapped != NULL`, so I think we should error out for ARENA > maps here, instead of silently doing the wrong thing? > > if (map->type != BPF_MAP_TYPE_ARRAY) > return -EOPNOTSUPP; > > should do Good point. Will do.
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 01f407591a92..4880d623098d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", + [BPF_MAP_TYPE_ARENA] = "arena", }; static const char * const prog_type_name[] = { @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return map; } -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) { const long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) return map_sz; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + + switch (map->def.type) { + case BPF_MAP_TYPE_ARRAY: + return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + case BPF_MAP_TYPE_ARENA: + return page_sz * map->def.max_entries; + default: + return 0; /* not supported */ + } +} + static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) { void *mmaped; @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_sz = bpf_map_mmap_sz(map); map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_ARENA: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (map->fd == map_fd) return 0; + if (def->type == BPF_MAP_TYPE_ARENA) { + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), + PROT_READ | PROT_WRITE, + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, + map_fd, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + close(map_fd); + pr_warn("map '%s': failed to mmap bpf_arena: %d\n", + bpf_map__name(map), err); + return err; + } + } + /* Keep placeholder FD value but now point it to the BPF map object. * This way everything that relied on this map's FD (e.g., relocated * ldimm64 instructions) will stay valid and won't need adjustments. @@ -8582,7 +8613,7 @@ static void bpf_map__destroy(struct bpf_map *map) if (map->mmaped) { size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_sz = bpf_map_mmap_sz(map); munmap(map->mmaped, mmap_sz); map->mmaped = NULL; } @@ -9830,8 +9861,8 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) int err; size_t mmap_old_sz, mmap_new_sz; - mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); - mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); + mmap_old_sz = bpf_map_mmap_sz(map); + mmap_new_sz = __bpf_map_mmap_sz(size, map->def.max_entries); err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); if (err) { pr_warn("map '%s': failed to resize memory-mapped region: %d\n", @@ -13356,7 +13387,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->map_cnt; i++) { struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + size_t mmap_sz = bpf_map_mmap_sz(map); int prot, map_fd = map->fd; void **mmaped = s->maps[i].mmaped; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index ee9b1dbea9eb..302188122439 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -338,6 +338,13 @@ static int probe_map_create(enum bpf_map_type map_type) key_size = 0; max_entries = 1; break; + case BPF_MAP_TYPE_ARENA: + key_size = 0; + value_size = 0; + max_entries = 1; /* one page */ + opts.map_extra = 0; /* can mmap() at any address */ + opts.map_flags = BPF_F_MMAPABLE; + break; case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PROG_ARRAY: