diff mbox series

[bpf-next,12/16] bpf: Use bpf_mem_cache_alloc/free in bpf_selem_alloc/free

Message ID 20230306084216.3186830-13-martin.lau@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Use bpf_mem_cache_alloc/free in bpf_local_storage | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR pending PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-11 pending Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 170 this patch: 170
netdev/cc_maintainers warning 7 maintainers not CCed: jolsa@kernel.org john.fastabend@gmail.com yhs@fb.com kpsingh@kernel.org song@kernel.org haoluo@google.com sdf@google.com
netdev/build_clang success Errors and warnings before: 12 this patch: 12
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 170 this patch: 170
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Martin KaFai Lau March 6, 2023, 8:42 a.m. UTC
From: Martin KaFai Lau <martin.lau@kernel.org>

This patch uses bpf_mem_cache_alloc/free in bpf_selem_alloc/free.

The ____cacheline_aligned attribute is no longer needed
in 'struct bpf_local_storage_elem'. bpf_mem_cache_alloc will
have 'struct llist_node' in front of the 'struct bpf_local_storage_elem'.
It will use the 8bytes hole in the bpf_local_storage_elem.

After bpf_mem_cache_alloc(), the SDATA(selem)->data is zero-ed because
bpf_mem_cache_alloc() could return a reused selem. It is to keep
the existing bpf_map_kzalloc() behavior. Only SDATA(selem)->data
is zero-ed. SDATA(selem)->data is the visible part to the bpf prog.
No need to use zero_map_value() to do the zeroing because
bpf_selem_free() ensures no bpf prog is using the selem before
returning the selem through bpf_mem_cache_free(). For the internal
fields of selem, they will be initialized when linking to the
new smap and the new local_storage.

When bpf_mem_cache_alloc() fails, bpf_selem_alloc() will try to
fallback to kzalloc only if the caller has GFP_KERNEL flag set (ie. from
sleepable bpf prog that should not cause deadlock). BPF_MA_SIZE
and BPF_MA_PTR macro are added for that.

For the common selem free path where the selem is freed when its owner
is also being freed, reuse_now == true and selem can be reused
immediately. bpf_selem_free() uses bpf_mem_cache_free() where
selem will be considered for immediate reuse.

For the uncommon path that the bpf prog explicitly deletes the selem (by
using the helper bpf_*_storage_delete), the selem cannot be reused
immediately. reuse_now == false and bpf_selem_free() will stay with
the current call_rcu_tasks_trace. BPF_MA_NODE macro is added to get
the correct address for the kfree.

mem_charge and mem_uncharge are changed to use the BPF_MA_SIZE
macro. It will have a temporarily over-charge for the
bpf_local_storage_alloc() because bpf_local_storage is not
moved to bpf_mem_cache_alloc in this patch but it will be done
in the next patch.

Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/bpf_local_storage.h |  8 ++---
 include/linux/bpf_mem_alloc.h     |  5 +++
 kernel/bpf/bpf_local_storage.c    | 56 +++++++++++++++++++++++++------
 3 files changed, 53 insertions(+), 16 deletions(-)

Comments

Alexei Starovoitov March 7, 2023, 3:47 a.m. UTC | #1
On Mon, Mar 6, 2023 at 12:43 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> From: Martin KaFai Lau <martin.lau@kernel.org>
>
> This patch uses bpf_mem_cache_alloc/free in bpf_selem_alloc/free.
>
> The ____cacheline_aligned attribute is no longer needed
> in 'struct bpf_local_storage_elem'. bpf_mem_cache_alloc will
> have 'struct llist_node' in front of the 'struct bpf_local_storage_elem'.
> It will use the 8bytes hole in the bpf_local_storage_elem.
>
> After bpf_mem_cache_alloc(), the SDATA(selem)->data is zero-ed because
> bpf_mem_cache_alloc() could return a reused selem. It is to keep
> the existing bpf_map_kzalloc() behavior. Only SDATA(selem)->data
> is zero-ed. SDATA(selem)->data is the visible part to the bpf prog.
> No need to use zero_map_value() to do the zeroing because
> bpf_selem_free() ensures no bpf prog is using the selem before
> returning the selem through bpf_mem_cache_free(). For the internal
> fields of selem, they will be initialized when linking to the
> new smap and the new local_storage.
>
> When bpf_mem_cache_alloc() fails, bpf_selem_alloc() will try to
> fallback to kzalloc only if the caller has GFP_KERNEL flag set (ie. from
> sleepable bpf prog that should not cause deadlock). BPF_MA_SIZE
> and BPF_MA_PTR macro are added for that.
>
> For the common selem free path where the selem is freed when its owner
> is also being freed, reuse_now == true and selem can be reused
> immediately. bpf_selem_free() uses bpf_mem_cache_free() where
> selem will be considered for immediate reuse.
>
> For the uncommon path that the bpf prog explicitly deletes the selem (by
> using the helper bpf_*_storage_delete), the selem cannot be reused
> immediately. reuse_now == false and bpf_selem_free() will stay with
> the current call_rcu_tasks_trace. BPF_MA_NODE macro is added to get
> the correct address for the kfree.
>
> mem_charge and mem_uncharge are changed to use the BPF_MA_SIZE
> macro. It will have a temporarily over-charge for the
> bpf_local_storage_alloc() because bpf_local_storage is not
> moved to bpf_mem_cache_alloc in this patch but it will be done
> in the next patch.
>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
> ---
>  include/linux/bpf_local_storage.h |  8 ++---
>  include/linux/bpf_mem_alloc.h     |  5 +++
>  kernel/bpf/bpf_local_storage.c    | 56 +++++++++++++++++++++++++------
>  3 files changed, 53 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
> index adb5023a1af5..a236c9b964cf 100644
> --- a/include/linux/bpf_local_storage.h
> +++ b/include/linux/bpf_local_storage.h
> @@ -13,6 +13,7 @@
>  #include <linux/list.h>
>  #include <linux/hash.h>
>  #include <linux/types.h>
> +#include <linux/bpf_mem_alloc.h>
>  #include <uapi/linux/btf.h>
>
>  #define BPF_LOCAL_STORAGE_CACHE_SIZE   16
> @@ -55,6 +56,7 @@ struct bpf_local_storage_map {
>         u32 bucket_log;
>         u16 elem_size;
>         u16 cache_idx;
> +       struct bpf_mem_alloc selem_ma;
>  };
>
>  struct bpf_local_storage_data {
> @@ -74,11 +76,7 @@ struct bpf_local_storage_elem {
>         struct hlist_node snode;        /* Linked to bpf_local_storage */
>         struct bpf_local_storage __rcu *local_storage;
>         struct rcu_head rcu;
> -       /* 8 bytes hole */
> -       /* The data is stored in another cacheline to minimize
> -        * the number of cachelines access during a cache hit.
> -        */
> -       struct bpf_local_storage_data sdata ____cacheline_aligned;
> +       struct bpf_local_storage_data sdata;
>  };
>
>  struct bpf_local_storage {
> diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
> index a7104af61ab4..0ab16fb0ab50 100644
> --- a/include/linux/bpf_mem_alloc.h
> +++ b/include/linux/bpf_mem_alloc.h
> @@ -5,6 +5,11 @@
>  #include <linux/compiler_types.h>
>  #include <linux/workqueue.h>
>
> +#define BPF_MA_NODE_SZ sizeof(struct llist_node)
> +#define BPF_MA_SIZE(_size) ((_size) + BPF_MA_NODE_SZ)
> +#define BPF_MA_PTR(_node) ((void *)(_node) + BPF_MA_NODE_SZ)
> +#define BPF_MA_NODE(_ptr) ((void *)(_ptr) - BPF_MA_NODE_SZ)
> +
>  struct bpf_mem_cache;
>  struct bpf_mem_caches;
>
> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
> index 532b82084ba7..d3c0dd5737d6 100644
> --- a/kernel/bpf/bpf_local_storage.c
> +++ b/kernel/bpf/bpf_local_storage.c
> @@ -31,7 +31,7 @@ static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
>         if (!map->ops->map_local_storage_charge)
>                 return 0;
>
> -       return map->ops->map_local_storage_charge(smap, owner, size);
> +       return map->ops->map_local_storage_charge(smap, owner, BPF_MA_SIZE(size));
>  }
>
>  static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
> @@ -40,7 +40,7 @@ static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
>         struct bpf_map *map = &smap->map;
>
>         if (map->ops->map_local_storage_uncharge)
> -               map->ops->map_local_storage_uncharge(smap, owner, size);
> +               map->ops->map_local_storage_uncharge(smap, owner, BPF_MA_SIZE(size));
>  }
>
>  static struct bpf_local_storage __rcu **
> @@ -80,12 +80,32 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
>         if (charge_mem && mem_charge(smap, owner, smap->elem_size))
>                 return NULL;
>
> -       selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
> -                               gfp_flags | __GFP_NOWARN);
> +       migrate_disable();
> +       selem = bpf_mem_cache_alloc(&smap->selem_ma);
> +       migrate_enable();
> +       if (!selem && (gfp_flags & GFP_KERNEL)) {
> +               void *ma_node;
> +
> +               ma_node = bpf_map_kzalloc(&smap->map,
> +                                         BPF_MA_SIZE(smap->elem_size),
> +                                         gfp_flags | __GFP_NOWARN);
> +               if (ma_node)
> +                       selem = BPF_MA_PTR(ma_node);
> +       }

If I understand it correctly the code is not trying
to free selem the same way it allocated it.
So we can have kzalloc-ed selems freed into bpf_mem_cache_alloc free-list.
That feels dangerous.
I don't think we can do such things in local storage,
but if we add this api to bpf_mem_alloc it might be acceptable.
I mean mem alloc will try to take from the free list and if empty
and GFP_KERNEL it will kzalloc it.
The knowledge of hidden llist_node shouldn't leave the bpf/memalloc.c file.
reuse_now should probably be a memalloc api flag too.
The implementation detail that it's scary but ok-ish to kfree or
bpf_mem_cache_free depending on circumstances should stay in memalloc.c
Martin KaFai Lau March 8, 2023, 12:38 a.m. UTC | #2
On 3/6/23 7:47 PM, Alexei Starovoitov wrote:
>> @@ -80,12 +80,32 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
>>          if (charge_mem && mem_charge(smap, owner, smap->elem_size))
>>                  return NULL;
>>
>> -       selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
>> -                               gfp_flags | __GFP_NOWARN);
>> +       migrate_disable();
>> +       selem = bpf_mem_cache_alloc(&smap->selem_ma);
>> +       migrate_enable();
>> +       if (!selem && (gfp_flags & GFP_KERNEL)) {
>> +               void *ma_node;
>> +
>> +               ma_node = bpf_map_kzalloc(&smap->map,
>> +                                         BPF_MA_SIZE(smap->elem_size),
>> +                                         gfp_flags | __GFP_NOWARN);
>> +               if (ma_node)
>> +                       selem = BPF_MA_PTR(ma_node);
>> +       }
> 
> If I understand it correctly the code is not trying
> to free selem the same way it allocated it.
> So we can have kzalloc-ed selems freed into bpf_mem_cache_alloc free-list.
> That feels dangerous.
> I don't think we can do such things in local storage,
> but if we add this api to bpf_mem_alloc it might be acceptable.
> I mean mem alloc will try to take from the free list and if empty
> and GFP_KERNEL it will kzalloc it.
> The knowledge of hidden llist_node shouldn't leave the bpf/memalloc.c file.
> reuse_now should probably be a memalloc api flag too.
> The implementation detail that it's scary but ok-ish to kfree or
> bpf_mem_cache_free depending on circumstances should stay in memalloc.c

All make sense. I will create a bpf_mem_cache_alloc_flags(..., gfp_t flags) to 
hide the llist_node and kzalloc details. For free, local storage still needs to 
use the selem->rcu head in its call_rcu_tasks_trace(), so I will create a 
bpf_mem_cache_raw_free(void *ptr) to hide the llist_node details, like:

/* 'struct bpf_mem_alloc *ma' is not available at this
  * point but the caller knows it is percpu or not and
  * call different raw_free function.
  */
void bpf_mem_cache_raw_free(void *ptr)
{
         kfree(ptr - LLIST_NODE_SZ);
}
diff mbox series

Patch

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index adb5023a1af5..a236c9b964cf 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -13,6 +13,7 @@ 
 #include <linux/list.h>
 #include <linux/hash.h>
 #include <linux/types.h>
+#include <linux/bpf_mem_alloc.h>
 #include <uapi/linux/btf.h>
 
 #define BPF_LOCAL_STORAGE_CACHE_SIZE	16
@@ -55,6 +56,7 @@  struct bpf_local_storage_map {
 	u32 bucket_log;
 	u16 elem_size;
 	u16 cache_idx;
+	struct bpf_mem_alloc selem_ma;
 };
 
 struct bpf_local_storage_data {
@@ -74,11 +76,7 @@  struct bpf_local_storage_elem {
 	struct hlist_node snode;	/* Linked to bpf_local_storage */
 	struct bpf_local_storage __rcu *local_storage;
 	struct rcu_head rcu;
-	/* 8 bytes hole */
-	/* The data is stored in another cacheline to minimize
-	 * the number of cachelines access during a cache hit.
-	 */
-	struct bpf_local_storage_data sdata ____cacheline_aligned;
+	struct bpf_local_storage_data sdata;
 };
 
 struct bpf_local_storage {
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index a7104af61ab4..0ab16fb0ab50 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -5,6 +5,11 @@ 
 #include <linux/compiler_types.h>
 #include <linux/workqueue.h>
 
+#define BPF_MA_NODE_SZ sizeof(struct llist_node)
+#define BPF_MA_SIZE(_size) ((_size) + BPF_MA_NODE_SZ)
+#define BPF_MA_PTR(_node) ((void *)(_node) + BPF_MA_NODE_SZ)
+#define BPF_MA_NODE(_ptr) ((void *)(_ptr) - BPF_MA_NODE_SZ)
+
 struct bpf_mem_cache;
 struct bpf_mem_caches;
 
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 532b82084ba7..d3c0dd5737d6 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -31,7 +31,7 @@  static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
 	if (!map->ops->map_local_storage_charge)
 		return 0;
 
-	return map->ops->map_local_storage_charge(smap, owner, size);
+	return map->ops->map_local_storage_charge(smap, owner, BPF_MA_SIZE(size));
 }
 
 static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
@@ -40,7 +40,7 @@  static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
 	struct bpf_map *map = &smap->map;
 
 	if (map->ops->map_local_storage_uncharge)
-		map->ops->map_local_storage_uncharge(smap, owner, size);
+		map->ops->map_local_storage_uncharge(smap, owner, BPF_MA_SIZE(size));
 }
 
 static struct bpf_local_storage __rcu **
@@ -80,12 +80,32 @@  bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
 	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
 		return NULL;
 
-	selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
-				gfp_flags | __GFP_NOWARN);
+	migrate_disable();
+	selem = bpf_mem_cache_alloc(&smap->selem_ma);
+	migrate_enable();
+	if (!selem && (gfp_flags & GFP_KERNEL)) {
+		void *ma_node;
+
+		ma_node = bpf_map_kzalloc(&smap->map,
+					  BPF_MA_SIZE(smap->elem_size),
+					  gfp_flags | __GFP_NOWARN);
+		if (ma_node)
+			selem = BPF_MA_PTR(ma_node);
+	}
+
 	if (selem) {
 		if (value)
 			copy_map_value(&smap->map, SDATA(selem)->data, value);
-		/* No need to call check_and_init_map_value as memory is zero init */
+		else
+			/* Keep the original bpf_map_kzalloc behavior
+			 * before started using the bpf_mem_cache_alloc.
+			 *
+			 * No need to use zero_map_value. The bpf_selem_free()
+			 * only does bpf_mem_cache_free when there is
+			 * no other bpf prog is using the selem.
+			 */
+			memset(SDATA(selem)->data, 0, smap->map.value_size);
+
 		return selem;
 	}
 
@@ -129,7 +149,7 @@  static void bpf_selem_free_rcu(struct rcu_head *rcu)
 	struct bpf_local_storage_elem *selem;
 
 	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
-	kfree(selem);
+	kfree(BPF_MA_NODE(selem));
 }
 
 static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
@@ -145,10 +165,13 @@  void bpf_selem_free(struct bpf_local_storage_elem *selem,
 		    bool reuse_now)
 {
 	bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
-	if (!reuse_now)
+	if (!reuse_now) {
 		call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
-	else
-		call_rcu(&selem->rcu, bpf_selem_free_rcu);
+	} else {
+		migrate_disable();
+		bpf_mem_cache_free(&smap->selem_ma, selem);
+		migrate_enable();
+	}
 }
 
 /* local_storage->lock must be held and selem->local_storage == local_storage.
@@ -651,6 +674,7 @@  bpf_local_storage_map_alloc(union bpf_attr *attr,
 	struct bpf_local_storage_map *smap;
 	unsigned int i;
 	u32 nbuckets;
+	int err;
 
 	smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
 	if (!smap)
@@ -665,8 +689,8 @@  bpf_local_storage_map_alloc(union bpf_attr *attr,
 	smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
 					 nbuckets, GFP_USER | __GFP_NOWARN);
 	if (!smap->buckets) {
-		bpf_map_area_free(smap);
-		return ERR_PTR(-ENOMEM);
+		err = -ENOMEM;
+		goto free_smap;
 	}
 
 	for (i = 0; i < nbuckets; i++) {
@@ -677,8 +701,17 @@  bpf_local_storage_map_alloc(union bpf_attr *attr,
 	smap->elem_size = offsetof(struct bpf_local_storage_elem,
 				   sdata.data[attr->value_size]);
 
+	err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false);
+	if (err)
+		goto free_smap;
+
 	smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
 	return &smap->map;
+
+free_smap:
+	kvfree(smap->buckets);
+	bpf_map_area_free(smap);
+	return ERR_PTR(err);
 }
 
 void bpf_local_storage_map_free(struct bpf_map *map,
@@ -744,6 +777,7 @@  void bpf_local_storage_map_free(struct bpf_map *map,
 	 */
 	synchronize_rcu();
 
+	bpf_mem_alloc_destroy(&smap->selem_ma);
 	kvfree(smap->buckets);
 	bpf_map_area_free(smap);
 }