diff mbox series

[bpf-next,1/2] bpf: Add open coded version of kmem_cache iterator

Message ID 20241017080604.541872-1-namhyung@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next,1/2] bpf: Add open coded version of kmem_cache iterator | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 60 this patch: 63
netdev/checkpatch warning CHECK: Comparison to NULL could be written "!prev" CHECK: Comparison to NULL could be written "!s" WARNING: Prefer __aligned(8) over __attribute__((aligned(8))) WARNING: line length of 87 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 11 this patch: 11
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18

Commit Message

Namhyung Kim Oct. 17, 2024, 8:06 a.m. UTC
Add a new open coded iterator for kmem_cache which can be called from a
BPF program like below.  It doesn't take any argument and traverses all
kmem_cache entries.

  struct kmem_cache *pos;

  bpf_for_each(kmem_cache, pos) {
      ...
  }

As it needs to grab slab_mutex, it should be called from sleepable BPF
programs only.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 kernel/bpf/helpers.c         |  3 ++
 kernel/bpf/kmem_cache_iter.c | 87 ++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

Comments

Martin KaFai Lau Oct. 18, 2024, 6:22 p.m. UTC | #1
On 10/17/24 1:06 AM, Namhyung Kim wrote:
> Add a new open coded iterator for kmem_cache which can be called from a
> BPF program like below.  It doesn't take any argument and traverses all
> kmem_cache entries.
> 
>    struct kmem_cache *pos;
> 
>    bpf_for_each(kmem_cache, pos) {
>        ...
>    }
> 
> As it needs to grab slab_mutex, it should be called from sleepable BPF
> programs only.
> 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>   kernel/bpf/helpers.c         |  3 ++
>   kernel/bpf/kmem_cache_iter.c | 87 ++++++++++++++++++++++++++++++++++++
>   2 files changed, 90 insertions(+)
> 
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 073e6f04f4d765ff..d1dfa4f335577914 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3111,6 +3111,9 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
>   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
>   BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
>   BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
>   BTF_KFUNCS_END(common_btf_ids)
>   
>   static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/bpf/kmem_cache_iter.c b/kernel/bpf/kmem_cache_iter.c
> index ebc101d7da51b57c..31ddaf452b20a458 100644
> --- a/kernel/bpf/kmem_cache_iter.c
> +++ b/kernel/bpf/kmem_cache_iter.c
> @@ -145,6 +145,93 @@ static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = {
>   	.seq_ops		= &kmem_cache_iter_seq_ops,
>   };
>   
> +/* open-coded version */
> +struct bpf_iter_kmem_cache {
> +	__u64 __opaque[1];
> +} __attribute__((aligned(8)));
> +
> +struct bpf_iter_kmem_cache_kern {
> +	struct kmem_cache *pos;
> +} __attribute__((aligned(8)));
> +
> +__bpf_kfunc_start_defs();
> +
> +__bpf_kfunc int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it)
> +{
> +	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
> +
> +	BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
> +	BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
> +
> +	kit->pos = NULL;
> +	return 0;
> +}
> +
> +__bpf_kfunc struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it)
> +{
> +	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
> +	struct kmem_cache *prev = kit->pos;
> +	struct kmem_cache *next;
> +	bool destroy = false;
> +
> +	mutex_lock(&slab_mutex);

I think taking mutex_lock here should be fine since sleepable tracing prog 
should be limited to the error injection whitelist. Those functions should not 
have held the mutex afaict.

> +
> +	if (list_empty(&slab_caches)) {
> +		mutex_unlock(&slab_mutex);
> +		return NULL;
> +	}
> +
> +	if (prev == NULL)
> +		next = list_first_entry(&slab_caches, struct kmem_cache, list);
> +	else if (list_last_entry(&slab_caches, struct kmem_cache, list) == prev)
> +		next = NULL;

At the last entry, next is NULL.

> +	else
> +		next = list_next_entry(prev, list);
> +
> +	/* boot_caches have negative refcount, don't touch them */
> +	if (next && next->refcount > 0)
> +		next->refcount++;
> +
> +	/* Skip kmem_cache_destroy() for active entries */
> +	if (prev && prev->refcount > 1)
> +		prev->refcount--;
> +	else if (prev && prev->refcount == 1)
> +		destroy = true;
> +
> +	mutex_unlock(&slab_mutex);
> +
> +	if (destroy)
> +		kmem_cache_destroy(prev);
> +
> +	kit->pos = next;

so kit->pos will be NULL also. Does it mean the bpf prog will be able to call 
bpf_iter_kmem_cache_next() again and re-loop from the beginning of the 
slab_caches list?

> +	return next;
> +}
> +
> +__bpf_kfunc void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it)
> +{
> +	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
> +	struct kmem_cache *s = kit->pos;
> +	bool destroy = false;
> +
> +	if (s == NULL)
> +		return;
> +
> +	mutex_lock(&slab_mutex);
> +
> +	/* Skip kmem_cache_destroy() for active entries */
> +	if (s->refcount > 1)
> +		s->refcount--;
> +	else if (s->refcount == 1)
> +		destroy = true;
> +
> +	mutex_unlock(&slab_mutex);
> +
> +	if (destroy)
> +		kmem_cache_destroy(s);
> +}
> +
> +__bpf_kfunc_end_defs();
> +
>   static void bpf_iter_kmem_cache_show_fdinfo(const struct bpf_iter_aux_info *aux,
>   					    struct seq_file *seq)
>   {
Andrii Nakryiko Oct. 21, 2024, 11:32 p.m. UTC | #2
On Thu, Oct 17, 2024 at 1:06 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Add a new open coded iterator for kmem_cache which can be called from a
> BPF program like below.  It doesn't take any argument and traverses all
> kmem_cache entries.
>
>   struct kmem_cache *pos;
>
>   bpf_for_each(kmem_cache, pos) {
>       ...
>   }
>
> As it needs to grab slab_mutex, it should be called from sleepable BPF
> programs only.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  kernel/bpf/helpers.c         |  3 ++
>  kernel/bpf/kmem_cache_iter.c | 87 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 90 insertions(+)
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 073e6f04f4d765ff..d1dfa4f335577914 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3111,6 +3111,9 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
>  BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
>  BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)

I'm curious. Having bpf_iter_kmem_cache_{new,next,destroy} functions,
can we rewrite kmem_cache_iter_seq_next in terms of these ones, so
that we have less duplication of iteration logic? Or there will be
some locking concerns preventing this? (I haven't looked into the
actual logic much, sorry, lazy question)

>  BTF_KFUNCS_END(common_btf_ids)
>
>  static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/bpf/kmem_cache_iter.c b/kernel/bpf/kmem_cache_iter.c
> index ebc101d7da51b57c..31ddaf452b20a458 100644
> --- a/kernel/bpf/kmem_cache_iter.c
> +++ b/kernel/bpf/kmem_cache_iter.c
> @@ -145,6 +145,93 @@ static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = {
>         .seq_ops                = &kmem_cache_iter_seq_ops,
>  };
>

[...]
Namhyung Kim Oct. 22, 2024, 5:47 p.m. UTC | #3
Hello,

On Fri, Oct 18, 2024 at 11:22:00AM -0700, Martin KaFai Lau wrote:
> On 10/17/24 1:06 AM, Namhyung Kim wrote:
> > Add a new open coded iterator for kmem_cache which can be called from a
> > BPF program like below.  It doesn't take any argument and traverses all
> > kmem_cache entries.
> > 
> >    struct kmem_cache *pos;
> > 
> >    bpf_for_each(kmem_cache, pos) {
> >        ...
> >    }
> > 
> > As it needs to grab slab_mutex, it should be called from sleepable BPF
> > programs only.
> > 
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >   kernel/bpf/helpers.c         |  3 ++
> >   kernel/bpf/kmem_cache_iter.c | 87 ++++++++++++++++++++++++++++++++++++
> >   2 files changed, 90 insertions(+)
> > 
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index 073e6f04f4d765ff..d1dfa4f335577914 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -3111,6 +3111,9 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
> >   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> >   BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
> >   BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> > +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> >   BTF_KFUNCS_END(common_btf_ids)
> >   static const struct btf_kfunc_id_set common_kfunc_set = {
> > diff --git a/kernel/bpf/kmem_cache_iter.c b/kernel/bpf/kmem_cache_iter.c
> > index ebc101d7da51b57c..31ddaf452b20a458 100644
> > --- a/kernel/bpf/kmem_cache_iter.c
> > +++ b/kernel/bpf/kmem_cache_iter.c
> > @@ -145,6 +145,93 @@ static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = {
> >   	.seq_ops		= &kmem_cache_iter_seq_ops,
> >   };
> > +/* open-coded version */
> > +struct bpf_iter_kmem_cache {
> > +	__u64 __opaque[1];
> > +} __attribute__((aligned(8)));
> > +
> > +struct bpf_iter_kmem_cache_kern {
> > +	struct kmem_cache *pos;
> > +} __attribute__((aligned(8)));
> > +
> > +__bpf_kfunc_start_defs();
> > +
> > +__bpf_kfunc int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it)
> > +{
> > +	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
> > +
> > +	BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
> > +	BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
> > +
> > +	kit->pos = NULL;
> > +	return 0;
> > +}
> > +
> > +__bpf_kfunc struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it)
> > +{
> > +	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
> > +	struct kmem_cache *prev = kit->pos;
> > +	struct kmem_cache *next;
> > +	bool destroy = false;
> > +
> > +	mutex_lock(&slab_mutex);
> 
> I think taking mutex_lock here should be fine since sleepable tracing prog
> should be limited to the error injection whitelist. Those functions should
> not have held the mutex afaict.
> 
> > +
> > +	if (list_empty(&slab_caches)) {
> > +		mutex_unlock(&slab_mutex);
> > +		return NULL;
> > +	}
> > +
> > +	if (prev == NULL)
> > +		next = list_first_entry(&slab_caches, struct kmem_cache, list);
> > +	else if (list_last_entry(&slab_caches, struct kmem_cache, list) == prev)
> > +		next = NULL;
> 
> At the last entry, next is NULL.
> 
> > +	else
> > +		next = list_next_entry(prev, list);
> > +
> > +	/* boot_caches have negative refcount, don't touch them */
> > +	if (next && next->refcount > 0)
> > +		next->refcount++;
> > +
> > +	/* Skip kmem_cache_destroy() for active entries */
> > +	if (prev && prev->refcount > 1)
> > +		prev->refcount--;
> > +	else if (prev && prev->refcount == 1)
> > +		destroy = true;
> > +
> > +	mutex_unlock(&slab_mutex);
> > +
> > +	if (destroy)
> > +		kmem_cache_destroy(prev);
> > +
> > +	kit->pos = next;
> 
> so kit->pos will be NULL also. Does it mean the bpf prog will be able to
> call bpf_iter_kmem_cache_next() again and re-loop from the beginning of the
> slab_caches list?

Right, I'll mark the start pos differently to prevent that.

Thanks,
Namhyung

> 
> > +	return next;
> > +}
> > +
> > +__bpf_kfunc void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it)
> > +{
> > +	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
> > +	struct kmem_cache *s = kit->pos;
> > +	bool destroy = false;
> > +
> > +	if (s == NULL)
> > +		return;
> > +
> > +	mutex_lock(&slab_mutex);
> > +
> > +	/* Skip kmem_cache_destroy() for active entries */
> > +	if (s->refcount > 1)
> > +		s->refcount--;
> > +	else if (s->refcount == 1)
> > +		destroy = true;
> > +
> > +	mutex_unlock(&slab_mutex);
> > +
> > +	if (destroy)
> > +		kmem_cache_destroy(s);
> > +}
> > +
> > +__bpf_kfunc_end_defs();
> > +
> >   static void bpf_iter_kmem_cache_show_fdinfo(const struct bpf_iter_aux_info *aux,
> >   					    struct seq_file *seq)
> >   {
>
Namhyung Kim Oct. 22, 2024, 5:50 p.m. UTC | #4
Hello,

On Mon, Oct 21, 2024 at 04:32:10PM -0700, Andrii Nakryiko wrote:
> On Thu, Oct 17, 2024 at 1:06 AM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > Add a new open coded iterator for kmem_cache which can be called from a
> > BPF program like below.  It doesn't take any argument and traverses all
> > kmem_cache entries.
> >
> >   struct kmem_cache *pos;
> >
> >   bpf_for_each(kmem_cache, pos) {
> >       ...
> >   }
> >
> > As it needs to grab slab_mutex, it should be called from sleepable BPF
> > programs only.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >  kernel/bpf/helpers.c         |  3 ++
> >  kernel/bpf/kmem_cache_iter.c | 87 ++++++++++++++++++++++++++++++++++++
> >  2 files changed, 90 insertions(+)
> >
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index 073e6f04f4d765ff..d1dfa4f335577914 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -3111,6 +3111,9 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
> >  BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> >  BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
> >  BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> > +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
> > +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
> 
> I'm curious. Having bpf_iter_kmem_cache_{new,next,destroy} functions,
> can we rewrite kmem_cache_iter_seq_next in terms of these ones, so
> that we have less duplication of iteration logic? Or there will be
> some locking concerns preventing this? (I haven't looked into the
> actual logic much, sorry, lazy question)

It should be fine with locking, I think there's a subtle difference
between seq interface and the open coded iterator.  But I'll think about
how to reduce the duplication.

Thanks for your review!
Namhyung
diff mbox series

Patch

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 073e6f04f4d765ff..d1dfa4f335577914 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3111,6 +3111,9 @@  BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
 BTF_ID_FLAGS(func, bpf_get_kmem_cache)
+BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/kmem_cache_iter.c b/kernel/bpf/kmem_cache_iter.c
index ebc101d7da51b57c..31ddaf452b20a458 100644
--- a/kernel/bpf/kmem_cache_iter.c
+++ b/kernel/bpf/kmem_cache_iter.c
@@ -145,6 +145,93 @@  static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = {
 	.seq_ops		= &kmem_cache_iter_seq_ops,
 };
 
+/* open-coded version */
+struct bpf_iter_kmem_cache {
+	__u64 __opaque[1];
+} __attribute__((aligned(8)));
+
+struct bpf_iter_kmem_cache_kern {
+	struct kmem_cache *pos;
+} __attribute__((aligned(8)));
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it)
+{
+	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
+
+	BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
+	BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
+
+	kit->pos = NULL;
+	return 0;
+}
+
+__bpf_kfunc struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it)
+{
+	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
+	struct kmem_cache *prev = kit->pos;
+	struct kmem_cache *next;
+	bool destroy = false;
+
+	mutex_lock(&slab_mutex);
+
+	if (list_empty(&slab_caches)) {
+		mutex_unlock(&slab_mutex);
+		return NULL;
+	}
+
+	if (prev == NULL)
+		next = list_first_entry(&slab_caches, struct kmem_cache, list);
+	else if (list_last_entry(&slab_caches, struct kmem_cache, list) == prev)
+		next = NULL;
+	else
+		next = list_next_entry(prev, list);
+
+	/* boot_caches have negative refcount, don't touch them */
+	if (next && next->refcount > 0)
+		next->refcount++;
+
+	/* Skip kmem_cache_destroy() for active entries */
+	if (prev && prev->refcount > 1)
+		prev->refcount--;
+	else if (prev && prev->refcount == 1)
+		destroy = true;
+
+	mutex_unlock(&slab_mutex);
+
+	if (destroy)
+		kmem_cache_destroy(prev);
+
+	kit->pos = next;
+	return next;
+}
+
+__bpf_kfunc void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it)
+{
+	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
+	struct kmem_cache *s = kit->pos;
+	bool destroy = false;
+
+	if (s == NULL)
+		return;
+
+	mutex_lock(&slab_mutex);
+
+	/* Skip kmem_cache_destroy() for active entries */
+	if (s->refcount > 1)
+		s->refcount--;
+	else if (s->refcount == 1)
+		destroy = true;
+
+	mutex_unlock(&slab_mutex);
+
+	if (destroy)
+		kmem_cache_destroy(s);
+}
+
+__bpf_kfunc_end_defs();
+
 static void bpf_iter_kmem_cache_show_fdinfo(const struct bpf_iter_aux_info *aux,
 					    struct seq_file *seq)
 {