diff mbox series

[RFC,bpf-next,6/8] sched_ext: Add filter for scx_kfunc_ids_unlocked

Message ID AM6PR03MB5080EDA5E2E2FDB96C98F72F99F72@AM6PR03MB5080.eurprd03.prod.outlook.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf, sched_ext: Make kfunc filters support struct_ops context to reduce runtime overhead | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-9 pending Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 pending Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 pending Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 pending Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-12 pending Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-13 pending Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 pending Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 9 maintainers not CCed: dietmar.eggemann@arm.com peterz@infradead.org juri.lelli@redhat.com bsegall@google.com vincent.guittot@linaro.org mingo@redhat.com rostedt@goodmis.org mgorman@suse.de vschneid@redhat.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 33 this patch: 33
netdev/source_inline success Was 0 now: 0

Commit Message

Juntong Deng Feb. 5, 2025, 7:30 p.m. UTC
This patch adds filter for scx_kfunc_ids_unlocked.

The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit,
cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit,
cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight
operations.

Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
---
 kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Alexei Starovoitov Feb. 8, 2025, 3:37 a.m. UTC | #1
On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote:
>
> This patch adds filter for scx_kfunc_ids_unlocked.
>
> The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit,
> cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit,
> cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight
> operations.
>
> Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
> ---
>  kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 7f039a32f137..955fb0f5fc5e 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
>  BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
>  BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
>
> +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id)
> +{
> +       u32 moff;
> +
> +       if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ||
> +           prog->aux->st_ops != &bpf_sched_ext_ops)
> +               return 0;
> +
> +       moff = prog->aux->attach_st_ops_member_off;
> +       if (moff == offsetof(struct sched_ext_ops, init) ||
> +           moff == offsetof(struct sched_ext_ops, exit) ||
> +           moff == offsetof(struct sched_ext_ops, cpu_online) ||
> +           moff == offsetof(struct sched_ext_ops, cpu_offline) ||
> +           moff == offsetof(struct sched_ext_ops, init_task) ||
> +           moff == offsetof(struct sched_ext_ops, dump))
> +               return 0;
> +
> +#ifdef CONFIG_EXT_GROUP_SCHED
> +       if (moff == offsetof(struct sched_ext_ops, cgroup_init) ||
> +           moff == offsetof(struct sched_ext_ops, cgroup_exit) ||
> +           moff == offsetof(struct sched_ext_ops, cgroup_prep_move) ||
> +           moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) ||
> +           moff == offsetof(struct sched_ext_ops, cgroup_move) ||
> +           moff == offsetof(struct sched_ext_ops, cgroup_set_weight))
> +               return 0;
> +#endif
> +       return -EACCES;
> +}
> +
>  static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
>         .owner                  = THIS_MODULE,
>         .set                    = &scx_kfunc_ids_unlocked,
> +       .filter                 = scx_kfunc_ids_unlocked_filter,
>  };

why does sched-ext use so many id_set-s ?

        if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
                                             &scx_kfunc_set_select_cpu)) ||
            (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,

&scx_kfunc_set_enqueue_dispatch)) ||
            (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
                                             &scx_kfunc_set_dispatch)) ||
            (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
                                             &scx_kfunc_set_cpu_release)) ||
            (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
                                             &scx_kfunc_set_unlocked)) ||

Can they all be rolled into one id_set then
the patches 2-6 will be collapsed into one patch and
one filter callback that will describe allowed hook/kfunc combinations?
Andrea Righi Feb. 9, 2025, 3:22 p.m. UTC | #2
On Fri, Feb 07, 2025 at 07:37:51PM -0800, Alexei Starovoitov wrote:
> On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote:
> >
> > This patch adds filter for scx_kfunc_ids_unlocked.
> >
> > The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit,
> > cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit,
> > cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight
> > operations.
> >
> > Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
> > ---
> >  kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++
> >  1 file changed, 30 insertions(+)
> >
> > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> > index 7f039a32f137..955fb0f5fc5e 100644
> > --- a/kernel/sched/ext.c
> > +++ b/kernel/sched/ext.c
> > @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
> >  BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
> >  BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
> >
> > +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id)
> > +{
> > +       u32 moff;
> > +
> > +       if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ||
> > +           prog->aux->st_ops != &bpf_sched_ext_ops)
> > +               return 0;
> > +
> > +       moff = prog->aux->attach_st_ops_member_off;
> > +       if (moff == offsetof(struct sched_ext_ops, init) ||
> > +           moff == offsetof(struct sched_ext_ops, exit) ||
> > +           moff == offsetof(struct sched_ext_ops, cpu_online) ||
> > +           moff == offsetof(struct sched_ext_ops, cpu_offline) ||
> > +           moff == offsetof(struct sched_ext_ops, init_task) ||
> > +           moff == offsetof(struct sched_ext_ops, dump))
> > +               return 0;
> > +
> > +#ifdef CONFIG_EXT_GROUP_SCHED
> > +       if (moff == offsetof(struct sched_ext_ops, cgroup_init) ||
> > +           moff == offsetof(struct sched_ext_ops, cgroup_exit) ||
> > +           moff == offsetof(struct sched_ext_ops, cgroup_prep_move) ||
> > +           moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) ||
> > +           moff == offsetof(struct sched_ext_ops, cgroup_move) ||
> > +           moff == offsetof(struct sched_ext_ops, cgroup_set_weight))
> > +               return 0;
> > +#endif
> > +       return -EACCES;
> > +}
> > +
> >  static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
> >         .owner                  = THIS_MODULE,
> >         .set                    = &scx_kfunc_ids_unlocked,
> > +       .filter                 = scx_kfunc_ids_unlocked_filter,
> >  };
> 
> why does sched-ext use so many id_set-s ?
> 
>         if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                              &scx_kfunc_set_select_cpu)) ||
>             (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> 
> &scx_kfunc_set_enqueue_dispatch)) ||
>             (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                              &scx_kfunc_set_dispatch)) ||
>             (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                              &scx_kfunc_set_cpu_release)) ||
>             (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                              &scx_kfunc_set_unlocked)) ||
> 
> Can they all be rolled into one id_set then
> the patches 2-6 will be collapsed into one patch and
> one filter callback that will describe allowed hook/kfunc combinations?

I think the idea was to group them in different sets based on their context
usage, like scx_kfunc_set_select_cpu kfuncs can be used only from
ops.select_cpu(), scx_kfunc_set_dispatch kfuncs can be used only from
ops.dispatch(), etc.

However, since the actual context enforcement is done by scx_kf_allowed(),
it seems that we could have just 3 sets to classify the kfuncs based by
their prog type:
 1) BPF_PROG_TYPE_STRUCT_OPS
 2) BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_SYSCALL
 3) BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_SYSCALL + BPF_PROG_TYPE_TRACING

-Andrea
Tejun Heo Feb. 10, 2025, 6:05 p.m. UTC | #3
Hello,

On Fri, Feb 07, 2025 at 07:37:51PM -0800, Alexei Starovoitov wrote:
> Can they all be rolled into one id_set then
> the patches 2-6 will be collapsed into one patch and
> one filter callback that will describe allowed hook/kfunc combinations?

I thought the BPF side filtering may be declarative on the kfunc sets and
tried to group the sets by where they can be called from. As we're going
prodcedural, there's no point in separating out the sets and we can use
merged kfunc sets and do all the distinctions in the filter function.

Thanks.
Juntong Deng Feb. 10, 2025, 11:40 p.m. UTC | #4
On 2025/2/8 03:37, Alexei Starovoitov wrote:
> On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote:
>>
>> This patch adds filter for scx_kfunc_ids_unlocked.
>>
>> The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit,
>> cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit,
>> cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight
>> operations.
>>
>> Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
>> ---
>>   kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++
>>   1 file changed, 30 insertions(+)
>>
>> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
>> index 7f039a32f137..955fb0f5fc5e 100644
>> --- a/kernel/sched/ext.c
>> +++ b/kernel/sched/ext.c
>> @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
>>   BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
>>   BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
>>
>> +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id)
>> +{
>> +       u32 moff;
>> +
>> +       if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ||
>> +           prog->aux->st_ops != &bpf_sched_ext_ops)
>> +               return 0;
>> +
>> +       moff = prog->aux->attach_st_ops_member_off;
>> +       if (moff == offsetof(struct sched_ext_ops, init) ||
>> +           moff == offsetof(struct sched_ext_ops, exit) ||
>> +           moff == offsetof(struct sched_ext_ops, cpu_online) ||
>> +           moff == offsetof(struct sched_ext_ops, cpu_offline) ||
>> +           moff == offsetof(struct sched_ext_ops, init_task) ||
>> +           moff == offsetof(struct sched_ext_ops, dump))
>> +               return 0;
>> +
>> +#ifdef CONFIG_EXT_GROUP_SCHED
>> +       if (moff == offsetof(struct sched_ext_ops, cgroup_init) ||
>> +           moff == offsetof(struct sched_ext_ops, cgroup_exit) ||
>> +           moff == offsetof(struct sched_ext_ops, cgroup_prep_move) ||
>> +           moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) ||
>> +           moff == offsetof(struct sched_ext_ops, cgroup_move) ||
>> +           moff == offsetof(struct sched_ext_ops, cgroup_set_weight))
>> +               return 0;
>> +#endif
>> +       return -EACCES;
>> +}
>> +
>>   static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
>>          .owner                  = THIS_MODULE,
>>          .set                    = &scx_kfunc_ids_unlocked,
>> +       .filter                 = scx_kfunc_ids_unlocked_filter,
>>   };
> 
> why does sched-ext use so many id_set-s ?
> 
>          if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                               &scx_kfunc_set_select_cpu)) ||
>              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> 
> &scx_kfunc_set_enqueue_dispatch)) ||
>              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                               &scx_kfunc_set_dispatch)) ||
>              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                               &scx_kfunc_set_cpu_release)) ||
>              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>                                               &scx_kfunc_set_unlocked)) ||
> 
> Can they all be rolled into one id_set then
> the patches 2-6 will be collapsed into one patch and
> one filter callback that will describe allowed hook/kfunc combinations?

Yes, I agree that it would be ideal to put all kfuncs in the one id_set,
but I am not sure that this is better in implementation.

For filters, the only kfunc-related information that can be known is
the kfunc_id.

kfunc_id is not a stable value, for example, when we add a new kfunc to
the kernel, it may cause the kfunc_id of other kfuncs to change.

A simple experiment is to add a bpf_task_from_aaa kfunc, and then we
will find that the kfunc_id of bpf_task_from_pid has changed.

This means that it is simple for us to implement kfuncs grouping via
id_set because we only need to check if kfunc_id exists in a specific
id_set, we do not need to care about what kfunc_id is.

But if we implement grouping only in the filter, we may need to first
get the btf type of the corresponding kfunc based on the kfunc_id via
btf_type_by_id, and then further get the kfunc name, and then group
based on the kfunc name in the filter, which seems more complicated.
Alexei Starovoitov Feb. 11, 2025, 3:48 a.m. UTC | #5
On Mon, Feb 10, 2025 at 3:40 PM Juntong Deng <juntong.deng@outlook.com> wrote:
>
> On 2025/2/8 03:37, Alexei Starovoitov wrote:
> > On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote:
> >>
> >> This patch adds filter for scx_kfunc_ids_unlocked.
> >>
> >> The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit,
> >> cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit,
> >> cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight
> >> operations.
> >>
> >> Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
> >> ---
> >>   kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++
> >>   1 file changed, 30 insertions(+)
> >>
> >> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> >> index 7f039a32f137..955fb0f5fc5e 100644
> >> --- a/kernel/sched/ext.c
> >> +++ b/kernel/sched/ext.c
> >> @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
> >>   BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
> >>   BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
> >>
> >> +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id)
> >> +{
> >> +       u32 moff;
> >> +
> >> +       if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ||
> >> +           prog->aux->st_ops != &bpf_sched_ext_ops)
> >> +               return 0;
> >> +
> >> +       moff = prog->aux->attach_st_ops_member_off;
> >> +       if (moff == offsetof(struct sched_ext_ops, init) ||
> >> +           moff == offsetof(struct sched_ext_ops, exit) ||
> >> +           moff == offsetof(struct sched_ext_ops, cpu_online) ||
> >> +           moff == offsetof(struct sched_ext_ops, cpu_offline) ||
> >> +           moff == offsetof(struct sched_ext_ops, init_task) ||
> >> +           moff == offsetof(struct sched_ext_ops, dump))
> >> +               return 0;
> >> +
> >> +#ifdef CONFIG_EXT_GROUP_SCHED
> >> +       if (moff == offsetof(struct sched_ext_ops, cgroup_init) ||
> >> +           moff == offsetof(struct sched_ext_ops, cgroup_exit) ||
> >> +           moff == offsetof(struct sched_ext_ops, cgroup_prep_move) ||
> >> +           moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) ||
> >> +           moff == offsetof(struct sched_ext_ops, cgroup_move) ||
> >> +           moff == offsetof(struct sched_ext_ops, cgroup_set_weight))
> >> +               return 0;
> >> +#endif
> >> +       return -EACCES;
> >> +}
> >> +
> >>   static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
> >>          .owner                  = THIS_MODULE,
> >>          .set                    = &scx_kfunc_ids_unlocked,
> >> +       .filter                 = scx_kfunc_ids_unlocked_filter,
> >>   };
> >
> > why does sched-ext use so many id_set-s ?
> >
> >          if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> >                                               &scx_kfunc_set_select_cpu)) ||
> >              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> >
> > &scx_kfunc_set_enqueue_dispatch)) ||
> >              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> >                                               &scx_kfunc_set_dispatch)) ||
> >              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> >                                               &scx_kfunc_set_cpu_release)) ||
> >              (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
> >                                               &scx_kfunc_set_unlocked)) ||
> >
> > Can they all be rolled into one id_set then
> > the patches 2-6 will be collapsed into one patch and
> > one filter callback that will describe allowed hook/kfunc combinations?
>
> Yes, I agree that it would be ideal to put all kfuncs in the one id_set,
> but I am not sure that this is better in implementation.
>
> For filters, the only kfunc-related information that can be known is
> the kfunc_id.
>
> kfunc_id is not a stable value, for example, when we add a new kfunc to
> the kernel, it may cause the kfunc_id of other kfuncs to change.
>
> A simple experiment is to add a bpf_task_from_aaa kfunc, and then we
> will find that the kfunc_id of bpf_task_from_pid has changed.
>
> This means that it is simple for us to implement kfuncs grouping via
> id_set because we only need to check if kfunc_id exists in a specific
> id_set, we do not need to care about what kfunc_id is.
>
> But if we implement grouping only in the filter, we may need to first
> get the btf type of the corresponding kfunc based on the kfunc_id via
> btf_type_by_id, and then further get the kfunc name, and then group
> based on the kfunc name in the filter, which seems more complicated.

I didn't mean to extract kfunc name as a string and do strcmp() on it.
That's a non-starter.
I imagined verifier-like approach of enum+set+list
where enum has all kfunc names,
set gives efficient btf_id_set8_contains() access,
and list[KF_bpf_foo] gives func_id to compare with.

But if the current break down of scx_kfunc_set_* fits well
with per struct_ops hook filtering then keep it.
But please think of a set approach for moff as well to avoid
+           moff == offsetof(struct sched_ext_ops, exit) ||
+           moff == offsetof(struct sched_ext_ops, cpu_online) ||
+           moff == offsetof(struct sched_ext_ops, cpu_offline) ||

Then it will be:
if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ...
&& moff_set_containts(.._unlocked, moff)) // allow

There is SCX_OP_IDX(). Maybe it can be used to populate a set.

Something like this:
static const u32 ops_flags[] = {
  [SCX_OP_IDX(cpu_online)] = KF_UNLOCKED,
  ..
};

if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) &&
    (ops_flags[moff / sizeof(void (*)(void))] & KF_UNLOCKED)) // allow
Juntong Deng Feb. 14, 2025, 8:30 p.m. UTC | #6
On 2025/2/11 03:48, Alexei Starovoitov wrote:
> On Mon, Feb 10, 2025 at 3:40 PM Juntong Deng <juntong.deng@outlook.com> wrote:
>>
>> On 2025/2/8 03:37, Alexei Starovoitov wrote:
>>> On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote:
>>>>
>>>> This patch adds filter for scx_kfunc_ids_unlocked.
>>>>
>>>> The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit,
>>>> cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit,
>>>> cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight
>>>> operations.
>>>>
>>>> Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
>>>> ---
>>>>    kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++
>>>>    1 file changed, 30 insertions(+)
>>>>
>>>> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
>>>> index 7f039a32f137..955fb0f5fc5e 100644
>>>> --- a/kernel/sched/ext.c
>>>> +++ b/kernel/sched/ext.c
>>>> @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
>>>>    BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
>>>>    BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
>>>>
>>>> +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id)
>>>> +{
>>>> +       u32 moff;
>>>> +
>>>> +       if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ||
>>>> +           prog->aux->st_ops != &bpf_sched_ext_ops)
>>>> +               return 0;
>>>> +
>>>> +       moff = prog->aux->attach_st_ops_member_off;
>>>> +       if (moff == offsetof(struct sched_ext_ops, init) ||
>>>> +           moff == offsetof(struct sched_ext_ops, exit) ||
>>>> +           moff == offsetof(struct sched_ext_ops, cpu_online) ||
>>>> +           moff == offsetof(struct sched_ext_ops, cpu_offline) ||
>>>> +           moff == offsetof(struct sched_ext_ops, init_task) ||
>>>> +           moff == offsetof(struct sched_ext_ops, dump))
>>>> +               return 0;
>>>> +
>>>> +#ifdef CONFIG_EXT_GROUP_SCHED
>>>> +       if (moff == offsetof(struct sched_ext_ops, cgroup_init) ||
>>>> +           moff == offsetof(struct sched_ext_ops, cgroup_exit) ||
>>>> +           moff == offsetof(struct sched_ext_ops, cgroup_prep_move) ||
>>>> +           moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) ||
>>>> +           moff == offsetof(struct sched_ext_ops, cgroup_move) ||
>>>> +           moff == offsetof(struct sched_ext_ops, cgroup_set_weight))
>>>> +               return 0;
>>>> +#endif
>>>> +       return -EACCES;
>>>> +}
>>>> +
>>>>    static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
>>>>           .owner                  = THIS_MODULE,
>>>>           .set                    = &scx_kfunc_ids_unlocked,
>>>> +       .filter                 = scx_kfunc_ids_unlocked_filter,
>>>>    };
>>>
>>> why does sched-ext use so many id_set-s ?
>>>
>>>           if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>>>                                                &scx_kfunc_set_select_cpu)) ||
>>>               (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>>>
>>> &scx_kfunc_set_enqueue_dispatch)) ||
>>>               (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>>>                                                &scx_kfunc_set_dispatch)) ||
>>>               (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>>>                                                &scx_kfunc_set_cpu_release)) ||
>>>               (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>>>                                                &scx_kfunc_set_unlocked)) ||
>>>
>>> Can they all be rolled into one id_set then
>>> the patches 2-6 will be collapsed into one patch and
>>> one filter callback that will describe allowed hook/kfunc combinations?
>>
>> Yes, I agree that it would be ideal to put all kfuncs in the one id_set,
>> but I am not sure that this is better in implementation.
>>
>> For filters, the only kfunc-related information that can be known is
>> the kfunc_id.
>>
>> kfunc_id is not a stable value, for example, when we add a new kfunc to
>> the kernel, it may cause the kfunc_id of other kfuncs to change.
>>
>> A simple experiment is to add a bpf_task_from_aaa kfunc, and then we
>> will find that the kfunc_id of bpf_task_from_pid has changed.
>>
>> This means that it is simple for us to implement kfuncs grouping via
>> id_set because we only need to check if kfunc_id exists in a specific
>> id_set, we do not need to care about what kfunc_id is.
>>
>> But if we implement grouping only in the filter, we may need to first
>> get the btf type of the corresponding kfunc based on the kfunc_id via
>> btf_type_by_id, and then further get the kfunc name, and then group
>> based on the kfunc name in the filter, which seems more complicated.
> 
> I didn't mean to extract kfunc name as a string and do strcmp() on it.
> That's a non-starter.
> I imagined verifier-like approach of enum+set+list
> where enum has all kfunc names,
> set gives efficient btf_id_set8_contains() access,
> and list[KF_bpf_foo] gives func_id to compare with.
> 
> But if the current break down of scx_kfunc_set_* fits well
> with per struct_ops hook filtering then keep it.
> But please think of a set approach for moff as well to avoid
> +           moff == offsetof(struct sched_ext_ops, exit) ||
> +           moff == offsetof(struct sched_ext_ops, cpu_online) ||
> +           moff == offsetof(struct sched_ext_ops, cpu_offline) ||
> 
> Then it will be:
> if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ...
> && moff_set_containts(.._unlocked, moff)) // allow
> 
> There is SCX_OP_IDX(). Maybe it can be used to populate a set.
> 
> Something like this:
> static const u32 ops_flags[] = {
>    [SCX_OP_IDX(cpu_online)] = KF_UNLOCKED,
>    ..
> };
> 
> if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) &&
>      (ops_flags[moff / sizeof(void (*)(void))] & KF_UNLOCKED)) // allow

Thanks for letting me know this method.

This is a good method.

I have used it in version 2 [0].

Also, I figured out a way to require only one filter.

[0]: 
https://lore.kernel.org/bpf/AM6PR03MB5080855B90C3FE9B6C4243B099FE2@AM6PR03MB5080.eurprd03.prod.outlook.com/T/#u
diff mbox series

Patch

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 7f039a32f137..955fb0f5fc5e 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -7079,9 +7079,39 @@  BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
 BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
 BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
 
+static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+	u32 moff;
+
+	if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ||
+	    prog->aux->st_ops != &bpf_sched_ext_ops)
+		return 0;
+
+	moff = prog->aux->attach_st_ops_member_off;
+	if (moff == offsetof(struct sched_ext_ops, init) ||
+	    moff == offsetof(struct sched_ext_ops, exit) ||
+	    moff == offsetof(struct sched_ext_ops, cpu_online) ||
+	    moff == offsetof(struct sched_ext_ops, cpu_offline) ||
+	    moff == offsetof(struct sched_ext_ops, init_task) ||
+	    moff == offsetof(struct sched_ext_ops, dump))
+		return 0;
+
+#ifdef CONFIG_EXT_GROUP_SCHED
+	if (moff == offsetof(struct sched_ext_ops, cgroup_init) ||
+	    moff == offsetof(struct sched_ext_ops, cgroup_exit) ||
+	    moff == offsetof(struct sched_ext_ops, cgroup_prep_move) ||
+	    moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) ||
+	    moff == offsetof(struct sched_ext_ops, cgroup_move) ||
+	    moff == offsetof(struct sched_ext_ops, cgroup_set_weight))
+		return 0;
+#endif
+	return -EACCES;
+}
+
 static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
 	.owner			= THIS_MODULE,
 	.set			= &scx_kfunc_ids_unlocked,
+	.filter			= scx_kfunc_ids_unlocked_filter,
 };
 
 __bpf_kfunc_start_defs();