Message ID | AM6PR03MB5080EDA5E2E2FDB96C98F72F99F72@AM6PR03MB5080.eurprd03.prod.outlook.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpf, sched_ext: Make kfunc filters support struct_ops context to reduce runtime overhead | expand |
On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote: > > This patch adds filter for scx_kfunc_ids_unlocked. > > The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit, > cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit, > cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight > operations. > > Signed-off-by: Juntong Deng <juntong.deng@outlook.com> > --- > kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++ > 1 file changed, 30 insertions(+) > > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c > index 7f039a32f137..955fb0f5fc5e 100644 > --- a/kernel/sched/ext.c > +++ b/kernel/sched/ext.c > @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) > BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) > BTF_KFUNCS_END(scx_kfunc_ids_unlocked) > > +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id) > +{ > + u32 moff; > + > + if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) || > + prog->aux->st_ops != &bpf_sched_ext_ops) > + return 0; > + > + moff = prog->aux->attach_st_ops_member_off; > + if (moff == offsetof(struct sched_ext_ops, init) || > + moff == offsetof(struct sched_ext_ops, exit) || > + moff == offsetof(struct sched_ext_ops, cpu_online) || > + moff == offsetof(struct sched_ext_ops, cpu_offline) || > + moff == offsetof(struct sched_ext_ops, init_task) || > + moff == offsetof(struct sched_ext_ops, dump)) > + return 0; > + > +#ifdef CONFIG_EXT_GROUP_SCHED > + if (moff == offsetof(struct sched_ext_ops, cgroup_init) || > + moff == offsetof(struct sched_ext_ops, cgroup_exit) || > + moff == offsetof(struct sched_ext_ops, cgroup_prep_move) || > + moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) || > + moff == offsetof(struct sched_ext_ops, cgroup_move) || > + moff == offsetof(struct sched_ext_ops, cgroup_set_weight)) > + return 0; > +#endif > + return -EACCES; > +} > + > static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = { > .owner = THIS_MODULE, > .set = &scx_kfunc_ids_unlocked, > + .filter = scx_kfunc_ids_unlocked_filter, > }; why does sched-ext use so many id_set-s ? if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_select_cpu)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_enqueue_dispatch)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_dispatch)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cpu_release)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_unlocked)) || Can they all be rolled into one id_set then the patches 2-6 will be collapsed into one patch and one filter callback that will describe allowed hook/kfunc combinations?
On Fri, Feb 07, 2025 at 07:37:51PM -0800, Alexei Starovoitov wrote: > On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote: > > > > This patch adds filter for scx_kfunc_ids_unlocked. > > > > The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit, > > cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit, > > cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight > > operations. > > > > Signed-off-by: Juntong Deng <juntong.deng@outlook.com> > > --- > > kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++ > > 1 file changed, 30 insertions(+) > > > > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c > > index 7f039a32f137..955fb0f5fc5e 100644 > > --- a/kernel/sched/ext.c > > +++ b/kernel/sched/ext.c > > @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) > > BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) > > BTF_KFUNCS_END(scx_kfunc_ids_unlocked) > > > > +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id) > > +{ > > + u32 moff; > > + > > + if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) || > > + prog->aux->st_ops != &bpf_sched_ext_ops) > > + return 0; > > + > > + moff = prog->aux->attach_st_ops_member_off; > > + if (moff == offsetof(struct sched_ext_ops, init) || > > + moff == offsetof(struct sched_ext_ops, exit) || > > + moff == offsetof(struct sched_ext_ops, cpu_online) || > > + moff == offsetof(struct sched_ext_ops, cpu_offline) || > > + moff == offsetof(struct sched_ext_ops, init_task) || > > + moff == offsetof(struct sched_ext_ops, dump)) > > + return 0; > > + > > +#ifdef CONFIG_EXT_GROUP_SCHED > > + if (moff == offsetof(struct sched_ext_ops, cgroup_init) || > > + moff == offsetof(struct sched_ext_ops, cgroup_exit) || > > + moff == offsetof(struct sched_ext_ops, cgroup_prep_move) || > > + moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) || > > + moff == offsetof(struct sched_ext_ops, cgroup_move) || > > + moff == offsetof(struct sched_ext_ops, cgroup_set_weight)) > > + return 0; > > +#endif > > + return -EACCES; > > +} > > + > > static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = { > > .owner = THIS_MODULE, > > .set = &scx_kfunc_ids_unlocked, > > + .filter = scx_kfunc_ids_unlocked_filter, > > }; > > why does sched-ext use so many id_set-s ? > > if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_select_cpu)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > > &scx_kfunc_set_enqueue_dispatch)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_dispatch)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_cpu_release)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_unlocked)) || > > Can they all be rolled into one id_set then > the patches 2-6 will be collapsed into one patch and > one filter callback that will describe allowed hook/kfunc combinations? I think the idea was to group them in different sets based on their context usage, like scx_kfunc_set_select_cpu kfuncs can be used only from ops.select_cpu(), scx_kfunc_set_dispatch kfuncs can be used only from ops.dispatch(), etc. However, since the actual context enforcement is done by scx_kf_allowed(), it seems that we could have just 3 sets to classify the kfuncs based by their prog type: 1) BPF_PROG_TYPE_STRUCT_OPS 2) BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_SYSCALL 3) BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_SYSCALL + BPF_PROG_TYPE_TRACING -Andrea
Hello, On Fri, Feb 07, 2025 at 07:37:51PM -0800, Alexei Starovoitov wrote: > Can they all be rolled into one id_set then > the patches 2-6 will be collapsed into one patch and > one filter callback that will describe allowed hook/kfunc combinations? I thought the BPF side filtering may be declarative on the kfunc sets and tried to group the sets by where they can be called from. As we're going prodcedural, there's no point in separating out the sets and we can use merged kfunc sets and do all the distinctions in the filter function. Thanks.
On 2025/2/8 03:37, Alexei Starovoitov wrote: > On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote: >> >> This patch adds filter for scx_kfunc_ids_unlocked. >> >> The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit, >> cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit, >> cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight >> operations. >> >> Signed-off-by: Juntong Deng <juntong.deng@outlook.com> >> --- >> kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++ >> 1 file changed, 30 insertions(+) >> >> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c >> index 7f039a32f137..955fb0f5fc5e 100644 >> --- a/kernel/sched/ext.c >> +++ b/kernel/sched/ext.c >> @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) >> BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) >> BTF_KFUNCS_END(scx_kfunc_ids_unlocked) >> >> +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id) >> +{ >> + u32 moff; >> + >> + if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) || >> + prog->aux->st_ops != &bpf_sched_ext_ops) >> + return 0; >> + >> + moff = prog->aux->attach_st_ops_member_off; >> + if (moff == offsetof(struct sched_ext_ops, init) || >> + moff == offsetof(struct sched_ext_ops, exit) || >> + moff == offsetof(struct sched_ext_ops, cpu_online) || >> + moff == offsetof(struct sched_ext_ops, cpu_offline) || >> + moff == offsetof(struct sched_ext_ops, init_task) || >> + moff == offsetof(struct sched_ext_ops, dump)) >> + return 0; >> + >> +#ifdef CONFIG_EXT_GROUP_SCHED >> + if (moff == offsetof(struct sched_ext_ops, cgroup_init) || >> + moff == offsetof(struct sched_ext_ops, cgroup_exit) || >> + moff == offsetof(struct sched_ext_ops, cgroup_prep_move) || >> + moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) || >> + moff == offsetof(struct sched_ext_ops, cgroup_move) || >> + moff == offsetof(struct sched_ext_ops, cgroup_set_weight)) >> + return 0; >> +#endif >> + return -EACCES; >> +} >> + >> static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = { >> .owner = THIS_MODULE, >> .set = &scx_kfunc_ids_unlocked, >> + .filter = scx_kfunc_ids_unlocked_filter, >> }; > > why does sched-ext use so many id_set-s ? > > if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_select_cpu)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > > &scx_kfunc_set_enqueue_dispatch)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_dispatch)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_cpu_release)) || > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > &scx_kfunc_set_unlocked)) || > > Can they all be rolled into one id_set then > the patches 2-6 will be collapsed into one patch and > one filter callback that will describe allowed hook/kfunc combinations? Yes, I agree that it would be ideal to put all kfuncs in the one id_set, but I am not sure that this is better in implementation. For filters, the only kfunc-related information that can be known is the kfunc_id. kfunc_id is not a stable value, for example, when we add a new kfunc to the kernel, it may cause the kfunc_id of other kfuncs to change. A simple experiment is to add a bpf_task_from_aaa kfunc, and then we will find that the kfunc_id of bpf_task_from_pid has changed. This means that it is simple for us to implement kfuncs grouping via id_set because we only need to check if kfunc_id exists in a specific id_set, we do not need to care about what kfunc_id is. But if we implement grouping only in the filter, we may need to first get the btf type of the corresponding kfunc based on the kfunc_id via btf_type_by_id, and then further get the kfunc name, and then group based on the kfunc name in the filter, which seems more complicated.
On Mon, Feb 10, 2025 at 3:40 PM Juntong Deng <juntong.deng@outlook.com> wrote: > > On 2025/2/8 03:37, Alexei Starovoitov wrote: > > On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote: > >> > >> This patch adds filter for scx_kfunc_ids_unlocked. > >> > >> The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit, > >> cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit, > >> cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight > >> operations. > >> > >> Signed-off-by: Juntong Deng <juntong.deng@outlook.com> > >> --- > >> kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++ > >> 1 file changed, 30 insertions(+) > >> > >> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c > >> index 7f039a32f137..955fb0f5fc5e 100644 > >> --- a/kernel/sched/ext.c > >> +++ b/kernel/sched/ext.c > >> @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) > >> BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) > >> BTF_KFUNCS_END(scx_kfunc_ids_unlocked) > >> > >> +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id) > >> +{ > >> + u32 moff; > >> + > >> + if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) || > >> + prog->aux->st_ops != &bpf_sched_ext_ops) > >> + return 0; > >> + > >> + moff = prog->aux->attach_st_ops_member_off; > >> + if (moff == offsetof(struct sched_ext_ops, init) || > >> + moff == offsetof(struct sched_ext_ops, exit) || > >> + moff == offsetof(struct sched_ext_ops, cpu_online) || > >> + moff == offsetof(struct sched_ext_ops, cpu_offline) || > >> + moff == offsetof(struct sched_ext_ops, init_task) || > >> + moff == offsetof(struct sched_ext_ops, dump)) > >> + return 0; > >> + > >> +#ifdef CONFIG_EXT_GROUP_SCHED > >> + if (moff == offsetof(struct sched_ext_ops, cgroup_init) || > >> + moff == offsetof(struct sched_ext_ops, cgroup_exit) || > >> + moff == offsetof(struct sched_ext_ops, cgroup_prep_move) || > >> + moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) || > >> + moff == offsetof(struct sched_ext_ops, cgroup_move) || > >> + moff == offsetof(struct sched_ext_ops, cgroup_set_weight)) > >> + return 0; > >> +#endif > >> + return -EACCES; > >> +} > >> + > >> static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = { > >> .owner = THIS_MODULE, > >> .set = &scx_kfunc_ids_unlocked, > >> + .filter = scx_kfunc_ids_unlocked_filter, > >> }; > > > > why does sched-ext use so many id_set-s ? > > > > if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > > &scx_kfunc_set_select_cpu)) || > > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > > > > &scx_kfunc_set_enqueue_dispatch)) || > > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > > &scx_kfunc_set_dispatch)) || > > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > > &scx_kfunc_set_cpu_release)) || > > (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, > > &scx_kfunc_set_unlocked)) || > > > > Can they all be rolled into one id_set then > > the patches 2-6 will be collapsed into one patch and > > one filter callback that will describe allowed hook/kfunc combinations? > > Yes, I agree that it would be ideal to put all kfuncs in the one id_set, > but I am not sure that this is better in implementation. > > For filters, the only kfunc-related information that can be known is > the kfunc_id. > > kfunc_id is not a stable value, for example, when we add a new kfunc to > the kernel, it may cause the kfunc_id of other kfuncs to change. > > A simple experiment is to add a bpf_task_from_aaa kfunc, and then we > will find that the kfunc_id of bpf_task_from_pid has changed. > > This means that it is simple for us to implement kfuncs grouping via > id_set because we only need to check if kfunc_id exists in a specific > id_set, we do not need to care about what kfunc_id is. > > But if we implement grouping only in the filter, we may need to first > get the btf type of the corresponding kfunc based on the kfunc_id via > btf_type_by_id, and then further get the kfunc name, and then group > based on the kfunc name in the filter, which seems more complicated. I didn't mean to extract kfunc name as a string and do strcmp() on it. That's a non-starter. I imagined verifier-like approach of enum+set+list where enum has all kfunc names, set gives efficient btf_id_set8_contains() access, and list[KF_bpf_foo] gives func_id to compare with. But if the current break down of scx_kfunc_set_* fits well with per struct_ops hook filtering then keep it. But please think of a set approach for moff as well to avoid + moff == offsetof(struct sched_ext_ops, exit) || + moff == offsetof(struct sched_ext_ops, cpu_online) || + moff == offsetof(struct sched_ext_ops, cpu_offline) || Then it will be: if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ... && moff_set_containts(.._unlocked, moff)) // allow There is SCX_OP_IDX(). Maybe it can be used to populate a set. Something like this: static const u32 ops_flags[] = { [SCX_OP_IDX(cpu_online)] = KF_UNLOCKED, .. }; if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) && (ops_flags[moff / sizeof(void (*)(void))] & KF_UNLOCKED)) // allow
On 2025/2/11 03:48, Alexei Starovoitov wrote: > On Mon, Feb 10, 2025 at 3:40 PM Juntong Deng <juntong.deng@outlook.com> wrote: >> >> On 2025/2/8 03:37, Alexei Starovoitov wrote: >>> On Wed, Feb 5, 2025 at 11:35 AM Juntong Deng <juntong.deng@outlook.com> wrote: >>>> >>>> This patch adds filter for scx_kfunc_ids_unlocked. >>>> >>>> The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit, >>>> cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit, >>>> cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight >>>> operations. >>>> >>>> Signed-off-by: Juntong Deng <juntong.deng@outlook.com> >>>> --- >>>> kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++ >>>> 1 file changed, 30 insertions(+) >>>> >>>> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c >>>> index 7f039a32f137..955fb0f5fc5e 100644 >>>> --- a/kernel/sched/ext.c >>>> +++ b/kernel/sched/ext.c >>>> @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) >>>> BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) >>>> BTF_KFUNCS_END(scx_kfunc_ids_unlocked) >>>> >>>> +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id) >>>> +{ >>>> + u32 moff; >>>> + >>>> + if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) || >>>> + prog->aux->st_ops != &bpf_sched_ext_ops) >>>> + return 0; >>>> + >>>> + moff = prog->aux->attach_st_ops_member_off; >>>> + if (moff == offsetof(struct sched_ext_ops, init) || >>>> + moff == offsetof(struct sched_ext_ops, exit) || >>>> + moff == offsetof(struct sched_ext_ops, cpu_online) || >>>> + moff == offsetof(struct sched_ext_ops, cpu_offline) || >>>> + moff == offsetof(struct sched_ext_ops, init_task) || >>>> + moff == offsetof(struct sched_ext_ops, dump)) >>>> + return 0; >>>> + >>>> +#ifdef CONFIG_EXT_GROUP_SCHED >>>> + if (moff == offsetof(struct sched_ext_ops, cgroup_init) || >>>> + moff == offsetof(struct sched_ext_ops, cgroup_exit) || >>>> + moff == offsetof(struct sched_ext_ops, cgroup_prep_move) || >>>> + moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) || >>>> + moff == offsetof(struct sched_ext_ops, cgroup_move) || >>>> + moff == offsetof(struct sched_ext_ops, cgroup_set_weight)) >>>> + return 0; >>>> +#endif >>>> + return -EACCES; >>>> +} >>>> + >>>> static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = { >>>> .owner = THIS_MODULE, >>>> .set = &scx_kfunc_ids_unlocked, >>>> + .filter = scx_kfunc_ids_unlocked_filter, >>>> }; >>> >>> why does sched-ext use so many id_set-s ? >>> >>> if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, >>> &scx_kfunc_set_select_cpu)) || >>> (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, >>> >>> &scx_kfunc_set_enqueue_dispatch)) || >>> (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, >>> &scx_kfunc_set_dispatch)) || >>> (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, >>> &scx_kfunc_set_cpu_release)) || >>> (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, >>> &scx_kfunc_set_unlocked)) || >>> >>> Can they all be rolled into one id_set then >>> the patches 2-6 will be collapsed into one patch and >>> one filter callback that will describe allowed hook/kfunc combinations? >> >> Yes, I agree that it would be ideal to put all kfuncs in the one id_set, >> but I am not sure that this is better in implementation. >> >> For filters, the only kfunc-related information that can be known is >> the kfunc_id. >> >> kfunc_id is not a stable value, for example, when we add a new kfunc to >> the kernel, it may cause the kfunc_id of other kfuncs to change. >> >> A simple experiment is to add a bpf_task_from_aaa kfunc, and then we >> will find that the kfunc_id of bpf_task_from_pid has changed. >> >> This means that it is simple for us to implement kfuncs grouping via >> id_set because we only need to check if kfunc_id exists in a specific >> id_set, we do not need to care about what kfunc_id is. >> >> But if we implement grouping only in the filter, we may need to first >> get the btf type of the corresponding kfunc based on the kfunc_id via >> btf_type_by_id, and then further get the kfunc name, and then group >> based on the kfunc name in the filter, which seems more complicated. > > I didn't mean to extract kfunc name as a string and do strcmp() on it. > That's a non-starter. > I imagined verifier-like approach of enum+set+list > where enum has all kfunc names, > set gives efficient btf_id_set8_contains() access, > and list[KF_bpf_foo] gives func_id to compare with. > > But if the current break down of scx_kfunc_set_* fits well > with per struct_ops hook filtering then keep it. > But please think of a set approach for moff as well to avoid > + moff == offsetof(struct sched_ext_ops, exit) || > + moff == offsetof(struct sched_ext_ops, cpu_online) || > + moff == offsetof(struct sched_ext_ops, cpu_offline) || > > Then it will be: > if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) ... > && moff_set_containts(.._unlocked, moff)) // allow > > There is SCX_OP_IDX(). Maybe it can be used to populate a set. > > Something like this: > static const u32 ops_flags[] = { > [SCX_OP_IDX(cpu_online)] = KF_UNLOCKED, > .. > }; > > if (btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) && > (ops_flags[moff / sizeof(void (*)(void))] & KF_UNLOCKED)) // allow Thanks for letting me know this method. This is a good method. I have used it in version 2 [0]. Also, I figured out a way to require only one filter. [0]: https://lore.kernel.org/bpf/AM6PR03MB5080855B90C3FE9B6C4243B099FE2@AM6PR03MB5080.eurprd03.prod.outlook.com/T/#u
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7f039a32f137..955fb0f5fc5e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -7079,9 +7079,39 @@ BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_unlocked) +static int scx_kfunc_ids_unlocked_filter(const struct bpf_prog *prog, u32 kfunc_id) +{ + u32 moff; + + if (!btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id) || + prog->aux->st_ops != &bpf_sched_ext_ops) + return 0; + + moff = prog->aux->attach_st_ops_member_off; + if (moff == offsetof(struct sched_ext_ops, init) || + moff == offsetof(struct sched_ext_ops, exit) || + moff == offsetof(struct sched_ext_ops, cpu_online) || + moff == offsetof(struct sched_ext_ops, cpu_offline) || + moff == offsetof(struct sched_ext_ops, init_task) || + moff == offsetof(struct sched_ext_ops, dump)) + return 0; + +#ifdef CONFIG_EXT_GROUP_SCHED + if (moff == offsetof(struct sched_ext_ops, cgroup_init) || + moff == offsetof(struct sched_ext_ops, cgroup_exit) || + moff == offsetof(struct sched_ext_ops, cgroup_prep_move) || + moff == offsetof(struct sched_ext_ops, cgroup_cancel_move) || + moff == offsetof(struct sched_ext_ops, cgroup_move) || + moff == offsetof(struct sched_ext_ops, cgroup_set_weight)) + return 0; +#endif + return -EACCES; +} + static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = { .owner = THIS_MODULE, .set = &scx_kfunc_ids_unlocked, + .filter = scx_kfunc_ids_unlocked_filter, }; __bpf_kfunc_start_defs();
This patch adds filter for scx_kfunc_ids_unlocked. The kfuncs in the scx_kfunc_ids_unlocked set can be used in init, exit, cpu_online, cpu_offline, init_task, dump, cgroup_init, cgroup_exit, cgroup_prep_move, cgroup_cancel_move, cgroup_move, cgroup_set_weight operations. Signed-off-by: Juntong Deng <juntong.deng@outlook.com> --- kernel/sched/ext.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)