Message ID | 20210916162451.709260-3-guro@fb.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | BPF |
Headers | show |
Series | Scheduler BPF | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
bpf/vmtest-bpf-PR | fail | merge-conflict |
bpf/vmtest-bpf-next | success | VM_Test |
bpf/vmtest-bpf-next-PR | success | PR summary |
> On Sep 17, 2021, at 12:24 AM, Roman Gushchin <guro@fb.com> wrote: > > This patch adds 3 helpers useful for dealing with sched entities: > u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se); > u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se); > long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid); > > Sched entity is a basic structure used by the scheduler to represent > schedulable objects: tasks and cgroups (if CONFIG_FAIR_GROUP_SCHED > is enabled). It will be passed as an argument to many bpf hooks, so > scheduler bpf programs need a convenient way to deal with it. > > bpf_sched_entity_to_tgidpid() and bpf_sched_entity_to_cgrpid() are > useful to identify a sched entity in userspace terms (pid, tgid and > cgroup id). bpf_sched_entity_belongs_to_cgrp() allows to check whether > a sched entity belongs to sub-tree of a cgroup. It allows to write > cgroup-specific scheduler policies even without enabling the cgroup > cpu controller. > > Signed-off-by: Roman Gushchin <guro@fb.com> > --- > include/uapi/linux/bpf.h | 23 +++++++++++ > kernel/sched/bpf_sched.c | 74 ++++++++++++++++++++++++++++++++++ > scripts/bpf_doc.py | 2 + > tools/include/uapi/linux/bpf.h | 23 +++++++++++ > 4 files changed, 122 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 6dfbebb8fc8f..199e4a92820d 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -4900,6 +4900,26 @@ union bpf_attr { > * **-EINVAL** if *flags* is not zero. > * > * **-ENOENT** if architecture does not support branch records. > + * > + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se) > + * Description > + * Return task's encoded tgid and pid if the sched entity is a task. > + * Return > + * Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise. > + * > + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se) > + * Description > + * Return cgroup id if the given sched entity is a cgroup. > + * Return > + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise. > + * > + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid) > + * Description > + * Checks whether the sched entity belongs to a cgroup or > + * it's sub-tree. It doesn't require a cgroup CPU controller > + * to be enabled. > + * Return > + * 1 if the sched entity belongs to a cgroup, 0 otherwise. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5079,6 +5099,9 @@ union bpf_attr { > FN(get_attach_cookie), \ > FN(task_pt_regs), \ > FN(get_branch_snapshot), \ > + FN(sched_entity_to_tgidpid), \ > + FN(sched_entity_to_cgrpid), \ > + FN(sched_entity_belongs_to_cgrp), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c > index 2f05c186cfd0..ead691dc6e85 100644 > --- a/kernel/sched/bpf_sched.c > +++ b/kernel/sched/bpf_sched.c > @@ -42,12 +42,86 @@ int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, > return 0; > } > > +BPF_CALL_1(bpf_sched_entity_to_tgidpid, struct sched_entity *, se) > +{ > + if (entity_is_task(se)) { > + struct task_struct *task = task_of(se); > + > + return (u64) task->tgid << 32 | task->pid; > + } else { > + return (u64) -1; > + } > +} > + > +BPF_CALL_1(bpf_sched_entity_to_cgrpid, struct sched_entity *, se) > +{ > +#ifdef CONFIG_FAIR_GROUP_SCHED > + if (!entity_is_task(se)) > + return cgroup_id(se->cfs_rq->tg->css.cgroup); > +#endif > + return (u64) -1; > +} > + > +BPF_CALL_2(bpf_sched_entity_belongs_to_cgrp, struct sched_entity *, se, > + u64, cgrpid) > +{ > +#ifdef CONFIG_CGROUPS > + struct cgroup *cgrp; > + int level; > + > + if (entity_is_task(se)) > + cgrp = task_dfl_cgroup(task_of(se)); > +#ifdef CONFIG_FAIR_GROUP_SCHED > + else > + cgrp = se->cfs_rq->tg->css.cgroup; It is incorrect. It should use se->my_q->tg->css.cgroup and some possible NULL check. (for autogroup) se->cfs_rq and se->my_q are different. se->my_q is the cfs_rq of this se itself, while the se->cfs_rq may be the parent. > +#endif > + > + for (level = cgrp->level; level; level--) > + if (cgrp->ancestor_ids[level] == cgrpid) > + return 1; > +#endif > + return 0; > +} > + > +BTF_ID_LIST_SINGLE(btf_sched_entity_ids, struct, sched_entity) > + > +static const struct bpf_func_proto bpf_sched_entity_to_tgidpid_proto = { > + .func = bpf_sched_entity_to_tgidpid, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_BTF_ID, > + .arg1_btf_id = &btf_sched_entity_ids[0], > +}; > + > +static const struct bpf_func_proto bpf_sched_entity_to_cgrpid_proto = { > + .func = bpf_sched_entity_to_cgrpid, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_BTF_ID, > + .arg1_btf_id = &btf_sched_entity_ids[0], > +}; > + > +static const struct bpf_func_proto bpf_sched_entity_belongs_to_cgrp_proto = { > + .func = bpf_sched_entity_belongs_to_cgrp, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_BTF_ID, > + .arg1_btf_id = &btf_sched_entity_ids[0], > + .arg2_type = ARG_ANYTHING, > +}; > + > static const struct bpf_func_proto * > bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > { > switch (func_id) { > case BPF_FUNC_trace_printk: > return bpf_get_trace_printk_proto(); > + case BPF_FUNC_sched_entity_to_tgidpid: > + return &bpf_sched_entity_to_tgidpid_proto; > + case BPF_FUNC_sched_entity_to_cgrpid: > + return &bpf_sched_entity_to_cgrpid_proto; > + case BPF_FUNC_sched_entity_belongs_to_cgrp: > + return &bpf_sched_entity_belongs_to_cgrp_proto; > default: > return NULL; > } > diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py > index 00ac7b79cddb..84019ba5b67b 100755 > --- a/scripts/bpf_doc.py > +++ b/scripts/bpf_doc.py > @@ -548,6 +548,7 @@ class PrinterHelpers(Printer): > 'struct socket', > 'struct file', > 'struct bpf_timer', > + 'struct sched_entity', > ] > known_types = { > '...', > @@ -596,6 +597,7 @@ class PrinterHelpers(Printer): > 'struct socket', > 'struct file', > 'struct bpf_timer', > + 'struct sched_entity', > } > mapped_types = { > 'u8': '__u8', > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 6dfbebb8fc8f..199e4a92820d 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -4900,6 +4900,26 @@ union bpf_attr { > * **-EINVAL** if *flags* is not zero. > * > * **-ENOENT** if architecture does not support branch records. > + * > + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se) > + * Description > + * Return task's encoded tgid and pid if the sched entity is a task. > + * Return > + * Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise. > + * > + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se) > + * Description > + * Return cgroup id if the given sched entity is a cgroup. > + * Return > + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise. > + * > + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid) > + * Description > + * Checks whether the sched entity belongs to a cgroup or > + * it's sub-tree. It doesn't require a cgroup CPU controller > + * to be enabled. > + * Return > + * 1 if the sched entity belongs to a cgroup, 0 otherwise. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5079,6 +5099,9 @@ union bpf_attr { > FN(get_attach_cookie), \ > FN(task_pt_regs), \ > FN(get_branch_snapshot), \ > + FN(sched_entity_to_tgidpid), \ > + FN(sched_entity_to_cgrpid), \ > + FN(sched_entity_belongs_to_cgrp), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > -- > 2.31.1 > >
On Thu, Nov 25, 2021 at 02:09:00PM +0800, Yafang Shao wrote: > > > > On Sep 17, 2021, at 12:24 AM, Roman Gushchin <guro@fb.com> wrote: > > > > This patch adds 3 helpers useful for dealing with sched entities: > > u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se); > > u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se); > > long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid); > > > > Sched entity is a basic structure used by the scheduler to represent > > schedulable objects: tasks and cgroups (if CONFIG_FAIR_GROUP_SCHED > > is enabled). It will be passed as an argument to many bpf hooks, so > > scheduler bpf programs need a convenient way to deal with it. > > > > bpf_sched_entity_to_tgidpid() and bpf_sched_entity_to_cgrpid() are > > useful to identify a sched entity in userspace terms (pid, tgid and > > cgroup id). bpf_sched_entity_belongs_to_cgrp() allows to check whether > > a sched entity belongs to sub-tree of a cgroup. It allows to write > > cgroup-specific scheduler policies even without enabling the cgroup > > cpu controller. > > > > Signed-off-by: Roman Gushchin <guro@fb.com> > > --- > > include/uapi/linux/bpf.h | 23 +++++++++++ > > kernel/sched/bpf_sched.c | 74 ++++++++++++++++++++++++++++++++++ > > scripts/bpf_doc.py | 2 + > > tools/include/uapi/linux/bpf.h | 23 +++++++++++ > > 4 files changed, 122 insertions(+) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index 6dfbebb8fc8f..199e4a92820d 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -4900,6 +4900,26 @@ union bpf_attr { > > * **-EINVAL** if *flags* is not zero. > > * > > * **-ENOENT** if architecture does not support branch records. > > + * > > + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se) > > + * Description > > + * Return task's encoded tgid and pid if the sched entity is a task. > > + * Return > > + * Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise. > > + * > > + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se) > > + * Description > > + * Return cgroup id if the given sched entity is a cgroup. > > + * Return > > + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise. > > + * > > + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid) > > + * Description > > + * Checks whether the sched entity belongs to a cgroup or > > + * it's sub-tree. It doesn't require a cgroup CPU controller > > + * to be enabled. > > + * Return > > + * 1 if the sched entity belongs to a cgroup, 0 otherwise. > > */ > > #define __BPF_FUNC_MAPPER(FN) \ > > FN(unspec), \ > > @@ -5079,6 +5099,9 @@ union bpf_attr { > > FN(get_attach_cookie), \ > > FN(task_pt_regs), \ > > FN(get_branch_snapshot), \ > > + FN(sched_entity_to_tgidpid), \ > > + FN(sched_entity_to_cgrpid), \ > > + FN(sched_entity_belongs_to_cgrp), \ > > /* */ > > > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > > diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c > > index 2f05c186cfd0..ead691dc6e85 100644 > > --- a/kernel/sched/bpf_sched.c > > +++ b/kernel/sched/bpf_sched.c > > @@ -42,12 +42,86 @@ int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, > > return 0; > > } > > > > +BPF_CALL_1(bpf_sched_entity_to_tgidpid, struct sched_entity *, se) > > +{ > > + if (entity_is_task(se)) { > > + struct task_struct *task = task_of(se); > > + > > + return (u64) task->tgid << 32 | task->pid; > > + } else { > > + return (u64) -1; > > + } > > +} > > + > > +BPF_CALL_1(bpf_sched_entity_to_cgrpid, struct sched_entity *, se) > > +{ > > +#ifdef CONFIG_FAIR_GROUP_SCHED > > + if (!entity_is_task(se)) > > + return cgroup_id(se->cfs_rq->tg->css.cgroup); > > +#endif > > + return (u64) -1; > > +} > > + > > +BPF_CALL_2(bpf_sched_entity_belongs_to_cgrp, struct sched_entity *, se, > > + u64, cgrpid) > > +{ > > +#ifdef CONFIG_CGROUPS > > + struct cgroup *cgrp; > > + int level; > > + > > + if (entity_is_task(se)) > > + cgrp = task_dfl_cgroup(task_of(se)); > > +#ifdef CONFIG_FAIR_GROUP_SCHED > > + else > > + cgrp = se->cfs_rq->tg->css.cgroup; > > It is incorrect. > It should use se->my_q->tg->css.cgroup and some possible NULL check. (for autogroup) > se->cfs_rq and se->my_q are different. se->my_q is the cfs_rq of this se itself, while the se->cfs_rq may be the parent. Indeed. Thanks, will fix in the next version.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6dfbebb8fc8f..199e4a92820d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4900,6 +4900,26 @@ union bpf_attr { * **-EINVAL** if *flags* is not zero. * * **-ENOENT** if architecture does not support branch records. + * + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se) + * Description + * Return task's encoded tgid and pid if the sched entity is a task. + * Return + * Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise. + * + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se) + * Description + * Return cgroup id if the given sched entity is a cgroup. + * Return + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise. + * + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid) + * Description + * Checks whether the sched entity belongs to a cgroup or + * it's sub-tree. It doesn't require a cgroup CPU controller + * to be enabled. + * Return + * 1 if the sched entity belongs to a cgroup, 0 otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5079,6 +5099,9 @@ union bpf_attr { FN(get_attach_cookie), \ FN(task_pt_regs), \ FN(get_branch_snapshot), \ + FN(sched_entity_to_tgidpid), \ + FN(sched_entity_to_cgrpid), \ + FN(sched_entity_belongs_to_cgrp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index 2f05c186cfd0..ead691dc6e85 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -42,12 +42,86 @@ int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, return 0; } +BPF_CALL_1(bpf_sched_entity_to_tgidpid, struct sched_entity *, se) +{ + if (entity_is_task(se)) { + struct task_struct *task = task_of(se); + + return (u64) task->tgid << 32 | task->pid; + } else { + return (u64) -1; + } +} + +BPF_CALL_1(bpf_sched_entity_to_cgrpid, struct sched_entity *, se) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + if (!entity_is_task(se)) + return cgroup_id(se->cfs_rq->tg->css.cgroup); +#endif + return (u64) -1; +} + +BPF_CALL_2(bpf_sched_entity_belongs_to_cgrp, struct sched_entity *, se, + u64, cgrpid) +{ +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; + int level; + + if (entity_is_task(se)) + cgrp = task_dfl_cgroup(task_of(se)); +#ifdef CONFIG_FAIR_GROUP_SCHED + else + cgrp = se->cfs_rq->tg->css.cgroup; +#endif + + for (level = cgrp->level; level; level--) + if (cgrp->ancestor_ids[level] == cgrpid) + return 1; +#endif + return 0; +} + +BTF_ID_LIST_SINGLE(btf_sched_entity_ids, struct, sched_entity) + +static const struct bpf_func_proto bpf_sched_entity_to_tgidpid_proto = { + .func = bpf_sched_entity_to_tgidpid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_sched_entity_ids[0], +}; + +static const struct bpf_func_proto bpf_sched_entity_to_cgrpid_proto = { + .func = bpf_sched_entity_to_cgrpid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_sched_entity_ids[0], +}; + +static const struct bpf_func_proto bpf_sched_entity_belongs_to_cgrp_proto = { + .func = bpf_sched_entity_belongs_to_cgrp, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_sched_entity_ids[0], + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); + case BPF_FUNC_sched_entity_to_tgidpid: + return &bpf_sched_entity_to_tgidpid_proto; + case BPF_FUNC_sched_entity_to_cgrpid: + return &bpf_sched_entity_to_cgrpid_proto; + case BPF_FUNC_sched_entity_belongs_to_cgrp: + return &bpf_sched_entity_belongs_to_cgrp_proto; default: return NULL; } diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 00ac7b79cddb..84019ba5b67b 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -548,6 +548,7 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct sched_entity', ] known_types = { '...', @@ -596,6 +597,7 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct sched_entity', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6dfbebb8fc8f..199e4a92820d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4900,6 +4900,26 @@ union bpf_attr { * **-EINVAL** if *flags* is not zero. * * **-ENOENT** if architecture does not support branch records. + * + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se) + * Description + * Return task's encoded tgid and pid if the sched entity is a task. + * Return + * Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise. + * + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se) + * Description + * Return cgroup id if the given sched entity is a cgroup. + * Return + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise. + * + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid) + * Description + * Checks whether the sched entity belongs to a cgroup or + * it's sub-tree. It doesn't require a cgroup CPU controller + * to be enabled. + * Return + * 1 if the sched entity belongs to a cgroup, 0 otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5079,6 +5099,9 @@ union bpf_attr { FN(get_attach_cookie), \ FN(task_pt_regs), \ FN(get_branch_snapshot), \ + FN(sched_entity_to_tgidpid), \ + FN(sched_entity_to_cgrpid), \ + FN(sched_entity_belongs_to_cgrp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper
This patch adds 3 helpers useful for dealing with sched entities: u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se); u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se); long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid); Sched entity is a basic structure used by the scheduler to represent schedulable objects: tasks and cgroups (if CONFIG_FAIR_GROUP_SCHED is enabled). It will be passed as an argument to many bpf hooks, so scheduler bpf programs need a convenient way to deal with it. bpf_sched_entity_to_tgidpid() and bpf_sched_entity_to_cgrpid() are useful to identify a sched entity in userspace terms (pid, tgid and cgroup id). bpf_sched_entity_belongs_to_cgrp() allows to check whether a sched entity belongs to sub-tree of a cgroup. It allows to write cgroup-specific scheduler policies even without enabling the cgroup cpu controller. Signed-off-by: Roman Gushchin <guro@fb.com> --- include/uapi/linux/bpf.h | 23 +++++++++++ kernel/sched/bpf_sched.c | 74 ++++++++++++++++++++++++++++++++++ scripts/bpf_doc.py | 2 + tools/include/uapi/linux/bpf.h | 23 +++++++++++ 4 files changed, 122 insertions(+)