Message ID | 20220610194435.2268290-5-yosryahmed@google.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | bpf: rstat: cgroup hierarchical stats | expand |
Hi Yosry, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on bpf-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: m68k-defconfig (https://download.01.org/0day-ci/archive/20220611/202206111453.xWWh2wMK-lkp@intel.com/config) compiler: m68k-linux-gcc (GCC) 11.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/619857fd1ec4f351376ffcaaec20acc9aae9486f git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 git checkout 619857fd1ec4f351376ffcaaec20acc9aae9486f # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash kernel/bpf/ kernel/cgroup/ If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): In file included from kernel/bpf/cgroup_iter.c:9: kernel/bpf/../cgroup/cgroup-internal.h:78:41: error: field 'iter' has incomplete type 78 | struct css_task_iter iter; | ^~~~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'cgroup_is_dead': kernel/bpf/../cgroup/cgroup-internal.h:188:22: error: invalid use of undefined type 'const struct cgroup' 188 | return !(cgrp->self.flags & CSS_ONLINE); | ^~ kernel/bpf/../cgroup/cgroup-internal.h:188:37: error: 'CSS_ONLINE' undeclared (first use in this function); did you mean 'N_ONLINE'? 188 | return !(cgrp->self.flags & CSS_ONLINE); | ^~~~~~~~~~ | N_ONLINE kernel/bpf/../cgroup/cgroup-internal.h:188:37: note: each undeclared identifier is reported only once for each function it appears in kernel/bpf/../cgroup/cgroup-internal.h: In function 'notify_on_release': kernel/bpf/../cgroup/cgroup-internal.h:193:25: error: 'CGRP_NOTIFY_ON_RELEASE' undeclared (first use in this function) 193 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ^~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/../cgroup/cgroup-internal.h:193:54: error: invalid use of undefined type 'const struct cgroup' 193 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ^~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'put_css_set': kernel/bpf/../cgroup/cgroup-internal.h:207:39: error: invalid use of undefined type 'struct css_set' 207 | if (refcount_dec_not_one(&cset->refcount)) | ^~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'get_css_set': kernel/bpf/../cgroup/cgroup-internal.h:220:27: error: invalid use of undefined type 'struct css_set' 220 | refcount_inc(&cset->refcount); | ^~ kernel/bpf/../cgroup/cgroup-internal.h: At top level: kernel/bpf/../cgroup/cgroup-internal.h:284:22: error: array type has incomplete element type 'struct cftype' 284 | extern struct cftype cgroup1_base_files[]; | ^~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_start': kernel/bpf/cgroup_iter.c:55:24: error: implicit declaration of function 'css_next_descendant_pre' [-Werror=implicit-function-declaration] 55 | return css_next_descendant_pre(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~ >> kernel/bpf/cgroup_iter.c:55:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 55 | return css_next_descendant_pre(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:57:24: error: implicit declaration of function 'css_next_descendant_post' [-Werror=implicit-function-declaration] 57 | return css_next_descendant_post(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:57:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 57 | return css_next_descendant_post(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_next': kernel/bpf/cgroup_iter.c:83:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 83 | return css_next_descendant_pre(curr, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:85:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 85 | return css_next_descendant_post(curr, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:87:28: error: invalid use of undefined type 'struct cgroup_subsys_state' 87 | return curr->parent; | ^~ kernel/bpf/cgroup_iter.c: In function '__cgroup_iter_seq_show': kernel/bpf/cgroup_iter.c:100:38: error: invalid use of undefined type 'struct cgroup_subsys_state' 100 | if (css && cgroup_is_dead(css->cgroup)) | ^~ kernel/bpf/cgroup_iter.c:104:31: error: invalid use of undefined type 'struct cgroup_subsys_state' 104 | ctx.cgroup = css ? css->cgroup : NULL; | ^~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_init': kernel/bpf/cgroup_iter.c:137:29: error: invalid use of undefined type 'struct cgroup' 137 | p->start_css = &cgrp->self; | ^~ kernel/bpf/cgroup_iter.c: In function 'bpf_iter_attach_cgroup': kernel/bpf/cgroup_iter.c:157:24: error: implicit declaration of function 'cgroup_get_from_fd'; did you mean 'cgroup_get_from_id'? [-Werror=implicit-function-declaration] 157 | cgrp = cgroup_get_from_fd(fd); | ^~~~~~~~~~~~~~~~~~ | cgroup_get_from_id >> kernel/bpf/cgroup_iter.c:157:22: warning: assignment to 'struct cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 157 | cgrp = cgroup_get_from_fd(fd); | ^ kernel/bpf/cgroup_iter.c:159:24: error: implicit declaration of function 'cgroup_get_from_path'; did you mean 'cgroup_get_from_id'? [-Werror=implicit-function-declaration] 159 | cgrp = cgroup_get_from_path("/"); | ^~~~~~~~~~~~~~~~~~~~ | cgroup_get_from_id kernel/bpf/cgroup_iter.c:159:22: warning: assignment to 'struct cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 159 | cgrp = cgroup_get_from_path("/"); | ^ kernel/bpf/cgroup_iter.c: In function 'bpf_iter_cgroup_show_fdinfo': kernel/bpf/cgroup_iter.c:190:9: error: implicit declaration of function 'cgroup_path_ns'; did you mean 'cgroup_parent'? [-Werror=implicit-function-declaration] 190 | cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX, | ^~~~~~~~~~~~~~ | cgroup_parent kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_next': kernel/bpf/cgroup_iter.c:88:1: error: control reaches end of non-void function [-Werror=return-type] 88 | } | ^ cc1: some warnings being treated as errors vim +55 kernel/bpf/cgroup_iter.c 41 42 static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) 43 { 44 struct cgroup_iter_priv *p = seq->private; 45 46 mutex_lock(&cgroup_mutex); 47 48 /* support only one session */ 49 if (*pos > 0) 50 return NULL; 51 52 ++*pos; 53 p->terminate = false; 54 if (p->order == BPF_ITER_CGROUP_PRE) > 55 return css_next_descendant_pre(NULL, p->start_css); 56 else if (p->order == BPF_ITER_CGROUP_POST) 57 return css_next_descendant_post(NULL, p->start_css); 58 else /* BPF_ITER_CGROUP_PARENT_UP */ 59 return p->start_css; 60 } 61 62 static int __cgroup_iter_seq_show(struct seq_file *seq, 63 struct cgroup_subsys_state *css, int in_stop); 64 65 static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) 66 { 67 /* pass NULL to the prog for post-processing */ 68 if (!v) 69 __cgroup_iter_seq_show(seq, NULL, true); 70 mutex_unlock(&cgroup_mutex); 71 } 72 73 static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) 74 { 75 struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; 76 struct cgroup_iter_priv *p = seq->private; 77 78 ++*pos; 79 if (p->terminate) 80 return NULL; 81 82 if (p->order == BPF_ITER_CGROUP_PRE) 83 return css_next_descendant_pre(curr, p->start_css); 84 else if (p->order == BPF_ITER_CGROUP_POST) 85 return css_next_descendant_post(curr, p->start_css); 86 else 87 return curr->parent; 88 } 89 90 static int __cgroup_iter_seq_show(struct seq_file *seq, 91 struct cgroup_subsys_state *css, int in_stop) 92 { 93 struct cgroup_iter_priv *p = seq->private; 94 struct bpf_iter__cgroup ctx; 95 struct bpf_iter_meta meta; 96 struct bpf_prog *prog; 97 int ret = 0; 98 99 /* cgroup is dead, skip this element */ 100 if (css && cgroup_is_dead(css->cgroup)) 101 return 0; 102 103 ctx.meta = &meta; 104 ctx.cgroup = css ? css->cgroup : NULL; 105 meta.seq = seq; 106 prog = bpf_iter_get_info(&meta, in_stop); 107 if (prog) 108 ret = bpf_iter_run_prog(prog, &ctx); 109 110 /* if prog returns > 0, terminate after this element. */ 111 if (ret != 0) 112 p->terminate = true; 113 114 return 0; 115 } 116 117 static int cgroup_iter_seq_show(struct seq_file *seq, void *v) 118 { 119 return __cgroup_iter_seq_show(seq, (struct cgroup_subsys_state *)v, 120 false); 121 } 122 123 static const struct seq_operations cgroup_iter_seq_ops = { 124 .start = cgroup_iter_seq_start, 125 .next = cgroup_iter_seq_next, 126 .stop = cgroup_iter_seq_stop, 127 .show = cgroup_iter_seq_show, 128 }; 129 130 BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup) 131 132 static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) 133 { 134 struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv; 135 struct cgroup *cgrp = aux->cgroup.start; 136 137 p->start_css = &cgrp->self; 138 p->terminate = false; 139 p->order = aux->cgroup.order; 140 return 0; 141 } 142 143 static const struct bpf_iter_seq_info cgroup_iter_seq_info = { 144 .seq_ops = &cgroup_iter_seq_ops, 145 .init_seq_private = cgroup_iter_seq_init, 146 .seq_priv_size = sizeof(struct cgroup_iter_priv), 147 }; 148 149 static int bpf_iter_attach_cgroup(struct bpf_prog *prog, 150 union bpf_iter_link_info *linfo, 151 struct bpf_iter_aux_info *aux) 152 { 153 int fd = linfo->cgroup.cgroup_fd; 154 struct cgroup *cgrp; 155 156 if (fd) > 157 cgrp = cgroup_get_from_fd(fd); 158 else /* walk the entire hierarchy by default. */ 159 cgrp = cgroup_get_from_path("/"); 160 161 if (IS_ERR(cgrp)) 162 return PTR_ERR(cgrp); 163 164 aux->cgroup.start = cgrp; 165 aux->cgroup.order = linfo->cgroup.traversal_order; 166 return 0; 167 } 168
Hi Yosry, Thank you for the patch! Yet something to improve: [auto build test ERROR on bpf-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: m68k-defconfig (https://download.01.org/0day-ci/archive/20220611/202206111529.2okIVRo9-lkp@intel.com/config) compiler: m68k-linux-gcc (GCC) 11.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/619857fd1ec4f351376ffcaaec20acc9aae9486f git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 git checkout 619857fd1ec4f351376ffcaaec20acc9aae9486f # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): In file included from kernel/bpf/cgroup_iter.c:9: >> kernel/bpf/../cgroup/cgroup-internal.h:78:41: error: field 'iter' has incomplete type 78 | struct css_task_iter iter; | ^~~~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'cgroup_is_dead': kernel/bpf/../cgroup/cgroup-internal.h:188:22: error: invalid use of undefined type 'const struct cgroup' 188 | return !(cgrp->self.flags & CSS_ONLINE); | ^~ kernel/bpf/../cgroup/cgroup-internal.h:188:37: error: 'CSS_ONLINE' undeclared (first use in this function); did you mean 'N_ONLINE'? 188 | return !(cgrp->self.flags & CSS_ONLINE); | ^~~~~~~~~~ | N_ONLINE kernel/bpf/../cgroup/cgroup-internal.h:188:37: note: each undeclared identifier is reported only once for each function it appears in kernel/bpf/../cgroup/cgroup-internal.h: In function 'notify_on_release': kernel/bpf/../cgroup/cgroup-internal.h:193:25: error: 'CGRP_NOTIFY_ON_RELEASE' undeclared (first use in this function) 193 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ^~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/../cgroup/cgroup-internal.h:193:54: error: invalid use of undefined type 'const struct cgroup' 193 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ^~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'put_css_set': kernel/bpf/../cgroup/cgroup-internal.h:207:39: error: invalid use of undefined type 'struct css_set' 207 | if (refcount_dec_not_one(&cset->refcount)) | ^~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'get_css_set': kernel/bpf/../cgroup/cgroup-internal.h:220:27: error: invalid use of undefined type 'struct css_set' 220 | refcount_inc(&cset->refcount); | ^~ kernel/bpf/../cgroup/cgroup-internal.h: At top level: kernel/bpf/../cgroup/cgroup-internal.h:284:22: error: array type has incomplete element type 'struct cftype' 284 | extern struct cftype cgroup1_base_files[]; | ^~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_start': >> kernel/bpf/cgroup_iter.c:55:24: error: implicit declaration of function 'css_next_descendant_pre' [-Werror=implicit-function-declaration] 55 | return css_next_descendant_pre(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:55:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 55 | return css_next_descendant_pre(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> kernel/bpf/cgroup_iter.c:57:24: error: implicit declaration of function 'css_next_descendant_post' [-Werror=implicit-function-declaration] 57 | return css_next_descendant_post(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:57:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 57 | return css_next_descendant_post(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_next': kernel/bpf/cgroup_iter.c:83:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 83 | return css_next_descendant_pre(curr, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:85:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 85 | return css_next_descendant_post(curr, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> kernel/bpf/cgroup_iter.c:87:28: error: invalid use of undefined type 'struct cgroup_subsys_state' 87 | return curr->parent; | ^~ kernel/bpf/cgroup_iter.c: In function '__cgroup_iter_seq_show': kernel/bpf/cgroup_iter.c:100:38: error: invalid use of undefined type 'struct cgroup_subsys_state' 100 | if (css && cgroup_is_dead(css->cgroup)) | ^~ kernel/bpf/cgroup_iter.c:104:31: error: invalid use of undefined type 'struct cgroup_subsys_state' 104 | ctx.cgroup = css ? css->cgroup : NULL; | ^~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_init': >> kernel/bpf/cgroup_iter.c:137:29: error: invalid use of undefined type 'struct cgroup' 137 | p->start_css = &cgrp->self; | ^~ kernel/bpf/cgroup_iter.c: In function 'bpf_iter_attach_cgroup': >> kernel/bpf/cgroup_iter.c:157:24: error: implicit declaration of function 'cgroup_get_from_fd'; did you mean 'cgroup_get_from_id'? [-Werror=implicit-function-declaration] 157 | cgrp = cgroup_get_from_fd(fd); | ^~~~~~~~~~~~~~~~~~ | cgroup_get_from_id kernel/bpf/cgroup_iter.c:157:22: warning: assignment to 'struct cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 157 | cgrp = cgroup_get_from_fd(fd); | ^ >> kernel/bpf/cgroup_iter.c:159:24: error: implicit declaration of function 'cgroup_get_from_path'; did you mean 'cgroup_get_from_id'? [-Werror=implicit-function-declaration] 159 | cgrp = cgroup_get_from_path("/"); | ^~~~~~~~~~~~~~~~~~~~ | cgroup_get_from_id kernel/bpf/cgroup_iter.c:159:22: warning: assignment to 'struct cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 159 | cgrp = cgroup_get_from_path("/"); | ^ kernel/bpf/cgroup_iter.c: In function 'bpf_iter_cgroup_show_fdinfo': >> kernel/bpf/cgroup_iter.c:190:9: error: implicit declaration of function 'cgroup_path_ns'; did you mean 'cgroup_parent'? [-Werror=implicit-function-declaration] 190 | cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX, | ^~~~~~~~~~~~~~ | cgroup_parent kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_next': kernel/bpf/cgroup_iter.c:88:1: error: control reaches end of non-void function [-Werror=return-type] 88 | } | ^ cc1: some warnings being treated as errors vim +/iter +78 kernel/bpf/../cgroup/cgroup-internal.h 0d2b5955b36250 Tejun Heo 2022-01-06 68 0d2b5955b36250 Tejun Heo 2022-01-06 69 struct cgroup_file_ctx { e57457641613fe Tejun Heo 2022-01-06 70 struct cgroup_namespace *ns; e57457641613fe Tejun Heo 2022-01-06 71 0d2b5955b36250 Tejun Heo 2022-01-06 72 struct { 0d2b5955b36250 Tejun Heo 2022-01-06 73 void *trigger; 0d2b5955b36250 Tejun Heo 2022-01-06 74 } psi; 0d2b5955b36250 Tejun Heo 2022-01-06 75 0d2b5955b36250 Tejun Heo 2022-01-06 76 struct { 0d2b5955b36250 Tejun Heo 2022-01-06 77 bool started; 0d2b5955b36250 Tejun Heo 2022-01-06 @78 struct css_task_iter iter; 0d2b5955b36250 Tejun Heo 2022-01-06 79 } procs; 0d2b5955b36250 Tejun Heo 2022-01-06 80 0d2b5955b36250 Tejun Heo 2022-01-06 81 struct { 0d2b5955b36250 Tejun Heo 2022-01-06 82 struct cgroup_pidlist *pidlist; 0d2b5955b36250 Tejun Heo 2022-01-06 83 } procs1; 0d2b5955b36250 Tejun Heo 2022-01-06 84 }; 0d2b5955b36250 Tejun Heo 2022-01-06 85
Hi Yosry, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on bpf-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: s390-randconfig-r035-20220611 (https://download.01.org/0day-ci/archive/20220611/202206112009.sycCJKhv-lkp@intel.com/config) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project ff4abe755279a3a47cc416ef80dbc900d9a98a19) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install s390 cross compiling tool for clang build # apt-get install binutils-s390x-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/619857fd1ec4f351376ffcaaec20acc9aae9486f git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 git checkout 619857fd1ec4f351376ffcaaec20acc9aae9486f # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash kernel/bpf/ kernel/cgroup/ If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): In file included from kernel/bpf/cgroup_iter.c:9: kernel/bpf/../cgroup/cgroup-internal.h:78:24: error: field has incomplete type 'struct css_task_iter' struct css_task_iter iter; ^ kernel/bpf/../cgroup/cgroup-internal.h:78:10: note: forward declaration of 'struct css_task_iter' struct css_task_iter iter; ^ kernel/bpf/../cgroup/cgroup-internal.h:188:15: error: incomplete definition of type 'struct cgroup' return !(cgrp->self.flags & CSS_ONLINE); ~~~~^ include/linux/sched/task.h:35:9: note: forward declaration of 'struct cgroup' struct cgroup *cgrp; ^ In file included from kernel/bpf/cgroup_iter.c:9: kernel/bpf/../cgroup/cgroup-internal.h:188:30: error: use of undeclared identifier 'CSS_ONLINE'; did you mean 'N_ONLINE'? return !(cgrp->self.flags & CSS_ONLINE); ^~~~~~~~~~ N_ONLINE include/linux/nodemask.h:392:2: note: 'N_ONLINE' declared here N_ONLINE, /* The node is online */ ^ In file included from kernel/bpf/cgroup_iter.c:9: kernel/bpf/../cgroup/cgroup-internal.h:193:47: error: incomplete definition of type 'struct cgroup' return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); ~~~~^ include/linux/sched/task.h:35:9: note: forward declaration of 'struct cgroup' struct cgroup *cgrp; ^ In file included from kernel/bpf/cgroup_iter.c:9: kernel/bpf/../cgroup/cgroup-internal.h:193:18: error: use of undeclared identifier 'CGRP_NOTIFY_ON_RELEASE' return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); ^ kernel/bpf/../cgroup/cgroup-internal.h:207:32: error: incomplete definition of type 'struct css_set' if (refcount_dec_not_one(&cset->refcount)) ~~~~^ include/linux/sched/task.h:16:8: note: forward declaration of 'struct css_set' struct css_set; ^ In file included from kernel/bpf/cgroup_iter.c:9: kernel/bpf/../cgroup/cgroup-internal.h:220:20: error: incomplete definition of type 'struct css_set' refcount_inc(&cset->refcount); ~~~~^ include/linux/sched/task.h:16:8: note: forward declaration of 'struct css_set' struct css_set; ^ In file included from kernel/bpf/cgroup_iter.c:9: kernel/bpf/../cgroup/cgroup-internal.h:284:40: error: array has incomplete element type 'struct cftype' extern struct cftype cgroup1_base_files[]; ^ kernel/bpf/../cgroup/cgroup-internal.h:284:15: note: forward declaration of 'struct cftype' extern struct cftype cgroup1_base_files[]; ^ kernel/bpf/cgroup_iter.c:55:10: error: call to undeclared function 'css_next_descendant_pre'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] return css_next_descendant_pre(NULL, p->start_css); ^ >> kernel/bpf/cgroup_iter.c:55:10: warning: incompatible integer to pointer conversion returning 'int' from a function with result type 'void *' [-Wint-conversion] return css_next_descendant_pre(NULL, p->start_css); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:57:10: error: call to undeclared function 'css_next_descendant_post'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] return css_next_descendant_post(NULL, p->start_css); ^ kernel/bpf/cgroup_iter.c:57:10: warning: incompatible integer to pointer conversion returning 'int' from a function with result type 'void *' [-Wint-conversion] return css_next_descendant_post(NULL, p->start_css); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:83:10: error: call to undeclared function 'css_next_descendant_pre'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] return css_next_descendant_pre(curr, p->start_css); ^ kernel/bpf/cgroup_iter.c:83:10: warning: incompatible integer to pointer conversion returning 'int' from a function with result type 'void *' [-Wint-conversion] return css_next_descendant_pre(curr, p->start_css); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:85:10: error: call to undeclared function 'css_next_descendant_post'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] return css_next_descendant_post(curr, p->start_css); ^ kernel/bpf/cgroup_iter.c:85:10: warning: incompatible integer to pointer conversion returning 'int' from a function with result type 'void *' [-Wint-conversion] return css_next_descendant_post(curr, p->start_css); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:87:14: error: incomplete definition of type 'struct cgroup_subsys_state' return curr->parent; ~~~~^ include/linux/kthread.h:218:8: note: forward declaration of 'struct cgroup_subsys_state' struct cgroup_subsys_state; ^ kernel/bpf/cgroup_iter.c:100:31: error: incomplete definition of type 'struct cgroup_subsys_state' if (css && cgroup_is_dead(css->cgroup)) ~~~^ include/linux/kthread.h:218:8: note: forward declaration of 'struct cgroup_subsys_state' struct cgroup_subsys_state; ^ kernel/bpf/cgroup_iter.c:104:24: error: incomplete definition of type 'struct cgroup_subsys_state' ctx.cgroup = css ? css->cgroup : NULL; ~~~^ include/linux/kthread.h:218:8: note: forward declaration of 'struct cgroup_subsys_state' struct cgroup_subsys_state; ^ kernel/bpf/cgroup_iter.c:137:22: error: incomplete definition of type 'struct cgroup' p->start_css = &cgrp->self; ~~~~^ include/linux/sched/task.h:35:9: note: forward declaration of 'struct cgroup' struct cgroup *cgrp; ^ kernel/bpf/cgroup_iter.c:157:10: error: call to undeclared function 'cgroup_get_from_fd'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] cgrp = cgroup_get_from_fd(fd); ^ kernel/bpf/cgroup_iter.c:157:10: note: did you mean 'cgroup_get_from_id'? include/linux/cgroup.h:756:30: note: 'cgroup_get_from_id' declared here static inline struct cgroup *cgroup_get_from_id(u64 id) ^ >> kernel/bpf/cgroup_iter.c:157:8: warning: incompatible integer to pointer conversion assigning to 'struct cgroup *' from 'int' [-Wint-conversion] cgrp = cgroup_get_from_fd(fd); ^ ~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:159:10: error: call to undeclared function 'cgroup_get_from_path'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] cgrp = cgroup_get_from_path("/"); ^ kernel/bpf/cgroup_iter.c:159:8: warning: incompatible integer to pointer conversion assigning to 'struct cgroup *' from 'int' [-Wint-conversion] cgrp = cgroup_get_from_path("/"); ^ ~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:190:2: error: call to undeclared function 'cgroup_path_ns'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX, ^ kernel/bpf/cgroup_iter.c:190:2: note: did you mean 'cgroup_parent'? include/linux/cgroup.h:732:30: note: 'cgroup_parent' declared here static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) ^ 6 warnings and 19 errors generated. vim +55 kernel/bpf/cgroup_iter.c 41 42 static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) 43 { 44 struct cgroup_iter_priv *p = seq->private; 45 46 mutex_lock(&cgroup_mutex); 47 48 /* support only one session */ 49 if (*pos > 0) 50 return NULL; 51 52 ++*pos; 53 p->terminate = false; 54 if (p->order == BPF_ITER_CGROUP_PRE) > 55 return css_next_descendant_pre(NULL, p->start_css); 56 else if (p->order == BPF_ITER_CGROUP_POST) 57 return css_next_descendant_post(NULL, p->start_css); 58 else /* BPF_ITER_CGROUP_PARENT_UP */ 59 return p->start_css; 60 } 61 62 static int __cgroup_iter_seq_show(struct seq_file *seq, 63 struct cgroup_subsys_state *css, int in_stop); 64 65 static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) 66 { 67 /* pass NULL to the prog for post-processing */ 68 if (!v) 69 __cgroup_iter_seq_show(seq, NULL, true); 70 mutex_unlock(&cgroup_mutex); 71 } 72 73 static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) 74 { 75 struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; 76 struct cgroup_iter_priv *p = seq->private; 77 78 ++*pos; 79 if (p->terminate) 80 return NULL; 81 82 if (p->order == BPF_ITER_CGROUP_PRE) 83 return css_next_descendant_pre(curr, p->start_css); 84 else if (p->order == BPF_ITER_CGROUP_POST) 85 return css_next_descendant_post(curr, p->start_css); 86 else 87 return curr->parent; 88 } 89 90 static int __cgroup_iter_seq_show(struct seq_file *seq, 91 struct cgroup_subsys_state *css, int in_stop) 92 { 93 struct cgroup_iter_priv *p = seq->private; 94 struct bpf_iter__cgroup ctx; 95 struct bpf_iter_meta meta; 96 struct bpf_prog *prog; 97 int ret = 0; 98 99 /* cgroup is dead, skip this element */ 100 if (css && cgroup_is_dead(css->cgroup)) 101 return 0; 102 103 ctx.meta = &meta; 104 ctx.cgroup = css ? css->cgroup : NULL; 105 meta.seq = seq; 106 prog = bpf_iter_get_info(&meta, in_stop); 107 if (prog) 108 ret = bpf_iter_run_prog(prog, &ctx); 109 110 /* if prog returns > 0, terminate after this element. */ 111 if (ret != 0) 112 p->terminate = true; 113 114 return 0; 115 } 116 117 static int cgroup_iter_seq_show(struct seq_file *seq, void *v) 118 { 119 return __cgroup_iter_seq_show(seq, (struct cgroup_subsys_state *)v, 120 false); 121 } 122 123 static const struct seq_operations cgroup_iter_seq_ops = { 124 .start = cgroup_iter_seq_start, 125 .next = cgroup_iter_seq_next, 126 .stop = cgroup_iter_seq_stop, 127 .show = cgroup_iter_seq_show, 128 }; 129 130 BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup) 131 132 static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) 133 { 134 struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv; 135 struct cgroup *cgrp = aux->cgroup.start; 136 137 p->start_css = &cgrp->self; 138 p->terminate = false; 139 p->order = aux->cgroup.order; 140 return 0; 141 } 142 143 static const struct bpf_iter_seq_info cgroup_iter_seq_info = { 144 .seq_ops = &cgroup_iter_seq_ops, 145 .init_seq_private = cgroup_iter_seq_init, 146 .seq_priv_size = sizeof(struct cgroup_iter_priv), 147 }; 148 149 static int bpf_iter_attach_cgroup(struct bpf_prog *prog, 150 union bpf_iter_link_info *linfo, 151 struct bpf_iter_aux_info *aux) 152 { 153 int fd = linfo->cgroup.cgroup_fd; 154 struct cgroup *cgrp; 155 156 if (fd) > 157 cgrp = cgroup_get_from_fd(fd); 158 else /* walk the entire hierarchy by default. */ 159 cgrp = cgroup_get_from_path("/"); 160 161 if (IS_ERR(cgrp)) 162 return PTR_ERR(cgrp); 163 164 aux->cgroup.start = cgrp; 165 aux->cgroup.order = linfo->cgroup.traversal_order; 166 return 0; 167 } 168
Hi Yosry, Thank you for the patch! Yet something to improve: [auto build test ERROR on bpf-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: m68k-randconfig-r004-20220611 (https://download.01.org/0day-ci/archive/20220611/202206112000.LRgcxlpN-lkp@intel.com/config) compiler: m68k-linux-gcc (GCC) 11.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/619857fd1ec4f351376ffcaaec20acc9aae9486f git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Yosry-Ahmed/bpf-rstat-cgroup-hierarchical-stats/20220611-034720 git checkout 619857fd1ec4f351376ffcaaec20acc9aae9486f # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash kernel/bpf/ kernel/cgroup/ If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All error/warnings (new ones prefixed by >>): In file included from kernel/bpf/cgroup_iter.c:9: >> kernel/bpf/../cgroup/cgroup-internal.h:78:41: error: field 'iter' has incomplete type 78 | struct css_task_iter iter; | ^~~~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'cgroup_is_dead': >> kernel/bpf/../cgroup/cgroup-internal.h:188:22: error: invalid use of undefined type 'const struct cgroup' 188 | return !(cgrp->self.flags & CSS_ONLINE); | ^~ >> kernel/bpf/../cgroup/cgroup-internal.h:188:37: error: 'CSS_ONLINE' undeclared (first use in this function); did you mean 'N_ONLINE'? 188 | return !(cgrp->self.flags & CSS_ONLINE); | ^~~~~~~~~~ | N_ONLINE kernel/bpf/../cgroup/cgroup-internal.h:188:37: note: each undeclared identifier is reported only once for each function it appears in kernel/bpf/../cgroup/cgroup-internal.h: In function 'notify_on_release': >> kernel/bpf/../cgroup/cgroup-internal.h:193:25: error: 'CGRP_NOTIFY_ON_RELEASE' undeclared (first use in this function) 193 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ^~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/../cgroup/cgroup-internal.h:193:54: error: invalid use of undefined type 'const struct cgroup' 193 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | ^~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'put_css_set': >> kernel/bpf/../cgroup/cgroup-internal.h:207:39: error: invalid use of undefined type 'struct css_set' 207 | if (refcount_dec_not_one(&cset->refcount)) | ^~ kernel/bpf/../cgroup/cgroup-internal.h: In function 'get_css_set': kernel/bpf/../cgroup/cgroup-internal.h:220:27: error: invalid use of undefined type 'struct css_set' 220 | refcount_inc(&cset->refcount); | ^~ kernel/bpf/../cgroup/cgroup-internal.h: At top level: >> kernel/bpf/../cgroup/cgroup-internal.h:284:22: error: array type has incomplete element type 'struct cftype' 284 | extern struct cftype cgroup1_base_files[]; | ^~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_start': >> kernel/bpf/cgroup_iter.c:55:24: error: implicit declaration of function 'css_next_descendant_pre' [-Werror=implicit-function-declaration] 55 | return css_next_descendant_pre(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~ >> kernel/bpf/cgroup_iter.c:55:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 55 | return css_next_descendant_pre(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> kernel/bpf/cgroup_iter.c:57:24: error: implicit declaration of function 'css_next_descendant_post' [-Werror=implicit-function-declaration] 57 | return css_next_descendant_post(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:57:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 57 | return css_next_descendant_post(NULL, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_next': kernel/bpf/cgroup_iter.c:83:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 83 | return css_next_descendant_pre(curr, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/cgroup_iter.c:85:24: warning: returning 'int' from a function with return type 'void *' makes pointer from integer without a cast [-Wint-conversion] 85 | return css_next_descendant_post(curr, p->start_css); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> kernel/bpf/cgroup_iter.c:87:28: error: invalid use of undefined type 'struct cgroup_subsys_state' 87 | return curr->parent; | ^~ kernel/bpf/cgroup_iter.c: In function '__cgroup_iter_seq_show': kernel/bpf/cgroup_iter.c:100:38: error: invalid use of undefined type 'struct cgroup_subsys_state' 100 | if (css && cgroup_is_dead(css->cgroup)) | ^~ kernel/bpf/cgroup_iter.c:104:31: error: invalid use of undefined type 'struct cgroup_subsys_state' 104 | ctx.cgroup = css ? css->cgroup : NULL; | ^~ kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_init': >> kernel/bpf/cgroup_iter.c:137:29: error: invalid use of undefined type 'struct cgroup' 137 | p->start_css = &cgrp->self; | ^~ kernel/bpf/cgroup_iter.c: In function 'bpf_iter_attach_cgroup': >> kernel/bpf/cgroup_iter.c:157:24: error: implicit declaration of function 'cgroup_get_from_fd'; did you mean 'cgroup_get_from_id'? [-Werror=implicit-function-declaration] 157 | cgrp = cgroup_get_from_fd(fd); | ^~~~~~~~~~~~~~~~~~ | cgroup_get_from_id >> kernel/bpf/cgroup_iter.c:157:22: warning: assignment to 'struct cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 157 | cgrp = cgroup_get_from_fd(fd); | ^ >> kernel/bpf/cgroup_iter.c:159:24: error: implicit declaration of function 'cgroup_get_from_path'; did you mean 'cgroup_get_from_id'? [-Werror=implicit-function-declaration] 159 | cgrp = cgroup_get_from_path("/"); | ^~~~~~~~~~~~~~~~~~~~ | cgroup_get_from_id kernel/bpf/cgroup_iter.c:159:22: warning: assignment to 'struct cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 159 | cgrp = cgroup_get_from_path("/"); | ^ kernel/bpf/cgroup_iter.c: In function 'bpf_iter_cgroup_show_fdinfo': >> kernel/bpf/cgroup_iter.c:190:9: error: implicit declaration of function 'cgroup_path_ns'; did you mean 'cgroup_parent'? [-Werror=implicit-function-declaration] 190 | cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX, | ^~~~~~~~~~~~~~ | cgroup_parent kernel/bpf/cgroup_iter.c: In function 'cgroup_iter_seq_next': kernel/bpf/cgroup_iter.c:88:1: error: control reaches end of non-void function [-Werror=return-type] 88 | } | ^ cc1: some warnings being treated as errors vim +/iter +78 kernel/bpf/../cgroup/cgroup-internal.h 0d2b5955b36250 Tejun Heo 2022-01-06 68 0d2b5955b36250 Tejun Heo 2022-01-06 69 struct cgroup_file_ctx { e57457641613fe Tejun Heo 2022-01-06 70 struct cgroup_namespace *ns; e57457641613fe Tejun Heo 2022-01-06 71 0d2b5955b36250 Tejun Heo 2022-01-06 72 struct { 0d2b5955b36250 Tejun Heo 2022-01-06 73 void *trigger; 0d2b5955b36250 Tejun Heo 2022-01-06 74 } psi; 0d2b5955b36250 Tejun Heo 2022-01-06 75 0d2b5955b36250 Tejun Heo 2022-01-06 76 struct { 0d2b5955b36250 Tejun Heo 2022-01-06 77 bool started; 0d2b5955b36250 Tejun Heo 2022-01-06 @78 struct css_task_iter iter; 0d2b5955b36250 Tejun Heo 2022-01-06 79 } procs; 0d2b5955b36250 Tejun Heo 2022-01-06 80 0d2b5955b36250 Tejun Heo 2022-01-06 81 struct { 0d2b5955b36250 Tejun Heo 2022-01-06 82 struct cgroup_pidlist *pidlist; 0d2b5955b36250 Tejun Heo 2022-01-06 83 } procs1; 0d2b5955b36250 Tejun Heo 2022-01-06 84 }; 0d2b5955b36250 Tejun Heo 2022-01-06 85 0a268dbd7932c7 Tejun Heo 2016-12-27 86 /* 0a268dbd7932c7 Tejun Heo 2016-12-27 87 * A cgroup can be associated with multiple css_sets as different tasks may 0a268dbd7932c7 Tejun Heo 2016-12-27 88 * belong to different cgroups on different hierarchies. In the other 0a268dbd7932c7 Tejun Heo 2016-12-27 89 * direction, a css_set is naturally associated with multiple cgroups. 0a268dbd7932c7 Tejun Heo 2016-12-27 90 * This M:N relationship is represented by the following link structure 0a268dbd7932c7 Tejun Heo 2016-12-27 91 * which exists for each association and allows traversing the associations 0a268dbd7932c7 Tejun Heo 2016-12-27 92 * from both sides. 0a268dbd7932c7 Tejun Heo 2016-12-27 93 */ 0a268dbd7932c7 Tejun Heo 2016-12-27 94 struct cgrp_cset_link { 0a268dbd7932c7 Tejun Heo 2016-12-27 95 /* the cgroup and css_set this link associates */ 0a268dbd7932c7 Tejun Heo 2016-12-27 96 struct cgroup *cgrp; 0a268dbd7932c7 Tejun Heo 2016-12-27 97 struct css_set *cset; 0a268dbd7932c7 Tejun Heo 2016-12-27 98 0a268dbd7932c7 Tejun Heo 2016-12-27 99 /* list of cgrp_cset_links anchored at cgrp->cset_links */ 0a268dbd7932c7 Tejun Heo 2016-12-27 100 struct list_head cset_link; 0a268dbd7932c7 Tejun Heo 2016-12-27 101 0a268dbd7932c7 Tejun Heo 2016-12-27 102 /* list of cgrp_cset_links anchored at css_set->cgrp_links */ 0a268dbd7932c7 Tejun Heo 2016-12-27 103 struct list_head cgrp_link; 0a268dbd7932c7 Tejun Heo 2016-12-27 104 }; 0a268dbd7932c7 Tejun Heo 2016-12-27 105 e595cd706982bf Tejun Heo 2017-01-15 106 /* used to track tasks and csets during migration */ e595cd706982bf Tejun Heo 2017-01-15 107 struct cgroup_taskset { e595cd706982bf Tejun Heo 2017-01-15 108 /* the src and dst cset list running through cset->mg_node */ e595cd706982bf Tejun Heo 2017-01-15 109 struct list_head src_csets; e595cd706982bf Tejun Heo 2017-01-15 110 struct list_head dst_csets; e595cd706982bf Tejun Heo 2017-01-15 111 610467270fb368 Tejun Heo 2017-07-08 112 /* the number of tasks in the set */ 610467270fb368 Tejun Heo 2017-07-08 113 int nr_tasks; 610467270fb368 Tejun Heo 2017-07-08 114 e595cd706982bf Tejun Heo 2017-01-15 115 /* the subsys currently being processed */ e595cd706982bf Tejun Heo 2017-01-15 116 int ssid; e595cd706982bf Tejun Heo 2017-01-15 117 e595cd706982bf Tejun Heo 2017-01-15 118 /* e595cd706982bf Tejun Heo 2017-01-15 119 * Fields for cgroup_taskset_*() iteration. e595cd706982bf Tejun Heo 2017-01-15 120 * e595cd706982bf Tejun Heo 2017-01-15 121 * Before migration is committed, the target migration tasks are on e595cd706982bf Tejun Heo 2017-01-15 122 * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of e595cd706982bf Tejun Heo 2017-01-15 123 * the csets on ->dst_csets. ->csets point to either ->src_csets e595cd706982bf Tejun Heo 2017-01-15 124 * or ->dst_csets depending on whether migration is committed. e595cd706982bf Tejun Heo 2017-01-15 125 * e595cd706982bf Tejun Heo 2017-01-15 126 * ->cur_csets and ->cur_task point to the current task position e595cd706982bf Tejun Heo 2017-01-15 127 * during iteration. e595cd706982bf Tejun Heo 2017-01-15 128 */ e595cd706982bf Tejun Heo 2017-01-15 129 struct list_head *csets; e595cd706982bf Tejun Heo 2017-01-15 130 struct css_set *cur_cset; e595cd706982bf Tejun Heo 2017-01-15 131 struct task_struct *cur_task; e595cd706982bf Tejun Heo 2017-01-15 132 }; e595cd706982bf Tejun Heo 2017-01-15 133 e595cd706982bf Tejun Heo 2017-01-15 134 /* migration context also tracks preloading */ e595cd706982bf Tejun Heo 2017-01-15 135 struct cgroup_mgctx { e595cd706982bf Tejun Heo 2017-01-15 136 /* e595cd706982bf Tejun Heo 2017-01-15 137 * Preloaded source and destination csets. Used to guarantee e595cd706982bf Tejun Heo 2017-01-15 138 * atomic success or failure on actual migration. e595cd706982bf Tejun Heo 2017-01-15 139 */ e595cd706982bf Tejun Heo 2017-01-15 140 struct list_head preloaded_src_csets; e595cd706982bf Tejun Heo 2017-01-15 141 struct list_head preloaded_dst_csets; e595cd706982bf Tejun Heo 2017-01-15 142 e595cd706982bf Tejun Heo 2017-01-15 143 /* tasks and csets to migrate */ e595cd706982bf Tejun Heo 2017-01-15 144 struct cgroup_taskset tset; bfc2cf6f61fcea Tejun Heo 2017-01-15 145 bfc2cf6f61fcea Tejun Heo 2017-01-15 146 /* subsystems affected by migration */ bfc2cf6f61fcea Tejun Heo 2017-01-15 147 u16 ss_mask; e595cd706982bf Tejun Heo 2017-01-15 148 }; e595cd706982bf Tejun Heo 2017-01-15 149 e595cd706982bf Tejun Heo 2017-01-15 150 #define CGROUP_TASKSET_INIT(tset) \ e595cd706982bf Tejun Heo 2017-01-15 151 { \ e595cd706982bf Tejun Heo 2017-01-15 152 .src_csets = LIST_HEAD_INIT(tset.src_csets), \ e595cd706982bf Tejun Heo 2017-01-15 153 .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \ e595cd706982bf Tejun Heo 2017-01-15 154 .csets = &tset.src_csets, \ e595cd706982bf Tejun Heo 2017-01-15 155 } e595cd706982bf Tejun Heo 2017-01-15 156 e595cd706982bf Tejun Heo 2017-01-15 157 #define CGROUP_MGCTX_INIT(name) \ e595cd706982bf Tejun Heo 2017-01-15 158 { \ e595cd706982bf Tejun Heo 2017-01-15 159 LIST_HEAD_INIT(name.preloaded_src_csets), \ e595cd706982bf Tejun Heo 2017-01-15 160 LIST_HEAD_INIT(name.preloaded_dst_csets), \ e595cd706982bf Tejun Heo 2017-01-15 161 CGROUP_TASKSET_INIT(name.tset), \ e595cd706982bf Tejun Heo 2017-01-15 162 } e595cd706982bf Tejun Heo 2017-01-15 163 e595cd706982bf Tejun Heo 2017-01-15 164 #define DEFINE_CGROUP_MGCTX(name) \ e595cd706982bf Tejun Heo 2017-01-15 165 struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) e595cd706982bf Tejun Heo 2017-01-15 166 0a268dbd7932c7 Tejun Heo 2016-12-27 167 extern struct mutex cgroup_mutex; 0a268dbd7932c7 Tejun Heo 2016-12-27 168 extern spinlock_t css_set_lock; 0a268dbd7932c7 Tejun Heo 2016-12-27 169 extern struct cgroup_subsys *cgroup_subsys[]; 0a268dbd7932c7 Tejun Heo 2016-12-27 170 extern struct list_head cgroup_roots; 0a268dbd7932c7 Tejun Heo 2016-12-27 171 extern struct file_system_type cgroup_fs_type; 0a268dbd7932c7 Tejun Heo 2016-12-27 172 0a268dbd7932c7 Tejun Heo 2016-12-27 173 /* iterate across the hierarchies */ 0a268dbd7932c7 Tejun Heo 2016-12-27 174 #define for_each_root(root) \ 0a268dbd7932c7 Tejun Heo 2016-12-27 175 list_for_each_entry((root), &cgroup_roots, root_list) 0a268dbd7932c7 Tejun Heo 2016-12-27 176 0a268dbd7932c7 Tejun Heo 2016-12-27 177 /** 0a268dbd7932c7 Tejun Heo 2016-12-27 178 * for_each_subsys - iterate all enabled cgroup subsystems 0a268dbd7932c7 Tejun Heo 2016-12-27 179 * @ss: the iteration cursor 0a268dbd7932c7 Tejun Heo 2016-12-27 180 * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end 0a268dbd7932c7 Tejun Heo 2016-12-27 181 */ 0a268dbd7932c7 Tejun Heo 2016-12-27 182 #define for_each_subsys(ss, ssid) \ 0a268dbd7932c7 Tejun Heo 2016-12-27 183 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \ 0a268dbd7932c7 Tejun Heo 2016-12-27 184 (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) 0a268dbd7932c7 Tejun Heo 2016-12-27 185 0a268dbd7932c7 Tejun Heo 2016-12-27 186 static inline bool cgroup_is_dead(const struct cgroup *cgrp) 0a268dbd7932c7 Tejun Heo 2016-12-27 187 { 0a268dbd7932c7 Tejun Heo 2016-12-27 @188 return !(cgrp->self.flags & CSS_ONLINE); 0a268dbd7932c7 Tejun Heo 2016-12-27 189 } 0a268dbd7932c7 Tejun Heo 2016-12-27 190 0a268dbd7932c7 Tejun Heo 2016-12-27 191 static inline bool notify_on_release(const struct cgroup *cgrp) 0a268dbd7932c7 Tejun Heo 2016-12-27 192 { 0a268dbd7932c7 Tejun Heo 2016-12-27 @193 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 0a268dbd7932c7 Tejun Heo 2016-12-27 194 } 0a268dbd7932c7 Tejun Heo 2016-12-27 195 dcfe149b9f45aa Tejun Heo 2016-12-27 196 void put_css_set_locked(struct css_set *cset); dcfe149b9f45aa Tejun Heo 2016-12-27 197 dcfe149b9f45aa Tejun Heo 2016-12-27 198 static inline void put_css_set(struct css_set *cset) dcfe149b9f45aa Tejun Heo 2016-12-27 199 { dcfe149b9f45aa Tejun Heo 2016-12-27 200 unsigned long flags; dcfe149b9f45aa Tejun Heo 2016-12-27 201 dcfe149b9f45aa Tejun Heo 2016-12-27 202 /* dcfe149b9f45aa Tejun Heo 2016-12-27 203 * Ensure that the refcount doesn't hit zero while any readers dcfe149b9f45aa Tejun Heo 2016-12-27 204 * can see it. Similar to atomic_dec_and_lock(), but for an dcfe149b9f45aa Tejun Heo 2016-12-27 205 * rwlock dcfe149b9f45aa Tejun Heo 2016-12-27 206 */ 4b9502e63b5e2b Elena Reshetova 2017-03-08 @207 if (refcount_dec_not_one(&cset->refcount)) dcfe149b9f45aa Tejun Heo 2016-12-27 208 return; dcfe149b9f45aa Tejun Heo 2016-12-27 209 dcfe149b9f45aa Tejun Heo 2016-12-27 210 spin_lock_irqsave(&css_set_lock, flags); dcfe149b9f45aa Tejun Heo 2016-12-27 211 put_css_set_locked(cset); dcfe149b9f45aa Tejun Heo 2016-12-27 212 spin_unlock_irqrestore(&css_set_lock, flags); dcfe149b9f45aa Tejun Heo 2016-12-27 213 } dcfe149b9f45aa Tejun Heo 2016-12-27 214
On 6/10/22 12:44 PM, Yosry Ahmed wrote: > From: Hao Luo <haoluo@google.com> > > Cgroup_iter is a type of bpf_iter. It walks over cgroups in two modes: > > - walking a cgroup's descendants. > - walking a cgroup's ancestors. The implementation has another choice, BPF_ITER_CGROUP_PARENT_UP. We should add it here as well. > > When attaching cgroup_iter, one can set a cgroup to the iter_link > created from attaching. This cgroup is passed as a file descriptor and > serves as the starting point of the walk. If no cgroup is specified, > the starting point will be the root cgroup. > > For walking descendants, one can specify the order: either pre-order or > post-order. For walking ancestors, the walk starts at the specified > cgroup and ends at the root. > > One can also terminate the walk early by returning 1 from the iter > program. > > Note that because walking cgroup hierarchy holds cgroup_mutex, the iter > program is called with cgroup_mutex held. Overall looks good to me with a few nits below. Acked-by: Yonghong Song <yhs@fb.com> > > Signed-off-by: Hao Luo <haoluo@google.com> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com> > --- > include/linux/bpf.h | 8 ++ > include/uapi/linux/bpf.h | 21 +++ > kernel/bpf/Makefile | 2 +- > kernel/bpf/cgroup_iter.c | 235 +++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 21 +++ > 5 files changed, 286 insertions(+), 1 deletion(-) > create mode 100644 kernel/bpf/cgroup_iter.c > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 8e6092d0ea956..48d8e836b9748 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -44,6 +44,7 @@ struct kobject; > struct mem_cgroup; > struct module; > struct bpf_func_state; > +struct cgroup; > > extern struct idr btf_idr; > extern spinlock_t btf_idr_lock; > @@ -1590,7 +1591,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags); > int __init bpf_iter_ ## target(args) { return 0; } > > struct bpf_iter_aux_info { > + /* for map_elem iter */ > struct bpf_map *map; > + > + /* for cgroup iter */ > + struct { > + struct cgroup *start; /* starting cgroup */ > + int order; > + } cgroup; > }; > > typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index f4009dbdf62da..4fd05cde19116 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -87,10 +87,27 @@ struct bpf_cgroup_storage_key { > __u32 attach_type; /* program attach type (enum bpf_attach_type) */ > }; > > +enum bpf_iter_cgroup_traversal_order { > + BPF_ITER_CGROUP_PRE = 0, /* pre-order traversal */ > + BPF_ITER_CGROUP_POST, /* post-order traversal */ > + BPF_ITER_CGROUP_PARENT_UP, /* traversal of ancestors up to the root */ > +}; > + > union bpf_iter_link_info { > struct { > __u32 map_fd; > } map; > + > + /* cgroup_iter walks either the live descendants of a cgroup subtree, or the ancestors > + * of a given cgroup. > + */ > + struct { > + /* Cgroup file descriptor. This is root of the subtree if for walking the > + * descendants; this is the starting cgroup if for walking the ancestors. > + */ > + __u32 cgroup_fd; > + __u32 traversal_order; > + } cgroup; > }; > > /* BPF syscall commands, see bpf(2) man-page for more details. */ > @@ -6050,6 +6067,10 @@ struct bpf_link_info { > struct { > __u32 map_id; > } map; > + struct { > + __u32 traversal_order; > + __aligned_u64 cgroup_id; > + } cgroup; > }; > } iter; > struct { > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > index 057ba8e01e70f..9741b9314fb46 100644 > --- a/kernel/bpf/Makefile > +++ b/kernel/bpf/Makefile > @@ -8,7 +8,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) > > obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o > obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o > -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o > +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o cgroup_iter.o > obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o > obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o > obj-$(CONFIG_BPF_SYSCALL) += disasm.o > diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c > new file mode 100644 > index 0000000000000..88deb655efa71 > --- /dev/null > +++ b/kernel/bpf/cgroup_iter.c > @@ -0,0 +1,235 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright (c) 2022 Google */ > +#include <linux/bpf.h> > +#include <linux/btf_ids.h> > +#include <linux/cgroup.h> > +#include <linux/kernel.h> > +#include <linux/seq_file.h> > + > +#include "../cgroup/cgroup-internal.h" /* cgroup_mutex and cgroup_is_dead */ > + > +/* cgroup_iter provides two modes of traversal to the cgroup hierarchy. > + * > + * 1. Walk the descendants of a cgroup. > + * 2. Walk the ancestors of a cgroup. three modes here? > + * > + * For walking descendants, cgroup_iter can walk in either pre-order or > + * post-order. For walking ancestors, the iter walks up from a cgroup to > + * the root. > + * > + * The iter program can terminate the walk early by returning 1. Walk > + * continues if prog returns 0. > + * > + * The prog can check (seq->num == 0) to determine whether this is > + * the first element. The prog may also be passed a NULL cgroup, > + * which means the walk has completed and the prog has a chance to > + * do post-processing, such as outputing an epilogue. > + * > + * Note: the iter_prog is called with cgroup_mutex held. > + */ > + > +struct bpf_iter__cgroup { > + __bpf_md_ptr(struct bpf_iter_meta *, meta); > + __bpf_md_ptr(struct cgroup *, cgroup); > +}; > + > +struct cgroup_iter_priv { > + struct cgroup_subsys_state *start_css; > + bool terminate; > + int order; > +}; > + > +static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) > +{ > + struct cgroup_iter_priv *p = seq->private; > + > + mutex_lock(&cgroup_mutex); > + > + /* support only one session */ > + if (*pos > 0) > + return NULL; > + > + ++*pos; > + p->terminate = false; > + if (p->order == BPF_ITER_CGROUP_PRE) > + return css_next_descendant_pre(NULL, p->start_css); > + else if (p->order == BPF_ITER_CGROUP_POST) > + return css_next_descendant_post(NULL, p->start_css); > + else /* BPF_ITER_CGROUP_PARENT_UP */ > + return p->start_css; > +} > + > +static int __cgroup_iter_seq_show(struct seq_file *seq, > + struct cgroup_subsys_state *css, int in_stop); > + > +static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) > +{ > + /* pass NULL to the prog for post-processing */ > + if (!v) > + __cgroup_iter_seq_show(seq, NULL, true); > + mutex_unlock(&cgroup_mutex); > +} > + > +static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) > +{ > + struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; > + struct cgroup_iter_priv *p = seq->private; > + > + ++*pos; > + if (p->terminate) > + return NULL; > + > + if (p->order == BPF_ITER_CGROUP_PRE) > + return css_next_descendant_pre(curr, p->start_css); > + else if (p->order == BPF_ITER_CGROUP_POST) > + return css_next_descendant_post(curr, p->start_css); > + else > + return curr->parent; > +} > + > +static int __cgroup_iter_seq_show(struct seq_file *seq, > + struct cgroup_subsys_state *css, int in_stop) > +{ > + struct cgroup_iter_priv *p = seq->private; > + struct bpf_iter__cgroup ctx; > + struct bpf_iter_meta meta; > + struct bpf_prog *prog; > + int ret = 0; > + > + /* cgroup is dead, skip this element */ > + if (css && cgroup_is_dead(css->cgroup)) > + return 0; > + > + ctx.meta = &meta; > + ctx.cgroup = css ? css->cgroup : NULL; > + meta.seq = seq; > + prog = bpf_iter_get_info(&meta, in_stop); > + if (prog) > + ret = bpf_iter_run_prog(prog, &ctx); > + > + /* if prog returns > 0, terminate after this element. */ > + if (ret != 0) > + p->terminate = true; > + > + return 0; > +} > + > +static int cgroup_iter_seq_show(struct seq_file *seq, void *v) > +{ > + return __cgroup_iter_seq_show(seq, (struct cgroup_subsys_state *)v, > + false); > +} > + > +static const struct seq_operations cgroup_iter_seq_ops = { > + .start = cgroup_iter_seq_start, > + .next = cgroup_iter_seq_next, > + .stop = cgroup_iter_seq_stop, > + .show = cgroup_iter_seq_show, > +}; > + > +BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup) > + > +static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) > +{ > + struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv; > + struct cgroup *cgrp = aux->cgroup.start; > + > + p->start_css = &cgrp->self; > + p->terminate = false; > + p->order = aux->cgroup.order; > + return 0; > +} > + > +static const struct bpf_iter_seq_info cgroup_iter_seq_info = { > + .seq_ops = &cgroup_iter_seq_ops, > + .init_seq_private = cgroup_iter_seq_init, > + .seq_priv_size = sizeof(struct cgroup_iter_priv), > +}; > + > +static int bpf_iter_attach_cgroup(struct bpf_prog *prog, > + union bpf_iter_link_info *linfo, > + struct bpf_iter_aux_info *aux) > +{ > + int fd = linfo->cgroup.cgroup_fd; > + struct cgroup *cgrp; > + > + if (fd) > + cgrp = cgroup_get_from_fd(fd); > + else /* walk the entire hierarchy by default. */ > + cgrp = cgroup_get_from_path("/"); > + > + if (IS_ERR(cgrp)) > + return PTR_ERR(cgrp); > + > + aux->cgroup.start = cgrp; > + aux->cgroup.order = linfo->cgroup.traversal_order; The legality of traversal_order should be checked. > + return 0; > +} > + > +static void bpf_iter_detach_cgroup(struct bpf_iter_aux_info *aux) > +{ > + cgroup_put(aux->cgroup.start); > +} > + > +static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux, > + struct seq_file *seq) > +{ > + char *buf; > + > + buf = kzalloc(PATH_MAX, GFP_KERNEL); > + if (!buf) { > + seq_puts(seq, "cgroup_path:\n"); This is a really unlikely case. maybe "cgroup_path:<unknown>"? > + goto show_order; > + } > + > + /* If cgroup_path_ns() fails, buf will be an empty string, cgroup_path > + * will print nothing. > + * > + * Path is in the calling process's cgroup namespace. > + */ > + cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX, > + current->nsproxy->cgroup_ns); > + seq_printf(seq, "cgroup_path:\t%s\n", buf); > + kfree(buf); > + > +show_order: > + if (aux->cgroup.order == BPF_ITER_CGROUP_PRE) > + seq_puts(seq, "traversal_order: pre\n"); > + else if (aux->cgroup.order == BPF_ITER_CGROUP_POST) > + seq_puts(seq, "traversal_order: post\n"); > + else /* BPF_ITER_CGROUP_PARENT_UP */ > + seq_puts(seq, "traversal_order: parent_up\n"); > +} > + [...]
On 6/10/22 12:44 PM, Yosry Ahmed wrote: > From: Hao Luo <haoluo@google.com> > > Cgroup_iter is a type of bpf_iter. It walks over cgroups in two modes: > > - walking a cgroup's descendants. > - walking a cgroup's ancestors. > > When attaching cgroup_iter, one can set a cgroup to the iter_link > created from attaching. This cgroup is passed as a file descriptor and > serves as the starting point of the walk. If no cgroup is specified, > the starting point will be the root cgroup. > > For walking descendants, one can specify the order: either pre-order or > post-order. For walking ancestors, the walk starts at the specified > cgroup and ends at the root. > > One can also terminate the walk early by returning 1 from the iter > program. > > Note that because walking cgroup hierarchy holds cgroup_mutex, the iter > program is called with cgroup_mutex held. > > Signed-off-by: Hao Luo <haoluo@google.com> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com> > --- > include/linux/bpf.h | 8 ++ > include/uapi/linux/bpf.h | 21 +++ > kernel/bpf/Makefile | 2 +- > kernel/bpf/cgroup_iter.c | 235 +++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 21 +++ > 5 files changed, 286 insertions(+), 1 deletion(-) > create mode 100644 kernel/bpf/cgroup_iter.c > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 8e6092d0ea956..48d8e836b9748 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -44,6 +44,7 @@ struct kobject; > struct mem_cgroup; > struct module; > struct bpf_func_state; > +struct cgroup; > > extern struct idr btf_idr; > extern spinlock_t btf_idr_lock; > @@ -1590,7 +1591,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags); > int __init bpf_iter_ ## target(args) { return 0; } > > struct bpf_iter_aux_info { > + /* for map_elem iter */ > struct bpf_map *map; > + > + /* for cgroup iter */ > + struct { > + struct cgroup *start; /* starting cgroup */ > + int order; > + } cgroup; > }; > [...] > + > +static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) > +{ > + struct cgroup_iter_priv *p = seq->private; > + > + mutex_lock(&cgroup_mutex); > + > + /* support only one session */ > + if (*pos > 0) > + return NULL; > + > + ++*pos; > + p->terminate = false; > + if (p->order == BPF_ITER_CGROUP_PRE) > + return css_next_descendant_pre(NULL, p->start_css); > + else if (p->order == BPF_ITER_CGROUP_POST) > + return css_next_descendant_post(NULL, p->start_css); > + else /* BPF_ITER_CGROUP_PARENT_UP */ > + return p->start_css; > +} > + > +static int __cgroup_iter_seq_show(struct seq_file *seq, > + struct cgroup_subsys_state *css, int in_stop); > + > +static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) > +{ > + /* pass NULL to the prog for post-processing */ > + if (!v) > + __cgroup_iter_seq_show(seq, NULL, true); > + mutex_unlock(&cgroup_mutex); > +} > + > +static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) > +{ > + struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; > + struct cgroup_iter_priv *p = seq->private; > + > + ++*pos; > + if (p->terminate) > + return NULL; > + > + if (p->order == BPF_ITER_CGROUP_PRE) > + return css_next_descendant_pre(curr, p->start_css); > + else if (p->order == BPF_ITER_CGROUP_POST) > + return css_next_descendant_post(curr, p->start_css); > + else > + return curr->parent; > +} > + > +static int __cgroup_iter_seq_show(struct seq_file *seq, > + struct cgroup_subsys_state *css, int in_stop) > +{ > + struct cgroup_iter_priv *p = seq->private; > + struct bpf_iter__cgroup ctx; > + struct bpf_iter_meta meta; > + struct bpf_prog *prog; > + int ret = 0; > + > + /* cgroup is dead, skip this element */ > + if (css && cgroup_is_dead(css->cgroup)) > + return 0; > + > + ctx.meta = &meta; > + ctx.cgroup = css ? css->cgroup : NULL; > + meta.seq = seq; > + prog = bpf_iter_get_info(&meta, in_stop); > + if (prog) > + ret = bpf_iter_run_prog(prog, &ctx); Do we need to do anything special to ensure bpf program gets up-to-date stat from ctx.cgroup? > + > + /* if prog returns > 0, terminate after this element. */ > + if (ret != 0) > + p->terminate = true; > + > + return 0; > +} > + [...]
On Mon, Jun 27, 2022 at 9:14 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 6/10/22 12:44 PM, Yosry Ahmed wrote: > > From: Hao Luo <haoluo@google.com> > > > > Cgroup_iter is a type of bpf_iter. It walks over cgroups in two modes: > > > > - walking a cgroup's descendants. > > - walking a cgroup's ancestors. > > > > When attaching cgroup_iter, one can set a cgroup to the iter_link > > created from attaching. This cgroup is passed as a file descriptor and > > serves as the starting point of the walk. If no cgroup is specified, > > the starting point will be the root cgroup. > > > > For walking descendants, one can specify the order: either pre-order or > > post-order. For walking ancestors, the walk starts at the specified > > cgroup and ends at the root. > > > > One can also terminate the walk early by returning 1 from the iter > > program. > > > > Note that because walking cgroup hierarchy holds cgroup_mutex, the iter > > program is called with cgroup_mutex held. > > > > Signed-off-by: Hao Luo <haoluo@google.com> > > Signed-off-by: Yosry Ahmed <yosryahmed@google.com> > > --- > > include/linux/bpf.h | 8 ++ > > include/uapi/linux/bpf.h | 21 +++ > > kernel/bpf/Makefile | 2 +- > > kernel/bpf/cgroup_iter.c | 235 +++++++++++++++++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 21 +++ > > 5 files changed, 286 insertions(+), 1 deletion(-) > > create mode 100644 kernel/bpf/cgroup_iter.c > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index 8e6092d0ea956..48d8e836b9748 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -44,6 +44,7 @@ struct kobject; > > struct mem_cgroup; > > struct module; > > struct bpf_func_state; > > +struct cgroup; > > > > extern struct idr btf_idr; > > extern spinlock_t btf_idr_lock; > > @@ -1590,7 +1591,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags); > > int __init bpf_iter_ ## target(args) { return 0; } > > > > struct bpf_iter_aux_info { > > + /* for map_elem iter */ > > struct bpf_map *map; > > + > > + /* for cgroup iter */ > > + struct { > > + struct cgroup *start; /* starting cgroup */ > > + int order; > > + } cgroup; > > }; > > > [...] > > + > > +static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) > > +{ > > + struct cgroup_iter_priv *p = seq->private; > > + > > + mutex_lock(&cgroup_mutex); > > + > > + /* support only one session */ > > + if (*pos > 0) > > + return NULL; > > + > > + ++*pos; > > + p->terminate = false; > > + if (p->order == BPF_ITER_CGROUP_PRE) > > + return css_next_descendant_pre(NULL, p->start_css); > > + else if (p->order == BPF_ITER_CGROUP_POST) > > + return css_next_descendant_post(NULL, p->start_css); > > + else /* BPF_ITER_CGROUP_PARENT_UP */ > > + return p->start_css; > > +} > > + > > +static int __cgroup_iter_seq_show(struct seq_file *seq, > > + struct cgroup_subsys_state *css, int in_stop); > > + > > +static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) > > +{ > > + /* pass NULL to the prog for post-processing */ > > + if (!v) > > + __cgroup_iter_seq_show(seq, NULL, true); > > + mutex_unlock(&cgroup_mutex); > > +} > > + > > +static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) > > +{ > > + struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; > > + struct cgroup_iter_priv *p = seq->private; > > + > > + ++*pos; > > + if (p->terminate) > > + return NULL; > > + > > + if (p->order == BPF_ITER_CGROUP_PRE) > > + return css_next_descendant_pre(curr, p->start_css); > > + else if (p->order == BPF_ITER_CGROUP_POST) > > + return css_next_descendant_post(curr, p->start_css); > > + else > > + return curr->parent; > > +} > > + > > +static int __cgroup_iter_seq_show(struct seq_file *seq, > > + struct cgroup_subsys_state *css, int in_stop) > > +{ > > + struct cgroup_iter_priv *p = seq->private; > > + struct bpf_iter__cgroup ctx; > > + struct bpf_iter_meta meta; > > + struct bpf_prog *prog; > > + int ret = 0; > > + > > + /* cgroup is dead, skip this element */ > > + if (css && cgroup_is_dead(css->cgroup)) > > + return 0; > > + > > + ctx.meta = &meta; > > + ctx.cgroup = css ? css->cgroup : NULL; > > + meta.seq = seq; > > + prog = bpf_iter_get_info(&meta, in_stop); > > + if (prog) > > + ret = bpf_iter_run_prog(prog, &ctx); > > Do we need to do anything special to ensure bpf program gets > up-to-date stat from ctx.cgroup? Later patches in the series add cgroup_flush_rstat() kfunc which flushes cgroup stats that use rstat (e.g. memcg stats). It can be called directly from the bpf program if needed. It would be better to leave this to the bpf program, it's an unnecessary toll to flush the stats for any cgroup_iter program, that could be not accessing stats, or stats that are not maintained using rstat. > > > + > > + /* if prog returns > 0, terminate after this element. */ > > + if (ret != 0) > > + p->terminate = true; > > + > > + return 0; > > +} > > + > [...]
On Mon, Jun 27, 2022 at 9:09 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 6/10/22 12:44 PM, Yosry Ahmed wrote: > > From: Hao Luo <haoluo@google.com> > > > > Cgroup_iter is a type of bpf_iter. It walks over cgroups in two modes: > > > > - walking a cgroup's descendants. > > - walking a cgroup's ancestors. > > The implementation has another choice, BPF_ITER_CGROUP_PARENT_UP. > We should add it here as well. > BPF_ITER_CGROUP_PARENT_UP is expressed here, I think what's actually missing here (and down below where only 2 modes are specified again) is that walking descendants is broken down into two separate modes, pre and post order traversals. > > > > When attaching cgroup_iter, one can set a cgroup to the iter_link > > created from attaching. This cgroup is passed as a file descriptor and > > serves as the starting point of the walk. If no cgroup is specified, > > the starting point will be the root cgroup. > > > > For walking descendants, one can specify the order: either pre-order or > > post-order. For walking ancestors, the walk starts at the specified > > cgroup and ends at the root. > > > > One can also terminate the walk early by returning 1 from the iter > > program. > > > > Note that because walking cgroup hierarchy holds cgroup_mutex, the iter > > program is called with cgroup_mutex held. > > Overall looks good to me with a few nits below. > > Acked-by: Yonghong Song <yhs@fb.com> > > > > > Signed-off-by: Hao Luo <haoluo@google.com> > > Signed-off-by: Yosry Ahmed <yosryahmed@google.com> > > --- > > include/linux/bpf.h | 8 ++ > > include/uapi/linux/bpf.h | 21 +++ > > kernel/bpf/Makefile | 2 +- > > kernel/bpf/cgroup_iter.c | 235 +++++++++++++++++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 21 +++ > > 5 files changed, 286 insertions(+), 1 deletion(-) > > create mode 100644 kernel/bpf/cgroup_iter.c > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index 8e6092d0ea956..48d8e836b9748 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -44,6 +44,7 @@ struct kobject; > > struct mem_cgroup; > > struct module; > > struct bpf_func_state; > > +struct cgroup; > > > > extern struct idr btf_idr; > > extern spinlock_t btf_idr_lock; > > @@ -1590,7 +1591,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags); > > int __init bpf_iter_ ## target(args) { return 0; } > > > > struct bpf_iter_aux_info { > > + /* for map_elem iter */ > > struct bpf_map *map; > > + > > + /* for cgroup iter */ > > + struct { > > + struct cgroup *start; /* starting cgroup */ > > + int order; > > + } cgroup; > > }; > > > > typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index f4009dbdf62da..4fd05cde19116 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -87,10 +87,27 @@ struct bpf_cgroup_storage_key { > > __u32 attach_type; /* program attach type (enum bpf_attach_type) */ > > }; > > > > +enum bpf_iter_cgroup_traversal_order { > > + BPF_ITER_CGROUP_PRE = 0, /* pre-order traversal */ > > + BPF_ITER_CGROUP_POST, /* post-order traversal */ > > + BPF_ITER_CGROUP_PARENT_UP, /* traversal of ancestors up to the root */ > > +}; > > + > > union bpf_iter_link_info { > > struct { > > __u32 map_fd; > > } map; > > + > > + /* cgroup_iter walks either the live descendants of a cgroup subtree, or the ancestors > > + * of a given cgroup. > > + */ > > + struct { > > + /* Cgroup file descriptor. This is root of the subtree if for walking the > > + * descendants; this is the starting cgroup if for walking the ancestors. > > + */ > > + __u32 cgroup_fd; > > + __u32 traversal_order; > > + } cgroup; > > }; > > > > /* BPF syscall commands, see bpf(2) man-page for more details. */ > > @@ -6050,6 +6067,10 @@ struct bpf_link_info { > > struct { > > __u32 map_id; > > } map; > > + struct { > > + __u32 traversal_order; > > + __aligned_u64 cgroup_id; > > + } cgroup; > > }; > > } iter; > > struct { > > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > > index 057ba8e01e70f..9741b9314fb46 100644 > > --- a/kernel/bpf/Makefile > > +++ b/kernel/bpf/Makefile > > @@ -8,7 +8,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) > > > > obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o > > obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o > > -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o > > +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o cgroup_iter.o > > obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o > > obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o > > obj-$(CONFIG_BPF_SYSCALL) += disasm.o > > diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c > > new file mode 100644 > > index 0000000000000..88deb655efa71 > > --- /dev/null > > +++ b/kernel/bpf/cgroup_iter.c > > @@ -0,0 +1,235 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* Copyright (c) 2022 Google */ > > +#include <linux/bpf.h> > > +#include <linux/btf_ids.h> > > +#include <linux/cgroup.h> > > +#include <linux/kernel.h> > > +#include <linux/seq_file.h> > > + > > +#include "../cgroup/cgroup-internal.h" /* cgroup_mutex and cgroup_is_dead */ > > + > > +/* cgroup_iter provides two modes of traversal to the cgroup hierarchy. > > + * > > + * 1. Walk the descendants of a cgroup. > > + * 2. Walk the ancestors of a cgroup. > > three modes here? > > > + * > > + * For walking descendants, cgroup_iter can walk in either pre-order or > > + * post-order. For walking ancestors, the iter walks up from a cgroup to > > + * the root. > > + * > > + * The iter program can terminate the walk early by returning 1. Walk > > + * continues if prog returns 0. > > + * > > + * The prog can check (seq->num == 0) to determine whether this is > > + * the first element. The prog may also be passed a NULL cgroup, > > + * which means the walk has completed and the prog has a chance to > > + * do post-processing, such as outputing an epilogue. > > + * > > + * Note: the iter_prog is called with cgroup_mutex held. > > + */ > > + > > +struct bpf_iter__cgroup { > > + __bpf_md_ptr(struct bpf_iter_meta *, meta); > > + __bpf_md_ptr(struct cgroup *, cgroup); > > +}; > > + > > +struct cgroup_iter_priv { > > + struct cgroup_subsys_state *start_css; > > + bool terminate; > > + int order; > > +}; > > + > > +static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) > > +{ > > + struct cgroup_iter_priv *p = seq->private; > > + > > + mutex_lock(&cgroup_mutex); > > + > > + /* support only one session */ > > + if (*pos > 0) > > + return NULL; > > + > > + ++*pos; > > + p->terminate = false; > > + if (p->order == BPF_ITER_CGROUP_PRE) > > + return css_next_descendant_pre(NULL, p->start_css); > > + else if (p->order == BPF_ITER_CGROUP_POST) > > + return css_next_descendant_post(NULL, p->start_css); > > + else /* BPF_ITER_CGROUP_PARENT_UP */ > > + return p->start_css; > > +} > > + > > +static int __cgroup_iter_seq_show(struct seq_file *seq, > > + struct cgroup_subsys_state *css, int in_stop); > > + > > +static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) > > +{ > > + /* pass NULL to the prog for post-processing */ > > + if (!v) > > + __cgroup_iter_seq_show(seq, NULL, true); > > + mutex_unlock(&cgroup_mutex); > > +} > > + > > +static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) > > +{ > > + struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; > > + struct cgroup_iter_priv *p = seq->private; > > + > > + ++*pos; > > + if (p->terminate) > > + return NULL; > > + > > + if (p->order == BPF_ITER_CGROUP_PRE) > > + return css_next_descendant_pre(curr, p->start_css); > > + else if (p->order == BPF_ITER_CGROUP_POST) > > + return css_next_descendant_post(curr, p->start_css); > > + else > > + return curr->parent; > > +} > > + > > +static int __cgroup_iter_seq_show(struct seq_file *seq, > > + struct cgroup_subsys_state *css, int in_stop) > > +{ > > + struct cgroup_iter_priv *p = seq->private; > > + struct bpf_iter__cgroup ctx; > > + struct bpf_iter_meta meta; > > + struct bpf_prog *prog; > > + int ret = 0; > > + > > + /* cgroup is dead, skip this element */ > > + if (css && cgroup_is_dead(css->cgroup)) > > + return 0; > > + > > + ctx.meta = &meta; > > + ctx.cgroup = css ? css->cgroup : NULL; > > + meta.seq = seq; > > + prog = bpf_iter_get_info(&meta, in_stop); > > + if (prog) > > + ret = bpf_iter_run_prog(prog, &ctx); > > + > > + /* if prog returns > 0, terminate after this element. */ > > + if (ret != 0) > > + p->terminate = true; > > + > > + return 0; > > +} > > + > > +static int cgroup_iter_seq_show(struct seq_file *seq, void *v) > > +{ > > + return __cgroup_iter_seq_show(seq, (struct cgroup_subsys_state *)v, > > + false); > > +} > > + > > +static const struct seq_operations cgroup_iter_seq_ops = { > > + .start = cgroup_iter_seq_start, > > + .next = cgroup_iter_seq_next, > > + .stop = cgroup_iter_seq_stop, > > + .show = cgroup_iter_seq_show, > > +}; > > + > > +BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup) > > + > > +static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) > > +{ > > + struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv; > > + struct cgroup *cgrp = aux->cgroup.start; > > + > > + p->start_css = &cgrp->self; > > + p->terminate = false; > > + p->order = aux->cgroup.order; > > + return 0; > > +} > > + > > +static const struct bpf_iter_seq_info cgroup_iter_seq_info = { > > + .seq_ops = &cgroup_iter_seq_ops, > > + .init_seq_private = cgroup_iter_seq_init, > > + .seq_priv_size = sizeof(struct cgroup_iter_priv), > > +}; > > + > > +static int bpf_iter_attach_cgroup(struct bpf_prog *prog, > > + union bpf_iter_link_info *linfo, > > + struct bpf_iter_aux_info *aux) > > +{ > > + int fd = linfo->cgroup.cgroup_fd; > > + struct cgroup *cgrp; > > + > > + if (fd) > > + cgrp = cgroup_get_from_fd(fd); > > + else /* walk the entire hierarchy by default. */ > > + cgrp = cgroup_get_from_path("/"); > > + > > + if (IS_ERR(cgrp)) > > + return PTR_ERR(cgrp); > > + > > + aux->cgroup.start = cgrp; > > + aux->cgroup.order = linfo->cgroup.traversal_order; > > The legality of traversal_order should be checked. > > > + return 0; > > +} > > + > > +static void bpf_iter_detach_cgroup(struct bpf_iter_aux_info *aux) > > +{ > > + cgroup_put(aux->cgroup.start); > > +} > > + > > +static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux, > > + struct seq_file *seq) > > +{ > > + char *buf; > > + > > + buf = kzalloc(PATH_MAX, GFP_KERNEL); > > + if (!buf) { > > + seq_puts(seq, "cgroup_path:\n"); > > This is a really unlikely case. maybe "cgroup_path:<unknown>"? > > > + goto show_order; > > + } > > + > > + /* If cgroup_path_ns() fails, buf will be an empty string, cgroup_path > > + * will print nothing. > > + * > > + * Path is in the calling process's cgroup namespace. > > + */ > > + cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX, > > + current->nsproxy->cgroup_ns); > > + seq_printf(seq, "cgroup_path:\t%s\n", buf); > > + kfree(buf); > > + > > +show_order: > > + if (aux->cgroup.order == BPF_ITER_CGROUP_PRE) > > + seq_puts(seq, "traversal_order: pre\n"); > > + else if (aux->cgroup.order == BPF_ITER_CGROUP_POST) > > + seq_puts(seq, "traversal_order: post\n"); > > + else /* BPF_ITER_CGROUP_PARENT_UP */ > > + seq_puts(seq, "traversal_order: parent_up\n"); > > +} > > + > [...]
On 6/27/22 11:03 PM, Yosry Ahmed wrote: > On Mon, Jun 27, 2022 at 9:14 PM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 6/10/22 12:44 PM, Yosry Ahmed wrote: >>> From: Hao Luo <haoluo@google.com> >>> >>> Cgroup_iter is a type of bpf_iter. It walks over cgroups in two modes: >>> >>> - walking a cgroup's descendants. >>> - walking a cgroup's ancestors. >>> >>> When attaching cgroup_iter, one can set a cgroup to the iter_link >>> created from attaching. This cgroup is passed as a file descriptor and >>> serves as the starting point of the walk. If no cgroup is specified, >>> the starting point will be the root cgroup. >>> >>> For walking descendants, one can specify the order: either pre-order or >>> post-order. For walking ancestors, the walk starts at the specified >>> cgroup and ends at the root. >>> >>> One can also terminate the walk early by returning 1 from the iter >>> program. >>> >>> Note that because walking cgroup hierarchy holds cgroup_mutex, the iter >>> program is called with cgroup_mutex held. >>> >>> Signed-off-by: Hao Luo <haoluo@google.com> >>> Signed-off-by: Yosry Ahmed <yosryahmed@google.com> >>> --- >>> include/linux/bpf.h | 8 ++ >>> include/uapi/linux/bpf.h | 21 +++ >>> kernel/bpf/Makefile | 2 +- >>> kernel/bpf/cgroup_iter.c | 235 +++++++++++++++++++++++++++++++++ >>> tools/include/uapi/linux/bpf.h | 21 +++ >>> 5 files changed, 286 insertions(+), 1 deletion(-) >>> create mode 100644 kernel/bpf/cgroup_iter.c >>> >>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >>> index 8e6092d0ea956..48d8e836b9748 100644 >>> --- a/include/linux/bpf.h >>> +++ b/include/linux/bpf.h >>> @@ -44,6 +44,7 @@ struct kobject; >>> struct mem_cgroup; >>> struct module; >>> struct bpf_func_state; >>> +struct cgroup; >>> >>> extern struct idr btf_idr; >>> extern spinlock_t btf_idr_lock; >>> @@ -1590,7 +1591,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags); >>> int __init bpf_iter_ ## target(args) { return 0; } >>> >>> struct bpf_iter_aux_info { >>> + /* for map_elem iter */ >>> struct bpf_map *map; >>> + >>> + /* for cgroup iter */ >>> + struct { >>> + struct cgroup *start; /* starting cgroup */ >>> + int order; >>> + } cgroup; >>> }; >>> >> [...] >>> + >>> +static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) >>> +{ >>> + struct cgroup_iter_priv *p = seq->private; >>> + >>> + mutex_lock(&cgroup_mutex); >>> + >>> + /* support only one session */ >>> + if (*pos > 0) >>> + return NULL; >>> + >>> + ++*pos; >>> + p->terminate = false; >>> + if (p->order == BPF_ITER_CGROUP_PRE) >>> + return css_next_descendant_pre(NULL, p->start_css); >>> + else if (p->order == BPF_ITER_CGROUP_POST) >>> + return css_next_descendant_post(NULL, p->start_css); >>> + else /* BPF_ITER_CGROUP_PARENT_UP */ >>> + return p->start_css; >>> +} >>> + >>> +static int __cgroup_iter_seq_show(struct seq_file *seq, >>> + struct cgroup_subsys_state *css, int in_stop); >>> + >>> +static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) >>> +{ >>> + /* pass NULL to the prog for post-processing */ >>> + if (!v) >>> + __cgroup_iter_seq_show(seq, NULL, true); >>> + mutex_unlock(&cgroup_mutex); >>> +} >>> + >>> +static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) >>> +{ >>> + struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; >>> + struct cgroup_iter_priv *p = seq->private; >>> + >>> + ++*pos; >>> + if (p->terminate) >>> + return NULL; >>> + >>> + if (p->order == BPF_ITER_CGROUP_PRE) >>> + return css_next_descendant_pre(curr, p->start_css); >>> + else if (p->order == BPF_ITER_CGROUP_POST) >>> + return css_next_descendant_post(curr, p->start_css); >>> + else >>> + return curr->parent; >>> +} >>> + >>> +static int __cgroup_iter_seq_show(struct seq_file *seq, >>> + struct cgroup_subsys_state *css, int in_stop) >>> +{ >>> + struct cgroup_iter_priv *p = seq->private; >>> + struct bpf_iter__cgroup ctx; >>> + struct bpf_iter_meta meta; >>> + struct bpf_prog *prog; >>> + int ret = 0; >>> + >>> + /* cgroup is dead, skip this element */ >>> + if (css && cgroup_is_dead(css->cgroup)) >>> + return 0; >>> + >>> + ctx.meta = &meta; >>> + ctx.cgroup = css ? css->cgroup : NULL; >>> + meta.seq = seq; >>> + prog = bpf_iter_get_info(&meta, in_stop); >>> + if (prog) >>> + ret = bpf_iter_run_prog(prog, &ctx); >> >> Do we need to do anything special to ensure bpf program gets >> up-to-date stat from ctx.cgroup? > > Later patches in the series add cgroup_flush_rstat() kfunc which > flushes cgroup stats that use rstat (e.g. memcg stats). It can be > called directly from the bpf program if needed. > > It would be better to leave this to the bpf program, it's an > unnecessary toll to flush the stats for any cgroup_iter program, that > could be not accessing stats, or stats that are not maintained using > rstat. Okay, this should work. > >> >>> + >>> + /* if prog returns > 0, terminate after this element. */ >>> + if (ret != 0) >>> + p->terminate = true; >>> + >>> + return 0; >>> +} >>> + >> [...]
On Mon, Jun 27, 2022 at 9:09 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 6/10/22 12:44 PM, Yosry Ahmed wrote: > > From: Hao Luo <haoluo@google.com> > > > > Cgroup_iter is a type of bpf_iter. It walks over cgroups in two modes: > > > > - walking a cgroup's descendants. > > - walking a cgroup's ancestors. > > The implementation has another choice, BPF_ITER_CGROUP_PARENT_UP. > We should add it here as well. > Sorry about the late reply. I meant to write two modes: walking up and walking down. And two sub-modes when walking down: pre-order and post-order. But it seems this is confusing. I'm going to use three modes in the next version: UP, PRE and POST. Besides, since this patch modifies the bpf_iter_link_info, and that breaks the btf_dump selftest, I need to include the fix of the selftest in this patch. > > > > When attaching cgroup_iter, one can set a cgroup to the iter_link > > created from attaching. This cgroup is passed as a file descriptor and > > serves as the starting point of the walk. If no cgroup is specified, > > the starting point will be the root cgroup. > > > > For walking descendants, one can specify the order: either pre-order or > > post-order. For walking ancestors, the walk starts at the specified > > cgroup and ends at the root. > > > > One can also terminate the walk early by returning 1 from the iter > > program. > > > > Note that because walking cgroup hierarchy holds cgroup_mutex, the iter > > program is called with cgroup_mutex held. > > Overall looks good to me with a few nits below. > > Acked-by: Yonghong Song <yhs@fb.com> > > > > > Signed-off-by: Hao Luo <haoluo@google.com> > > Signed-off-by: Yosry Ahmed <yosryahmed@google.com> > > --- [...] > > + > > +/* cgroup_iter provides two modes of traversal to the cgroup hierarchy. > > + * > > + * 1. Walk the descendants of a cgroup. > > + * 2. Walk the ancestors of a cgroup. > > three modes here? > Same here. Will use 'three modes' in the next version. > > + * [...] > > + > > +static int bpf_iter_attach_cgroup(struct bpf_prog *prog, > > + union bpf_iter_link_info *linfo, > > + struct bpf_iter_aux_info *aux) > > +{ > > + int fd = linfo->cgroup.cgroup_fd; > > + struct cgroup *cgrp; > > + > > + if (fd) > > + cgrp = cgroup_get_from_fd(fd); > > + else /* walk the entire hierarchy by default. */ > > + cgrp = cgroup_get_from_path("/"); > > + > > + if (IS_ERR(cgrp)) > > + return PTR_ERR(cgrp); > > + > > + aux->cgroup.start = cgrp; > > + aux->cgroup.order = linfo->cgroup.traversal_order; > > The legality of traversal_order should be checked. > Sounds good. Will do. > > + return 0; > > +} > > + [...] > > +static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux, > > + struct seq_file *seq) > > +{ > > + char *buf; > > + > > + buf = kzalloc(PATH_MAX, GFP_KERNEL); > > + if (!buf) { > > + seq_puts(seq, "cgroup_path:\n"); > > This is a really unlikely case. maybe "cgroup_path:<unknown>"? > Sure, no problem. This is a path that is unlikely to hit. > > + goto show_order; [...]
On Mon, Jun 27, 2022 at 9:14 PM Yonghong Song <yhs@fb.com> wrote: > > > +static int __cgroup_iter_seq_show(struct seq_file *seq, > > + struct cgroup_subsys_state *css, int in_stop) > > +{ > > + struct cgroup_iter_priv *p = seq->private; > > + struct bpf_iter__cgroup ctx; > > + struct bpf_iter_meta meta; > > + struct bpf_prog *prog; > > + int ret = 0; > > + > > + /* cgroup is dead, skip this element */ > > + if (css && cgroup_is_dead(css->cgroup)) > > + return 0; > > + > > + ctx.meta = &meta; > > + ctx.cgroup = css ? css->cgroup : NULL; > > + meta.seq = seq; > > + prog = bpf_iter_get_info(&meta, in_stop); > > + if (prog) > > + ret = bpf_iter_run_prog(prog, &ctx); > > Do we need to do anything special to ensure bpf program gets > up-to-date stat from ctx.cgroup? > Let's leave that to be handled by bpf programs. The kfunc rstat_flush can be called to sync stats, if using rstat. > > + > > + /* if prog returns > 0, terminate after this element. */ > > + if (ret != 0) > > + p->terminate = true; > > + > > + return 0; > > +} > > + > [...]
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8e6092d0ea956..48d8e836b9748 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -44,6 +44,7 @@ struct kobject; struct mem_cgroup; struct module; struct bpf_func_state; +struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -1590,7 +1591,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags); int __init bpf_iter_ ## target(args) { return 0; } struct bpf_iter_aux_info { + /* for map_elem iter */ struct bpf_map *map; + + /* for cgroup iter */ + struct { + struct cgroup *start; /* starting cgroup */ + int order; + } cgroup; }; typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4009dbdf62da..4fd05cde19116 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -87,10 +87,27 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; +enum bpf_iter_cgroup_traversal_order { + BPF_ITER_CGROUP_PRE = 0, /* pre-order traversal */ + BPF_ITER_CGROUP_POST, /* post-order traversal */ + BPF_ITER_CGROUP_PARENT_UP, /* traversal of ancestors up to the root */ +}; + union bpf_iter_link_info { struct { __u32 map_fd; } map; + + /* cgroup_iter walks either the live descendants of a cgroup subtree, or the ancestors + * of a given cgroup. + */ + struct { + /* Cgroup file descriptor. This is root of the subtree if for walking the + * descendants; this is the starting cgroup if for walking the ancestors. + */ + __u32 cgroup_fd; + __u32 traversal_order; + } cgroup; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -6050,6 +6067,10 @@ struct bpf_link_info { struct { __u32 map_id; } map; + struct { + __u32 traversal_order; + __aligned_u64 cgroup_id; + } cgroup; }; } iter; struct { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 057ba8e01e70f..9741b9314fb46 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -8,7 +8,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o cgroup_iter.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c new file mode 100644 index 0000000000000..88deb655efa71 --- /dev/null +++ b/kernel/bpf/cgroup_iter.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Google */ +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/cgroup.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> + +#include "../cgroup/cgroup-internal.h" /* cgroup_mutex and cgroup_is_dead */ + +/* cgroup_iter provides two modes of traversal to the cgroup hierarchy. + * + * 1. Walk the descendants of a cgroup. + * 2. Walk the ancestors of a cgroup. + * + * For walking descendants, cgroup_iter can walk in either pre-order or + * post-order. For walking ancestors, the iter walks up from a cgroup to + * the root. + * + * The iter program can terminate the walk early by returning 1. Walk + * continues if prog returns 0. + * + * The prog can check (seq->num == 0) to determine whether this is + * the first element. The prog may also be passed a NULL cgroup, + * which means the walk has completed and the prog has a chance to + * do post-processing, such as outputing an epilogue. + * + * Note: the iter_prog is called with cgroup_mutex held. + */ + +struct bpf_iter__cgroup { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct cgroup *, cgroup); +}; + +struct cgroup_iter_priv { + struct cgroup_subsys_state *start_css; + bool terminate; + int order; +}; + +static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct cgroup_iter_priv *p = seq->private; + + mutex_lock(&cgroup_mutex); + + /* support only one session */ + if (*pos > 0) + return NULL; + + ++*pos; + p->terminate = false; + if (p->order == BPF_ITER_CGROUP_PRE) + return css_next_descendant_pre(NULL, p->start_css); + else if (p->order == BPF_ITER_CGROUP_POST) + return css_next_descendant_post(NULL, p->start_css); + else /* BPF_ITER_CGROUP_PARENT_UP */ + return p->start_css; +} + +static int __cgroup_iter_seq_show(struct seq_file *seq, + struct cgroup_subsys_state *css, int in_stop); + +static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) +{ + /* pass NULL to the prog for post-processing */ + if (!v) + __cgroup_iter_seq_show(seq, NULL, true); + mutex_unlock(&cgroup_mutex); +} + +static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v; + struct cgroup_iter_priv *p = seq->private; + + ++*pos; + if (p->terminate) + return NULL; + + if (p->order == BPF_ITER_CGROUP_PRE) + return css_next_descendant_pre(curr, p->start_css); + else if (p->order == BPF_ITER_CGROUP_POST) + return css_next_descendant_post(curr, p->start_css); + else + return curr->parent; +} + +static int __cgroup_iter_seq_show(struct seq_file *seq, + struct cgroup_subsys_state *css, int in_stop) +{ + struct cgroup_iter_priv *p = seq->private; + struct bpf_iter__cgroup ctx; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + /* cgroup is dead, skip this element */ + if (css && cgroup_is_dead(css->cgroup)) + return 0; + + ctx.meta = &meta; + ctx.cgroup = css ? css->cgroup : NULL; + meta.seq = seq; + prog = bpf_iter_get_info(&meta, in_stop); + if (prog) + ret = bpf_iter_run_prog(prog, &ctx); + + /* if prog returns > 0, terminate after this element. */ + if (ret != 0) + p->terminate = true; + + return 0; +} + +static int cgroup_iter_seq_show(struct seq_file *seq, void *v) +{ + return __cgroup_iter_seq_show(seq, (struct cgroup_subsys_state *)v, + false); +} + +static const struct seq_operations cgroup_iter_seq_ops = { + .start = cgroup_iter_seq_start, + .next = cgroup_iter_seq_next, + .stop = cgroup_iter_seq_stop, + .show = cgroup_iter_seq_show, +}; + +BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup) + +static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) +{ + struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv; + struct cgroup *cgrp = aux->cgroup.start; + + p->start_css = &cgrp->self; + p->terminate = false; + p->order = aux->cgroup.order; + return 0; +} + +static const struct bpf_iter_seq_info cgroup_iter_seq_info = { + .seq_ops = &cgroup_iter_seq_ops, + .init_seq_private = cgroup_iter_seq_init, + .seq_priv_size = sizeof(struct cgroup_iter_priv), +}; + +static int bpf_iter_attach_cgroup(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) +{ + int fd = linfo->cgroup.cgroup_fd; + struct cgroup *cgrp; + + if (fd) + cgrp = cgroup_get_from_fd(fd); + else /* walk the entire hierarchy by default. */ + cgrp = cgroup_get_from_path("/"); + + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + aux->cgroup.start = cgrp; + aux->cgroup.order = linfo->cgroup.traversal_order; + return 0; +} + +static void bpf_iter_detach_cgroup(struct bpf_iter_aux_info *aux) +{ + cgroup_put(aux->cgroup.start); +} + +static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux, + struct seq_file *seq) +{ + char *buf; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) { + seq_puts(seq, "cgroup_path:\n"); + goto show_order; + } + + /* If cgroup_path_ns() fails, buf will be an empty string, cgroup_path + * will print nothing. + * + * Path is in the calling process's cgroup namespace. + */ + cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX, + current->nsproxy->cgroup_ns); + seq_printf(seq, "cgroup_path:\t%s\n", buf); + kfree(buf); + +show_order: + if (aux->cgroup.order == BPF_ITER_CGROUP_PRE) + seq_puts(seq, "traversal_order: pre\n"); + else if (aux->cgroup.order == BPF_ITER_CGROUP_POST) + seq_puts(seq, "traversal_order: post\n"); + else /* BPF_ITER_CGROUP_PARENT_UP */ + seq_puts(seq, "traversal_order: parent_up\n"); +} + +static int bpf_iter_cgroup_fill_link_info(const struct bpf_iter_aux_info *aux, + struct bpf_link_info *info) +{ + info->iter.cgroup.traversal_order = aux->cgroup.order; + info->iter.cgroup.cgroup_id = cgroup_id(aux->cgroup.start); + return 0; +} + +DEFINE_BPF_ITER_FUNC(cgroup, struct bpf_iter_meta *meta, + struct cgroup *cgroup) + +static struct bpf_iter_reg bpf_cgroup_reg_info = { + .target = "cgroup", + .attach_target = bpf_iter_attach_cgroup, + .detach_target = bpf_iter_detach_cgroup, + .show_fdinfo = bpf_iter_cgroup_show_fdinfo, + .fill_link_info = bpf_iter_cgroup_fill_link_info, + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__cgroup, cgroup), + PTR_TO_BTF_ID }, + }, + .seq_info = &cgroup_iter_seq_info, +}; + +static int __init bpf_cgroup_iter_init(void) +{ + bpf_cgroup_reg_info.ctx_arg_info[0].btf_id = bpf_cgroup_btf_id[0]; + return bpf_iter_reg_target(&bpf_cgroup_reg_info); +} + +late_initcall(bpf_cgroup_iter_init); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f4009dbdf62da..4fd05cde19116 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -87,10 +87,27 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; +enum bpf_iter_cgroup_traversal_order { + BPF_ITER_CGROUP_PRE = 0, /* pre-order traversal */ + BPF_ITER_CGROUP_POST, /* post-order traversal */ + BPF_ITER_CGROUP_PARENT_UP, /* traversal of ancestors up to the root */ +}; + union bpf_iter_link_info { struct { __u32 map_fd; } map; + + /* cgroup_iter walks either the live descendants of a cgroup subtree, or the ancestors + * of a given cgroup. + */ + struct { + /* Cgroup file descriptor. This is root of the subtree if for walking the + * descendants; this is the starting cgroup if for walking the ancestors. + */ + __u32 cgroup_fd; + __u32 traversal_order; + } cgroup; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -6050,6 +6067,10 @@ struct bpf_link_info { struct { __u32 map_id; } map; + struct { + __u32 traversal_order; + __aligned_u64 cgroup_id; + } cgroup; }; } iter; struct {