diff mbox series

[bpf-next,4/5] bpf: Limit up to 512 bytes for bpf_global_percpu_ma allocation

Message ID 20231212223100.2138537-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Reduce memory usage for bpf_global_percpu_ma | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1127 this patch: 1128
netdev/cc_maintainers warning 7 maintainers not CCed: kpsingh@kernel.org haoluo@google.com martin.lau@linux.dev jolsa@kernel.org sdf@google.com song@kernel.org john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 1143 this patch: 1143
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1154 this patch: 1154
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 119 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat

Commit Message

Yonghong Song Dec. 12, 2023, 10:31 p.m. UTC
For percpu data structure allocation with bpf_global_percpu_ma,
the maximum data size is 4K. But for a system with large
number of cpus, bigger data size (e.g., 2K, 4K) might consume
a lot of memory. For example, the percpu memory consumption
with unit size 2K and 1024 cpus will be 2K * 1K * 1k = 2GB
memory.

We should discourage such usage. Let us limit the maximum data
size to be 512 for bpf_global_percpu_ma allocation.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 kernel/bpf/verifier.c | 7 +++++++
 1 file changed, 7 insertions(+)

Comments

kernel test robot Dec. 13, 2023, 10:15 a.m. UTC | #1
Hi Yonghong,

kernel test robot noticed the following build warnings:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Yonghong-Song/bpf-Refactor-to-have-a-memalloc-cache-destroying-function/20231213-063401
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20231212223100.2138537-1-yonghong.song%40linux.dev
patch subject: [PATCH bpf-next 4/5] bpf: Limit up to 512 bytes for bpf_global_percpu_ma allocation
config: m68k-defconfig (https://download.01.org/0day-ci/archive/20231213/202312131731.Yh7iYbJG-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231213/202312131731.Yh7iYbJG-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312131731.Yh7iYbJG-lkp@intel.com/

All warnings (new ones prefixed by >>):

   kernel/bpf/verifier.c: In function 'check_kfunc_call':
>> kernel/bpf/verifier.c:12082:115: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'unsigned int' [-Wformat=]
   12082 |                                                 verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
         |                                                                                                                 ~~^
         |                                                                                                                   |
         |                                                                                                                   long unsigned int
         |                                                                                                                 %u


vim +12082 kernel/bpf/verifier.c

 11885	
 11886	static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 11887				    int *insn_idx_p)
 11888	{
 11889		const struct btf_type *t, *ptr_type;
 11890		u32 i, nargs, ptr_type_id, release_ref_obj_id;
 11891		struct bpf_reg_state *regs = cur_regs(env);
 11892		const char *func_name, *ptr_type_name;
 11893		bool sleepable, rcu_lock, rcu_unlock;
 11894		struct bpf_kfunc_call_arg_meta meta;
 11895		struct bpf_insn_aux_data *insn_aux;
 11896		int err, insn_idx = *insn_idx_p;
 11897		const struct btf_param *args;
 11898		const struct btf_type *ret_t;
 11899		struct btf *desc_btf;
 11900	
 11901		/* skip for now, but return error when we find this in fixup_kfunc_call */
 11902		if (!insn->imm)
 11903			return 0;
 11904	
 11905		err = fetch_kfunc_meta(env, insn, &meta, &func_name);
 11906		if (err == -EACCES && func_name)
 11907			verbose(env, "calling kernel function %s is not allowed\n", func_name);
 11908		if (err)
 11909			return err;
 11910		desc_btf = meta.btf;
 11911		insn_aux = &env->insn_aux_data[insn_idx];
 11912	
 11913		insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
 11914	
 11915		if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
 11916			verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
 11917			return -EACCES;
 11918		}
 11919	
 11920		sleepable = is_kfunc_sleepable(&meta);
 11921		if (sleepable && !env->prog->aux->sleepable) {
 11922			verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
 11923			return -EACCES;
 11924		}
 11925	
 11926		/* Check the arguments */
 11927		err = check_kfunc_args(env, &meta, insn_idx);
 11928		if (err < 0)
 11929			return err;
 11930	
 11931		if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 11932			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
 11933						 set_rbtree_add_callback_state);
 11934			if (err) {
 11935				verbose(env, "kfunc %s#%d failed callback verification\n",
 11936					func_name, meta.func_id);
 11937				return err;
 11938			}
 11939		}
 11940	
 11941		rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
 11942		rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
 11943	
 11944		if (env->cur_state->active_rcu_lock) {
 11945			struct bpf_func_state *state;
 11946			struct bpf_reg_state *reg;
 11947			u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
 11948	
 11949			if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
 11950				verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
 11951				return -EACCES;
 11952			}
 11953	
 11954			if (rcu_lock) {
 11955				verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
 11956				return -EINVAL;
 11957			} else if (rcu_unlock) {
 11958				bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
 11959					if (reg->type & MEM_RCU) {
 11960						reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
 11961						reg->type |= PTR_UNTRUSTED;
 11962					}
 11963				}));
 11964				env->cur_state->active_rcu_lock = false;
 11965			} else if (sleepable) {
 11966				verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
 11967				return -EACCES;
 11968			}
 11969		} else if (rcu_lock) {
 11970			env->cur_state->active_rcu_lock = true;
 11971		} else if (rcu_unlock) {
 11972			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
 11973			return -EINVAL;
 11974		}
 11975	
 11976		/* In case of release function, we get register number of refcounted
 11977		 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
 11978		 */
 11979		if (meta.release_regno) {
 11980			err = release_reference(env, regs[meta.release_regno].ref_obj_id);
 11981			if (err) {
 11982				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
 11983					func_name, meta.func_id);
 11984				return err;
 11985			}
 11986		}
 11987	
 11988		if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
 11989		    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
 11990		    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 11991			release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
 11992			insn_aux->insert_off = regs[BPF_REG_2].off;
 11993			insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
 11994			err = ref_convert_owning_non_owning(env, release_ref_obj_id);
 11995			if (err) {
 11996				verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
 11997					func_name, meta.func_id);
 11998				return err;
 11999			}
 12000	
 12001			err = release_reference(env, release_ref_obj_id);
 12002			if (err) {
 12003				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
 12004					func_name, meta.func_id);
 12005				return err;
 12006			}
 12007		}
 12008	
 12009		if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
 12010			if (!bpf_jit_supports_exceptions()) {
 12011				verbose(env, "JIT does not support calling kfunc %s#%d\n",
 12012					func_name, meta.func_id);
 12013				return -ENOTSUPP;
 12014			}
 12015			env->seen_exception = true;
 12016	
 12017			/* In the case of the default callback, the cookie value passed
 12018			 * to bpf_throw becomes the return value of the program.
 12019			 */
 12020			if (!env->exception_callback_subprog) {
 12021				err = check_return_code(env, BPF_REG_1, "R1");
 12022				if (err < 0)
 12023					return err;
 12024			}
 12025		}
 12026	
 12027		for (i = 0; i < CALLER_SAVED_REGS; i++)
 12028			mark_reg_not_init(env, regs, caller_saved[i]);
 12029	
 12030		/* Check return type */
 12031		t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
 12032	
 12033		if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
 12034			/* Only exception is bpf_obj_new_impl */
 12035			if (meta.btf != btf_vmlinux ||
 12036			    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
 12037			     meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
 12038			     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
 12039				verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
 12040				return -EINVAL;
 12041			}
 12042		}
 12043	
 12044		if (btf_type_is_scalar(t)) {
 12045			mark_reg_unknown(env, regs, BPF_REG_0);
 12046			mark_btf_func_reg_size(env, BPF_REG_0, t->size);
 12047		} else if (btf_type_is_ptr(t)) {
 12048			ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
 12049	
 12050			if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
 12051				if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
 12052				    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
 12053					struct btf_struct_meta *struct_meta;
 12054					struct btf *ret_btf;
 12055					u32 ret_btf_id;
 12056	
 12057					if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
 12058						return -ENOMEM;
 12059	
 12060					if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
 12061						verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
 12062						return -EINVAL;
 12063					}
 12064	
 12065					ret_btf = env->prog->aux->btf;
 12066					ret_btf_id = meta.arg_constant.value;
 12067	
 12068					/* This may be NULL due to user not supplying a BTF */
 12069					if (!ret_btf) {
 12070						verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
 12071						return -EINVAL;
 12072					}
 12073	
 12074					ret_t = btf_type_by_id(ret_btf, ret_btf_id);
 12075					if (!ret_t || !__btf_type_is_struct(ret_t)) {
 12076						verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
 12077						return -EINVAL;
 12078					}
 12079	
 12080					if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
 12081						if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
 12082							verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
 12083								ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
 12084							return -EINVAL;
 12085						}
 12086						mutex_lock(&bpf_percpu_ma_lock);
 12087						err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
 12088						mutex_unlock(&bpf_percpu_ma_lock);
 12089						if (err)
 12090							return err;
 12091					}
 12092	
 12093					struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
 12094					if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
 12095						if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
 12096							verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
 12097							return -EINVAL;
 12098						}
 12099	
 12100						if (struct_meta) {
 12101							verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
 12102							return -EINVAL;
 12103						}
 12104					}
 12105	
 12106					mark_reg_known_zero(env, regs, BPF_REG_0);
 12107					regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 12108					regs[BPF_REG_0].btf = ret_btf;
 12109					regs[BPF_REG_0].btf_id = ret_btf_id;
 12110					if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
 12111						regs[BPF_REG_0].type |= MEM_PERCPU;
 12112	
 12113					insn_aux->obj_new_size = ret_t->size;
 12114					insn_aux->kptr_struct_meta = struct_meta;
 12115				} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
 12116					mark_reg_known_zero(env, regs, BPF_REG_0);
 12117					regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 12118					regs[BPF_REG_0].btf = meta.arg_btf;
 12119					regs[BPF_REG_0].btf_id = meta.arg_btf_id;
 12120	
 12121					insn_aux->kptr_struct_meta =
 12122						btf_find_struct_meta(meta.arg_btf,
 12123								     meta.arg_btf_id);
 12124				} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
 12125					   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
 12126					struct btf_field *field = meta.arg_list_head.field;
 12127	
 12128					mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
 12129				} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
 12130					   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
 12131					struct btf_field *field = meta.arg_rbtree_root.field;
 12132	
 12133					mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
 12134				} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
 12135					mark_reg_known_zero(env, regs, BPF_REG_0);
 12136					regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
 12137					regs[BPF_REG_0].btf = desc_btf;
 12138					regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 12139				} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 12140					ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
 12141					if (!ret_t || !btf_type_is_struct(ret_t)) {
 12142						verbose(env,
 12143							"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
 12144						return -EINVAL;
 12145					}
 12146	
 12147					mark_reg_known_zero(env, regs, BPF_REG_0);
 12148					regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
 12149					regs[BPF_REG_0].btf = desc_btf;
 12150					regs[BPF_REG_0].btf_id = meta.arg_constant.value;
 12151				} else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
 12152					   meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
 12153					enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
 12154	
 12155					mark_reg_known_zero(env, regs, BPF_REG_0);
 12156	
 12157					if (!meta.arg_constant.found) {
 12158						verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
 12159						return -EFAULT;
 12160					}
 12161	
 12162					regs[BPF_REG_0].mem_size = meta.arg_constant.value;
 12163	
 12164					/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
 12165					regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
 12166	
 12167					if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
 12168						regs[BPF_REG_0].type |= MEM_RDONLY;
 12169					} else {
 12170						/* this will set env->seen_direct_write to true */
 12171						if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
 12172							verbose(env, "the prog does not allow writes to packet data\n");
 12173							return -EINVAL;
 12174						}
 12175					}
 12176	
 12177					if (!meta.initialized_dynptr.id) {
 12178						verbose(env, "verifier internal error: no dynptr id\n");
 12179						return -EFAULT;
 12180					}
 12181					regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
 12182	
 12183					/* we don't need to set BPF_REG_0's ref obj id
 12184					 * because packet slices are not refcounted (see
 12185					 * dynptr_type_refcounted)
 12186					 */
 12187				} else {
 12188					verbose(env, "kernel function %s unhandled dynamic return type\n",
 12189						meta.func_name);
 12190					return -EFAULT;
 12191				}
 12192			} else if (!__btf_type_is_struct(ptr_type)) {
 12193				if (!meta.r0_size) {
 12194					__u32 sz;
 12195	
 12196					if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
 12197						meta.r0_size = sz;
 12198						meta.r0_rdonly = true;
 12199					}
 12200				}
 12201				if (!meta.r0_size) {
 12202					ptr_type_name = btf_name_by_offset(desc_btf,
 12203									   ptr_type->name_off);
 12204					verbose(env,
 12205						"kernel function %s returns pointer type %s %s is not supported\n",
 12206						func_name,
 12207						btf_type_str(ptr_type),
 12208						ptr_type_name);
 12209					return -EINVAL;
 12210				}
 12211	
 12212				mark_reg_known_zero(env, regs, BPF_REG_0);
 12213				regs[BPF_REG_0].type = PTR_TO_MEM;
 12214				regs[BPF_REG_0].mem_size = meta.r0_size;
 12215	
 12216				if (meta.r0_rdonly)
 12217					regs[BPF_REG_0].type |= MEM_RDONLY;
 12218	
 12219				/* Ensures we don't access the memory after a release_reference() */
 12220				if (meta.ref_obj_id)
 12221					regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
 12222			} else {
 12223				mark_reg_known_zero(env, regs, BPF_REG_0);
 12224				regs[BPF_REG_0].btf = desc_btf;
 12225				regs[BPF_REG_0].type = PTR_TO_BTF_ID;
 12226				regs[BPF_REG_0].btf_id = ptr_type_id;
 12227			}
 12228	
 12229			if (is_kfunc_ret_null(&meta)) {
 12230				regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
 12231				/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
 12232				regs[BPF_REG_0].id = ++env->id_gen;
 12233			}
 12234			mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
 12235			if (is_kfunc_acquire(&meta)) {
 12236				int id = acquire_reference_state(env, insn_idx);
 12237	
 12238				if (id < 0)
 12239					return id;
 12240				if (is_kfunc_ret_null(&meta))
 12241					regs[BPF_REG_0].id = id;
 12242				regs[BPF_REG_0].ref_obj_id = id;
 12243			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
 12244				ref_set_non_owning(env, &regs[BPF_REG_0]);
 12245			}
 12246	
 12247			if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
 12248				regs[BPF_REG_0].id = ++env->id_gen;
 12249		} else if (btf_type_is_void(t)) {
 12250			if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
 12251				if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
 12252				    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
 12253					insn_aux->kptr_struct_meta =
 12254						btf_find_struct_meta(meta.arg_btf,
 12255								     meta.arg_btf_id);
 12256				}
 12257			}
 12258		}
 12259	
 12260		nargs = btf_type_vlen(meta.func_proto);
 12261		args = (const struct btf_param *)(meta.func_proto + 1);
 12262		for (i = 0; i < nargs; i++) {
 12263			u32 regno = i + 1;
 12264	
 12265			t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
 12266			if (btf_type_is_ptr(t))
 12267				mark_btf_func_reg_size(env, regno, sizeof(void *));
 12268			else
 12269				/* scalar. ensured by btf_check_kfunc_arg_match() */
 12270				mark_btf_func_reg_size(env, regno, t->size);
 12271		}
 12272	
 12273		if (is_iter_next_kfunc(&meta)) {
 12274			err = process_iter_next_call(env, insn_idx, &meta);
 12275			if (err)
 12276				return err;
 12277		}
 12278	
 12279		return 0;
 12280	}
 12281
Hou Tao Dec. 13, 2023, 11:09 a.m. UTC | #2
Hi,

On 12/13/2023 6:31 AM, Yonghong Song wrote:
> For percpu data structure allocation with bpf_global_percpu_ma,
> the maximum data size is 4K. But for a system with large
> number of cpus, bigger data size (e.g., 2K, 4K) might consume
> a lot of memory. For example, the percpu memory consumption
> with unit size 2K and 1024 cpus will be 2K * 1K * 1k = 2GB
> memory.
>
> We should discourage such usage. Let us limit the maximum data
> size to be 512 for bpf_global_percpu_ma allocation.
>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  kernel/bpf/verifier.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 0c55fe4451e1..e5cb6b7526b6 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -43,6 +43,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
>  };
>  
>  struct bpf_mem_alloc bpf_global_percpu_ma;
> +#define LLIST_NODE_SZ sizeof(struct llist_node)
> +#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  (512 - LLIST_NODE_SZ)

It seems for per-cpu allocation the extra subtraction is not needed, we
could use all allocated space in per-cpu pointer. Maybe we could update
bpf_mem_alloc() firstly to use size instead of size + sizeof(void *) to
select cache.
>  
>  /* bpf_check() is a static code analyzer that walks eBPF program
>   * instruction by instruction and updates register/stack state.
> @@ -12091,6 +12093,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  				}
>  
>  				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
> +					if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
> +						verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
> +							ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
> +						return -EINVAL;
> +					}
>  					mutex_lock(&bpf_percpu_ma_lock);
>  					err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
>  					mutex_unlock(&bpf_percpu_ma_lock);
kernel test robot Dec. 13, 2023, 2:13 p.m. UTC | #3
Hi Yonghong,

kernel test robot noticed the following build warnings:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Yonghong-Song/bpf-Refactor-to-have-a-memalloc-cache-destroying-function/20231213-063401
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20231212223100.2138537-1-yonghong.song%40linux.dev
patch subject: [PATCH bpf-next 4/5] bpf: Limit up to 512 bytes for bpf_global_percpu_ma allocation
config: i386-buildonly-randconfig-001-20231213 (https://download.01.org/0day-ci/archive/20231213/202312132241.IJQpMDvO-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231213/202312132241.IJQpMDvO-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312132241.IJQpMDvO-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> kernel/bpf/verifier.c:12083:21: warning: format specifies type 'unsigned long' but the argument has type 'unsigned int' [-Wformat]
                                                           ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
                                                                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/bpf/verifier.c:47:40: note: expanded from macro 'BPF_GLOBAL_PERCPU_MA_MAX_SIZE'
   #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  (512 - LLIST_NODE_SZ)
                                          ^~~~~~~~~~~~~~~~~~~~~
   1 warning generated.


vim +12083 kernel/bpf/verifier.c

 11885	
 11886	static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 11887				    int *insn_idx_p)
 11888	{
 11889		const struct btf_type *t, *ptr_type;
 11890		u32 i, nargs, ptr_type_id, release_ref_obj_id;
 11891		struct bpf_reg_state *regs = cur_regs(env);
 11892		const char *func_name, *ptr_type_name;
 11893		bool sleepable, rcu_lock, rcu_unlock;
 11894		struct bpf_kfunc_call_arg_meta meta;
 11895		struct bpf_insn_aux_data *insn_aux;
 11896		int err, insn_idx = *insn_idx_p;
 11897		const struct btf_param *args;
 11898		const struct btf_type *ret_t;
 11899		struct btf *desc_btf;
 11900	
 11901		/* skip for now, but return error when we find this in fixup_kfunc_call */
 11902		if (!insn->imm)
 11903			return 0;
 11904	
 11905		err = fetch_kfunc_meta(env, insn, &meta, &func_name);
 11906		if (err == -EACCES && func_name)
 11907			verbose(env, "calling kernel function %s is not allowed\n", func_name);
 11908		if (err)
 11909			return err;
 11910		desc_btf = meta.btf;
 11911		insn_aux = &env->insn_aux_data[insn_idx];
 11912	
 11913		insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
 11914	
 11915		if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
 11916			verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
 11917			return -EACCES;
 11918		}
 11919	
 11920		sleepable = is_kfunc_sleepable(&meta);
 11921		if (sleepable && !env->prog->aux->sleepable) {
 11922			verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
 11923			return -EACCES;
 11924		}
 11925	
 11926		/* Check the arguments */
 11927		err = check_kfunc_args(env, &meta, insn_idx);
 11928		if (err < 0)
 11929			return err;
 11930	
 11931		if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 11932			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
 11933						 set_rbtree_add_callback_state);
 11934			if (err) {
 11935				verbose(env, "kfunc %s#%d failed callback verification\n",
 11936					func_name, meta.func_id);
 11937				return err;
 11938			}
 11939		}
 11940	
 11941		rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
 11942		rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
 11943	
 11944		if (env->cur_state->active_rcu_lock) {
 11945			struct bpf_func_state *state;
 11946			struct bpf_reg_state *reg;
 11947			u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
 11948	
 11949			if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
 11950				verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
 11951				return -EACCES;
 11952			}
 11953	
 11954			if (rcu_lock) {
 11955				verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
 11956				return -EINVAL;
 11957			} else if (rcu_unlock) {
 11958				bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
 11959					if (reg->type & MEM_RCU) {
 11960						reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
 11961						reg->type |= PTR_UNTRUSTED;
 11962					}
 11963				}));
 11964				env->cur_state->active_rcu_lock = false;
 11965			} else if (sleepable) {
 11966				verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
 11967				return -EACCES;
 11968			}
 11969		} else if (rcu_lock) {
 11970			env->cur_state->active_rcu_lock = true;
 11971		} else if (rcu_unlock) {
 11972			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
 11973			return -EINVAL;
 11974		}
 11975	
 11976		/* In case of release function, we get register number of refcounted
 11977		 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
 11978		 */
 11979		if (meta.release_regno) {
 11980			err = release_reference(env, regs[meta.release_regno].ref_obj_id);
 11981			if (err) {
 11982				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
 11983					func_name, meta.func_id);
 11984				return err;
 11985			}
 11986		}
 11987	
 11988		if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
 11989		    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
 11990		    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 11991			release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
 11992			insn_aux->insert_off = regs[BPF_REG_2].off;
 11993			insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
 11994			err = ref_convert_owning_non_owning(env, release_ref_obj_id);
 11995			if (err) {
 11996				verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
 11997					func_name, meta.func_id);
 11998				return err;
 11999			}
 12000	
 12001			err = release_reference(env, release_ref_obj_id);
 12002			if (err) {
 12003				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
 12004					func_name, meta.func_id);
 12005				return err;
 12006			}
 12007		}
 12008	
 12009		if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
 12010			if (!bpf_jit_supports_exceptions()) {
 12011				verbose(env, "JIT does not support calling kfunc %s#%d\n",
 12012					func_name, meta.func_id);
 12013				return -ENOTSUPP;
 12014			}
 12015			env->seen_exception = true;
 12016	
 12017			/* In the case of the default callback, the cookie value passed
 12018			 * to bpf_throw becomes the return value of the program.
 12019			 */
 12020			if (!env->exception_callback_subprog) {
 12021				err = check_return_code(env, BPF_REG_1, "R1");
 12022				if (err < 0)
 12023					return err;
 12024			}
 12025		}
 12026	
 12027		for (i = 0; i < CALLER_SAVED_REGS; i++)
 12028			mark_reg_not_init(env, regs, caller_saved[i]);
 12029	
 12030		/* Check return type */
 12031		t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
 12032	
 12033		if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
 12034			/* Only exception is bpf_obj_new_impl */
 12035			if (meta.btf != btf_vmlinux ||
 12036			    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
 12037			     meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
 12038			     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
 12039				verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
 12040				return -EINVAL;
 12041			}
 12042		}
 12043	
 12044		if (btf_type_is_scalar(t)) {
 12045			mark_reg_unknown(env, regs, BPF_REG_0);
 12046			mark_btf_func_reg_size(env, BPF_REG_0, t->size);
 12047		} else if (btf_type_is_ptr(t)) {
 12048			ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
 12049	
 12050			if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
 12051				if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
 12052				    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
 12053					struct btf_struct_meta *struct_meta;
 12054					struct btf *ret_btf;
 12055					u32 ret_btf_id;
 12056	
 12057					if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
 12058						return -ENOMEM;
 12059	
 12060					if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
 12061						verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
 12062						return -EINVAL;
 12063					}
 12064	
 12065					ret_btf = env->prog->aux->btf;
 12066					ret_btf_id = meta.arg_constant.value;
 12067	
 12068					/* This may be NULL due to user not supplying a BTF */
 12069					if (!ret_btf) {
 12070						verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
 12071						return -EINVAL;
 12072					}
 12073	
 12074					ret_t = btf_type_by_id(ret_btf, ret_btf_id);
 12075					if (!ret_t || !__btf_type_is_struct(ret_t)) {
 12076						verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
 12077						return -EINVAL;
 12078					}
 12079	
 12080					if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
 12081						if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
 12082							verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
 12083								ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
 12084							return -EINVAL;
 12085						}
 12086						mutex_lock(&bpf_percpu_ma_lock);
 12087						err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
 12088						mutex_unlock(&bpf_percpu_ma_lock);
 12089						if (err)
 12090							return err;
 12091					}
 12092	
 12093					struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
 12094					if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
 12095						if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
 12096							verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
 12097							return -EINVAL;
 12098						}
 12099	
 12100						if (struct_meta) {
 12101							verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
 12102							return -EINVAL;
 12103						}
 12104					}
 12105	
 12106					mark_reg_known_zero(env, regs, BPF_REG_0);
 12107					regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 12108					regs[BPF_REG_0].btf = ret_btf;
 12109					regs[BPF_REG_0].btf_id = ret_btf_id;
 12110					if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
 12111						regs[BPF_REG_0].type |= MEM_PERCPU;
 12112	
 12113					insn_aux->obj_new_size = ret_t->size;
 12114					insn_aux->kptr_struct_meta = struct_meta;
 12115				} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
 12116					mark_reg_known_zero(env, regs, BPF_REG_0);
 12117					regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 12118					regs[BPF_REG_0].btf = meta.arg_btf;
 12119					regs[BPF_REG_0].btf_id = meta.arg_btf_id;
 12120	
 12121					insn_aux->kptr_struct_meta =
 12122						btf_find_struct_meta(meta.arg_btf,
 12123								     meta.arg_btf_id);
 12124				} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
 12125					   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
 12126					struct btf_field *field = meta.arg_list_head.field;
 12127	
 12128					mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
 12129				} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
 12130					   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
 12131					struct btf_field *field = meta.arg_rbtree_root.field;
 12132	
 12133					mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
 12134				} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
 12135					mark_reg_known_zero(env, regs, BPF_REG_0);
 12136					regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
 12137					regs[BPF_REG_0].btf = desc_btf;
 12138					regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 12139				} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 12140					ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
 12141					if (!ret_t || !btf_type_is_struct(ret_t)) {
 12142						verbose(env,
 12143							"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
 12144						return -EINVAL;
 12145					}
 12146	
 12147					mark_reg_known_zero(env, regs, BPF_REG_0);
 12148					regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
 12149					regs[BPF_REG_0].btf = desc_btf;
 12150					regs[BPF_REG_0].btf_id = meta.arg_constant.value;
 12151				} else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
 12152					   meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
 12153					enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
 12154	
 12155					mark_reg_known_zero(env, regs, BPF_REG_0);
 12156	
 12157					if (!meta.arg_constant.found) {
 12158						verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
 12159						return -EFAULT;
 12160					}
 12161	
 12162					regs[BPF_REG_0].mem_size = meta.arg_constant.value;
 12163	
 12164					/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
 12165					regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
 12166	
 12167					if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
 12168						regs[BPF_REG_0].type |= MEM_RDONLY;
 12169					} else {
 12170						/* this will set env->seen_direct_write to true */
 12171						if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
 12172							verbose(env, "the prog does not allow writes to packet data\n");
 12173							return -EINVAL;
 12174						}
 12175					}
 12176	
 12177					if (!meta.initialized_dynptr.id) {
 12178						verbose(env, "verifier internal error: no dynptr id\n");
 12179						return -EFAULT;
 12180					}
 12181					regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
 12182	
 12183					/* we don't need to set BPF_REG_0's ref obj id
 12184					 * because packet slices are not refcounted (see
 12185					 * dynptr_type_refcounted)
 12186					 */
 12187				} else {
 12188					verbose(env, "kernel function %s unhandled dynamic return type\n",
 12189						meta.func_name);
 12190					return -EFAULT;
 12191				}
 12192			} else if (!__btf_type_is_struct(ptr_type)) {
 12193				if (!meta.r0_size) {
 12194					__u32 sz;
 12195	
 12196					if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
 12197						meta.r0_size = sz;
 12198						meta.r0_rdonly = true;
 12199					}
 12200				}
 12201				if (!meta.r0_size) {
 12202					ptr_type_name = btf_name_by_offset(desc_btf,
 12203									   ptr_type->name_off);
 12204					verbose(env,
 12205						"kernel function %s returns pointer type %s %s is not supported\n",
 12206						func_name,
 12207						btf_type_str(ptr_type),
 12208						ptr_type_name);
 12209					return -EINVAL;
 12210				}
 12211	
 12212				mark_reg_known_zero(env, regs, BPF_REG_0);
 12213				regs[BPF_REG_0].type = PTR_TO_MEM;
 12214				regs[BPF_REG_0].mem_size = meta.r0_size;
 12215	
 12216				if (meta.r0_rdonly)
 12217					regs[BPF_REG_0].type |= MEM_RDONLY;
 12218	
 12219				/* Ensures we don't access the memory after a release_reference() */
 12220				if (meta.ref_obj_id)
 12221					regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
 12222			} else {
 12223				mark_reg_known_zero(env, regs, BPF_REG_0);
 12224				regs[BPF_REG_0].btf = desc_btf;
 12225				regs[BPF_REG_0].type = PTR_TO_BTF_ID;
 12226				regs[BPF_REG_0].btf_id = ptr_type_id;
 12227			}
 12228	
 12229			if (is_kfunc_ret_null(&meta)) {
 12230				regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
 12231				/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
 12232				regs[BPF_REG_0].id = ++env->id_gen;
 12233			}
 12234			mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
 12235			if (is_kfunc_acquire(&meta)) {
 12236				int id = acquire_reference_state(env, insn_idx);
 12237	
 12238				if (id < 0)
 12239					return id;
 12240				if (is_kfunc_ret_null(&meta))
 12241					regs[BPF_REG_0].id = id;
 12242				regs[BPF_REG_0].ref_obj_id = id;
 12243			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
 12244				ref_set_non_owning(env, &regs[BPF_REG_0]);
 12245			}
 12246	
 12247			if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
 12248				regs[BPF_REG_0].id = ++env->id_gen;
 12249		} else if (btf_type_is_void(t)) {
 12250			if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
 12251				if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
 12252				    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
 12253					insn_aux->kptr_struct_meta =
 12254						btf_find_struct_meta(meta.arg_btf,
 12255								     meta.arg_btf_id);
 12256				}
 12257			}
 12258		}
 12259	
 12260		nargs = btf_type_vlen(meta.func_proto);
 12261		args = (const struct btf_param *)(meta.func_proto + 1);
 12262		for (i = 0; i < nargs; i++) {
 12263			u32 regno = i + 1;
 12264	
 12265			t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
 12266			if (btf_type_is_ptr(t))
 12267				mark_btf_func_reg_size(env, regno, sizeof(void *));
 12268			else
 12269				/* scalar. ensured by btf_check_kfunc_arg_match() */
 12270				mark_btf_func_reg_size(env, regno, t->size);
 12271		}
 12272	
 12273		if (is_iter_next_kfunc(&meta)) {
 12274			err = process_iter_next_call(env, insn_idx, &meta);
 12275			if (err)
 12276				return err;
 12277		}
 12278	
 12279		return 0;
 12280	}
 12281
Yonghong Song Dec. 13, 2023, 5:20 p.m. UTC | #4
On 12/13/23 2:15 AM, kernel test robot wrote:
> Hi Yonghong,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on bpf-next/master]
>
> url:    https://github.com/intel-lab-lkp/linux/commits/Yonghong-Song/bpf-Refactor-to-have-a-memalloc-cache-destroying-function/20231213-063401
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
> patch link:    https://lore.kernel.org/r/20231212223100.2138537-1-yonghong.song%40linux.dev
> patch subject: [PATCH bpf-next 4/5] bpf: Limit up to 512 bytes for bpf_global_percpu_ma allocation
> config: m68k-defconfig (https://download.01.org/0day-ci/archive/20231213/202312131731.Yh7iYbJG-lkp@intel.com/config)
> compiler: m68k-linux-gcc (GCC) 13.2.0
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231213/202312131731.Yh7iYbJG-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202312131731.Yh7iYbJG-lkp@intel.com/
>
> All warnings (new ones prefixed by >>):
>
>     kernel/bpf/verifier.c: In function 'check_kfunc_call':
>>> kernel/bpf/verifier.c:12082:115: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'unsigned int' [-Wformat=]
>     12082 |                                                 verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
>           |                                                                                                                 ~~^
>           |                                                                                                                   |
>           |                                                                                                                   long unsigned int
>           |                                                                                                                 %u
>
>
> vim +12082 kernel/bpf/verifier.c

Okay, seems '%lu' not portable. Will fix with '%zu' if the code roughly stay the same in the next revision.

>
>   11885	
>   11886	static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>   11887				    int *insn_idx_p)
>   11888	{
>   11889		const struct btf_type *t, *ptr_type;
>   11890		u32 i, nargs, ptr_type_id, release_ref_obj_id;
>   11891		struct bpf_reg_state *regs = cur_regs(env);
>   11892		const char *func_name, *ptr_type_name;
>   11893		bool sleepable, rcu_lock, rcu_unlock;
>   11894		struct bpf_kfunc_call_arg_meta meta;
>   11895		struct bpf_insn_aux_data *insn_aux;
>   11896		int err, insn_idx = *insn_idx_p;
>   11897		const struct btf_param *args;
>   11898		const struct btf_type *ret_t;
>   11899		struct btf *desc_btf;
>   11900	
>   11901		/* skip for now, but return error when we find this in fixup_kfunc_call */
>   11902		if (!insn->imm)
>   11903			return 0;
>   11904	
>   11905		err = fetch_kfunc_meta(env, insn, &meta, &func_name);
>   11906		if (err == -EACCES && func_name)
>   11907			verbose(env, "calling kernel function %s is not allowed\n", func_name);
>   11908		if (err)
>   11909			return err;
>   11910		desc_btf = meta.btf;
>   11911		insn_aux = &env->insn_aux_data[insn_idx];
>   11912	
>   11913		insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
>   11914	
>   11915		if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
>   11916			verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
>   11917			return -EACCES;
>   11918		}
>   11919	
>   11920		sleepable = is_kfunc_sleepable(&meta);
>   11921		if (sleepable && !env->prog->aux->sleepable) {
>   11922			verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
>   11923			return -EACCES;
>   11924		}
>   11925	
>   11926		/* Check the arguments */
>   11927		err = check_kfunc_args(env, &meta, insn_idx);
>   11928		if (err < 0)
>   11929			return err;
>   11930	
>   11931		if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
>   11932			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
>   11933						 set_rbtree_add_callback_state);
>   11934			if (err) {
>   11935				verbose(env, "kfunc %s#%d failed callback verification\n",
>   11936					func_name, meta.func_id);
>   11937				return err;
>   11938			}
>   11939		}
>   11940	
>   11941		rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
>   11942		rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
>   11943	
>   11944		if (env->cur_state->active_rcu_lock) {
>   11945			struct bpf_func_state *state;
>   11946			struct bpf_reg_state *reg;
>   11947			u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
>   11948	
>   11949			if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
>   11950				verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
>   11951				return -EACCES;
>   11952			}
>   11953	
>   11954			if (rcu_lock) {
>   11955				verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
>   11956				return -EINVAL;
>   11957			} else if (rcu_unlock) {
>   11958				bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
>   11959					if (reg->type & MEM_RCU) {
>   11960						reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
>   11961						reg->type |= PTR_UNTRUSTED;
>   11962					}
>   11963				}));
>   11964				env->cur_state->active_rcu_lock = false;
>   11965			} else if (sleepable) {
>   11966				verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
>   11967				return -EACCES;
>   11968			}
>   11969		} else if (rcu_lock) {
>   11970			env->cur_state->active_rcu_lock = true;
>   11971		} else if (rcu_unlock) {
>   11972			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
>   11973			return -EINVAL;
>   11974		}
>   11975	
>   11976		/* In case of release function, we get register number of refcounted
>   11977		 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
>   11978		 */
>   11979		if (meta.release_regno) {
>   11980			err = release_reference(env, regs[meta.release_regno].ref_obj_id);
>   11981			if (err) {
>   11982				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
>   11983					func_name, meta.func_id);
>   11984				return err;
>   11985			}
>   11986		}
>   11987	
>   11988		if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
>   11989		    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
>   11990		    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
>   11991			release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
>   11992			insn_aux->insert_off = regs[BPF_REG_2].off;
>   11993			insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
>   11994			err = ref_convert_owning_non_owning(env, release_ref_obj_id);
>   11995			if (err) {
>   11996				verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
>   11997					func_name, meta.func_id);
>   11998				return err;
>   11999			}
>   12000	
>   12001			err = release_reference(env, release_ref_obj_id);
>   12002			if (err) {
>   12003				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
>   12004					func_name, meta.func_id);
>   12005				return err;
>   12006			}
>   12007		}
>   12008	
>   12009		if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
>   12010			if (!bpf_jit_supports_exceptions()) {
>   12011				verbose(env, "JIT does not support calling kfunc %s#%d\n",
>   12012					func_name, meta.func_id);
>   12013				return -ENOTSUPP;
>   12014			}
>   12015			env->seen_exception = true;
>   12016	
>   12017			/* In the case of the default callback, the cookie value passed
>   12018			 * to bpf_throw becomes the return value of the program.
>   12019			 */
>   12020			if (!env->exception_callback_subprog) {
>   12021				err = check_return_code(env, BPF_REG_1, "R1");
>   12022				if (err < 0)
>   12023					return err;
>   12024			}
>   12025		}
>   12026	
>   12027		for (i = 0; i < CALLER_SAVED_REGS; i++)
>   12028			mark_reg_not_init(env, regs, caller_saved[i]);
>   12029	
>   12030		/* Check return type */
>   12031		t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
>   12032	
>   12033		if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
>   12034			/* Only exception is bpf_obj_new_impl */
>   12035			if (meta.btf != btf_vmlinux ||
>   12036			    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
>   12037			     meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
>   12038			     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
>   12039				verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
>   12040				return -EINVAL;
>   12041			}
>   12042		}
>   12043	
>   12044		if (btf_type_is_scalar(t)) {
>   12045			mark_reg_unknown(env, regs, BPF_REG_0);
>   12046			mark_btf_func_reg_size(env, BPF_REG_0, t->size);
>   12047		} else if (btf_type_is_ptr(t)) {
>   12048			ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
>   12049	
>   12050			if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
>   12051				if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
>   12052				    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
>   12053					struct btf_struct_meta *struct_meta;
>   12054					struct btf *ret_btf;
>   12055					u32 ret_btf_id;
>   12056	
>   12057					if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
>   12058						return -ENOMEM;
>   12059	
>   12060					if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
>   12061						verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
>   12062						return -EINVAL;
>   12063					}
>   12064	
>   12065					ret_btf = env->prog->aux->btf;
>   12066					ret_btf_id = meta.arg_constant.value;
>   12067	
>   12068					/* This may be NULL due to user not supplying a BTF */
>   12069					if (!ret_btf) {
>   12070						verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
>   12071						return -EINVAL;
>   12072					}
>   12073	
>   12074					ret_t = btf_type_by_id(ret_btf, ret_btf_id);
>   12075					if (!ret_t || !__btf_type_is_struct(ret_t)) {
>   12076						verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
>   12077						return -EINVAL;
>   12078					}
>   12079	
>   12080					if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
>   12081						if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
>   12082							verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
>   12083								ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
>   12084							return -EINVAL;
>   12085						}
>   12086						mutex_lock(&bpf_percpu_ma_lock);
>   12087						err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
>   12088						mutex_unlock(&bpf_percpu_ma_lock);
>   12089						if (err)
>   12090							return err;
>   12091					}
>   12092	
[...]
Yonghong Song Dec. 13, 2023, 5:28 p.m. UTC | #5
On 12/13/23 3:09 AM, Hou Tao wrote:
> Hi,
>
> On 12/13/2023 6:31 AM, Yonghong Song wrote:
>> For percpu data structure allocation with bpf_global_percpu_ma,
>> the maximum data size is 4K. But for a system with large
>> number of cpus, bigger data size (e.g., 2K, 4K) might consume
>> a lot of memory. For example, the percpu memory consumption
>> with unit size 2K and 1024 cpus will be 2K * 1K * 1k = 2GB
>> memory.
>>
>> We should discourage such usage. Let us limit the maximum data
>> size to be 512 for bpf_global_percpu_ma allocation.
>>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   kernel/bpf/verifier.c | 7 +++++++
>>   1 file changed, 7 insertions(+)
>>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 0c55fe4451e1..e5cb6b7526b6 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -43,6 +43,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
>>   };
>>   
>>   struct bpf_mem_alloc bpf_global_percpu_ma;
>> +#define LLIST_NODE_SZ sizeof(struct llist_node)
>> +#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  (512 - LLIST_NODE_SZ)
> It seems for per-cpu allocation the extra subtraction is not needed, we
> could use all allocated space in per-cpu pointer. Maybe we could update
> bpf_mem_alloc() firstly to use size instead of size + sizeof(void *) to
> select cache.

Good point. If this works, it can also ensure if users try to allocate
512 bytes. It will go to 512-byte bucket instead of 1024-byte buck.
Will investigate.

>>   
>>   /* bpf_check() is a static code analyzer that walks eBPF program
>>    * instruction by instruction and updates register/stack state.
>> @@ -12091,6 +12093,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>>   				}
>>   
>>   				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
>> +					if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
>> +						verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
>> +							ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
>> +						return -EINVAL;
>> +					}
>>   					mutex_lock(&bpf_percpu_ma_lock);
>>   					err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
>>   					mutex_unlock(&bpf_percpu_ma_lock);
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0c55fe4451e1..e5cb6b7526b6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -43,6 +43,8 @@  static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
 };
 
 struct bpf_mem_alloc bpf_global_percpu_ma;
+#define LLIST_NODE_SZ sizeof(struct llist_node)
+#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  (512 - LLIST_NODE_SZ)
 
 /* bpf_check() is a static code analyzer that walks eBPF program
  * instruction by instruction and updates register/stack state.
@@ -12091,6 +12093,11 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 				}
 
 				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+					if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
+						verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %lu\n",
+							ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
+						return -EINVAL;
+					}
 					mutex_lock(&bpf_percpu_ma_lock);
 					err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
 					mutex_unlock(&bpf_percpu_ma_lock);