From patchwork Mon Nov 4 19:35:00 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861896 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC9FCAD2D for ; Mon, 4 Nov 2024 19:35:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730748914; cv=none; b=SuEdoXPhXY5isvZjPV4gZy6/yGCSyhaWmp6pGPee9epJ87PDDG5ZV0qlAzTqExOe03yGOhPz1w7J3rHCcMnvs5fCvi05duRp/ANW3FasxemTPvkotLK1c29v4m7xFGjR1I+nCKnmG0uVK/fuLeRP+0BGZKGzzjVQENTNdS9P1GE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730748914; c=relaxed/simple; bh=+jwfgpLrWLiEISSeJi+XNEMzaCiQtmQop2xz153gq34=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=YGZyqhMmSiznlR24Nw4PS7ungy9fdqlE1kS5QPtECG4ylYG+A0INPyjrfVluwfBRxGk0G1loQjXAPEBTHcE+9tOuaLiQVVbPqqurpzFI3VqI+G+NEI/CuYSoVyHakp/tVnNqUcCwwQ70an+IHczeL58nwy2qvOiEVOjgK3nqL2k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 7DBF6ABEC70B; Mon, 4 Nov 2024 11:35:00 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 01/10] bpf: Check stack depth limit after visiting all subprogs Date: Mon, 4 Nov 2024 11:35:00 -0800 Message-ID: <20241104193500.3242095-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Check stack depth limit after all subprogs are visited. Note that if private stack is enabled, the only stack size restriction is for a single subprog with size less than or equal to MAX_BPF_STACK. In subsequent patches, in function check_max_stack_depth(), there could be a flip from enabling private stack to disabling private stack due to potential nested bpf subprog run. Moving stack depth limit checking after visiting all subprogs ensures the checking not missed in such flipping cases. The useless 'continue' statement in the loop in func check_max_stack_depth() is also removed. Signed-off-by: Yonghong Song --- kernel/bpf/verifier.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ba800c7611e3..ed8f70e51141 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6032,7 +6032,8 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) * Since recursion is prevented by check_cfg() this algorithm * only needs a local stack of MAX_CALL_FRAMES to remember callsites */ -static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) +static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, + int *subtree_depth, int *depth_frame) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; @@ -6070,10 +6071,9 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) return -EACCES; } depth += round_up_stack_depth(env, subprog[idx].stack_depth); - if (depth > MAX_BPF_STACK) { - verbose(env, "combined stack size of %d calls is %d. Too large\n", - frame + 1, depth); - return -EACCES; + if (depth > MAX_BPF_STACK && !*subtree_depth) { + *subtree_depth = depth; + *depth_frame = frame + 1; } continue_func: subprog_end = subprog[idx + 1].start; @@ -6173,15 +6173,19 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) static int check_max_stack_depth(struct bpf_verifier_env *env) { struct bpf_subprog_info *si = env->subprog_info; - int ret; + int ret, subtree_depth = 0, depth_frame; for (int i = 0; i < env->subprog_cnt; i++) { if (!i || si[i].is_async_cb) { - ret = check_max_stack_depth_subprog(env, i); + ret = check_max_stack_depth_subprog(env, i, &subtree_depth, &depth_frame); if (ret < 0) return ret; } - continue; + } + if (subtree_depth > MAX_BPF_STACK) { + verbose(env, "combined stack size of %d calls is %d. Too large\n", + depth_frame, subtree_depth); + return -EACCES; } return 0; } From patchwork Mon Nov 4 19:35:05 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861894 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-180.mail-mxout.facebook.com (69-171-232-180.mail-mxout.facebook.com [69.171.232.180]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BB4111B81C1 for ; Mon, 4 Nov 2024 19:35:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.180 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730748913; cv=none; b=mpTMry0TkADr/4qvBUzoqmwNdQJMwYh+da+BtrkUjIYw4CTX6pIHLSerkN0KU7DmwLgkrN7hP36B99TQ5q71PtQKe0GEGkEplMYrcR4pnYyf+XIvsFck9GCePzB4rI4U3Y0pmBboAgn4Wx+SsgceqP3k99rzkSnnIvY9tuo3tWk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730748913; c=relaxed/simple; bh=aJVUeJaR8wIV+UxniBNtljwZavdiTAw74q2xJ95lgr0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=li+H2EDqgp758MBdse2jrhortp+GXAWehkkfElchOiJbsxDti9wuoLMcs9Oz9TcFQpDyKd4dB1gQTABJCDScmLzRJRmnK+f6llKtLjQzSbDJZb9sbJRDmJ5zzGzkyswgNHVLLrIeM3Uwzi2w/kdAo1RMgkpt0UFPOL/Ai2kNuvQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.180 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 97A4CABEC728; Mon, 4 Nov 2024 11:35:05 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 02/10] bpf: Return false for bpf_prog_check_recur() default case Date: Mon, 4 Nov 2024 11:35:05 -0800 Message-ID: <20241104193505.3242662-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net The bpf_prog_check_recur() funciton is currently used by trampoline and tracing programs (also using trampoline) to check whether a particular prog supports recursion checking or not. The default case (non-trampoline progs) return true in the current implementation. Let us make the non-trampoline prog recursion check return false instead. It does not impact any existing use cases and allows the function to be used outside the trampoline context in the next patch. Signed-off-by: Yonghong Song --- include/linux/bpf_verifier.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4513372c5bc8..ad887c68d3e1 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -889,9 +889,8 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) return prog->expected_attach_type != BPF_TRACE_ITER; case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_LSM: - return false; default: - return true; + return false; } } From patchwork Mon Nov 4 19:35:10 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861897 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 66-220-155-178.mail-mxout.facebook.com (66-220-155-178.mail-mxout.facebook.com [66.220.155.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7C1211C4A08 for ; Mon, 4 Nov 2024 19:35:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=66.220.155.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730748926; cv=none; b=JY3nvhoGhZanLECja80D9jVeKc8Ap6+w+ZXT7H4RuQk5DrsIhhSJeq9c1GgcG/OBSV2yQ/EWz44fOtwoz/hbp799EqgaCaE1IxkbnzcT+JQTY09SEIDR6kvXvqlc0nIwKT8sKs1e5lCeWlJ2UbbiB3nE7GDiT0PDC6Tym4H+QJU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730748926; c=relaxed/simple; bh=wtM19HkAUskk1ZzohlkMxd3TXJ8ETysgjUiSdaniaGk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=drdd2SjX8W8W75OlUO7vhUEKRjlmeC/Y0Bq75JW7rjMfCuyCld+6m4t+R+IUUz7OA7oBzl4N5TvWwLITeX1AHidOBjGB17JWyDar4Tn+MfzR0m/nOZ+pI+l6PRs1hu8/dqYZzTgsD4Z6CTzWi2V2BVT2g3vB2DBPslrjaDiG2pE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=66.220.155.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id B217CABEC755; Mon, 4 Nov 2024 11:35:10 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 03/10] bpf: Allow private stack to have each subprog having stack size of 512 bytes Date: Mon, 4 Nov 2024 11:35:10 -0800 Message-ID: <20241104193510.3243093-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net With private stack support, each subprog can have stack with up to 512 bytes. The limit of 512 bytes per subprog is kept to avoid increasing verifier complexity since greater than 512 bytes will cause big verifier change and increase memory consumption and verification time. If private stack is supported and certain stack size threshold is reached, that subprog will have its own private stack allocated. In this patch, some tracing programs are allowed to use private stack since tracing prog may be triggered in the middle of some other prog runs. The supported tracing programs already have recursion check such that if the same prog is running on the same cpu again, the nested prog run will be skipped. This ensures bpf prog private stack is not over-written. Note that if any tail_call is called in the prog (including all subprogs), then private stack is not used. Function bpf_enable_priv_stack() return values include NO_PRIV_STACK, PRIV_STACK_ADAPTIVE and PRIV_STACK_ALWAYS. The NO_PRIV_STACK represents priv stack not enabled, PRIV_STACK_ADAPTIVE for priv stack enabled with some conditions (e.g. stack size threshold), and PRIV_STACK_ALWAYS for priv stack always enabled. The PRIV_STACK_ALWAYS will be used by later struct_ops progs. Signed-off-by: Yonghong Song --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 1 + include/linux/filter.h | 1 + kernel/bpf/core.c | 5 +++ kernel/bpf/verifier.c | 73 +++++++++++++++++++++++++++++++++--- 5 files changed, 76 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3ba4d475174..8db3c5d7404b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1523,6 +1523,7 @@ struct bpf_prog_aux { bool exception_cb; bool exception_boundary; bool is_extended; /* true if extended by freplace program */ + bool use_priv_stack; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ad887c68d3e1..0622c11a7e19 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -668,6 +668,7 @@ struct bpf_subprog_info { bool args_cached: 1; /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; + bool use_priv_stack: 1; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; diff --git a/include/linux/filter.h b/include/linux/filter.h index 7d7578a8eac1..3a21947f2fd4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1119,6 +1119,7 @@ bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); +bool bpf_jit_supports_private_stack(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 233ea78f8f1b..14d9288441f2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -3045,6 +3045,11 @@ bool __weak bpf_jit_supports_exceptions(void) return false; } +bool __weak bpf_jit_supports_private_stack(void) +{ + return false; +} + void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ed8f70e51141..406195c433ea 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -194,6 +194,8 @@ struct bpf_verifier_stack_elem { #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 +#define BPF_PRIV_STACK_MIN_SIZE 64 + static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); @@ -6015,6 +6017,38 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } +enum priv_stack_mode { + NO_PRIV_STACK, + PRIV_STACK_ADAPTIVE, + PRIV_STACK_ALWAYS, +}; + +static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog) +{ + if (!bpf_jit_supports_private_stack()) + return NO_PRIV_STACK; + + /* bpf_prog_check_recur() checks all prog types that use bpf trampoline + * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked + * explicitly. + */ + switch (prog->type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + return PRIV_STACK_ADAPTIVE; + default: + break; + } + + if (!bpf_prog_check_recur(prog)) + return NO_PRIV_STACK; + + + return PRIV_STACK_ADAPTIVE; +} + static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) { if (env->prog->jit_requested) @@ -6033,11 +6067,12 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) * only needs a local stack of MAX_CALL_FRAMES to remember callsites */ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, - int *subtree_depth, int *depth_frame) + int *subtree_depth, int *depth_frame, + int priv_stack_supported) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; - int depth = 0, frame = 0, i, subprog_end; + int depth = 0, frame = 0, i, subprog_end, subprog_depth; bool tail_call_reachable = false; int ret_insn[MAX_CALL_FRAMES]; int ret_prog[MAX_CALL_FRAMES]; @@ -6070,11 +6105,23 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, depth); return -EACCES; } - depth += round_up_stack_depth(env, subprog[idx].stack_depth); + subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth); + depth += subprog_depth; if (depth > MAX_BPF_STACK && !*subtree_depth) { *subtree_depth = depth; *depth_frame = frame + 1; } + if (priv_stack_supported != NO_PRIV_STACK) { + if (!subprog[idx].use_priv_stack) { + if (subprog_depth > MAX_BPF_STACK) { + verbose(env, "stack size of subprog %d is %d. Too large\n", + idx, subprog_depth); + return -EACCES; + } + if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) + subprog[idx].use_priv_stack = true; + } + } continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { @@ -6173,20 +6220,36 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, static int check_max_stack_depth(struct bpf_verifier_env *env) { struct bpf_subprog_info *si = env->subprog_info; + enum priv_stack_mode priv_stack_supported; int ret, subtree_depth = 0, depth_frame; + priv_stack_supported = bpf_enable_priv_stack(env->prog); + + if (priv_stack_supported != NO_PRIV_STACK) { + for (int i = 0; i < env->subprog_cnt; i++) { + if (!si[i].has_tail_call) + continue; + priv_stack_supported = NO_PRIV_STACK; + break; + } + } + for (int i = 0; i < env->subprog_cnt; i++) { if (!i || si[i].is_async_cb) { - ret = check_max_stack_depth_subprog(env, i, &subtree_depth, &depth_frame); + ret = check_max_stack_depth_subprog(env, i, &subtree_depth, &depth_frame, + priv_stack_supported); if (ret < 0) return ret; } } - if (subtree_depth > MAX_BPF_STACK) { + + if (priv_stack_supported == NO_PRIV_STACK && subtree_depth > MAX_BPF_STACK) { verbose(env, "combined stack size of %d calls is %d. Too large\n", depth_frame, subtree_depth); return -EACCES; } + if (si[0].use_priv_stack) + env->prog->aux->use_priv_stack = true; return 0; } From patchwork Mon Nov 4 19:35:15 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861917 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-180.mail-mxout.facebook.com (69-171-232-180.mail-mxout.facebook.com [69.171.232.180]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 89E4B1CBE82 for ; Mon, 4 Nov 2024 19:38:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.180 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; cv=none; b=dzu+JvqeFQZZwwTKdMoGRLSFdMjIq2b8S7nBOqNDRfEUilj/xJQYc4I1zVKmDzaeZfYZ/fkZTkUgddrGi/Hv1SyPnjZMwXBJiqTixOzUfieA7KPeEO27jM+88r8wQUA7fH3iUgPsIJIhCpw0lyqdEZK8d+kex4/OZMfgWb2vRBw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; c=relaxed/simple; bh=BDhOLK54EQYRoaH1jAO7MkHRnauswTRLZgazS2+wkLE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=gqGib2sSo1iCLM8EiUwmew3PmHXlYsvUqz23mFXxrrdWRc4xpO29PMGYR26yBH1flpSdw+q6aTRP2Ti/6gDFnvBLUJbkzWljULcj1VXE9gD573bJOIg1bL3WR4AF61KdoY2ZPU7wL9PPTM4k3lRX109RNuclukPBJC1U/H3DB9E= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.180 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 37AB2ABEC766; Mon, 4 Nov 2024 11:35:15 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 04/10] bpf: Check potential private stack recursion for progs with async callback Date: Mon, 4 Nov 2024 11:35:15 -0800 Message-ID: <20241104193515.3243315-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net In previous patch, tracing progs are enabled for private stack since recursion checking ensures there exists no nested same bpf prog run on the same cpu. But it is still possible for nested bpf subprog run on the same cpu if the same subprog is called in both main prog and async callback, or in different async callbacks. For example, main_prog bpf_timer_set_callback(timer, timer_cb); call sub1 sub1 ... time_cb call sub1 In the above case, nested subprog run for sub1 is possible with one in process context and the other in softirq context. If this is the case, the verifier will disable private stack for this bpf prog. Signed-off-by: Yonghong Song --- include/linux/bpf_verifier.h | 2 ++ kernel/bpf/verifier.c | 42 +++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0622c11a7e19..e921589abc72 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -669,6 +669,8 @@ struct bpf_subprog_info { /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; bool use_priv_stack: 1; + bool visited_with_priv_stack_accum: 1; + bool visited_with_priv_stack: 1; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 406195c433ea..e01b3f0fd314 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6118,8 +6118,12 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, idx, subprog_depth); return -EACCES; } - if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) + if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) { subprog[idx].use_priv_stack = true; + subprog[idx].visited_with_priv_stack = true; + } + } else { + subprog[idx].visited_with_priv_stack = true; } } continue_func: @@ -6220,10 +6224,12 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, static int check_max_stack_depth(struct bpf_verifier_env *env) { struct bpf_subprog_info *si = env->subprog_info; + enum priv_stack_mode orig_priv_stack_supported; enum priv_stack_mode priv_stack_supported; int ret, subtree_depth = 0, depth_frame; priv_stack_supported = bpf_enable_priv_stack(env->prog); + orig_priv_stack_supported = priv_stack_supported; if (priv_stack_supported != NO_PRIV_STACK) { for (int i = 0; i < env->subprog_cnt; i++) { @@ -6240,13 +6246,39 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) priv_stack_supported); if (ret < 0) return ret; + + if (priv_stack_supported != NO_PRIV_STACK) { + for (int j = 0; j < env->subprog_cnt; j++) { + if (si[j].visited_with_priv_stack_accum && + si[j].visited_with_priv_stack) { + /* si[j] is visited by both main/async subprog + * and another async subprog. + */ + priv_stack_supported = NO_PRIV_STACK; + break; + } + if (!si[j].visited_with_priv_stack_accum) + si[j].visited_with_priv_stack_accum = + si[j].visited_with_priv_stack; + } + } + if (priv_stack_supported != NO_PRIV_STACK) { + for (int j = 0; j < env->subprog_cnt; j++) + si[j].visited_with_priv_stack = false; + } } } - if (priv_stack_supported == NO_PRIV_STACK && subtree_depth > MAX_BPF_STACK) { - verbose(env, "combined stack size of %d calls is %d. Too large\n", - depth_frame, subtree_depth); - return -EACCES; + if (priv_stack_supported == NO_PRIV_STACK) { + if (subtree_depth > MAX_BPF_STACK) { + verbose(env, "combined stack size of %d calls is %d. Too large\n", + depth_frame, subtree_depth); + return -EACCES; + } + if (orig_priv_stack_supported == PRIV_STACK_ADAPTIVE) { + for (int i = 0; i < env->subprog_cnt; i++) + si[i].use_priv_stack = false; + } } if (si[0].use_priv_stack) env->prog->aux->use_priv_stack = true; From patchwork Mon Nov 4 19:35:21 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861921 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 66-220-155-179.mail-mxout.facebook.com (66-220-155-179.mail-mxout.facebook.com [66.220.155.179]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 52EC71C876D for ; Mon, 4 Nov 2024 19:38:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=66.220.155.179 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749092; cv=none; b=XbkUj1wxkfIm5kMnaE3iWaYR8Dgl13a0mhBKnmwHsVqV3XoAAblFFqFqb79APi69SI2Bc6jcHlZdHdDELsNRT79v11R4O9VX+Kzur8uOQ677nqdHDuuz19lw83DPE8VI/2TBwTcEfvJkba7zkcAQlIDHupF/b5AxaMv7sMEMKZA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749092; c=relaxed/simple; bh=XU5vyXffjfmnt/7cQBsfkswVnNdBvK+qlUgDDQ/NKso=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qFlqOJRdtFVMNc+kee7NojWH7qVhDYvel/vwofGMHStieDDUGVpKJzGWSYqo0mthA+jPfzryobvRKcNTW1sLzoQtaJ7jZVo1R4gWehGMJvDTcRbfYpbgxddmBFsktsH6nDnt3dXJNf6r9ECMmOeLp1cMOWlJ1tiNQPUhJqdJw6M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=66.220.155.179 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 9C631ABEC78C; Mon, 4 Nov 2024 11:35:21 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 05/10] bpf: Allocate private stack for eligible main prog or subprogs Date: Mon, 4 Nov 2024 11:35:21 -0800 Message-ID: <20241104193521.3243984-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net For any main prog or subprogs, allocate private stack space if requested by subprog info or main prog. The alignment for private stack is 16 since maximum stack alignment is 16 for bpf-enabled archs. If jit failed, the allocated private stack will be freed in the same function where the allocation happens. If jit succeeded, e.g., for x86_64 arch, the allocated private stack is freed in arch specific implementation of bpf_jit_free(). Signed-off-by: Yonghong Song --- arch/x86/net/bpf_jit_comp.c | 1 + include/linux/bpf.h | 1 + kernel/bpf/core.c | 19 ++++++++++++++++--- kernel/bpf/verifier.c | 13 +++++++++++++ 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06b080b61aa5..59d294b8dd67 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3544,6 +3544,7 @@ void bpf_jit_free(struct bpf_prog *prog) prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset(); hdr = bpf_jit_binary_pack_hdr(prog); bpf_jit_binary_pack_free(hdr, NULL); + free_percpu(prog->aux->priv_stack_ptr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8db3c5d7404b..8a3ea7440a4a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1507,6 +1507,7 @@ struct bpf_prog_aux { u32 max_rdwr_access; struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; + void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 14d9288441f2..f7a3e93c41e1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2396,6 +2396,7 @@ static void bpf_prog_select_func(struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { + void __percpu *priv_stack_ptr = NULL; /* In case of BPF to BPF calls, verifier did all the prep * work with regards to JITing, etc. */ @@ -2421,11 +2422,23 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) if (*err) return fp; + if (fp->aux->use_priv_stack && fp->aux->stack_depth) { + priv_stack_ptr = __alloc_percpu_gfp(fp->aux->stack_depth, 16, GFP_KERNEL); + if (!priv_stack_ptr) { + *err = -ENOMEM; + return fp; + } + fp->aux->priv_stack_ptr = priv_stack_ptr; + } + fp = bpf_int_jit_compile(fp); bpf_prog_jit_attempt_done(fp); - if (!fp->jited && jit_needed) { - *err = -ENOTSUPP; - return fp; + if (!fp->jited) { + free_percpu(priv_stack_ptr); + if (jit_needed) { + *err = -ENOTSUPP; + return fp; + } } } else { *err = bpf_prog_offload_compile(fp); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e01b3f0fd314..03ae76d57076 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20073,6 +20073,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog, **func, *tmp; int i, j, subprog_start, subprog_end = 0, len, subprog; + void __percpu *priv_stack_ptr; struct bpf_map *map_ptr; struct bpf_insn *insn; void *old_bpf_func; @@ -20169,6 +20170,17 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; + + if (env->subprog_info[i].use_priv_stack && func[i]->aux->stack_depth) { + priv_stack_ptr = __alloc_percpu_gfp(func[i]->aux->stack_depth, 16, + GFP_KERNEL); + if (!priv_stack_ptr) { + err = -ENOMEM; + goto out_free; + } + func[i]->aux->priv_stack_ptr = priv_stack_ptr; + } + func[i]->jit_requested = 1; func[i]->blinding_requested = prog->blinding_requested; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; @@ -20201,6 +20213,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->exception_boundary = env->seen_exception; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { + free_percpu(func[i]->aux->priv_stack_ptr); err = -ENOTSUPP; goto out_free; } From patchwork Mon Nov 4 19:35:26 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861918 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-180.mail-mxout.facebook.com (69-171-232-180.mail-mxout.facebook.com [69.171.232.180]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B09001CBE9E for ; Mon, 4 Nov 2024 19:38:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.180 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; cv=none; b=a3egt9joOzfLzuPicU7SoWRIoLW3j7t8IdNlaaCZ7QSooFgbI0P0XlEtEzWK2yo7oW4Z01QC4Sd7JTfZS9jYcsf+lPgp++35jt4+0NLLHT4gyvLA/E4Ox2fGGHPVzvGWZGaSlnAN+k/eA32TVU1EX1qVwtxmkTjXaWIa67rvMRE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; c=relaxed/simple; bh=2psKV1t0NOR93PC4NYTwpoNi8Ljhop/tVA0vJK6sACs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ks1BOPLyFrLGMO+kdP8Y8RyHm16+6u8O/2DmqG2vvG+zr9vFP8w8fNSSDmzxHbKCijn4HjM/0xoB0t4Otc7dguehiHdPZsgZyjfucArNU2BrTJdtqfEj0olEPiWzOH5bnzgOmeJqTvSuC+0N4ctUV+0IV1BgkrPfSSeLpQeTL8I= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.180 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 77753ABEC7AA; Mon, 4 Nov 2024 11:35:26 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 06/10] bpf, x86: Avoid repeated usage of bpf_prog->aux->stack_depth Date: Mon, 4 Nov 2024 11:35:26 -0800 Message-ID: <20241104193526.3244179-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Refactor the code to avoid repeated usage of bpf_prog->aux->stack_depth in do_jit() func. If the private stack is used, the stack_depth will be 0 for that prog. Refactoring make it easy to adjust stack_depth. Signed-off-by: Yonghong Song --- arch/x86/net/bpf_jit_comp.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 59d294b8dd67..181d9f04418f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1425,14 +1425,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image int i, excnt = 0; int ilen, proglen = 0; u8 *prog = temp; + u32 stack_depth; int err; + stack_depth = bpf_prog->aux->stack_depth; + arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); detect_reg_usage(insn, insn_cnt, callee_regs_used); - emit_prologue(&prog, bpf_prog->aux->stack_depth, + emit_prologue(&prog, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); /* Exception callback will clobber callee regs for its own use, and @@ -2128,7 +2131,7 @@ st: if (is_imm8(insn->off)) func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { - LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); + LOAD_TAIL_CALL_CNT_PTR(stack_depth); ip += 7; } if (!imm32) @@ -2145,13 +2148,13 @@ st: if (is_imm8(insn->off)) &bpf_prog->aux->poke_tab[imm32 - 1], &prog, image + addrs[i - 1], callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, ctx); else emit_bpf_tail_call_indirect(bpf_prog, &prog, callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, image + addrs[i - 1], ctx); break; From patchwork Mon Nov 4 19:35:32 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861919 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A6D351CBE8B for ; Mon, 4 Nov 2024 19:38:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; cv=none; b=TT6PtSWiUE0puey1Zbwv8raEryQ4PotNIrdXkqMlEsWQUNARS56HbIVyqeN7uXt/F72pmloGSYRMfO94vZA7O2UL/ttRsHopoK/5DKUprWoWxt/YE6umVOeIZbmMMUGoj6477IaZIJgvaArXUKlqZHVl13jZluAwk/Hp3Wz15W0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; c=relaxed/simple; bh=yGemHCqZpB+JfjrCz1yYHHmGhW+9Mr3xLUKBtAzyyR0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=S9aIB4aLKrgpmRqBy58zlrKmhtRb4UUW1jnFs3X9IpN+80dvXSy6YgrXj+s3DEnIcHt8z+vj5DgnKuQ1rv3YJaoe98rYzY4pT9S+o5YH460oHXvkabYON8J8b9lwVxerqG+aYS+8bETzACY6fl02eUfjf5U39GlwsFvtXEEFPZs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 9171AABEC7C5; Mon, 4 Nov 2024 11:35:32 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 07/10] bpf, x86: Support private stack in jit Date: Mon, 4 Nov 2024 11:35:32 -0800 Message-ID: <20241104193532.3244711-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Support private stack in jit. The x86 register 9 (X86_REG_R9) is used to replace bpf frame register (BPF_REG_10). The private stack is used per subprog if it is enabled by verifier. The X86_REG_R9 is saved and restored around every func call (not including tailcall) to maintain correctness of X86_REG_R9. Signed-off-by: Yonghong Song --- arch/x86/net/bpf_jit_comp.c | 61 +++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 181d9f04418f..4ee69071c26d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -325,6 +325,22 @@ struct jit_context { /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) +static void push_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x51); /* push r9 */ + *pprog = prog; +} + +static void pop_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x59); /* pop r9 */ + *pprog = prog; +} + static void push_r12(u8 **pprog) { u8 *prog = *pprog; @@ -1404,6 +1420,24 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) *pprog = prog; } +static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr) +{ + u8 *prog = *pprog; + + /* movabs r9, priv_frame_ptr */ + emit_mov_imm64(&prog, X86_REG_R9, (__force long) priv_frame_ptr >> 32, + (u32) (__force long) priv_frame_ptr); + +#ifdef CONFIG_SMP + /* add , gs:[] */ + EMIT2(0x65, 0x4c); + EMIT3(0x03, 0x0c, 0x25); + EMIT((u32)(unsigned long)&this_cpu_off, 4); +#endif + + *pprog = prog; +} + #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) #define __LOAD_TCC_PTR(off) \ @@ -1421,6 +1455,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image int insn_cnt = bpf_prog->len; bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; + void __percpu *priv_frame_ptr = NULL; u64 arena_vm_start, user_vm_start; int i, excnt = 0; int ilen, proglen = 0; @@ -1429,6 +1464,10 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image int err; stack_depth = bpf_prog->aux->stack_depth; + if (bpf_prog->aux->priv_stack_ptr) { + priv_frame_ptr = bpf_prog->aux->priv_stack_ptr + round_up(stack_depth, 8); + stack_depth = 0; + } arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); @@ -1457,6 +1496,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_mov_imm64(&prog, X86_REG_R12, arena_vm_start >> 32, (u32) arena_vm_start); + if (priv_frame_ptr) + emit_priv_frame_ptr(&prog, priv_frame_ptr); + ilen = prog - temp; if (rw_image) memcpy(rw_image + proglen, temp, ilen); @@ -1476,6 +1518,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image u8 *func; int nops; + if (priv_frame_ptr) { + if (src_reg == BPF_REG_FP) + src_reg = X86_REG_R9; + + if (dst_reg == BPF_REG_FP) + dst_reg = X86_REG_R9; + } + switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: @@ -2136,9 +2186,15 @@ st: if (is_imm8(insn->off)) } if (!imm32) return -EINVAL; + if (priv_frame_ptr) { + push_r9(&prog); + ip += 2; + } ip += x86_call_depth_emit_accounting(&prog, func, ip); if (emit_call(&prog, func, ip)) return -EINVAL; + if (priv_frame_ptr) + pop_r9(&prog); break; } @@ -3563,6 +3619,11 @@ bool bpf_jit_supports_exceptions(void) return IS_ENABLED(CONFIG_UNWINDER_ORC); } +bool bpf_jit_supports_private_stack(void) +{ + return true; +} + void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { #if defined(CONFIG_UNWINDER_ORC) From patchwork Mon Nov 4 19:35:37 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861916 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 66-220-155-179.mail-mxout.facebook.com (66-220-155-179.mail-mxout.facebook.com [66.220.155.179]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 288FA1CBA1B for ; Mon, 4 Nov 2024 19:38:07 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=66.220.155.179 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; cv=none; b=CUuxJAkmtsa8C0O23O+LsRDgOKpDfqu0pPobHzjbMHpvbgmlAzpziIbO/1QTzhDg8HPaCp7KTLkyKkVBRO8E0x4nBS+//h6N8PByeRgh/smEh60dGXYkz37zrfebhlRkVVo5u4oRjC9vMkXOa+kNloJ2Mon57bBguLOxQsMpdCg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; c=relaxed/simple; bh=0i0qoMsPI/gwZFvf/9Gf4WnAAY/a8mpe4tYYz/MW2xY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=P5qzEWXSL+eoAIfFBSukzeUkF5B6MZS1aXJNpq/Jvm6M7lb0r2mXKPoD5Qa/o/0kgn9IWBmefnilm2PvmCspQiGc+CAQrv7Er24PFr1aQgbo5UD+tEUfPXeHOXvE+a0sanuV4nbu0hcpXcjIwhD6FiHRwyTRuoO+Ef9odfnKwEs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=66.220.155.179 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id AC6A9ABEC7E2; Mon, 4 Nov 2024 11:35:37 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 08/10] selftests/bpf: Add tracing prog private stack tests Date: Mon, 4 Nov 2024 11:35:37 -0800 Message-ID: <20241104193537.3245050-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Some private stack tests are added including: - main prog only with stack size greater than BPF_PSTACK_MIN_SIZE. - main prog only with stack size smaller than BPF_PSTACK_MIN_SIZE. - prog with one subprog having MAX_BPF_STACK stack size and another subprog having non-zero small stack size. - prog with callback function. - prog with exception in main prog or subprog. - prog with async callback without nesting - prog with async callback with possible nesting Signed-off-by: Yonghong Song --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_private_stack.c | 272 ++++++++++++++++++ 2 files changed, 274 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_private_stack.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 75f7a2ce334b..d9f65adb456b 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -61,6 +61,7 @@ #include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" +#include "verifier_private_stack.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" #include "verifier_reg_equal.skel.h" @@ -188,6 +189,7 @@ void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } +void test_verifier_private_stack(void) { RUN(verifier_private_stack); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); } diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c new file mode 100644 index 000000000000..b1fbdf119553 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" +#include "bpf_experimental.h" + +/* From include/linux/filter.h */ +#define MAX_BPF_STACK 512 + +#if defined(__TARGET_ARCH_x86) + +struct elem { + struct bpf_timer t; + char pad[256]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +SEC("kprobe") +__description("Private stack, single prog") +__success +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x100(%r9)") +__naked void private_stack_single_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 256) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("raw_tp") +__description("No private stack") +__success +__arch_x86_64 +__jited(" subq $0x8, %rsp") +__naked void no_private_stack_nested(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 8) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +__used +__naked static void cumulative_stack_depth_subprog(void) +{ + asm volatile (" \ + r1 = 41; \ + *(u64 *)(r10 - 32) = r1; \ + call %[bpf_get_smp_processor_id]; \ + exit; \ +" : + : __imm(bpf_get_smp_processor_id) + : __clobber_all); +} + +SEC("kprobe") +__description("Private stack, subtree > MAX_BPF_STACK") +__success +__arch_x86_64 +/* private stack fp for the main prog */ +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq 0x{{.*}}") +__jited(" popq %r9") +__jited(" xorl %eax, %eax") +__naked void private_stack_nested_1(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - %[max_bpf_stack]) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(max_bpf_stack, MAX_BPF_STACK) + : __clobber_all); +} + +__naked __noinline __used +static unsigned long loop_callback(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_common); +} + +SEC("raw_tp") +__description("Private stack, callback") +__success +__arch_x86_64 +/* for func loop_callback */ +__jited("func #1") +__jited(" endbr64") +__jited(" nopl (%rax,%rax)") +__jited(" nopl (%rax)") +__jited(" pushq %rbp") +__jited(" movq %rsp, %rbp") +__jited(" endbr64") +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__naked void private_stack_callback(void) +{ + asm volatile (" \ + r1 = 1; \ + r2 = %[loop_callback]; \ + r3 = 0; \ + r4 = 0; \ + call %[bpf_loop]; \ + r0 = 0; \ + exit; \ +" : + : __imm_ptr(loop_callback), + __imm(bpf_loop) + : __clobber_common); +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in main prog") +__success __retval(0) +__arch_x86_64 +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_main_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ +" ::: __clobber_common); + + bpf_throw(0); + return 0; +} + +__used static int subprog_exception(void) +{ + bpf_throw(0); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in subprog") +__success __retval(0) +__arch_x86_64 +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_sub_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call subprog_exception; \ +" ::: __clobber_common); + + return 0; +} + +int glob; +__noinline static void subprog2(int *val) +{ + glob += val[0] * 2; +} + +__noinline static void subprog1(int *val) +{ + int tmp[64] = {}; + + tmp[0] = *val; + subprog2(tmp); +} + +__noinline static int timer_cb1(void *map, int *key, struct bpf_timer *timer) +{ + subprog1(key); + return 0; +} + +__noinline static int timer_cb2(void *map, int *key, struct bpf_timer *timer) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, not nested") +__success __retval(0) +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +int private_stack_async_callback_1(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb2); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, potential nesting") +__success __retval(0) +__arch_x86_64 +__jited(" subq $0x100, %rsp") +int private_stack_async_callback_2(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb1); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +#else + +SEC("kprobe") +__description("private stack is not supported, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; From patchwork Mon Nov 4 19:35:42 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861922 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 66-220-155-178.mail-mxout.facebook.com (66-220-155-178.mail-mxout.facebook.com [66.220.155.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 753051CC893 for ; Mon, 4 Nov 2024 19:38:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=66.220.155.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749092; cv=none; b=dgkFNqc5uf3JxcytqVHubKunkpPR3GyPCSNEWTx1MVQGRkys4/g8hteaEPtDv6cRQpf1EFJvHSVnN7i2RFwjPcGc6xq6b9NBkNETHyufrcWOW5mFOVNB4YDWLe52/t0R1V1JlbsITL0H14uSu1vIMHegSVpns/hoysZKXXMtj0s= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749092; c=relaxed/simple; bh=BM+axZwe3R8DERr13mo1YjpJwZZAPwn39kxN/nD3wyg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=oJkhMCorB2rVvc/GsytC5fkK0Wj6KgchOcZ/0tl5mPCqrjJfqcMfmRijZk/hfXZFEs1qrZZs13LT8KeaCwIkxGawiMXOqSHiKOZP7qPD2GSJb12LJlYfQctxf2GqEV/bYDX2HqRchu1cvVnEzA2JdGIx2d+aPPySsp6ZqdjDExY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=66.220.155.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id C641FABEC7F7; Mon, 4 Nov 2024 11:35:42 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 09/10] bpf: Support private stack for struct_ops progs Date: Mon, 4 Nov 2024 11:35:42 -0800 Message-ID: <20241104193542.3245367-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net For struct_ops progs, whether a particular prog will use private stack or not (prog->aux->use_priv_stack) will be set before actual insn-level verification for that prog. One particular implementation is to piggyback on struct_ops->check_member(). The next patch will have an example for this. The struct_ops->check_member() will set prog->aux->use_priv_stack to be true which enables private stack usage with ignoring BPF_PRIV_STACK_MIN_SIZE limit. If use_priv_stack is true for a particular struct_ops prog, bpf trampoline will need to do recursion checks (one level at this point) to avoid stack overwrite. A field (recursion_detected()) is added to bpf_prog_aux structure such that if bpf_prog->aux->recursion_detected is set by the struct_ops subsystem, the function will be called to report an error, collect related info, etc. Acked-by: Tejun Heo Signed-off-by: Yonghong Song --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 1 + kernel/bpf/trampoline.c | 4 ++++ kernel/bpf/verifier.c | 20 +++++++++++++++++++- 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8a3ea7440a4a..d04f990dd6d9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1528,6 +1528,7 @@ struct bpf_prog_aux { u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; + void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e921589abc72..f65431c42f9e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -891,6 +891,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) case BPF_PROG_TYPE_TRACING: return prog->expected_attach_type != BPF_TRACE_ITER; case BPF_PROG_TYPE_STRUCT_OPS: + return prog->aux->use_priv_stack; case BPF_PROG_TYPE_LSM: default: return false; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 9f36c049f4c2..a8d188b31da5 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -899,6 +899,8 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); + if (prog->aux->recursion_detected) + prog->aux->recursion_detected(prog); return 0; } return bpf_prog_start_time(); @@ -975,6 +977,8 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); + if (prog->aux->recursion_detected) + prog->aux->recursion_detected(prog); return 0; } return bpf_prog_start_time(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 03ae76d57076..ee16c328dffc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6045,6 +6045,8 @@ static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog) if (!bpf_prog_check_recur(prog)) return NO_PRIV_STACK; + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) + return PRIV_STACK_ALWAYS; return PRIV_STACK_ADAPTIVE; } @@ -6118,7 +6120,8 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, idx, subprog_depth); return -EACCES; } - if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) { + if (priv_stack_supported == PRIV_STACK_ALWAYS || + subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) { subprog[idx].use_priv_stack = true; subprog[idx].visited_with_priv_stack = true; } @@ -6235,6 +6238,11 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) for (int i = 0; i < env->subprog_cnt; i++) { if (!si[i].has_tail_call) continue; + if (priv_stack_supported == PRIV_STACK_ALWAYS) { + verbose(env, + "Private stack not supported due to tail call\n"); + return -EACCES; + } priv_stack_supported = NO_PRIV_STACK; break; } @@ -6275,6 +6283,11 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) depth_frame, subtree_depth); return -EACCES; } + if (orig_priv_stack_supported == PRIV_STACK_ALWAYS) { + verbose(env, + "Private stack not supported due to possible nested subprog run\n"); + return -EACCES; + } if (orig_priv_stack_supported == PRIV_STACK_ADAPTIVE) { for (int i = 0; i < env->subprog_cnt; i++) si[i].use_priv_stack = false; @@ -21950,6 +21963,11 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } } + if (prog->aux->use_priv_stack && !bpf_jit_supports_private_stack()) { + verbose(env, "Private stack not supported by jit\n"); + return -EACCES; + } + /* btf_ctx_access() used this to provide argument type info */ prog->aux->ctx_arg_info = st_ops_desc->arg_info[member_idx].info; From patchwork Mon Nov 4 19:35:47 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13861920 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A41FD1CBE85 for ; Mon, 4 Nov 2024 19:38:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; cv=none; b=bSL60elEoRsNTwwOV1JsskvhOSeoLWj1ExZWmJyqb8Yu1bW1eCMXSFjOHXxd28HA/422mDfrqSqBzn36ulhVRKD/3eyN7Iwhou+aHkLo4EVKk2LDQ9rF2NcPScJuSYEiQKkZE+nI4akGkEbe9S+PQ2/0gVum2LZVSemum6wkElA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730749090; c=relaxed/simple; bh=HZZ80ElHuHLjcyvINlLoHB5YX4h7eqqI84dvnKIJriI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=e65XpVMTIo0pZG/n+HRaTOCZ7W62uZnTKllAMUMjOhljso1V/KjZwBFQkPIN9bbXRSmbQq35LXpBO6Ddv9GtaiTlOf+u9RaQszHDk4vY0JryqvCKPkYZP5pz2tXptoEhDHXAqnGumnbuAuxNaC2ZcAXr7llMYG4Uf91IM8KhB5Q= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id E1133ABEC815; Mon, 4 Nov 2024 11:35:47 -0800 (PST) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v9 10/10] selftests/bpf: Add struct_ops prog private stack tests Date: Mon, 4 Nov 2024 11:35:47 -0800 Message-ID: <20241104193547.3245591-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241104193455.3241859-1-yonghong.song@linux.dev> References: <20241104193455.3241859-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Add three tests for struct_ops using private stack. ./test_progs -t struct_ops_private_stack #333/1 struct_ops_private_stack/private_stack:OK #333/2 struct_ops_private_stack/private_stack_fail:OK #333/3 struct_ops_private_stack/private_stack_recur:OK #333 struct_ops_private_stack:OK The following is a snippet of a struct_ops check_member() implementation: u32 moff = __btf_member_bit_offset(t, member) / 8; switch (moff) { case offsetof(struct bpf_testmod_ops3, test_1): prog->aux->use_priv_stack = true; prog->aux->recursion_detected = test_1_recursion_detected; fallthrough; default: break; } return 0; The first test is with nested two different callback functions where the first prog has more than 512 byte stack size (including subprogs) with private stack enabled. The second test is a negative test where the second prog has more than 512 byte stack size without private stack enabled. The third test is the same callback function recursing itself. At run time, the jit trampoline recursion check kicks in to prevent the recursion. The recursion_detected() callback function is implemented by the bpf_testmod, the following message in dmesg bpf_testmod: oh no, recursing into test_1, recursion_misses 1 demonstrates the callback function is indeed triggered when recursion miss happens. Signed-off-by: Yonghong Song --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 104 +++++++++++++++++ .../selftests/bpf/bpf_testmod/bpf_testmod.h | 5 + .../bpf/prog_tests/struct_ops_private_stack.c | 106 ++++++++++++++++++ .../bpf/progs/struct_ops_private_stack.c | 62 ++++++++++ .../bpf/progs/struct_ops_private_stack_fail.c | 62 ++++++++++ .../progs/struct_ops_private_stack_recur.c | 50 +++++++++ 6 files changed, 389 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_private_stack.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 8835761d9a12..46fe282b2e93 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -245,6 +245,39 @@ __bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) call_rcu(&ctx->rcu, testmod_free_cb); } +static struct bpf_testmod_ops3 *st_ops3; + +static int bpf_testmod_test_3(void) +{ + return 0; +} + +static int bpf_testmod_test_4(void) +{ + return 0; +} + +static struct bpf_testmod_ops3 __bpf_testmod_ops3 = { + .test_1 = bpf_testmod_test_3, + .test_2 = bpf_testmod_test_4, +}; + +static void bpf_testmod_test_struct_ops3(void) +{ + if (st_ops3) + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_1(void) +{ + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_2(void) +{ + st_ops3->test_2(); +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -380,6 +413,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); + bpf_testmod_test_struct_ops3(); + struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + sizeof(int)), GFP_KERNEL); if (struct_arg3 != NULL) { @@ -584,6 +619,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2) BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) BTF_ID_LIST(bpf_testmod_dtor_ids) @@ -1094,6 +1131,10 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; +static const struct bpf_verifier_ops bpf_testmod_verifier_ops3 = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -1173,6 +1214,68 @@ struct bpf_struct_ops bpf_testmod_ops2 = { .owner = THIS_MODULE, }; +static int st_ops3_reg(void *kdata, struct bpf_link *link) +{ + int err = 0; + + mutex_lock(&st_ops_mutex); + if (st_ops3) { + pr_err("st_ops has already been registered\n"); + err = -EEXIST; + goto unlock; + } + st_ops3 = kdata; + +unlock: + mutex_unlock(&st_ops_mutex); + return err; +} + +static void st_ops3_unreg(void *kdata, struct bpf_link *link) +{ + mutex_lock(&st_ops_mutex); + st_ops3 = NULL; + mutex_unlock(&st_ops_mutex); +} + +static void test_1_recursion_detected(struct bpf_prog *prog) +{ + struct bpf_prog_stats *stats; + + stats = this_cpu_ptr(prog->stats); + printk("bpf_testmod: oh no, recursing into test_1, recursion_misses %llu", + u64_stats_read(&stats->misses)); +} + +static int st_ops3_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + u32 moff = __btf_member_bit_offset(t, member) / 8; + + switch (moff) { + case offsetof(struct bpf_testmod_ops3, test_1): + prog->aux->use_priv_stack = true; + prog->aux->recursion_detected = test_1_recursion_detected; + fallthrough; + default: + break; + } + return 0; +} + +struct bpf_struct_ops bpf_testmod_ops3 = { + .verifier_ops = &bpf_testmod_verifier_ops3, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = st_ops3_reg, + .unreg = st_ops3_unreg, + .check_member = st_ops3_check_member, + .cfi_stubs = &__bpf_testmod_ops3, + .name = "bpf_testmod_ops3", + .owner = THIS_MODULE, +}; + static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args) { return 0; @@ -1331,6 +1434,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set); ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); + ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, ARRAY_SIZE(bpf_testmod_dtors), diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index fb7dff47597a..356803d1c10e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -94,6 +94,11 @@ struct bpf_testmod_ops2 { int (*test_1)(void); }; +struct bpf_testmod_ops3 { + int (*test_1)(void); + int (*test_2)(void); +}; + struct st_ops_args { u64 a; }; diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c new file mode 100644 index 000000000000..4006879ca3fe --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "struct_ops_private_stack.skel.h" +#include "struct_ops_private_stack_fail.skel.h" +#include "struct_ops_private_stack_recur.skel.h" + +static void test_private_stack(void) +{ + struct struct_ops_private_stack *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_i, 3, "val_i"); + ASSERT_EQ(skel->bss->val_j, 8, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack__destroy(skel); +} + +static void test_private_stack_fail(void) +{ + struct struct_ops_private_stack_fail *skel; + int err; + + skel = struct_ops_private_stack_fail__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_fail__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_fail__load(skel); + if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load")) + goto cleanup; + return; + +cleanup: + struct_ops_private_stack_fail__destroy(skel); +} + +static void test_private_stack_recur(void) +{ + struct struct_ops_private_stack_recur *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack_recur__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_recur__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_recur__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack_recur__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_j, 3, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack_recur__destroy(skel); +} + +void test_struct_ops_private_stack(void) +{ + if (test__start_subtest("private_stack")) + test_private_stack(); + if (test__start_subtest("private_stack_fail")) + test_private_stack_fail(); + if (test__start_subtest("private_stack_recur")) + test_private_stack_recur(); +} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c new file mode 100644 index 000000000000..8ea57e5348ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 200 bytes */ + int a[50] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c new file mode 100644 index 000000000000..1f55ec4cee37 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 100 bytes */ + int a[25] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c new file mode 100644 index 000000000000..15d4e914dc92 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_1(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 400 bytes */ + int b[100] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 1; + val_j += subprog1(a); + bpf_testmod_ops3_call_test_1(); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, +};