diff mbox series

[bpf-next,v6,1/3] bpf: Prevent tailcall infinite loop caused by freplace

Message ID 20241008161333.33469-2-leon.hwang@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Fix tailcall infinite loop caused by freplace | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 206 this patch: 206
netdev/build_tools success Errors and warnings before: 0 (+1) this patch: 0 (+1)
netdev/cc_maintainers warning 7 maintainers not CCed: john.fastabend@gmail.com sdf@fomichev.me martin.lau@linux.dev song@kernel.org haoluo@google.com kpsingh@kernel.org jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 257 this patch: 257
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6961 this patch: 6961
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Please use a blank line after function/struct/union/enum declarations WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: The commit message has 'Call Trace:', perhaps it also needs a 'Fixes:' tag? WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Leon Hwang Oct. 8, 2024, 4:13 p.m. UTC
This patch prevents an infinite loop issue caused by combination of tailcall
and freplace.

For example:

tc_bpf2bpf.c:

// SPDX-License-Identifier: GPL-2.0

\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>

__noinline
int subprog_tc(struct __sk_buff *skb)
{
	return skb->len * 2;
}

SEC("tc")
int entry_tc(struct __sk_buff *skb)
{
	return subprog_tc(skb);
}

char __license[] SEC("license") = "GPL";

tailcall_freplace.c:

// SPDX-License-Identifier: GPL-2.0

\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>

struct {
	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
	__uint(max_entries, 1);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");

int count = 0;

SEC("freplace")
int entry_freplace(struct __sk_buff *skb)
{
	count++;
	bpf_tail_call_static(skb, &jmp_table, 0);
	return count;
}

char __license[] SEC("license") = "GPL";

The attach target of entry_freplace is subprog_tc, and the tail callee
in entry_freplace is entry_tc.

Then, the infinite loop will be entry_tc -> subprog_tc -> entry_freplace
--tailcall-> entry_tc, because tail_call_cnt in entry_freplace will count
from zero for every time of entry_freplace execution. Kernel will panic,
like:

[   15.310490] BUG: TASK stack guard page was hit at (____ptrval____)
(stack is (____ptrval____)..(____ptrval____))
[   15.310490] Oops: stack guard page: 0000 [#1] PREEMPT SMP NOPTI
[   15.310490] CPU: 1 PID: 89 Comm: test_progs Tainted: G           OE
   6.10.0-rc6-g026dcdae8d3e-dirty #72
[   15.310490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX,
1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
[   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
[   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
[   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
0000000000008cb5
[   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
ffff9c98808b7e00
[   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
0000000000000000
[   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
ffffb500c01af000
[   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
0000000000000000
[   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
knlGS:0000000000000000
[   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
00000000000006f0
[   15.310490] Call Trace:
[   15.310490]  <#DF>
[   15.310490]  ? die+0x36/0x90
[   15.310490]  ? handle_stack_overflow+0x4d/0x60
[   15.310490]  ? exc_double_fault+0x117/0x1a0
[   15.310490]  ? asm_exc_double_fault+0x23/0x30
[   15.310490]  ? bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
[   15.310490]  </#DF>
[   15.310490]  <TASK>
[   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
[   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
[   15.310490]  ...
[   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
[   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
[   15.310490]  bpf_test_run+0x210/0x370
[   15.310490]  ? bpf_test_run+0x128/0x370
[   15.310490]  bpf_prog_test_run_skb+0x388/0x7a0
[   15.310490]  __sys_bpf+0xdbf/0x2c40
[   15.310490]  ? clockevents_program_event+0x52/0xf0
[   15.310490]  ? lock_release+0xbf/0x290
[   15.310490]  __x64_sys_bpf+0x1e/0x30
[   15.310490]  do_syscall_64+0x68/0x140
[   15.310490]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   15.310490] RIP: 0033:0x7f133b52725d
[   15.310490] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa
48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48
[   15.310490] RSP: 002b:00007ffddbc10258 EFLAGS: 00000206 ORIG_RAX:
0000000000000141
[   15.310490] RAX: ffffffffffffffda RBX: 00007ffddbc10828 RCX:
00007f133b52725d
[   15.310490] RDX: 0000000000000050 RSI: 00007ffddbc102a0 RDI:
000000000000000a
[   15.310490] RBP: 00007ffddbc10270 R08: 0000000000000000 R09:
00007ffddbc102a0
[   15.310490] R10: 0000000000000064 R11: 0000000000000206 R12:
0000000000000004
[   15.310490] R13: 0000000000000000 R14: 0000558ec4c24890 R15:
00007f133b6ed000
[   15.310490]  </TASK>
[   15.310490] Modules linked in: bpf_testmod(OE)
[   15.310490] ---[ end trace 0000000000000000 ]---
[   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
[   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
[   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
[   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
0000000000008cb5
[   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
ffff9c98808b7e00
[   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
0000000000000000
[   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
ffffb500c01af000
[   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
0000000000000000
[   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
knlGS:0000000000000000
[   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
00000000000006f0
[   15.310490] Kernel panic - not syncing: Fatal exception in interrupt
[   15.310490] Kernel Offset: 0x30000000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)

This patch prevents this panic by preventing updating extended prog to
prog_array map and preventing extending a prog, which has been updated
to prog_array map, with freplace prog.

If a prog or its subprog has been extended by freplace prog, the prog
can not be updated to prog_array map.

If a prog has been updated to prog_array map, it or its subprog can not
be extended by freplace prog.

BTW, fix a minor code style issue by replacing 8 spaces with a tab.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-lkp@intel.com/
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf.h     | 21 ++++++++++++++++++++
 kernel/bpf/arraymap.c   | 23 +++++++++++++++++++++-
 kernel/bpf/core.c       |  1 +
 kernel/bpf/syscall.c    | 21 ++++++++++++++------
 kernel/bpf/trampoline.c | 43 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 102 insertions(+), 7 deletions(-)

Comments

Eduard Zingerman Oct. 9, 2024, 4:30 a.m. UTC | #1
On Wed, 2024-10-09 at 00:13 +0800, Leon Hwang wrote:
> This patch prevents an infinite loop issue caused by combination of tailcall
> and freplace.

[...]

> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-lkp@intel.com/
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---

This seems correct as far as I can tell.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>

[...]
Eduard Zingerman Oct. 9, 2024, 4:42 a.m. UTC | #2
On Wed, 2024-10-09 at 00:13 +0800, Leon Hwang wrote:

[...]

> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-lkp@intel.com/

These tags are misplaced: "Closes" link is for v5 version of this series.
"Closes" should refer to error reports for something that is already
a part of the kernel.


[...]
Leon Hwang Oct. 9, 2024, 5:43 a.m. UTC | #3
On 9/10/24 12:42, Eduard Zingerman wrote:
> On Wed, 2024-10-09 at 00:13 +0800, Leon Hwang wrote:
> 
> [...]
> 
>> Reported-by: kernel test robot <lkp@intel.com>
>> Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-lkp@intel.com/
> 
> These tags are misplaced: "Closes" link is for v5 version of this series.
> "Closes" should refer to error reports for something that is already
> a part of the kernel.
> 

My bad. Thank you for your clarification.

Thanks,
Leon
Alexei Starovoitov Oct. 10, 2024, 12:58 a.m. UTC | #4
On Tue, Oct 8, 2024 at 9:16 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> This patch prevents an infinite loop issue caused by combination of tailcall
> and freplace.
>
> For example:
>
> tc_bpf2bpf.c:
>
> // SPDX-License-Identifier: GPL-2.0
>
> \#include <linux/bpf.h>
> \#include <bpf/bpf_helpers.h>
>
> __noinline
> int subprog_tc(struct __sk_buff *skb)
> {
>         return skb->len * 2;
> }
>
> SEC("tc")
> int entry_tc(struct __sk_buff *skb)
> {
>         return subprog_tc(skb);
> }
>
> char __license[] SEC("license") = "GPL";
>
> tailcall_freplace.c:
>
> // SPDX-License-Identifier: GPL-2.0
>
> \#include <linux/bpf.h>
> \#include <bpf/bpf_helpers.h>
>
> struct {
>         __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
>         __uint(max_entries, 1);
>         __uint(key_size, sizeof(__u32));
>         __uint(value_size, sizeof(__u32));
> } jmp_table SEC(".maps");
>
> int count = 0;
>
> SEC("freplace")
> int entry_freplace(struct __sk_buff *skb)
> {
>         count++;
>         bpf_tail_call_static(skb, &jmp_table, 0);
>         return count;
> }
>
> char __license[] SEC("license") = "GPL";

No need to put the whole selftest into commit log.

The below few paragraphs are enough:

> The attach target of entry_freplace is subprog_tc, and the tail callee
> in entry_freplace is entry_tc.
>
> Then, the infinite loop will be entry_tc -> subprog_tc -> entry_freplace
> --tailcall-> entry_tc, because tail_call_cnt in entry_freplace will count
> from zero for every time of entry_freplace execution. Kernel will panic,
> like:

99% of this full dump below is nothing but noise.
Drop it. Above "kernel will panic" is enough.

> [   15.310490] BUG: TASK stack guard page was hit at (____ptrval____)
> (stack is (____ptrval____)..(____ptrval____))
> [   15.310490] Oops: stack guard page: 0000 [#1] PREEMPT SMP NOPTI
> [   15.310490] CPU: 1 PID: 89 Comm: test_progs Tainted: G           OE
>    6.10.0-rc6-g026dcdae8d3e-dirty #72
> [   15.310490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX,
> 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
> 0000000000008cb5
> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
> ffff9c98808b7e00
> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
> 0000000000000000
> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
> ffffb500c01af000
> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
> 0000000000000000
> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
> knlGS:0000000000000000
> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
> 00000000000006f0
> [   15.310490] Call Trace:
> [   15.310490]  <#DF>
> [   15.310490]  ? die+0x36/0x90
> [   15.310490]  ? handle_stack_overflow+0x4d/0x60
> [   15.310490]  ? exc_double_fault+0x117/0x1a0
> [   15.310490]  ? asm_exc_double_fault+0x23/0x30
> [   15.310490]  ? bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
> [   15.310490]  </#DF>
> [   15.310490]  <TASK>
> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
> [   15.310490]  ...
> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
> [   15.310490]  bpf_test_run+0x210/0x370
> [   15.310490]  ? bpf_test_run+0x128/0x370
> [   15.310490]  bpf_prog_test_run_skb+0x388/0x7a0
> [   15.310490]  __sys_bpf+0xdbf/0x2c40
> [   15.310490]  ? clockevents_program_event+0x52/0xf0
> [   15.310490]  ? lock_release+0xbf/0x290
> [   15.310490]  __x64_sys_bpf+0x1e/0x30
> [   15.310490]  do_syscall_64+0x68/0x140
> [   15.310490]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [   15.310490] RIP: 0033:0x7f133b52725d
> [   15.310490] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa
> 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48
> [   15.310490] RSP: 002b:00007ffddbc10258 EFLAGS: 00000206 ORIG_RAX:
> 0000000000000141
> [   15.310490] RAX: ffffffffffffffda RBX: 00007ffddbc10828 RCX:
> 00007f133b52725d
> [   15.310490] RDX: 0000000000000050 RSI: 00007ffddbc102a0 RDI:
> 000000000000000a
> [   15.310490] RBP: 00007ffddbc10270 R08: 0000000000000000 R09:
> 00007ffddbc102a0
> [   15.310490] R10: 0000000000000064 R11: 0000000000000206 R12:
> 0000000000000004
> [   15.310490] R13: 0000000000000000 R14: 0000558ec4c24890 R15:
> 00007f133b6ed000
> [   15.310490]  </TASK>
> [   15.310490] Modules linked in: bpf_testmod(OE)
> [   15.310490] ---[ end trace 0000000000000000 ]---
> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
> 0000000000008cb5
> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
> ffff9c98808b7e00
> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
> 0000000000000000
> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
> ffffb500c01af000
> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
> 0000000000000000
> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
> knlGS:0000000000000000
> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
> 00000000000006f0
> [   15.310490] Kernel panic - not syncing: Fatal exception in interrupt
> [   15.310490] Kernel Offset: 0x30000000 from 0xffffffff81000000
> (relocation range: 0xffffffff80000000-0xffffffffbfffffff)


Below paragraphs are almost ok, but pls use imperative language.
Instead of "this patch prevents ..."
say "Fix the issue by preventing update of ext prog .."

Consider using grammarly or chatgpt to make it less nerdy.

> This patch prevents this panic by preventing updating extended prog to
> prog_array map and preventing extending a prog, which has been updated
> to prog_array map, with freplace prog.
>
> If a prog or its subprog has been extended by freplace prog, the prog
> can not be updated to prog_array map.
>
> If a prog has been updated to prog_array map, it or its subprog can not
> be extended by freplace prog.
>
> BTW, fix a minor code style issue by replacing 8 spaces with a tab.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-lkp@intel.com/
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  include/linux/bpf.h     | 21 ++++++++++++++++++++
>  kernel/bpf/arraymap.c   | 23 +++++++++++++++++++++-
>  kernel/bpf/core.c       |  1 +
>  kernel/bpf/syscall.c    | 21 ++++++++++++++------
>  kernel/bpf/trampoline.c | 43 +++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 102 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 19d8ca8ac960f..213a68c59bdf7 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1294,6 +1294,12 @@ bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
>  #ifdef CONFIG_BPF_JIT
>  int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
>  int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
> +int bpf_extension_link_prog(struct bpf_tramp_link *link,
> +                           struct bpf_trampoline *tr,
> +                           struct bpf_prog *tgt_prog);
> +int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
> +                             struct bpf_trampoline *tr,
> +                             struct bpf_prog *tgt_prog);
>  struct bpf_trampoline *bpf_trampoline_get(u64 key,
>                                           struct bpf_attach_target_info *tgt_info);
>  void bpf_trampoline_put(struct bpf_trampoline *tr);
> @@ -1383,6 +1389,18 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
>  {
>         return -ENOTSUPP;
>  }
> +static inline int bpf_extension_link_prog(struct bpf_tramp_link *link,
> +                           struct bpf_trampoline *tr,
> +                           struct bpf_prog *tgt_prog)
> +{
> +       return -ENOTSUPP;
> +}
> +static inline int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
> +                             struct bpf_trampoline *tr,
> +                             struct bpf_prog *tgt_prog)
> +{
> +       return -ENOTSUPP;
> +}
>  static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
>                                                         struct bpf_attach_target_info *tgt_info)
>  {
> @@ -1483,6 +1501,9 @@ struct bpf_prog_aux {
>         bool xdp_has_frags;
>         bool exception_cb;
>         bool exception_boundary;
> +       bool is_extended; /* true if extended by freplace program */
> +       u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
> +       struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
>         struct bpf_arena *arena;
>         /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>         const struct btf_type *attach_func_proto;
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 79660e3fca4c1..f9bd63a74eee7 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -947,6 +947,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>                                    struct file *map_file, int fd)
>  {
>         struct bpf_prog *prog = bpf_prog_get(fd);
> +       bool is_extended;
>
>         if (IS_ERR(prog))
>                 return prog;
> @@ -956,13 +957,33 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>                 return ERR_PTR(-EINVAL);
>         }
>
> +       mutex_lock(&prog->aux->ext_mutex);
> +       is_extended = prog->aux->is_extended;
> +       if (!is_extended)
> +               prog->aux->prog_array_member_cnt++;
> +       mutex_unlock(&prog->aux->ext_mutex);
> +       if (is_extended) {
> +               /* Extended prog can not be tail callee. It's to prevent a
> +                * potential infinite loop like:
> +                * tail callee prog entry -> tail callee prog subprog ->
> +                * freplace prog entry --tailcall-> tail callee prog entry.
> +                */
> +               bpf_prog_put(prog);
> +               return ERR_PTR(-EBUSY);
> +       }
> +
>         return prog;
>  }
>
>  static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
>  {
> +       struct bpf_prog *prog = ptr;
> +
> +       mutex_lock(&prog->aux->ext_mutex);
> +       prog->aux->prog_array_member_cnt--;
> +       mutex_unlock(&prog->aux->ext_mutex);
>         /* bpf_prog is freed after one RCU or tasks trace grace period */
> -       bpf_prog_put(ptr);
> +       bpf_prog_put(prog);
>  }
>
>  static u32 prog_fd_array_sys_lookup_elem(void *ptr)
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 5e77c58e06010..233ea78f8f1bd 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -131,6 +131,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
>         INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
>  #endif
>         mutex_init(&fp->aux->used_maps_mutex);
> +       mutex_init(&fp->aux->ext_mutex);
>         mutex_init(&fp->aux->dst_mutex);
>
>         return fp;
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index a8f1808a1ca54..4a5a44bbb5f50 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3212,15 +3212,21 @@ static void bpf_tracing_link_release(struct bpf_link *link)
>  {
>         struct bpf_tracing_link *tr_link =
>                 container_of(link, struct bpf_tracing_link, link.link);
> +       struct bpf_prog *tgt_prog = tr_link->tgt_prog;
>
> -       WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> -                                               tr_link->trampoline));
> +       if (link->prog->type == BPF_PROG_TYPE_EXT)
> +               WARN_ON_ONCE(bpf_extension_unlink_prog(&tr_link->link,
> +                                                      tr_link->trampoline,
> +                                                      tgt_prog));
> +       else
> +               WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> +                                                       tr_link->trampoline));

This early demux into two helpers looks odd.
I think it would be cleaner to add this logic to bpf_trampoline_unlink_prog()
Just extend it with an extra prog argument.

>         bpf_trampoline_put(tr_link->trampoline);
>
>         /* tgt_prog is NULL if target is a kernel function */
> -       if (tr_link->tgt_prog)
> -               bpf_prog_put(tr_link->tgt_prog);
> +       if (tgt_prog)
> +               bpf_prog_put(tgt_prog);
>  }
>
>  static void bpf_tracing_link_dealloc(struct bpf_link *link)
> @@ -3354,7 +3360,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>          *   in prog->aux
>          *
>          * - if prog->aux->dst_trampoline is NULL, the program has already been
> -         *   attached to a target and its initial target was cleared (below)
> +        *   attached to a target and its initial target was cleared (below)
>          *
>          * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
>          *   target_btf_id using the link_create API.
> @@ -3429,7 +3435,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>         if (err)
>                 goto out_unlock;
>
> -       err = bpf_trampoline_link_prog(&link->link, tr);
> +       if (prog->type == BPF_PROG_TYPE_EXT)
> +               err = bpf_extension_link_prog(&link->link, tr, tgt_prog);
> +       else
> +               err = bpf_trampoline_link_prog(&link->link, tr);

same here.

>         if (err) {
>                 bpf_link_cleanup(&link_primer);
>                 link = NULL;
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index f8302a5ca400d..b14f56046ad4e 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -580,6 +580,35 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
>         return err;
>  }
>
> +int bpf_extension_link_prog(struct bpf_tramp_link *link,
> +                           struct bpf_trampoline *tr,
> +                           struct bpf_prog *tgt_prog)
> +{
> +       struct bpf_prog_aux *aux = tgt_prog->aux;
> +       int err;
> +
> +       mutex_lock(&aux->ext_mutex);

pls use guard(mutex) as I suggested earlier.

> +       if (aux->prog_array_member_cnt) {
> +               /* Program extensions can not extend target prog when the target
> +                * prog has been updated to any prog_array map as tail callee.
> +                * It's to prevent a potential infinite loop like:
> +                * tgt prog entry -> tgt prog subprog -> freplace prog entry
> +                * --tailcall-> tgt prog entry.
> +                */
> +               err = -EBUSY;
> +               goto out_unlock;
> +       }
> +
> +       err = bpf_trampoline_link_prog(link, tr);
> +       if (err)
> +               goto out_unlock;
> +
> +       aux->is_extended = true;
> +out_unlock:
> +       mutex_unlock(&aux->ext_mutex);
> +       return err;
> +}
> +
>  static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
>  {
>         enum bpf_tramp_prog_type kind;
> @@ -609,6 +638,20 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
>         return err;
>  }
>
> +int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
> +                             struct bpf_trampoline *tr,
> +                             struct bpf_prog *tgt_prog)
> +{
> +       struct bpf_prog_aux *aux = tgt_prog->aux;
> +       int err;
> +
> +       mutex_lock(&aux->ext_mutex);
> +       err = bpf_trampoline_unlink_prog(link, tr);
> +       aux->is_extended = false;
> +       mutex_unlock(&aux->ext_mutex);

same suggestions.

pw-bot: cr
Leon Hwang Oct. 10, 2024, 2:21 a.m. UTC | #5
On 10/10/24 08:58, Alexei Starovoitov wrote:
> On Tue, Oct 8, 2024 at 9:16 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> This patch prevents an infinite loop issue caused by combination of tailcall
>> and freplace.
>>
>> For example:
>>
>> tc_bpf2bpf.c:
>>
>> // SPDX-License-Identifier: GPL-2.0
>>
>> \#include <linux/bpf.h>
>> \#include <bpf/bpf_helpers.h>
>>
>> __noinline
>> int subprog_tc(struct __sk_buff *skb)
>> {
>>         return skb->len * 2;
>> }
>>
>> SEC("tc")
>> int entry_tc(struct __sk_buff *skb)
>> {
>>         return subprog_tc(skb);
>> }
>>
>> char __license[] SEC("license") = "GPL";
>>
>> tailcall_freplace.c:
>>
>> // SPDX-License-Identifier: GPL-2.0
>>
>> \#include <linux/bpf.h>
>> \#include <bpf/bpf_helpers.h>
>>
>> struct {
>>         __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
>>         __uint(max_entries, 1);
>>         __uint(key_size, sizeof(__u32));
>>         __uint(value_size, sizeof(__u32));
>> } jmp_table SEC(".maps");
>>
>> int count = 0;
>>
>> SEC("freplace")
>> int entry_freplace(struct __sk_buff *skb)
>> {
>>         count++;
>>         bpf_tail_call_static(skb, &jmp_table, 0);
>>         return count;
>> }
>>
>> char __license[] SEC("license") = "GPL";
> 
> No need to put the whole selftest into commit log.
> 
> The below few paragraphs are enough:
> 
>> The attach target of entry_freplace is subprog_tc, and the tail callee
>> in entry_freplace is entry_tc.
>>
>> Then, the infinite loop will be entry_tc -> subprog_tc -> entry_freplace
>> --tailcall-> entry_tc, because tail_call_cnt in entry_freplace will count
>> from zero for every time of entry_freplace execution. Kernel will panic,
>> like:
> 
> 99% of this full dump below is nothing but noise.
> Drop it. Above "kernel will panic" is enough.
> 
>> [   15.310490] BUG: TASK stack guard page was hit at (____ptrval____)
>> (stack is (____ptrval____)..(____ptrval____))
>> [   15.310490] Oops: stack guard page: 0000 [#1] PREEMPT SMP NOPTI
>> [   15.310490] CPU: 1 PID: 89 Comm: test_progs Tainted: G           OE
>>    6.10.0-rc6-g026dcdae8d3e-dirty #72
>> [   15.310490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX,
>> 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
>> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
>> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
>> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
>> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
>> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
>> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
>> 0000000000008cb5
>> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
>> ffff9c98808b7e00
>> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
>> 0000000000000000
>> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
>> ffffb500c01af000
>> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
>> 0000000000000000
>> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
>> knlGS:0000000000000000
>> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
>> 00000000000006f0
>> [   15.310490] Call Trace:
>> [   15.310490]  <#DF>
>> [   15.310490]  ? die+0x36/0x90
>> [   15.310490]  ? handle_stack_overflow+0x4d/0x60
>> [   15.310490]  ? exc_double_fault+0x117/0x1a0
>> [   15.310490]  ? asm_exc_double_fault+0x23/0x30
>> [   15.310490]  ? bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
>> [   15.310490]  </#DF>
>> [   15.310490]  <TASK>
>> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
>> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
>> [   15.310490]  ...
>> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
>> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
>> [   15.310490]  bpf_test_run+0x210/0x370
>> [   15.310490]  ? bpf_test_run+0x128/0x370
>> [   15.310490]  bpf_prog_test_run_skb+0x388/0x7a0
>> [   15.310490]  __sys_bpf+0xdbf/0x2c40
>> [   15.310490]  ? clockevents_program_event+0x52/0xf0
>> [   15.310490]  ? lock_release+0xbf/0x290
>> [   15.310490]  __x64_sys_bpf+0x1e/0x30
>> [   15.310490]  do_syscall_64+0x68/0x140
>> [   15.310490]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>> [   15.310490] RIP: 0033:0x7f133b52725d
>> [   15.310490] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa
>> 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
>> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48
>> [   15.310490] RSP: 002b:00007ffddbc10258 EFLAGS: 00000206 ORIG_RAX:
>> 0000000000000141
>> [   15.310490] RAX: ffffffffffffffda RBX: 00007ffddbc10828 RCX:
>> 00007f133b52725d
>> [   15.310490] RDX: 0000000000000050 RSI: 00007ffddbc102a0 RDI:
>> 000000000000000a
>> [   15.310490] RBP: 00007ffddbc10270 R08: 0000000000000000 R09:
>> 00007ffddbc102a0
>> [   15.310490] R10: 0000000000000064 R11: 0000000000000206 R12:
>> 0000000000000004
>> [   15.310490] R13: 0000000000000000 R14: 0000558ec4c24890 R15:
>> 00007f133b6ed000
>> [   15.310490]  </TASK>
>> [   15.310490] Modules linked in: bpf_testmod(OE)
>> [   15.310490] ---[ end trace 0000000000000000 ]---
>> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
>> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
>> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
>> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
>> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
>> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
>> 0000000000008cb5
>> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
>> ffff9c98808b7e00
>> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
>> 0000000000000000
>> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
>> ffffb500c01af000
>> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
>> 0000000000000000
>> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
>> knlGS:0000000000000000
>> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
>> 00000000000006f0
>> [   15.310490] Kernel panic - not syncing: Fatal exception in interrupt
>> [   15.310490] Kernel Offset: 0x30000000 from 0xffffffff81000000
>> (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
> 
> 
> Below paragraphs are almost ok, but pls use imperative language.
> Instead of "this patch prevents ..."
> say "Fix the issue by preventing update of ext prog .."
> 
> Consider using grammarly or chatgpt to make it less nerdy.
> 

Got it. Thanks for your suggestion.

>> This patch prevents this panic by preventing updating extended prog to
>> prog_array map and preventing extending a prog, which has been updated
>> to prog_array map, with freplace prog.
>>
>> If a prog or its subprog has been extended by freplace prog, the prog
>> can not be updated to prog_array map.
>>
>> If a prog has been updated to prog_array map, it or its subprog can not
>> be extended by freplace prog.
>>
>> BTW, fix a minor code style issue by replacing 8 spaces with a tab.
>>
>> Reported-by: kernel test robot <lkp@intel.com>
>> Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-lkp@intel.com/
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>>  include/linux/bpf.h     | 21 ++++++++++++++++++++
>>  kernel/bpf/arraymap.c   | 23 +++++++++++++++++++++-
>>  kernel/bpf/core.c       |  1 +
>>  kernel/bpf/syscall.c    | 21 ++++++++++++++------
>>  kernel/bpf/trampoline.c | 43 +++++++++++++++++++++++++++++++++++++++++
>>  5 files changed, 102 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 19d8ca8ac960f..213a68c59bdf7 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1294,6 +1294,12 @@ bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
>>  #ifdef CONFIG_BPF_JIT
>>  int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
>>  int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
>> +int bpf_extension_link_prog(struct bpf_tramp_link *link,
>> +                           struct bpf_trampoline *tr,
>> +                           struct bpf_prog *tgt_prog);
>> +int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
>> +                             struct bpf_trampoline *tr,
>> +                             struct bpf_prog *tgt_prog);
>>  struct bpf_trampoline *bpf_trampoline_get(u64 key,
>>                                           struct bpf_attach_target_info *tgt_info);
>>  void bpf_trampoline_put(struct bpf_trampoline *tr);
>> @@ -1383,6 +1389,18 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
>>  {
>>         return -ENOTSUPP;
>>  }
>> +static inline int bpf_extension_link_prog(struct bpf_tramp_link *link,
>> +                           struct bpf_trampoline *tr,
>> +                           struct bpf_prog *tgt_prog)
>> +{
>> +       return -ENOTSUPP;
>> +}
>> +static inline int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
>> +                             struct bpf_trampoline *tr,
>> +                             struct bpf_prog *tgt_prog)
>> +{
>> +       return -ENOTSUPP;
>> +}
>>  static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
>>                                                         struct bpf_attach_target_info *tgt_info)
>>  {
>> @@ -1483,6 +1501,9 @@ struct bpf_prog_aux {
>>         bool xdp_has_frags;
>>         bool exception_cb;
>>         bool exception_boundary;
>> +       bool is_extended; /* true if extended by freplace program */
>> +       u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
>> +       struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
>>         struct bpf_arena *arena;
>>         /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>>         const struct btf_type *attach_func_proto;
>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>> index 79660e3fca4c1..f9bd63a74eee7 100644
>> --- a/kernel/bpf/arraymap.c
>> +++ b/kernel/bpf/arraymap.c
>> @@ -947,6 +947,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>>                                    struct file *map_file, int fd)
>>  {
>>         struct bpf_prog *prog = bpf_prog_get(fd);
>> +       bool is_extended;
>>
>>         if (IS_ERR(prog))
>>                 return prog;
>> @@ -956,13 +957,33 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>>                 return ERR_PTR(-EINVAL);
>>         }
>>
>> +       mutex_lock(&prog->aux->ext_mutex);
>> +       is_extended = prog->aux->is_extended;
>> +       if (!is_extended)
>> +               prog->aux->prog_array_member_cnt++;
>> +       mutex_unlock(&prog->aux->ext_mutex);
>> +       if (is_extended) {
>> +               /* Extended prog can not be tail callee. It's to prevent a
>> +                * potential infinite loop like:
>> +                * tail callee prog entry -> tail callee prog subprog ->
>> +                * freplace prog entry --tailcall-> tail callee prog entry.
>> +                */
>> +               bpf_prog_put(prog);
>> +               return ERR_PTR(-EBUSY);
>> +       }
>> +
>>         return prog;
>>  }
>>
>>  static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
>>  {
>> +       struct bpf_prog *prog = ptr;
>> +
>> +       mutex_lock(&prog->aux->ext_mutex);
>> +       prog->aux->prog_array_member_cnt--;
>> +       mutex_unlock(&prog->aux->ext_mutex);
>>         /* bpf_prog is freed after one RCU or tasks trace grace period */
>> -       bpf_prog_put(ptr);
>> +       bpf_prog_put(prog);
>>  }
>>
>>  static u32 prog_fd_array_sys_lookup_elem(void *ptr)
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index 5e77c58e06010..233ea78f8f1bd 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -131,6 +131,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
>>         INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
>>  #endif
>>         mutex_init(&fp->aux->used_maps_mutex);
>> +       mutex_init(&fp->aux->ext_mutex);
>>         mutex_init(&fp->aux->dst_mutex);
>>
>>         return fp;
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index a8f1808a1ca54..4a5a44bbb5f50 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -3212,15 +3212,21 @@ static void bpf_tracing_link_release(struct bpf_link *link)
>>  {
>>         struct bpf_tracing_link *tr_link =
>>                 container_of(link, struct bpf_tracing_link, link.link);
>> +       struct bpf_prog *tgt_prog = tr_link->tgt_prog;
>>
>> -       WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
>> -                                               tr_link->trampoline));
>> +       if (link->prog->type == BPF_PROG_TYPE_EXT)
>> +               WARN_ON_ONCE(bpf_extension_unlink_prog(&tr_link->link,
>> +                                                      tr_link->trampoline,
>> +                                                      tgt_prog));
>> +       else
>> +               WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
>> +                                                       tr_link->trampoline));
> 
> This early demux into two helpers looks odd.
> I think it would be cleaner to add this logic to bpf_trampoline_unlink_prog()
> Just extend it with an extra prog argument.
> 

No problem. I can extend bpf_trampoline_unlink_prog().

>>         bpf_trampoline_put(tr_link->trampoline);
>>
>>         /* tgt_prog is NULL if target is a kernel function */
>> -       if (tr_link->tgt_prog)
>> -               bpf_prog_put(tr_link->tgt_prog);
>> +       if (tgt_prog)
>> +               bpf_prog_put(tgt_prog);
>>  }
>>
>>  static void bpf_tracing_link_dealloc(struct bpf_link *link)
>> @@ -3354,7 +3360,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>>          *   in prog->aux
>>          *
>>          * - if prog->aux->dst_trampoline is NULL, the program has already been
>> -         *   attached to a target and its initial target was cleared (below)
>> +        *   attached to a target and its initial target was cleared (below)
>>          *
>>          * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
>>          *   target_btf_id using the link_create API.
>> @@ -3429,7 +3435,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>>         if (err)
>>                 goto out_unlock;
>>
>> -       err = bpf_trampoline_link_prog(&link->link, tr);
>> +       if (prog->type == BPF_PROG_TYPE_EXT)
>> +               err = bpf_extension_link_prog(&link->link, tr, tgt_prog);
>> +       else
>> +               err = bpf_trampoline_link_prog(&link->link, tr);
> 
> same here.
> 

Ack.

>>         if (err) {
>>                 bpf_link_cleanup(&link_primer);
>>                 link = NULL;
>> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
>> index f8302a5ca400d..b14f56046ad4e 100644
>> --- a/kernel/bpf/trampoline.c
>> +++ b/kernel/bpf/trampoline.c
>> @@ -580,6 +580,35 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
>>         return err;
>>  }
>>
>> +int bpf_extension_link_prog(struct bpf_tramp_link *link,
>> +                           struct bpf_trampoline *tr,
>> +                           struct bpf_prog *tgt_prog)
>> +{
>> +       struct bpf_prog_aux *aux = tgt_prog->aux;
>> +       int err;
>> +
>> +       mutex_lock(&aux->ext_mutex);
> 
> pls use guard(mutex) as I suggested earlier.
> 

Thanks very much to suggest it again.

guard(mutex) is new for me. And I can learn how to use it from
guard(mutex)(&arena->lock).

>> +       if (aux->prog_array_member_cnt) {
>> +               /* Program extensions can not extend target prog when the target
>> +                * prog has been updated to any prog_array map as tail callee.
>> +                * It's to prevent a potential infinite loop like:
>> +                * tgt prog entry -> tgt prog subprog -> freplace prog entry
>> +                * --tailcall-> tgt prog entry.
>> +                */
>> +               err = -EBUSY;
>> +               goto out_unlock;
>> +       }
>> +
>> +       err = bpf_trampoline_link_prog(link, tr);
>> +       if (err)
>> +               goto out_unlock;
>> +
>> +       aux->is_extended = true;
>> +out_unlock:
>> +       mutex_unlock(&aux->ext_mutex);
>> +       return err;
>> +}
>> +
>>  static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
>>  {
>>         enum bpf_tramp_prog_type kind;
>> @@ -609,6 +638,20 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
>>         return err;
>>  }
>>
>> +int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
>> +                             struct bpf_trampoline *tr,
>> +                             struct bpf_prog *tgt_prog)
>> +{
>> +       struct bpf_prog_aux *aux = tgt_prog->aux;
>> +       int err;
>> +
>> +       mutex_lock(&aux->ext_mutex);
>> +       err = bpf_trampoline_unlink_prog(link, tr);
>> +       aux->is_extended = false;
>> +       mutex_unlock(&aux->ext_mutex);
> 
> same suggestions.
> 

Ack.

> pw-bot: cr

Thanks,
Leon
Xu Kuohai Oct. 10, 2024, 2:06 p.m. UTC | #6
On 10/9/2024 12:13 AM, Leon Hwang wrote:
> This patch prevents an infinite loop issue caused by combination of tailcall
> and freplace.
> 
> For example:
> 
> tc_bpf2bpf.c:
> 
> // SPDX-License-Identifier: GPL-2.0
> 
> \#include <linux/bpf.h>
> \#include <bpf/bpf_helpers.h>
> 
> __noinline
> int subprog_tc(struct __sk_buff *skb)
> {
> 	return skb->len * 2;
> }
> 
> SEC("tc")
> int entry_tc(struct __sk_buff *skb)
> {
> 	return subprog_tc(skb);
> }
> 
> char __license[] SEC("license") = "GPL";
> 
> tailcall_freplace.c:
> 
> // SPDX-License-Identifier: GPL-2.0
> 
> \#include <linux/bpf.h>
> \#include <bpf/bpf_helpers.h>
> 
> struct {
> 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
> 	__uint(max_entries, 1);
> 	__uint(key_size, sizeof(__u32));
> 	__uint(value_size, sizeof(__u32));
> } jmp_table SEC(".maps");
> 
> int count = 0;
> 
> SEC("freplace")
> int entry_freplace(struct __sk_buff *skb)
> {
> 	count++;
> 	bpf_tail_call_static(skb, &jmp_table, 0);
> 	return count;
> }
> 
> char __license[] SEC("license") = "GPL";
> 
> The attach target of entry_freplace is subprog_tc, and the tail callee
> in entry_freplace is entry_tc.
> 
> Then, the infinite loop will be entry_tc -> subprog_tc -> entry_freplace
> --tailcall-> entry_tc, because tail_call_cnt in entry_freplace will count
> from zero for every time of entry_freplace execution. Kernel will panic,
> like:
> 
> [   15.310490] BUG: TASK stack guard page was hit at (____ptrval____)
> (stack is (____ptrval____)..(____ptrval____))
> [   15.310490] Oops: stack guard page: 0000 [#1] PREEMPT SMP NOPTI
> [   15.310490] CPU: 1 PID: 89 Comm: test_progs Tainted: G           OE
>     6.10.0-rc6-g026dcdae8d3e-dirty #72
> [   15.310490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX,
> 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
> 0000000000008cb5
> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
> ffff9c98808b7e00
> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
> 0000000000000000
> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
> ffffb500c01af000
> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
> 0000000000000000
> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
> knlGS:0000000000000000
> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
> 00000000000006f0
> [   15.310490] Call Trace:
> [   15.310490]  <#DF>
> [   15.310490]  ? die+0x36/0x90
> [   15.310490]  ? handle_stack_overflow+0x4d/0x60
> [   15.310490]  ? exc_double_fault+0x117/0x1a0
> [   15.310490]  ? asm_exc_double_fault+0x23/0x30
> [   15.310490]  ? bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
> [   15.310490]  </#DF>
> [   15.310490]  <TASK>
> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
> [   15.310490]  ...
> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
> [   15.310490]  bpf_test_run+0x210/0x370
> [   15.310490]  ? bpf_test_run+0x128/0x370
> [   15.310490]  bpf_prog_test_run_skb+0x388/0x7a0
> [   15.310490]  __sys_bpf+0xdbf/0x2c40
> [   15.310490]  ? clockevents_program_event+0x52/0xf0
> [   15.310490]  ? lock_release+0xbf/0x290
> [   15.310490]  __x64_sys_bpf+0x1e/0x30
> [   15.310490]  do_syscall_64+0x68/0x140
> [   15.310490]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [   15.310490] RIP: 0033:0x7f133b52725d
> [   15.310490] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa
> 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48
> [   15.310490] RSP: 002b:00007ffddbc10258 EFLAGS: 00000206 ORIG_RAX:
> 0000000000000141
> [   15.310490] RAX: ffffffffffffffda RBX: 00007ffddbc10828 RCX:
> 00007f133b52725d
> [   15.310490] RDX: 0000000000000050 RSI: 00007ffddbc102a0 RDI:
> 000000000000000a
> [   15.310490] RBP: 00007ffddbc10270 R08: 0000000000000000 R09:
> 00007ffddbc102a0
> [   15.310490] R10: 0000000000000064 R11: 0000000000000206 R12:
> 0000000000000004
> [   15.310490] R13: 0000000000000000 R14: 0000558ec4c24890 R15:
> 00007f133b6ed000
> [   15.310490]  </TASK>
> [   15.310490] Modules linked in: bpf_testmod(OE)
> [   15.310490] ---[ end trace 0000000000000000 ]---
> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
> 0000000000008cb5
> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
> ffff9c98808b7e00
> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
> 0000000000000000
> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
> ffffb500c01af000
> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
> 0000000000000000
> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
> knlGS:0000000000000000
> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
> 00000000000006f0
> [   15.310490] Kernel panic - not syncing: Fatal exception in interrupt
> [   15.310490] Kernel Offset: 0x30000000 from 0xffffffff81000000
> (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
> 
> This patch prevents this panic by preventing updating extended prog to
> prog_array map and preventing extending a prog, which has been updated
> to prog_array map, with freplace prog.
> 
> If a prog or its subprog has been extended by freplace prog, the prog
> can not be updated to prog_array map.
> 
> If a prog has been updated to prog_array map, it or its subprog can not
> be extended by freplace prog.
> 
> BTW, fix a minor code style issue by replacing 8 spaces with a tab.
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-lkp@intel.com/
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>   include/linux/bpf.h     | 21 ++++++++++++++++++++
>   kernel/bpf/arraymap.c   | 23 +++++++++++++++++++++-
>   kernel/bpf/core.c       |  1 +
>   kernel/bpf/syscall.c    | 21 ++++++++++++++------
>   kernel/bpf/trampoline.c | 43 +++++++++++++++++++++++++++++++++++++++++
>   5 files changed, 102 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 19d8ca8ac960f..213a68c59bdf7 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1294,6 +1294,12 @@ bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
>   #ifdef CONFIG_BPF_JIT
>   int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
>   int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
> +int bpf_extension_link_prog(struct bpf_tramp_link *link,
> +			    struct bpf_trampoline *tr,
> +			    struct bpf_prog *tgt_prog);
> +int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
> +			      struct bpf_trampoline *tr,
> +			      struct bpf_prog *tgt_prog);
>   struct bpf_trampoline *bpf_trampoline_get(u64 key,
>   					  struct bpf_attach_target_info *tgt_info);
>   void bpf_trampoline_put(struct bpf_trampoline *tr);
> @@ -1383,6 +1389,18 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
>   {
>   	return -ENOTSUPP;
>   }
> +static inline int bpf_extension_link_prog(struct bpf_tramp_link *link,
> +			    struct bpf_trampoline *tr,
> +			    struct bpf_prog *tgt_prog)
> +{
> +	return -ENOTSUPP;
> +}
> +static inline int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
> +			      struct bpf_trampoline *tr,
> +			      struct bpf_prog *tgt_prog)
> +{
> +	return -ENOTSUPP;
> +}
>   static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
>   							struct bpf_attach_target_info *tgt_info)
>   {
> @@ -1483,6 +1501,9 @@ struct bpf_prog_aux {
>   	bool xdp_has_frags;
>   	bool exception_cb;
>   	bool exception_boundary;
> +	bool is_extended; /* true if extended by freplace program */
> +	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
> +	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
>   	struct bpf_arena *arena;
>   	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>   	const struct btf_type *attach_func_proto;
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 79660e3fca4c1..f9bd63a74eee7 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -947,6 +947,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>   				   struct file *map_file, int fd)
>   {
>   	struct bpf_prog *prog = bpf_prog_get(fd);
> +	bool is_extended;
>   
>   	if (IS_ERR(prog))
>   		return prog;
> @@ -956,13 +957,33 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>   		return ERR_PTR(-EINVAL);
>   	}
>   
> +	mutex_lock(&prog->aux->ext_mutex);
> +	is_extended = prog->aux->is_extended;
> +	if (!is_extended)
> +		prog->aux->prog_array_member_cnt++;
> +	mutex_unlock(&prog->aux->ext_mutex);
> +	if (is_extended) {
> +		/* Extended prog can not be tail callee. It's to prevent a
> +		 * potential infinite loop like:
> +		 * tail callee prog entry -> tail callee prog subprog ->
> +		 * freplace prog entry --tailcall-> tail callee prog entry.
> +		 */
> +		bpf_prog_put(prog);
> +		return ERR_PTR(-EBUSY);
> +	}

Sorry for the delay.

IIUC, extension prog should not be tailcalled independently, so an error
should also be returned if prog->type == BPF_PROG_TYPE_EXT

> +
>   	return prog;
>   }
>   
>   static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
>   {
> +	struct bpf_prog *prog = ptr;
> +
> +	mutex_lock(&prog->aux->ext_mutex);
> +	prog->aux->prog_array_member_cnt--;
> +	mutex_unlock(&prog->aux->ext_mutex);
>   	/* bpf_prog is freed after one RCU or tasks trace grace period */
> -	bpf_prog_put(ptr);
> +	bpf_prog_put(prog);
>   }
>   
>   static u32 prog_fd_array_sys_lookup_elem(void *ptr)
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 5e77c58e06010..233ea78f8f1bd 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -131,6 +131,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
>   	INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
>   #endif
>   	mutex_init(&fp->aux->used_maps_mutex);
> +	mutex_init(&fp->aux->ext_mutex);
>   	mutex_init(&fp->aux->dst_mutex);
>   
>   	return fp;
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index a8f1808a1ca54..4a5a44bbb5f50 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3212,15 +3212,21 @@ static void bpf_tracing_link_release(struct bpf_link *link)
>   {
>   	struct bpf_tracing_link *tr_link =
>   		container_of(link, struct bpf_tracing_link, link.link);
> +	struct bpf_prog *tgt_prog = tr_link->tgt_prog;
>   
> -	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> -						tr_link->trampoline));
> +	if (link->prog->type == BPF_PROG_TYPE_EXT)
> +		WARN_ON_ONCE(bpf_extension_unlink_prog(&tr_link->link,
> +						       tr_link->trampoline,
> +						       tgt_prog));
> +	else
> +		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> +							tr_link->trampoline));
>   
>   	bpf_trampoline_put(tr_link->trampoline);
>   
>   	/* tgt_prog is NULL if target is a kernel function */
> -	if (tr_link->tgt_prog)
> -		bpf_prog_put(tr_link->tgt_prog);
> +	if (tgt_prog)
> +		bpf_prog_put(tgt_prog);
>   }
>   
>   static void bpf_tracing_link_dealloc(struct bpf_link *link)
> @@ -3354,7 +3360,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>   	 *   in prog->aux
>   	 *
>   	 * - if prog->aux->dst_trampoline is NULL, the program has already been
> -         *   attached to a target and its initial target was cleared (below)
> +	 *   attached to a target and its initial target was cleared (below)
>   	 *
>   	 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
>   	 *   target_btf_id using the link_create API.
> @@ -3429,7 +3435,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>   	if (err)
>   		goto out_unlock;
>   
> -	err = bpf_trampoline_link_prog(&link->link, tr);
> +	if (prog->type == BPF_PROG_TYPE_EXT)
> +		err = bpf_extension_link_prog(&link->link, tr, tgt_prog);
> +	else
> +		err = bpf_trampoline_link_prog(&link->link, tr);
>   	if (err) {
>   		bpf_link_cleanup(&link_primer);
>   		link = NULL;
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index f8302a5ca400d..b14f56046ad4e 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -580,6 +580,35 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
>   	return err;
>   }
>   
> +int bpf_extension_link_prog(struct bpf_tramp_link *link,
> +			    struct bpf_trampoline *tr,
> +			    struct bpf_prog *tgt_prog)
> +{
> +	struct bpf_prog_aux *aux = tgt_prog->aux;
> +	int err;
> +
> +	mutex_lock(&aux->ext_mutex);
> +	if (aux->prog_array_member_cnt) {
> +		/* Program extensions can not extend target prog when the target
> +		 * prog has been updated to any prog_array map as tail callee.
> +		 * It's to prevent a potential infinite loop like:
> +		 * tgt prog entry -> tgt prog subprog -> freplace prog entry
> +		 * --tailcall-> tgt prog entry.
> +		 */
> +		err = -EBUSY;
> +		goto out_unlock;
> +	}
> +
> +	err = bpf_trampoline_link_prog(link, tr);
> +	if (err)
> +		goto out_unlock;
> +
> +	aux->is_extended = true;
> +out_unlock:
> +	mutex_unlock(&aux->ext_mutex);
> +	return err;
> +}
> +
>   static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
>   {
>   	enum bpf_tramp_prog_type kind;
> @@ -609,6 +638,20 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
>   	return err;
>   }
>   
> +int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
> +			      struct bpf_trampoline *tr,
> +			      struct bpf_prog *tgt_prog)
> +{
> +	struct bpf_prog_aux *aux = tgt_prog->aux;
> +	int err;
> +
> +	mutex_lock(&aux->ext_mutex);
> +	err = bpf_trampoline_unlink_prog(link, tr);
> +	aux->is_extended = false;
> +	mutex_unlock(&aux->ext_mutex);
> +	return err;
> +}
> +
>   #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
>   static void bpf_shim_tramp_link_release(struct bpf_link *link)
>   {
Leon Hwang Oct. 10, 2024, 2:27 p.m. UTC | #7
On 2024/10/10 22:06, Xu Kuohai wrote:
> On 10/9/2024 12:13 AM, Leon Hwang wrote:
>> This patch prevents an infinite loop issue caused by combination of
>> tailcall
>> and freplace.
>>
>> For example:
>>
>> tc_bpf2bpf.c:
>>
>> // SPDX-License-Identifier: GPL-2.0
>>
>> \#include <linux/bpf.h>
>> \#include <bpf/bpf_helpers.h>
>>
>> __noinline
>> int subprog_tc(struct __sk_buff *skb)
>> {
>>     return skb->len * 2;
>> }
>>
>> SEC("tc")
>> int entry_tc(struct __sk_buff *skb)
>> {
>>     return subprog_tc(skb);
>> }
>>
>> char __license[] SEC("license") = "GPL";
>>
>> tailcall_freplace.c:
>>
>> // SPDX-License-Identifier: GPL-2.0
>>
>> \#include <linux/bpf.h>
>> \#include <bpf/bpf_helpers.h>
>>
>> struct {
>>     __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
>>     __uint(max_entries, 1);
>>     __uint(key_size, sizeof(__u32));
>>     __uint(value_size, sizeof(__u32));
>> } jmp_table SEC(".maps");
>>
>> int count = 0;
>>
>> SEC("freplace")
>> int entry_freplace(struct __sk_buff *skb)
>> {
>>     count++;
>>     bpf_tail_call_static(skb, &jmp_table, 0);
>>     return count;
>> }
>>
>> char __license[] SEC("license") = "GPL";
>>
>> The attach target of entry_freplace is subprog_tc, and the tail callee
>> in entry_freplace is entry_tc.
>>
>> Then, the infinite loop will be entry_tc -> subprog_tc -> entry_freplace
>> --tailcall-> entry_tc, because tail_call_cnt in entry_freplace will count
>> from zero for every time of entry_freplace execution. Kernel will panic,
>> like:
>>
>> [   15.310490] BUG: TASK stack guard page was hit at (____ptrval____)
>> (stack is (____ptrval____)..(____ptrval____))
>> [   15.310490] Oops: stack guard page: 0000 [#1] PREEMPT SMP NOPTI
>> [   15.310490] CPU: 1 PID: 89 Comm: test_progs Tainted: G           OE
>>     6.10.0-rc6-g026dcdae8d3e-dirty #72
>> [   15.310490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX,
>> 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
>> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
>> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
>> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
>> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
>> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
>> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
>> 0000000000008cb5
>> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
>> ffff9c98808b7e00
>> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
>> 0000000000000000
>> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
>> ffffb500c01af000
>> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
>> 0000000000000000
>> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
>> knlGS:0000000000000000
>> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
>> 00000000000006f0
>> [   15.310490] Call Trace:
>> [   15.310490]  <#DF>
>> [   15.310490]  ? die+0x36/0x90
>> [   15.310490]  ? handle_stack_overflow+0x4d/0x60
>> [   15.310490]  ? exc_double_fault+0x117/0x1a0
>> [   15.310490]  ? asm_exc_double_fault+0x23/0x30
>> [   15.310490]  ? bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
>> [   15.310490]  </#DF>
>> [   15.310490]  <TASK>
>> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
>> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
>> [   15.310490]  ...
>> [   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
>> [   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
>> [   15.310490]  bpf_test_run+0x210/0x370
>> [   15.310490]  ? bpf_test_run+0x128/0x370
>> [   15.310490]  bpf_prog_test_run_skb+0x388/0x7a0
>> [   15.310490]  __sys_bpf+0xdbf/0x2c40
>> [   15.310490]  ? clockevents_program_event+0x52/0xf0
>> [   15.310490]  ? lock_release+0xbf/0x290
>> [   15.310490]  __x64_sys_bpf+0x1e/0x30
>> [   15.310490]  do_syscall_64+0x68/0x140
>> [   15.310490]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>> [   15.310490] RIP: 0033:0x7f133b52725d
>> [   15.310490] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa
>> 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
>> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48
>> [   15.310490] RSP: 002b:00007ffddbc10258 EFLAGS: 00000206 ORIG_RAX:
>> 0000000000000141
>> [   15.310490] RAX: ffffffffffffffda RBX: 00007ffddbc10828 RCX:
>> 00007f133b52725d
>> [   15.310490] RDX: 0000000000000050 RSI: 00007ffddbc102a0 RDI:
>> 000000000000000a
>> [   15.310490] RBP: 00007ffddbc10270 R08: 0000000000000000 R09:
>> 00007ffddbc102a0
>> [   15.310490] R10: 0000000000000064 R11: 0000000000000206 R12:
>> 0000000000000004
>> [   15.310490] R13: 0000000000000000 R14: 0000558ec4c24890 R15:
>> 00007f133b6ed000
>> [   15.310490]  </TASK>
>> [   15.310490] Modules linked in: bpf_testmod(OE)
>> [   15.310490] ---[ end trace 0000000000000000 ]---
>> [   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
>> [   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
>> cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
>> fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
>> [   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
>> [   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
>> 0000000000008cb5
>> [   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
>> ffff9c98808b7e00
>> [   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
>> 0000000000000000
>> [   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
>> ffffb500c01af000
>> [   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
>> 0000000000000000
>> [   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
>> knlGS:0000000000000000
>> [   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
>> 00000000000006f0
>> [   15.310490] Kernel panic - not syncing: Fatal exception in interrupt
>> [   15.310490] Kernel Offset: 0x30000000 from 0xffffffff81000000
>> (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
>>
>> This patch prevents this panic by preventing updating extended prog to
>> prog_array map and preventing extending a prog, which has been updated
>> to prog_array map, with freplace prog.
>>
>> If a prog or its subprog has been extended by freplace prog, the prog
>> can not be updated to prog_array map.
>>
>> If a prog has been updated to prog_array map, it or its subprog can not
>> be extended by freplace prog.
>>
>> BTW, fix a minor code style issue by replacing 8 spaces with a tab.
>>
>> Reported-by: kernel test robot <lkp@intel.com>
>> Closes: https://lore.kernel.org/oe-kbuild-all/202410080455.vy5GT8Vz-
>> lkp@intel.com/
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>>   include/linux/bpf.h     | 21 ++++++++++++++++++++
>>   kernel/bpf/arraymap.c   | 23 +++++++++++++++++++++-
>>   kernel/bpf/core.c       |  1 +
>>   kernel/bpf/syscall.c    | 21 ++++++++++++++------
>>   kernel/bpf/trampoline.c | 43 +++++++++++++++++++++++++++++++++++++++++
>>   5 files changed, 102 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 19d8ca8ac960f..213a68c59bdf7 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1294,6 +1294,12 @@ bool __bpf_dynptr_is_rdonly(const struct
>> bpf_dynptr_kern *ptr);
>>   #ifdef CONFIG_BPF_JIT
>>   int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct
>> bpf_trampoline *tr);
>>   int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct
>> bpf_trampoline *tr);
>> +int bpf_extension_link_prog(struct bpf_tramp_link *link,
>> +                struct bpf_trampoline *tr,
>> +                struct bpf_prog *tgt_prog);
>> +int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
>> +                  struct bpf_trampoline *tr,
>> +                  struct bpf_prog *tgt_prog);
>>   struct bpf_trampoline *bpf_trampoline_get(u64 key,
>>                         struct bpf_attach_target_info *tgt_info);
>>   void bpf_trampoline_put(struct bpf_trampoline *tr);
>> @@ -1383,6 +1389,18 @@ static inline int
>> bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
>>   {
>>       return -ENOTSUPP;
>>   }
>> +static inline int bpf_extension_link_prog(struct bpf_tramp_link *link,
>> +                struct bpf_trampoline *tr,
>> +                struct bpf_prog *tgt_prog)
>> +{
>> +    return -ENOTSUPP;
>> +}
>> +static inline int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
>> +                  struct bpf_trampoline *tr,
>> +                  struct bpf_prog *tgt_prog)
>> +{
>> +    return -ENOTSUPP;
>> +}
>>   static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
>>                               struct bpf_attach_target_info *tgt_info)
>>   {
>> @@ -1483,6 +1501,9 @@ struct bpf_prog_aux {
>>       bool xdp_has_frags;
>>       bool exception_cb;
>>       bool exception_boundary;
>> +    bool is_extended; /* true if extended by freplace program */
>> +    u64 prog_array_member_cnt; /* counts how many times as member of
>> prog_array */
>> +    struct mutex ext_mutex; /* mutex for is_extended and
>> prog_array_member_cnt */
>>       struct bpf_arena *arena;
>>       /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>>       const struct btf_type *attach_func_proto;
>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>> index 79660e3fca4c1..f9bd63a74eee7 100644
>> --- a/kernel/bpf/arraymap.c
>> +++ b/kernel/bpf/arraymap.c
>> @@ -947,6 +947,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map
>> *map,
>>                      struct file *map_file, int fd)
>>   {
>>       struct bpf_prog *prog = bpf_prog_get(fd);
>> +    bool is_extended;
>>         if (IS_ERR(prog))
>>           return prog;
>> @@ -956,13 +957,33 @@ static void *prog_fd_array_get_ptr(struct
>> bpf_map *map,
>>           return ERR_PTR(-EINVAL);
>>       }
>>   +    mutex_lock(&prog->aux->ext_mutex);
>> +    is_extended = prog->aux->is_extended;
>> +    if (!is_extended)
>> +        prog->aux->prog_array_member_cnt++;
>> +    mutex_unlock(&prog->aux->ext_mutex);
>> +    if (is_extended) {
>> +        /* Extended prog can not be tail callee. It's to prevent a
>> +         * potential infinite loop like:
>> +         * tail callee prog entry -> tail callee prog subprog ->
>> +         * freplace prog entry --tailcall-> tail callee prog entry.
>> +         */
>> +        bpf_prog_put(prog);
>> +        return ERR_PTR(-EBUSY);
>> +    }
> 
> Sorry for the delay.
> 
> IIUC, extension prog should not be tailcalled independently, so an error
> should also be returned if prog->type == BPF_PROG_TYPE_EXT
> 

I think it's OK that freplace prog tail calls itself, see patch #3.

Thanks,
Leon
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19d8ca8ac960f..213a68c59bdf7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1294,6 +1294,12 @@  bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
 #ifdef CONFIG_BPF_JIT
 int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
 int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
+int bpf_extension_link_prog(struct bpf_tramp_link *link,
+			    struct bpf_trampoline *tr,
+			    struct bpf_prog *tgt_prog);
+int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
+			      struct bpf_trampoline *tr,
+			      struct bpf_prog *tgt_prog);
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
@@ -1383,6 +1389,18 @@  static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
 {
 	return -ENOTSUPP;
 }
+static inline int bpf_extension_link_prog(struct bpf_tramp_link *link,
+			    struct bpf_trampoline *tr,
+			    struct bpf_prog *tgt_prog)
+{
+	return -ENOTSUPP;
+}
+static inline int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
+			      struct bpf_trampoline *tr,
+			      struct bpf_prog *tgt_prog)
+{
+	return -ENOTSUPP;
+}
 static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
 							struct bpf_attach_target_info *tgt_info)
 {
@@ -1483,6 +1501,9 @@  struct bpf_prog_aux {
 	bool xdp_has_frags;
 	bool exception_cb;
 	bool exception_boundary;
+	bool is_extended; /* true if extended by freplace program */
+	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
+	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 79660e3fca4c1..f9bd63a74eee7 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -947,6 +947,7 @@  static void *prog_fd_array_get_ptr(struct bpf_map *map,
 				   struct file *map_file, int fd)
 {
 	struct bpf_prog *prog = bpf_prog_get(fd);
+	bool is_extended;
 
 	if (IS_ERR(prog))
 		return prog;
@@ -956,13 +957,33 @@  static void *prog_fd_array_get_ptr(struct bpf_map *map,
 		return ERR_PTR(-EINVAL);
 	}
 
+	mutex_lock(&prog->aux->ext_mutex);
+	is_extended = prog->aux->is_extended;
+	if (!is_extended)
+		prog->aux->prog_array_member_cnt++;
+	mutex_unlock(&prog->aux->ext_mutex);
+	if (is_extended) {
+		/* Extended prog can not be tail callee. It's to prevent a
+		 * potential infinite loop like:
+		 * tail callee prog entry -> tail callee prog subprog ->
+		 * freplace prog entry --tailcall-> tail callee prog entry.
+		 */
+		bpf_prog_put(prog);
+		return ERR_PTR(-EBUSY);
+	}
+
 	return prog;
 }
 
 static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
 {
+	struct bpf_prog *prog = ptr;
+
+	mutex_lock(&prog->aux->ext_mutex);
+	prog->aux->prog_array_member_cnt--;
+	mutex_unlock(&prog->aux->ext_mutex);
 	/* bpf_prog is freed after one RCU or tasks trace grace period */
-	bpf_prog_put(ptr);
+	bpf_prog_put(prog);
 }
 
 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5e77c58e06010..233ea78f8f1bd 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -131,6 +131,7 @@  struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 	INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
 #endif
 	mutex_init(&fp->aux->used_maps_mutex);
+	mutex_init(&fp->aux->ext_mutex);
 	mutex_init(&fp->aux->dst_mutex);
 
 	return fp;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a8f1808a1ca54..4a5a44bbb5f50 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3212,15 +3212,21 @@  static void bpf_tracing_link_release(struct bpf_link *link)
 {
 	struct bpf_tracing_link *tr_link =
 		container_of(link, struct bpf_tracing_link, link.link);
+	struct bpf_prog *tgt_prog = tr_link->tgt_prog;
 
-	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
-						tr_link->trampoline));
+	if (link->prog->type == BPF_PROG_TYPE_EXT)
+		WARN_ON_ONCE(bpf_extension_unlink_prog(&tr_link->link,
+						       tr_link->trampoline,
+						       tgt_prog));
+	else
+		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
+							tr_link->trampoline));
 
 	bpf_trampoline_put(tr_link->trampoline);
 
 	/* tgt_prog is NULL if target is a kernel function */
-	if (tr_link->tgt_prog)
-		bpf_prog_put(tr_link->tgt_prog);
+	if (tgt_prog)
+		bpf_prog_put(tgt_prog);
 }
 
 static void bpf_tracing_link_dealloc(struct bpf_link *link)
@@ -3354,7 +3360,7 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	 *   in prog->aux
 	 *
 	 * - if prog->aux->dst_trampoline is NULL, the program has already been
-         *   attached to a target and its initial target was cleared (below)
+	 *   attached to a target and its initial target was cleared (below)
 	 *
 	 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
 	 *   target_btf_id using the link_create API.
@@ -3429,7 +3435,10 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	if (err)
 		goto out_unlock;
 
-	err = bpf_trampoline_link_prog(&link->link, tr);
+	if (prog->type == BPF_PROG_TYPE_EXT)
+		err = bpf_extension_link_prog(&link->link, tr, tgt_prog);
+	else
+		err = bpf_trampoline_link_prog(&link->link, tr);
 	if (err) {
 		bpf_link_cleanup(&link_primer);
 		link = NULL;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index f8302a5ca400d..b14f56046ad4e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -580,6 +580,35 @@  int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
 	return err;
 }
 
+int bpf_extension_link_prog(struct bpf_tramp_link *link,
+			    struct bpf_trampoline *tr,
+			    struct bpf_prog *tgt_prog)
+{
+	struct bpf_prog_aux *aux = tgt_prog->aux;
+	int err;
+
+	mutex_lock(&aux->ext_mutex);
+	if (aux->prog_array_member_cnt) {
+		/* Program extensions can not extend target prog when the target
+		 * prog has been updated to any prog_array map as tail callee.
+		 * It's to prevent a potential infinite loop like:
+		 * tgt prog entry -> tgt prog subprog -> freplace prog entry
+		 * --tailcall-> tgt prog entry.
+		 */
+		err = -EBUSY;
+		goto out_unlock;
+	}
+
+	err = bpf_trampoline_link_prog(link, tr);
+	if (err)
+		goto out_unlock;
+
+	aux->is_extended = true;
+out_unlock:
+	mutex_unlock(&aux->ext_mutex);
+	return err;
+}
+
 static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 {
 	enum bpf_tramp_prog_type kind;
@@ -609,6 +638,20 @@  int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
 	return err;
 }
 
+int bpf_extension_unlink_prog(struct bpf_tramp_link *link,
+			      struct bpf_trampoline *tr,
+			      struct bpf_prog *tgt_prog)
+{
+	struct bpf_prog_aux *aux = tgt_prog->aux;
+	int err;
+
+	mutex_lock(&aux->ext_mutex);
+	err = bpf_trampoline_unlink_prog(link, tr);
+	aux->is_extended = false;
+	mutex_unlock(&aux->ext_mutex);
+	return err;
+}
+
 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
 static void bpf_shim_tramp_link_release(struct bpf_link *link)
 {