diff mbox series

[bpf-next,4/4] selftests/bpf: validate jit behaviour for tail calls

Message ID 20240809010518.1137758-5-eddyz87@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series __jited_x86 test tag to check x86 assembly after jit | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 42 this patch: 42
netdev/build_tools success Errors and warnings before: 10 this patch: 10
netdev/cc_maintainers warning 9 maintainers not CCed: kpsingh@kernel.org shuah@kernel.org haoluo@google.com john.fastabend@gmail.com jolsa@kernel.org linux-kselftest@vger.kernel.org mykolal@fb.com song@kernel.org sdf@fomichev.me
netdev/build_clang success Errors and warnings before: 44 this patch: 44
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 49 this patch: 49
netdev/checkpatch warning CHECK: Lines should not end with a '(' CHECK: No space is necessary after a cast WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18

Commit Message

Eduard Zingerman Aug. 9, 2024, 1:05 a.m. UTC
A program calling sub-program which does a tail call.
The idea is to verify instructions generated by jit for tail calls:
- in program and sub-program prologues;
- for subprogram call instruction;
- for tail call itself.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 .../selftests/bpf/prog_tests/verifier.c       |   2 +
 .../bpf/progs/verifier_tailcall_jit.c         | 103 ++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c

Comments

Andrii Nakryiko Aug. 15, 2024, 9:15 p.m. UTC | #1
On Thu, Aug 8, 2024 at 6:05 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> A program calling sub-program which does a tail call.
> The idea is to verify instructions generated by jit for tail calls:
> - in program and sub-program prologues;
> - for subprogram call instruction;
> - for tail call itself.
>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  .../selftests/bpf/prog_tests/verifier.c       |   2 +
>  .../bpf/progs/verifier_tailcall_jit.c         | 103 ++++++++++++++++++
>  2 files changed, 105 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
> index f8f546eba488..cf3662dbd24f 100644
> --- a/tools/testing/selftests/bpf/prog_tests/verifier.c
> +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
> @@ -75,6 +75,7 @@
>  #include "verifier_stack_ptr.skel.h"
>  #include "verifier_subprog_precision.skel.h"
>  #include "verifier_subreg.skel.h"
> +#include "verifier_tailcall_jit.skel.h"
>  #include "verifier_typedef.skel.h"
>  #include "verifier_uninit.skel.h"
>  #include "verifier_unpriv.skel.h"
> @@ -198,6 +199,7 @@ void test_verifier_spin_lock(void)            { RUN(verifier_spin_lock); }
>  void test_verifier_stack_ptr(void)            { RUN(verifier_stack_ptr); }
>  void test_verifier_subprog_precision(void)    { RUN(verifier_subprog_precision); }
>  void test_verifier_subreg(void)               { RUN(verifier_subreg); }
> +void test_verifier_tailcall_jit(void)         { RUN(verifier_tailcall_jit); }
>  void test_verifier_typedef(void)              { RUN(verifier_typedef); }
>  void test_verifier_uninit(void)               { RUN(verifier_uninit); }
>  void test_verifier_unpriv(void)               { RUN(verifier_unpriv); }
> diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
> new file mode 100644
> index 000000000000..1a09c76d7be0
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
> @@ -0,0 +1,103 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include "bpf_misc.h"
> +
> +int main(void);
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
> +       __uint(max_entries, 1);
> +       __uint(key_size, sizeof(__u32));
> +       __array(values, void (void));
> +} jmp_table SEC(".maps") = {
> +       .values = {
> +               [0] = (void *) &main,
> +       },
> +};
> +
> +__noinline __auxiliary
> +static __naked int sub(void)
> +{
> +       asm volatile (
> +       "r2 = %[jmp_table] ll;"
> +       "r3 = 0;"
> +       "call 12;"
> +       "exit;"
> +       :
> +       : __imm_addr(jmp_table)
> +       : __clobber_all);
> +}
> +
> +__success
> +/* program entry for main(), regular function prologue */
> +__jit_x86("    endbr64")
> +__jit_x86("    nopl    (%rax,%rax)")
> +__jit_x86("    xorq    %rax, %rax")
> +__jit_x86("    pushq   %rbp")
> +__jit_x86("    movq    %rsp, %rbp")

I'm a bit too lazy to fish it out of the code, so I'll just ask.
Does matching of __jit_x86() string behave in the same way as __msg().
I.e., there could be unexpected lines that would be skipped, as long
as we find a match for each __jit_x86() one?


Isn't that a bit counter-intuitive and potentially dangerous behavior
for checking disassembly? If my assumption is correct, maybe we should
add some sort of `__jit_x86("...")` placeholder to explicitly mark
that we allow some amount of lines to be skipped, but otherwise be
strict and require matching line-by-line?

> +/* tail call prologue for program:
> + * - establish memory location for tail call counter at &rbp[-8];
> + * - spill tail_call_cnt_ptr at &rbp[-16];
> + * - expect tail call counter to be passed in rax;
> + * - for entry program rax is a raw counter, value < 33;
> + * - for tail called program rax is tail_call_cnt_ptr (value > 33).
> + */
> +__jit_x86("    endbr64")
> +__jit_x86("    cmpq    $0x21, %rax")
> +__jit_x86("    ja      L0")
> +__jit_x86("    pushq   %rax")
> +__jit_x86("    movq    %rsp, %rax")
> +__jit_x86("    jmp     L1")
> +__jit_x86("L0: pushq   %rax")                  /* rbp[-8]  = rax         */
> +__jit_x86("L1: pushq   %rax")                  /* rbp[-16] = rax         */
> +/* on subprogram call restore rax to be tail_call_cnt_ptr from rbp[-16]
> + * (cause original rax might be clobbered by this point)
> + */
> +__jit_x86("    movq    -0x10(%rbp), %rax")
> +__jit_x86("    callq   0x[0-9a-f]\\+")         /* call to sub()          */
> +__jit_x86("    xorl    %eax, %eax")
> +__jit_x86("    leave")
> +__jit_x86("    retq")
> +/* subprogram entry for sub(), regular function prologue */
> +__jit_x86("    endbr64")
> +__jit_x86("    nopl    (%rax,%rax)")
> +__jit_x86("    nopl    (%rax)")
> +__jit_x86("    pushq   %rbp")
> +__jit_x86("    movq    %rsp, %rbp")
> +/* tail call prologue for subprogram address of tail call counter
> + * stored at rbp[-16].
> + */
> +__jit_x86("    endbr64")
> +__jit_x86("    pushq   %rax")                  /* rbp[-8]  = rax          */
> +__jit_x86("    pushq   %rax")                  /* rbp[-16] = rax          */
> +__jit_x86("    movabsq $-0x[0-9a-f]\\+, %rsi") /* r2 = &jmp_table         */
> +__jit_x86("    xorl    %edx, %edx")            /* r3 = 0                  */
> +/* bpf_tail_call implementation:
> + * - load tail_call_cnt_ptr from rbp[-16];
> + * - if *tail_call_cnt_ptr < 33, increment it and jump to target;
> + * - otherwise do nothing.
> + */
> +__jit_x86("    movq    -0x10(%rbp), %rax")
> +__jit_x86("    cmpq    $0x21, (%rax)")
> +__jit_x86("    jae     L0")
> +__jit_x86("    nopl    (%rax,%rax)")
> +__jit_x86("    addq    $0x1, (%rax)")          /* *tail_call_cnt_ptr += 1 */
> +__jit_x86("    popq    %rax")
> +__jit_x86("    popq    %rax")
> +__jit_x86("    jmp     0x[0-9a-f]\\+")         /* jump to tail call tgt   */
> +__jit_x86("L0: leave")
> +__jit_x86("    retq")
> +SEC("tc")
> +__naked int main(void)
> +{
> +       asm volatile (
> +       "call %[sub];"
> +       "r0 = 0;"
> +       "exit;"
> +       :
> +       : __imm(sub)
> +       : __clobber_all);
> +}
> +
> +char __license[] SEC("license") = "GPL";
> --
> 2.45.2
>
Yonghong Song Aug. 15, 2024, 9:32 p.m. UTC | #2
On 8/8/24 6:05 PM, Eduard Zingerman wrote:
> A program calling sub-program which does a tail call.
> The idea is to verify instructions generated by jit for tail calls:
> - in program and sub-program prologues;
> - for subprogram call instruction;
> - for tail call itself.
>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>   .../selftests/bpf/prog_tests/verifier.c       |   2 +
>   .../bpf/progs/verifier_tailcall_jit.c         | 103 ++++++++++++++++++
>   2 files changed, 105 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
> index f8f546eba488..cf3662dbd24f 100644
> --- a/tools/testing/selftests/bpf/prog_tests/verifier.c
> +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
> @@ -75,6 +75,7 @@
>   #include "verifier_stack_ptr.skel.h"
>   #include "verifier_subprog_precision.skel.h"
>   #include "verifier_subreg.skel.h"
> +#include "verifier_tailcall_jit.skel.h"
>   #include "verifier_typedef.skel.h"
>   #include "verifier_uninit.skel.h"
>   #include "verifier_unpriv.skel.h"
> @@ -198,6 +199,7 @@ void test_verifier_spin_lock(void)            { RUN(verifier_spin_lock); }
>   void test_verifier_stack_ptr(void)            { RUN(verifier_stack_ptr); }
>   void test_verifier_subprog_precision(void)    { RUN(verifier_subprog_precision); }
>   void test_verifier_subreg(void)               { RUN(verifier_subreg); }
> +void test_verifier_tailcall_jit(void)         { RUN(verifier_tailcall_jit); }
>   void test_verifier_typedef(void)              { RUN(verifier_typedef); }
>   void test_verifier_uninit(void)               { RUN(verifier_uninit); }
>   void test_verifier_unpriv(void)               { RUN(verifier_unpriv); }
> diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
> new file mode 100644
> index 000000000000..1a09c76d7be0
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
> @@ -0,0 +1,103 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include "bpf_misc.h"
> +
> +int main(void);
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
> +	__uint(max_entries, 1);
> +	__uint(key_size, sizeof(__u32));
> +	__array(values, void (void));
> +} jmp_table SEC(".maps") = {
> +	.values = {
> +		[0] = (void *) &main,
> +	},
> +};
> +
> +__noinline __auxiliary
> +static __naked int sub(void)
> +{
> +	asm volatile (
> +	"r2 = %[jmp_table] ll;"
> +	"r3 = 0;"
> +	"call 12;"
> +	"exit;"
> +	:
> +	: __imm_addr(jmp_table)
> +	: __clobber_all);
> +}
> +
> +__success
> +/* program entry for main(), regular function prologue */
> +__jit_x86("	endbr64")
> +__jit_x86("	nopl	(%rax,%rax)")
> +__jit_x86("	xorq	%rax, %rax")
> +__jit_x86("	pushq	%rbp")
> +__jit_x86("	movq	%rsp, %rbp")

How do we hanble multi architectures (x86, arm64, riscv64)?

Do we support the following?

__jit_x86(...)
__jit_x86(...)
...

__jit_arm64(...)
__jit_arm64(...)
...

__jit_riscv64(...)
__jit_riscv64(...)
...

Or we can use macro like

#ifdef __TARGET_ARCH_x86
__jit(...)
...
#elif defined(__TARGET_ARCH_arm64)
__jit(...)
...
#elif defined(...)

Or we can have

__arch_x86_64
__jit(...) // code for x86
...

__arch_arm64
__jit(...) // code for arm64
...

__arch_riscv
__jit(...) // code for riscv
...

For xlated, different archs could share the same code.
Bot for jited code, different arch has different encoding,
so we need to figure out a format suitable for multiple
archs.

> +/* tail call prologue for program:
> + * - establish memory location for tail call counter at &rbp[-8];
> + * - spill tail_call_cnt_ptr at &rbp[-16];
> + * - expect tail call counter to be passed in rax;
> + * - for entry program rax is a raw counter, value < 33;
> + * - for tail called program rax is tail_call_cnt_ptr (value > 33).
> + */
> +__jit_x86("	endbr64")
> +__jit_x86("	cmpq	$0x21, %rax")
> +__jit_x86("	ja	L0")
> +__jit_x86("	pushq	%rax")
> +__jit_x86("	movq	%rsp, %rax")
> +__jit_x86("	jmp	L1")
> +__jit_x86("L0:	pushq	%rax")			/* rbp[-8]  = rax         */
> +__jit_x86("L1:	pushq	%rax")			/* rbp[-16] = rax         */
> +/* on subprogram call restore rax to be tail_call_cnt_ptr from rbp[-16]
> + * (cause original rax might be clobbered by this point)
> + */
[...]
Eduard Zingerman Aug. 15, 2024, 9:42 p.m. UTC | #3
On Thu, 2024-08-15 at 14:15 -0700, Andrii Nakryiko wrote:

[...]

> > +/* program entry for main(), regular function prologue */
> > +__jit_x86("    endbr64")
> > +__jit_x86("    nopl    (%rax,%rax)")
> > +__jit_x86("    xorq    %rax, %rax")
> > +__jit_x86("    pushq   %rbp")
> > +__jit_x86("    movq    %rsp, %rbp")
> 
> I'm a bit too lazy to fish it out of the code, so I'll just ask.
> Does matching of __jit_x86() string behave in the same way as __msg().
> I.e., there could be unexpected lines that would be skipped, as long
> as we find a match for each __jit_x86() one?

Yes, behaves same way as __msg().
 
> Isn't that a bit counter-intuitive and potentially dangerous behavior
> for checking disassembly? If my assumption is correct, maybe we should
> add some sort of `__jit_x86("...")` placeholder to explicitly mark
> that we allow some amount of lines to be skipped, but otherwise be
> strict and require matching line-by-line?

This is a valid concern.
What you suggest with "..." looks good.
Another option is to follow llvm-lit conventions and add
__jit_x86_next("<whatever>"), which would only match if pattern
is on line below any previous match.
(And later add __jit_x86_next_not, and make these *_not, *_next
 variants accessible for every __msg-like macro).
 
Link: https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-next-directive

[...]
Eduard Zingerman Aug. 15, 2024, 9:47 p.m. UTC | #4
On Thu, 2024-08-15 at 14:32 -0700, Yonghong Song wrote:

[...]

> > +__success
> > +/* program entry for main(), regular function prologue */
> > +__jit_x86("	endbr64")
> > +__jit_x86("	nopl	(%rax,%rax)")
> > +__jit_x86("	xorq	%rax, %rax")
> > +__jit_x86("	pushq	%rbp")
> > +__jit_x86("	movq	%rsp, %rbp")
> 
> How do we hanble multi architectures (x86, arm64, riscv64)?
> 
> Do we support the following?
> 
> __jit_x86(...)
> __jit_x86(...)
> ...
> 
> __jit_arm64(...)
> __jit_arm64(...)
> ...
> 
> __jit_riscv64(...)
> __jit_riscv64(...)
> ...

^^^^
I was thinking about this variant (and this is how things are now implemented).
Whenever there would be a need for that, just add one more arch
specific macro.

> 
> Or we can use macro like
> 
> #ifdef __TARGET_ARCH_x86
> __jit(...)
> ...
> #elif defined(__TARGET_ARCH_arm64)
> __jit(...)
> ...
> #elif defined(...)
> 
> Or we can have
> 
> __arch_x86_64
> __jit(...) // code for x86
> ...
> 
> __arch_arm64
> __jit(...) // code for arm64
> ...
> 
> __arch_riscv
> __jit(...) // code for riscv
> ...

This also looks good, and will work better with "*_next" and "*_not"
variants if we are going to borrow from llvm-lit/FileCheck.

> For xlated, different archs could share the same code.
> Bot for jited code, different arch has different encoding,
> so we need to figure out a format suitable for multiple
> archs.

I'll go with whatever way mailing list likes better.

[...]
Andrii Nakryiko Aug. 15, 2024, 10:07 p.m. UTC | #5
On Thu, Aug 15, 2024 at 2:42 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Thu, 2024-08-15 at 14:15 -0700, Andrii Nakryiko wrote:
>
> [...]
>
> > > +/* program entry for main(), regular function prologue */
> > > +__jit_x86("    endbr64")
> > > +__jit_x86("    nopl    (%rax,%rax)")
> > > +__jit_x86("    xorq    %rax, %rax")
> > > +__jit_x86("    pushq   %rbp")
> > > +__jit_x86("    movq    %rsp, %rbp")
> >
> > I'm a bit too lazy to fish it out of the code, so I'll just ask.
> > Does matching of __jit_x86() string behave in the same way as __msg().
> > I.e., there could be unexpected lines that would be skipped, as long
> > as we find a match for each __jit_x86() one?
>
> Yes, behaves same way as __msg().
>
> > Isn't that a bit counter-intuitive and potentially dangerous behavior
> > for checking disassembly? If my assumption is correct, maybe we should
> > add some sort of `__jit_x86("...")` placeholder to explicitly mark
> > that we allow some amount of lines to be skipped, but otherwise be
> > strict and require matching line-by-line?
>
> This is a valid concern.
> What you suggest with "..." looks good.

I'd add just that for now. _not and _next might be useful in the
future, but meh.

> Another option is to follow llvm-lit conventions and add
> __jit_x86_next("<whatever>"), which would only match if pattern
> is on line below any previous match.
> (And later add __jit_x86_next_not, and make these *_not, *_next
>  variants accessible for every __msg-like macro).
>
> Link: https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-next-directive
>
> [...]
>
Andrii Nakryiko Aug. 15, 2024, 10:09 p.m. UTC | #6
On Thu, Aug 15, 2024 at 2:47 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Thu, 2024-08-15 at 14:32 -0700, Yonghong Song wrote:
>
> [...]
>
> > > +__success
> > > +/* program entry for main(), regular function prologue */
> > > +__jit_x86("        endbr64")
> > > +__jit_x86("        nopl    (%rax,%rax)")
> > > +__jit_x86("        xorq    %rax, %rax")
> > > +__jit_x86("        pushq   %rbp")
> > > +__jit_x86("        movq    %rsp, %rbp")
> >
> > How do we hanble multi architectures (x86, arm64, riscv64)?
> >
> > Do we support the following?
> >
> > __jit_x86(...)
> > __jit_x86(...)
> > ...
> >
> > __jit_arm64(...)
> > __jit_arm64(...)
> > ...
> >
> > __jit_riscv64(...)
> > __jit_riscv64(...)
> > ...
>
> ^^^^
> I was thinking about this variant (and this is how things are now implemented).
> Whenever there would be a need for that, just add one more arch
> specific macro.
>
> >
> > Or we can use macro like
> >
> > #ifdef __TARGET_ARCH_x86
> > __jit(...)
> > ...
> > #elif defined(__TARGET_ARCH_arm64)
> > __jit(...)
> > ...
> > #elif defined(...)
> >
> > Or we can have
> >
> > __arch_x86_64
> > __jit(...) // code for x86
> > ...
> >
> > __arch_arm64
> > __jit(...) // code for arm64
> > ...
> >
> > __arch_riscv
> > __jit(...) // code for riscv
> > ...
>
> This also looks good, and will work better with "*_next" and "*_not"
> variants if we are going to borrow from llvm-lit/FileCheck.
>

shorter __jit() and then arch-specific __arch_blah seems pretty clean,
so if it's not too hard, let's do this.

BTW, in your implementation you are collecting expected messages for
all specified architectures, but really there will always be just one
valid subset. So maybe just discard all non-host architectures upfront
during "parsing" of decl tags?

> > For xlated, different archs could share the same code.
> > Bot for jited code, different arch has different encoding,
> > so we need to figure out a format suitable for multiple
> > archs.
>
> I'll go with whatever way mailing list likes better.
>
> [...]
>
Eduard Zingerman Aug. 15, 2024, 10:10 p.m. UTC | #7
On Thu, 2024-08-15 at 15:07 -0700, Andrii Nakryiko wrote:

[...]

> > > Isn't that a bit counter-intuitive and potentially dangerous behavior
> > > for checking disassembly? If my assumption is correct, maybe we should
> > > add some sort of `__jit_x86("...")` placeholder to explicitly mark
> > > that we allow some amount of lines to be skipped, but otherwise be
> > > strict and require matching line-by-line?
> > 
> > This is a valid concern.
> > What you suggest with "..." looks good.
> 
> I'd add just that for now. _not and _next might be useful in the
> future, but meh.

If we commit to "..." now and decide to add _not and _next in the
future this would make __jit macro special. Which is not ideal, imo.
(on the other hand, tests can always be adjusted).

[...]
Andrii Nakryiko Aug. 15, 2024, 10:14 p.m. UTC | #8
On Thu, Aug 15, 2024 at 3:11 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Thu, 2024-08-15 at 15:07 -0700, Andrii Nakryiko wrote:
>
> [...]
>
> > > > Isn't that a bit counter-intuitive and potentially dangerous behavior
> > > > for checking disassembly? If my assumption is correct, maybe we should
> > > > add some sort of `__jit_x86("...")` placeholder to explicitly mark
> > > > that we allow some amount of lines to be skipped, but otherwise be
> > > > strict and require matching line-by-line?
> > >
> > > This is a valid concern.
> > > What you suggest with "..." looks good.
> >
> > I'd add just that for now. _not and _next might be useful in the
> > future, but meh.
>
> If we commit to "..." now and decide to add _not and _next in the
> future this would make __jit macro special. Which is not ideal, imo.
> (on the other hand, tests can always be adjusted).
>

It is already special with a different flavor of regex. And I assume
we won't have that many jit-testing tests, so yeah, could be adjusted,
if necessary. But just in general, while __msg() works with large
verifier logs, __jit() is much more narrow-focused, so even if it
behaves differently from __msg() I don't really see much difference.

But we also have __xlated() with similar semantics, so I'd say we
should keep __jit() and __xlated() behaving similarly.

> [...]
>
Eduard Zingerman Aug. 15, 2024, 10:16 p.m. UTC | #9
On Thu, 2024-08-15 at 15:09 -0700, Andrii Nakryiko wrote:

[...]

> > > Or we can use macro like
> > > 
> > > #ifdef __TARGET_ARCH_x86
> > > __jit(...)
> > > ...
> > > #elif defined(__TARGET_ARCH_arm64)
> > > __jit(...)
> > > ...
> > > #elif defined(...)
> > > 
> > > Or we can have
> > > 
> > > __arch_x86_64
> > > __jit(...) // code for x86
> > > ...
> > > 
> > > __arch_arm64
> > > __jit(...) // code for arm64
> > > ...
> > > 
> > > __arch_riscv
> > > __jit(...) // code for riscv
> > > ...
> > 
> > This also looks good, and will work better with "*_next" and "*_not"
> > variants if we are going to borrow from llvm-lit/FileCheck.
> > 
> 
> shorter __jit() and then arch-specific __arch_blah seems pretty clean,
> so if it's not too hard, let's do this.

Ok, let's go this way.

> BTW, in your implementation you are collecting expected messages for
> all specified architectures, but really there will always be just one
> valid subset. So maybe just discard all non-host architectures upfront
> during "parsing" of decl tags?

I kinda wanted to keep parsing logic separate from processing logic,
but yeah, makes sense.

[...]
Eduard Zingerman Aug. 15, 2024, 10:19 p.m. UTC | #10
On Thu, 2024-08-15 at 15:14 -0700, Andrii Nakryiko wrote:

[...]

> It is already special with a different flavor of regex. And I assume
> we won't have that many jit-testing tests, so yeah, could be adjusted,
> if necessary. But just in general, while __msg() works with large
> verifier logs, __jit() is much more narrow-focused, so even if it
> behaves differently from __msg() I don't really see much difference.
> 
> But we also have __xlated() with similar semantics, so I'd say we
> should keep __jit() and __xlated() behaving similarly.

Ok, makes sense.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index f8f546eba488..cf3662dbd24f 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -75,6 +75,7 @@ 
 #include "verifier_stack_ptr.skel.h"
 #include "verifier_subprog_precision.skel.h"
 #include "verifier_subreg.skel.h"
+#include "verifier_tailcall_jit.skel.h"
 #include "verifier_typedef.skel.h"
 #include "verifier_uninit.skel.h"
 #include "verifier_unpriv.skel.h"
@@ -198,6 +199,7 @@  void test_verifier_spin_lock(void)            { RUN(verifier_spin_lock); }
 void test_verifier_stack_ptr(void)            { RUN(verifier_stack_ptr); }
 void test_verifier_subprog_precision(void)    { RUN(verifier_subprog_precision); }
 void test_verifier_subreg(void)               { RUN(verifier_subreg); }
+void test_verifier_tailcall_jit(void)         { RUN(verifier_tailcall_jit); }
 void test_verifier_typedef(void)              { RUN(verifier_typedef); }
 void test_verifier_uninit(void)               { RUN(verifier_uninit); }
 void test_verifier_unpriv(void)               { RUN(verifier_unpriv); }
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
new file mode 100644
index 000000000000..1a09c76d7be0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
@@ -0,0 +1,103 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+int main(void);
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__array(values, void (void));
+} jmp_table SEC(".maps") = {
+	.values = {
+		[0] = (void *) &main,
+	},
+};
+
+__noinline __auxiliary
+static __naked int sub(void)
+{
+	asm volatile (
+	"r2 = %[jmp_table] ll;"
+	"r3 = 0;"
+	"call 12;"
+	"exit;"
+	:
+	: __imm_addr(jmp_table)
+	: __clobber_all);
+}
+
+__success
+/* program entry for main(), regular function prologue */
+__jit_x86("	endbr64")
+__jit_x86("	nopl	(%rax,%rax)")
+__jit_x86("	xorq	%rax, %rax")
+__jit_x86("	pushq	%rbp")
+__jit_x86("	movq	%rsp, %rbp")
+/* tail call prologue for program:
+ * - establish memory location for tail call counter at &rbp[-8];
+ * - spill tail_call_cnt_ptr at &rbp[-16];
+ * - expect tail call counter to be passed in rax;
+ * - for entry program rax is a raw counter, value < 33;
+ * - for tail called program rax is tail_call_cnt_ptr (value > 33).
+ */
+__jit_x86("	endbr64")
+__jit_x86("	cmpq	$0x21, %rax")
+__jit_x86("	ja	L0")
+__jit_x86("	pushq	%rax")
+__jit_x86("	movq	%rsp, %rax")
+__jit_x86("	jmp	L1")
+__jit_x86("L0:	pushq	%rax")			/* rbp[-8]  = rax         */
+__jit_x86("L1:	pushq	%rax")			/* rbp[-16] = rax         */
+/* on subprogram call restore rax to be tail_call_cnt_ptr from rbp[-16]
+ * (cause original rax might be clobbered by this point)
+ */
+__jit_x86("	movq	-0x10(%rbp), %rax")
+__jit_x86("	callq	0x[0-9a-f]\\+")		/* call to sub()          */
+__jit_x86("	xorl	%eax, %eax")
+__jit_x86("	leave")
+__jit_x86("	retq")
+/* subprogram entry for sub(), regular function prologue */
+__jit_x86("	endbr64")
+__jit_x86("	nopl	(%rax,%rax)")
+__jit_x86("	nopl	(%rax)")
+__jit_x86("	pushq	%rbp")
+__jit_x86("	movq	%rsp, %rbp")
+/* tail call prologue for subprogram address of tail call counter
+ * stored at rbp[-16].
+ */
+__jit_x86("	endbr64")
+__jit_x86("	pushq	%rax")			/* rbp[-8]  = rax          */
+__jit_x86("	pushq	%rax")			/* rbp[-16] = rax          */
+__jit_x86("	movabsq	$-0x[0-9a-f]\\+, %rsi")	/* r2 = &jmp_table         */
+__jit_x86("	xorl	%edx, %edx")		/* r3 = 0                  */
+/* bpf_tail_call implementation:
+ * - load tail_call_cnt_ptr from rbp[-16];
+ * - if *tail_call_cnt_ptr < 33, increment it and jump to target;
+ * - otherwise do nothing.
+ */
+__jit_x86("	movq	-0x10(%rbp), %rax")
+__jit_x86("	cmpq	$0x21, (%rax)")
+__jit_x86("	jae	L0")
+__jit_x86("	nopl	(%rax,%rax)")
+__jit_x86("	addq	$0x1, (%rax)")		/* *tail_call_cnt_ptr += 1 */
+__jit_x86("	popq	%rax")
+__jit_x86("	popq	%rax")
+__jit_x86("	jmp	0x[0-9a-f]\\+")		/* jump to tail call tgt   */
+__jit_x86("L0:	leave")
+__jit_x86("	retq")
+SEC("tc")
+__naked int main(void)
+{
+	asm volatile (
+	"call %[sub];"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(sub)
+	: __clobber_all);
+}
+
+char __license[] SEC("license") = "GPL";