diff mbox series

[v7,24/36] x86/ftrace: Enable HAVE_FUNCTION_GRAPH_FREGS

Message ID 170723231592.502590.12367006830540525214.stgit@devnote2 (mailing list archive)
State Not Applicable
Headers show
Series tracing: fprobe: function_graph: Multi-function graph and fprobe on fgraph | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 5 maintainers not CCed: mingo@redhat.com dave.hansen@linux.intel.com x86@kernel.org hpa@zytor.com bp@alien8.de
netdev/build_clang fail Errors and warnings before: 1066 this patch: 1259
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6358 this patch: 6358
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 59 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Masami Hiramatsu (Google) Feb. 6, 2024, 3:11 p.m. UTC
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Support HAVE_FUNCTION_GRAPH_FREGS on x86-64, which saves ftrace_regs
on the stack in ftrace_graph return trampoline so that the callbacks
can access registers via ftrace_regs APIs.

Note that this only recovers 'rax' and 'rdx' registers because other
registers are not used anymore and recovered by caller. 'rax' and
'rdx' will be used for passing the return value.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v3:
  - Add a comment about rip.
 Changes in v2:
  - Save rsp register and drop clearing orig_ax.
---
 arch/x86/Kconfig            |    3 ++-
 arch/x86/kernel/ftrace_64.S |   37 +++++++++++++++++++++++++++++--------
 2 files changed, 31 insertions(+), 9 deletions(-)

Comments

Steven Rostedt Feb. 15, 2024, 4:08 p.m. UTC | #1
On Wed,  7 Feb 2024 00:11:56 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Support HAVE_FUNCTION_GRAPH_FREGS on x86-64, which saves ftrace_regs
> on the stack in ftrace_graph return trampoline so that the callbacks
> can access registers via ftrace_regs APIs.
> 
> Note that this only recovers 'rax' and 'rdx' registers because other
> registers are not used anymore and recovered by caller. 'rax' and
> 'rdx' will be used for passing the return value.
> 
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> ---
>  Changes in v3:
>   - Add a comment about rip.
>  Changes in v2:
>   - Save rsp register and drop clearing orig_ax.
> ---
>  arch/x86/Kconfig            |    3 ++-
>  arch/x86/kernel/ftrace_64.S |   37 +++++++++++++++++++++++++++++--------
>  2 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5edec175b9bf..ccf17d8b6f5f 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -223,7 +223,8 @@ config X86
>  	select HAVE_FAST_GUP
>  	select HAVE_FENTRY			if X86_64 || DYNAMIC_FTRACE
>  	select HAVE_FTRACE_MCOUNT_RECORD
> -	select HAVE_FUNCTION_GRAPH_RETVAL	if HAVE_FUNCTION_GRAPH_TRACER
> +	select HAVE_FUNCTION_GRAPH_FREGS	if HAVE_DYNAMIC_FTRACE_WITH_ARGS
> +	select HAVE_FUNCTION_GRAPH_RETVAL	if !HAVE_DYNAMIC_FTRACE_WITH_ARGS
>  	select HAVE_FUNCTION_GRAPH_TRACER	if X86_32 || (X86_64 && DYNAMIC_FTRACE)
>  	select HAVE_FUNCTION_TRACER
>  	select HAVE_GCC_PLUGINS
> diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
> index 214f30e9f0c0..8a16f774604e 100644
> --- a/arch/x86/kernel/ftrace_64.S
> +++ b/arch/x86/kernel/ftrace_64.S
> @@ -348,21 +348,42 @@ STACK_FRAME_NON_STANDARD_FP(__fentry__)
>  SYM_CODE_START(return_to_handler)
>  	UNWIND_HINT_UNDEFINED
>  	ANNOTATE_NOENDBR
> -	subq  $24, %rsp
> +	/*
> +	 * Save the registers requires for ftrace_regs;
> +	 * rax, rcx, rdx, rdi, rsi, r8, r9 and rbp
> +	 */
> +	subq $(FRAME_SIZE), %rsp
> +	movq %rax, RAX(%rsp)
> +	movq %rcx, RCX(%rsp)
> +	movq %rdx, RDX(%rsp)
> +	movq %rsi, RSI(%rsp)
> +	movq %rdi, RDI(%rsp)
> +	movq %r8, R8(%rsp)
> +	movq %r9, R9(%rsp)
> +	movq %rbp, RBP(%rsp)

This unconditionally slows down function graph tracer for no good reason.

Most of the above is going to be garbage anyway, except the rax and rdx.

I would recommend than we set something else in the ftrace regs that states
this only holds return values. Anything else will just get invalid.

I'm really against saving garbage. The purpose of ftrace_regs is that it
can hold incomplete data.

-- Steve


> +	/*
> +	 * orig_ax is not cleared because it is used for indicating the direct
> +	 * trampoline in the fentry. And rip is not set because we don't know
> +	 * the correct return address here.
> +	 */
> +
> +	leaq FRAME_SIZE(%rsp), %rcx
> +	movq %rcx, RSP(%rsp)
>  
> -	/* Save the return values */
> -	movq %rax, (%rsp)
> -	movq %rdx, 8(%rsp)
> -	movq %rbp, 16(%rsp)
>  	movq %rsp, %rdi
>  
>  	call ftrace_return_to_handler
>  
>  	movq %rax, %rdi
> -	movq 8(%rsp), %rdx
> -	movq (%rsp), %rax
>  
> -	addq $24, %rsp
> +	/*
> +	 * Restore only rax and rdx because other registers are not used
> +	 * for return value nor callee saved. Caller will reuse/recover it.
> +	 */
> +	movq RDX(%rsp), %rdx
> +	movq RAX(%rsp), %rax
> +
> +	addq $(FRAME_SIZE), %rsp
>  	/*
>  	 * Jump back to the old return address. This cannot be JMP_NOSPEC rdi
>  	 * since IBT would demand that contain ENDBR, which simply isn't so for
Masami Hiramatsu (Google) Feb. 16, 2024, 8:54 a.m. UTC | #2
On Thu, 15 Feb 2024 11:08:08 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Wed,  7 Feb 2024 00:11:56 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> 
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > Support HAVE_FUNCTION_GRAPH_FREGS on x86-64, which saves ftrace_regs
> > on the stack in ftrace_graph return trampoline so that the callbacks
> > can access registers via ftrace_regs APIs.
> > 
> > Note that this only recovers 'rax' and 'rdx' registers because other
> > registers are not used anymore and recovered by caller. 'rax' and
> > 'rdx' will be used for passing the return value.
> > 
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > ---
> >  Changes in v3:
> >   - Add a comment about rip.
> >  Changes in v2:
> >   - Save rsp register and drop clearing orig_ax.
> > ---
> >  arch/x86/Kconfig            |    3 ++-
> >  arch/x86/kernel/ftrace_64.S |   37 +++++++++++++++++++++++++++++--------
> >  2 files changed, 31 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 5edec175b9bf..ccf17d8b6f5f 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -223,7 +223,8 @@ config X86
> >  	select HAVE_FAST_GUP
> >  	select HAVE_FENTRY			if X86_64 || DYNAMIC_FTRACE
> >  	select HAVE_FTRACE_MCOUNT_RECORD
> > -	select HAVE_FUNCTION_GRAPH_RETVAL	if HAVE_FUNCTION_GRAPH_TRACER
> > +	select HAVE_FUNCTION_GRAPH_FREGS	if HAVE_DYNAMIC_FTRACE_WITH_ARGS
> > +	select HAVE_FUNCTION_GRAPH_RETVAL	if !HAVE_DYNAMIC_FTRACE_WITH_ARGS
> >  	select HAVE_FUNCTION_GRAPH_TRACER	if X86_32 || (X86_64 && DYNAMIC_FTRACE)
> >  	select HAVE_FUNCTION_TRACER
> >  	select HAVE_GCC_PLUGINS
> > diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
> > index 214f30e9f0c0..8a16f774604e 100644
> > --- a/arch/x86/kernel/ftrace_64.S
> > +++ b/arch/x86/kernel/ftrace_64.S
> > @@ -348,21 +348,42 @@ STACK_FRAME_NON_STANDARD_FP(__fentry__)
> >  SYM_CODE_START(return_to_handler)
> >  	UNWIND_HINT_UNDEFINED
> >  	ANNOTATE_NOENDBR
> > -	subq  $24, %rsp
> > +	/*
> > +	 * Save the registers requires for ftrace_regs;
> > +	 * rax, rcx, rdx, rdi, rsi, r8, r9 and rbp
> > +	 */
> > +	subq $(FRAME_SIZE), %rsp
> > +	movq %rax, RAX(%rsp)
> > +	movq %rcx, RCX(%rsp)
> > +	movq %rdx, RDX(%rsp)
> > +	movq %rsi, RSI(%rsp)
> > +	movq %rdi, RDI(%rsp)
> > +	movq %r8, R8(%rsp)
> > +	movq %r9, R9(%rsp)
> > +	movq %rbp, RBP(%rsp)
> 
> This unconditionally slows down function graph tracer for no good reason.
> 
> Most of the above is going to be garbage anyway, except the rax and rdx.
> 
> I would recommend than we set something else in the ftrace regs that states
> this only holds return values. Anything else will just get invalid.
> 
> I'm really against saving garbage. The purpose of ftrace_regs is that it
> can hold incomplete data.

Ah, OK. I misunderstood. I thought ftrace_regs was expected to be filled
with reduced (arch-defined) register set. But it just ensures that holds
some registers depends on the context.

Thank you,

> 
> -- Steve
> 
> 
> > +	/*
> > +	 * orig_ax is not cleared because it is used for indicating the direct
> > +	 * trampoline in the fentry. And rip is not set because we don't know
> > +	 * the correct return address here.
> > +	 */
> > +
> > +	leaq FRAME_SIZE(%rsp), %rcx
> > +	movq %rcx, RSP(%rsp)
> >  
> > -	/* Save the return values */
> > -	movq %rax, (%rsp)
> > -	movq %rdx, 8(%rsp)
> > -	movq %rbp, 16(%rsp)
> >  	movq %rsp, %rdi
> >  
> >  	call ftrace_return_to_handler
> >  
> >  	movq %rax, %rdi
> > -	movq 8(%rsp), %rdx
> > -	movq (%rsp), %rax
> >  
> > -	addq $24, %rsp
> > +	/*
> > +	 * Restore only rax and rdx because other registers are not used
> > +	 * for return value nor callee saved. Caller will reuse/recover it.
> > +	 */
> > +	movq RDX(%rsp), %rdx
> > +	movq RAX(%rsp), %rax
> > +
> > +	addq $(FRAME_SIZE), %rsp
> >  	/*
> >  	 * Jump back to the old return address. This cannot be JMP_NOSPEC rdi
> >  	 * since IBT would demand that contain ENDBR, which simply isn't so for
>
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5edec175b9bf..ccf17d8b6f5f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -223,7 +223,8 @@  config X86
 	select HAVE_FAST_GUP
 	select HAVE_FENTRY			if X86_64 || DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
-	select HAVE_FUNCTION_GRAPH_RETVAL	if HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_GRAPH_FREGS	if HAVE_DYNAMIC_FTRACE_WITH_ARGS
+	select HAVE_FUNCTION_GRAPH_RETVAL	if !HAVE_DYNAMIC_FTRACE_WITH_ARGS
 	select HAVE_FUNCTION_GRAPH_TRACER	if X86_32 || (X86_64 && DYNAMIC_FTRACE)
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 214f30e9f0c0..8a16f774604e 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -348,21 +348,42 @@  STACK_FRAME_NON_STANDARD_FP(__fentry__)
 SYM_CODE_START(return_to_handler)
 	UNWIND_HINT_UNDEFINED
 	ANNOTATE_NOENDBR
-	subq  $24, %rsp
+	/*
+	 * Save the registers requires for ftrace_regs;
+	 * rax, rcx, rdx, rdi, rsi, r8, r9 and rbp
+	 */
+	subq $(FRAME_SIZE), %rsp
+	movq %rax, RAX(%rsp)
+	movq %rcx, RCX(%rsp)
+	movq %rdx, RDX(%rsp)
+	movq %rsi, RSI(%rsp)
+	movq %rdi, RDI(%rsp)
+	movq %r8, R8(%rsp)
+	movq %r9, R9(%rsp)
+	movq %rbp, RBP(%rsp)
+	/*
+	 * orig_ax is not cleared because it is used for indicating the direct
+	 * trampoline in the fentry. And rip is not set because we don't know
+	 * the correct return address here.
+	 */
+
+	leaq FRAME_SIZE(%rsp), %rcx
+	movq %rcx, RSP(%rsp)
 
-	/* Save the return values */
-	movq %rax, (%rsp)
-	movq %rdx, 8(%rsp)
-	movq %rbp, 16(%rsp)
 	movq %rsp, %rdi
 
 	call ftrace_return_to_handler
 
 	movq %rax, %rdi
-	movq 8(%rsp), %rdx
-	movq (%rsp), %rax
 
-	addq $24, %rsp
+	/*
+	 * Restore only rax and rdx because other registers are not used
+	 * for return value nor callee saved. Caller will reuse/recover it.
+	 */
+	movq RDX(%rsp), %rdx
+	movq RAX(%rsp), %rax
+
+	addq $(FRAME_SIZE), %rsp
 	/*
 	 * Jump back to the old return address. This cannot be JMP_NOSPEC rdi
 	 * since IBT would demand that contain ENDBR, which simply isn't so for