Message ID | 20230107133549.4192639-6-guoren@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Palmer Dabbelt |
Headers | show |
Series | riscv: Optimize function trace | expand |
Context | Check | Description |
---|---|---|
conchuod/patch_count | success | Link |
conchuod/cover_letter | success | Series has a cover letter |
conchuod/tree_selection | success | Guessed tree name to be fixes |
conchuod/fixes_present | success | Fixes tag present in non-next series |
conchuod/maintainers_pattern | success | MAINTAINERS pattern errors before the patch: 13 and now 13 |
conchuod/verify_signedoff | success | Signed-off-by tag matches author and committer |
conchuod/kdoc | success | Errors and warnings before: 0 this patch: 0 |
conchuod/module_param | success | Was 0 now: 0 |
conchuod/alphanumeric_selects | success | Out of order selects before the patch: 57 and now 57 |
conchuod/build_rv32_defconfig | success | Build OK |
conchuod/build_warn_rv64 | success | Errors and warnings before: 2054 this patch: 2054 |
conchuod/dtb_warn_rv64 | success | Errors and warnings before: 4 this patch: 4 |
conchuod/header_inline | success | No static functions without inline keyword in header files |
conchuod/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 197 lines checked |
conchuod/source_inline | success | Was 0 now: 0 |
conchuod/build_rv64_nommu_k210_defconfig | success | Build OK |
conchuod/verify_fixes | success | No Fixes tag |
conchuod/build_rv64_nommu_virt_defconfig | success | Build OK |
On 07.01.2023 16:35, guoren@kernel.org wrote: > From: Song Shuai <suagrfillet@gmail.com> > > In order to make the function graph use ftrace directly, ftrace_caller > should be adjusted to save the necessary regs against the pt_regs layout > so it can call ftrace_graph_func reasonably. > > SAVE_ALL now saves all the regs according to the pt_regs struct. Here > supersedes SAVE_ALL by SAVE_ABI_REGS which has an extra option to allow > saving only the necessary ABI-related regs for ftrace_caller. > > ftrace_caller and ftrace_regs_caller save their regs with the respective > option of SAVE_ABI_REGS, then call the tracing function, especially > graph_ops's ftrace_graph_func. So the ftrace_graph_[regs]_call labels > aren't needed anymore if FTRACE_WITH_REGS is defined. > > As the previous patch described, the ftrace_caller remains with its > ftrace_graph_call if FTRACE_WITH_REGS isn't defined, > > For convenience, the original argument setup for the tracing function in > ftrace_[regs]_caller is separated as PREPARE_ARGS. > > Signed-off-by: Song Shuai <suagrfillet@gmail.com> > Tested-by: Guo Ren <guoren@kernel.org> > Signed-off-by: Guo Ren <guoren@kernel.org> > --- > arch/riscv/kernel/mcount-dyn.S | 142 ++++++++++++++++++++++++--------- > 1 file changed, 104 insertions(+), 38 deletions(-) > > diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S > index b75332ced757..d7d4d51b4bd7 100644 > --- a/arch/riscv/kernel/mcount-dyn.S > +++ b/arch/riscv/kernel/mcount-dyn.S > @@ -57,19 +57,52 @@ > .endm > > #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > - .macro SAVE_ALL > + > +/** > +* SAVE_ABI_REGS - save regs against the pt_regs struct > +* > +* @all: tell if saving all the regs > +* > +* If all is set, all the regs will be saved, otherwise only ABI > +* related regs (a0-a7,epc,ra and optional s0) will be saved. > +* > +* After the stack is established, > +* > +* 0(sp) stores the PC of the traced function which can be accessed > +* by &(fregs)->regs->epc in tracing function. Note that the real > +* function entry address should be computed with -FENTRY_RA_OFFSET. > +* > +* 8(sp) stores the function return address (i.e. parent IP) that > +* can be accessed by &(fregs)->regs->ra in tracing function. > +* > +* The other regs are saved at the respective localtion and accessed > +* by the respective pt_regs member. > +* > +* Here is the layout of stack for your reference. > +* > +* PT_SIZE_ON_STACK -> +++++++++ > +* + ..... + > +* + t3-t6 + > +* + s2-s11+ > +* + a0-a7 + --++++-> ftrace_caller saved > +* + s1 + + > +* + s0 + --+ > +* + t0-t2 + + > +* + tp + + > +* + gp + + > +* + sp + + > +* + ra + --+ // parent IP > +* sp -> + epc + --+ // PC > +* +++++++++ > +**/ > + .macro SAVE_ABI_REGS, all=0 > addi sp, sp, -PT_SIZE_ON_STACK > > REG_S t0, PT_EPC(sp) > REG_S x1, PT_RA(sp) > - REG_S x2, PT_SP(sp) > - REG_S x3, PT_GP(sp) > - REG_S x4, PT_TP(sp) > - REG_S x5, PT_T0(sp) > - REG_S x6, PT_T1(sp) > - REG_S x7, PT_T2(sp) > - REG_S x8, PT_S0(sp) > - REG_S x9, PT_S1(sp) > + > + // always save the ABI regs > + > REG_S x10, PT_A0(sp) > REG_S x11, PT_A1(sp) > REG_S x12, PT_A2(sp) > @@ -78,6 +111,18 @@ > REG_S x15, PT_A5(sp) > REG_S x16, PT_A6(sp) > REG_S x17, PT_A7(sp) > + > + // save the leftover regs > + > + .if \all == 1 > + REG_S x2, PT_SP(sp) > + REG_S x3, PT_GP(sp) > + REG_S x4, PT_TP(sp) > + REG_S x5, PT_T0(sp) > + REG_S x6, PT_T1(sp) > + REG_S x7, PT_T2(sp) > + REG_S x8, PT_S0(sp) > + REG_S x9, PT_S1(sp) > REG_S x18, PT_S2(sp) > REG_S x19, PT_S3(sp) > REG_S x20, PT_S4(sp) > @@ -92,19 +137,19 @@ > REG_S x29, PT_T4(sp) > REG_S x30, PT_T5(sp) > REG_S x31, PT_T6(sp) > + > + // save s0 if FP_TEST defined > + > + .else > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > + REG_S x8, PT_S0(sp) > +#endif > + .endif > .endm > > - .macro RESTORE_ALL > + .macro RESTORE_ABI_REGS, all=0 > REG_L t0, PT_EPC(sp) > REG_L x1, PT_RA(sp) > - REG_L x2, PT_SP(sp) > - REG_L x3, PT_GP(sp) > - REG_L x4, PT_TP(sp) > - REG_L x5, PT_T0(sp) > - REG_L x6, PT_T1(sp) > - REG_L x7, PT_T2(sp) > - REG_L x8, PT_S0(sp) > - REG_L x9, PT_S1(sp) > REG_L x10, PT_A0(sp) > REG_L x11, PT_A1(sp) > REG_L x12, PT_A2(sp) > @@ -113,6 +158,16 @@ > REG_L x15, PT_A5(sp) > REG_L x16, PT_A6(sp) > REG_L x17, PT_A7(sp) > + > + .if \all == 1 > + REG_L x2, PT_SP(sp) > + REG_L x3, PT_GP(sp) > + REG_L x4, PT_TP(sp) > + REG_L x5, PT_T0(sp) Same as for the patch #3, please skip "REG_L x5, PT_T0(sp)" here. The correct value of t0/x5 has already been read from PT_EPC(sp) at this point. > + REG_L x6, PT_T1(sp) > + REG_L x7, PT_T2(sp) > + REG_L x8, PT_S0(sp) > + REG_L x9, PT_S1(sp) > REG_L x18, PT_S2(sp) > REG_L x19, PT_S3(sp) > REG_L x20, PT_S4(sp) > @@ -128,10 +183,25 @@ > REG_L x30, PT_T5(sp) > REG_L x31, PT_T6(sp) > > + .else > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > + REG_L x8, PT_S0(sp) > +#endif > + .endif > addi sp, sp, PT_SIZE_ON_STACK > .endm > + > + .macro PREPARE_ARGS > + addi a0, t0, -FENTRY_RA_OFFSET // ip > + la a1, function_trace_op > + REG_L a2, 0(a1) // op > + mv a1, ra // parent_ip > + mv a3, sp // fregs > + .endm > + > #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > > +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS > ENTRY(ftrace_caller) > SAVE_ABI > > @@ -160,33 +230,29 @@ ftrace_graph_call: > jr t0 > ENDPROC(ftrace_caller) > > -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > ENTRY(ftrace_regs_caller) > - SAVE_ALL > - > - addi a0, t0, -FENTRY_RA_OFFSET > - la a1, function_trace_op > - REG_L a2, 0(a1) > - mv a1, ra > - mv a3, sp > + SAVE_ABI_REGS 1 > + PREPARE_ARGS > > ftrace_regs_call: > .global ftrace_regs_call > call ftrace_stub > > -#ifdef CONFIG_FUNCTION_GRAPH_TRACER > - addi a0, sp, PT_RA > - REG_L a1, PT_T0(sp) > - addi a1, a1, -FENTRY_RA_OFFSET > -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > - mv a2, s0 > -#endif > -ftrace_graph_regs_call: > - .global ftrace_graph_regs_call > - call ftrace_stub > -#endif > > - RESTORE_ALL > + RESTORE_ABI_REGS 1 > jr t0 > ENDPROC(ftrace_regs_caller) > + > +ENTRY(ftrace_caller) > + SAVE_ABI_REGS 0 > + PREPARE_ARGS > + > +ftrace_call: > + .global ftrace_call > + call ftrace_stub > + > + RESTORE_ABI_REGS 0 > + jr t0 > +ENDPROC(ftrace_caller) > #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > -- > 2.36.1 > > Regards, Evgenii
On Wed, Jan 11, 2023 at 1:16 AM Evgenii Shatokhin <e.shatokhin@yadro.com> wrote: > > On 07.01.2023 16:35, guoren@kernel.org wrote: > > From: Song Shuai <suagrfillet@gmail.com> > > > > In order to make the function graph use ftrace directly, ftrace_caller > > should be adjusted to save the necessary regs against the pt_regs layout > > so it can call ftrace_graph_func reasonably. > > > > SAVE_ALL now saves all the regs according to the pt_regs struct. Here > > supersedes SAVE_ALL by SAVE_ABI_REGS which has an extra option to allow > > saving only the necessary ABI-related regs for ftrace_caller. > > > > ftrace_caller and ftrace_regs_caller save their regs with the respective > > option of SAVE_ABI_REGS, then call the tracing function, especially > > graph_ops's ftrace_graph_func. So the ftrace_graph_[regs]_call labels > > aren't needed anymore if FTRACE_WITH_REGS is defined. > > > > As the previous patch described, the ftrace_caller remains with its > > ftrace_graph_call if FTRACE_WITH_REGS isn't defined, > > > > For convenience, the original argument setup for the tracing function in > > ftrace_[regs]_caller is separated as PREPARE_ARGS. > > > > Signed-off-by: Song Shuai <suagrfillet@gmail.com> > > Tested-by: Guo Ren <guoren@kernel.org> > > Signed-off-by: Guo Ren <guoren@kernel.org> > > --- > > arch/riscv/kernel/mcount-dyn.S | 142 ++++++++++++++++++++++++--------- > > 1 file changed, 104 insertions(+), 38 deletions(-) > > > > diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S > > index b75332ced757..d7d4d51b4bd7 100644 > > --- a/arch/riscv/kernel/mcount-dyn.S > > +++ b/arch/riscv/kernel/mcount-dyn.S > > @@ -57,19 +57,52 @@ > > .endm > > > > #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > - .macro SAVE_ALL > > + > > +/** > > +* SAVE_ABI_REGS - save regs against the pt_regs struct > > +* > > +* @all: tell if saving all the regs > > +* > > +* If all is set, all the regs will be saved, otherwise only ABI > > +* related regs (a0-a7,epc,ra and optional s0) will be saved. > > +* > > +* After the stack is established, > > +* > > +* 0(sp) stores the PC of the traced function which can be accessed > > +* by &(fregs)->regs->epc in tracing function. Note that the real > > +* function entry address should be computed with -FENTRY_RA_OFFSET. > > +* > > +* 8(sp) stores the function return address (i.e. parent IP) that > > +* can be accessed by &(fregs)->regs->ra in tracing function. > > +* > > +* The other regs are saved at the respective localtion and accessed > > +* by the respective pt_regs member. > > +* > > +* Here is the layout of stack for your reference. > > +* > > +* PT_SIZE_ON_STACK -> +++++++++ > > +* + ..... + > > +* + t3-t6 + > > +* + s2-s11+ > > +* + a0-a7 + --++++-> ftrace_caller saved > > +* + s1 + + > > +* + s0 + --+ > > +* + t0-t2 + + > > +* + tp + + > > +* + gp + + > > +* + sp + + > > +* + ra + --+ // parent IP > > +* sp -> + epc + --+ // PC > > +* +++++++++ > > +**/ > > + .macro SAVE_ABI_REGS, all=0 > > addi sp, sp, -PT_SIZE_ON_STACK > > > > REG_S t0, PT_EPC(sp) > > REG_S x1, PT_RA(sp) > > - REG_S x2, PT_SP(sp) > > - REG_S x3, PT_GP(sp) > > - REG_S x4, PT_TP(sp) > > - REG_S x5, PT_T0(sp) > > - REG_S x6, PT_T1(sp) > > - REG_S x7, PT_T2(sp) > > - REG_S x8, PT_S0(sp) > > - REG_S x9, PT_S1(sp) > > + > > + // always save the ABI regs > > + > > REG_S x10, PT_A0(sp) > > REG_S x11, PT_A1(sp) > > REG_S x12, PT_A2(sp) > > @@ -78,6 +111,18 @@ > > REG_S x15, PT_A5(sp) > > REG_S x16, PT_A6(sp) > > REG_S x17, PT_A7(sp) > > + > > + // save the leftover regs > > + > > + .if \all == 1 > > + REG_S x2, PT_SP(sp) > > + REG_S x3, PT_GP(sp) > > + REG_S x4, PT_TP(sp) > > + REG_S x5, PT_T0(sp) > > + REG_S x6, PT_T1(sp) > > + REG_S x7, PT_T2(sp) > > + REG_S x8, PT_S0(sp) > > + REG_S x9, PT_S1(sp) > > REG_S x18, PT_S2(sp) > > REG_S x19, PT_S3(sp) > > REG_S x20, PT_S4(sp) > > @@ -92,19 +137,19 @@ > > REG_S x29, PT_T4(sp) > > REG_S x30, PT_T5(sp) > > REG_S x31, PT_T6(sp) > > + > > + // save s0 if FP_TEST defined > > + > > + .else > > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > > + REG_S x8, PT_S0(sp) > > +#endif > > + .endif > > .endm > > > > - .macro RESTORE_ALL > > + .macro RESTORE_ABI_REGS, all=0 > > REG_L t0, PT_EPC(sp) > > REG_L x1, PT_RA(sp) > > - REG_L x2, PT_SP(sp) > > - REG_L x3, PT_GP(sp) > > - REG_L x4, PT_TP(sp) > > - REG_L x5, PT_T0(sp) > > - REG_L x6, PT_T1(sp) > > - REG_L x7, PT_T2(sp) > > - REG_L x8, PT_S0(sp) > > - REG_L x9, PT_S1(sp) > > REG_L x10, PT_A0(sp) > > REG_L x11, PT_A1(sp) > > REG_L x12, PT_A2(sp) > > @@ -113,6 +158,16 @@ > > REG_L x15, PT_A5(sp) > > REG_L x16, PT_A6(sp) > > REG_L x17, PT_A7(sp) > > + > > + .if \all == 1 > > + REG_L x2, PT_SP(sp) > > + REG_L x3, PT_GP(sp) > > + REG_L x4, PT_TP(sp) > > + REG_L x5, PT_T0(sp) > > Same as for the patch #3, please skip "REG_L x5, PT_T0(sp)" here. The > correct value of t0/x5 has already been read from PT_EPC(sp) at this point. Oh, I don't want to do that here. It's a common macro. Because it's a continuous load within the cacheline, I don't think it would cause a performance gap. > > > + REG_L x6, PT_T1(sp) > > + REG_L x7, PT_T2(sp) > > + REG_L x8, PT_S0(sp) > > + REG_L x9, PT_S1(sp) > > REG_L x18, PT_S2(sp) > > REG_L x19, PT_S3(sp) > > REG_L x20, PT_S4(sp) > > @@ -128,10 +183,25 @@ > > REG_L x30, PT_T5(sp) > > REG_L x31, PT_T6(sp) > > > > + .else > > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > > + REG_L x8, PT_S0(sp) > > +#endif > > + .endif > > addi sp, sp, PT_SIZE_ON_STACK > > .endm > > + > > + .macro PREPARE_ARGS > > + addi a0, t0, -FENTRY_RA_OFFSET // ip > > + la a1, function_trace_op > > + REG_L a2, 0(a1) // op > > + mv a1, ra // parent_ip > > + mv a3, sp // fregs > > + .endm > > + > > #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > > > > +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > ENTRY(ftrace_caller) > > SAVE_ABI > > > > @@ -160,33 +230,29 @@ ftrace_graph_call: > > jr t0 > > ENDPROC(ftrace_caller) > > > > -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > > ENTRY(ftrace_regs_caller) > > - SAVE_ALL > > - > > - addi a0, t0, -FENTRY_RA_OFFSET > > - la a1, function_trace_op > > - REG_L a2, 0(a1) > > - mv a1, ra > > - mv a3, sp > > + SAVE_ABI_REGS 1 > > + PREPARE_ARGS > > > > ftrace_regs_call: > > .global ftrace_regs_call > > call ftrace_stub > > > > -#ifdef CONFIG_FUNCTION_GRAPH_TRACER > > - addi a0, sp, PT_RA > > - REG_L a1, PT_T0(sp) > > - addi a1, a1, -FENTRY_RA_OFFSET > > -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > > - mv a2, s0 > > -#endif > > -ftrace_graph_regs_call: > > - .global ftrace_graph_regs_call > > - call ftrace_stub > > -#endif > > > > - RESTORE_ALL > > + RESTORE_ABI_REGS 1 > > jr t0 > > ENDPROC(ftrace_regs_caller) > > + > > +ENTRY(ftrace_caller) > > + SAVE_ABI_REGS 0 > > + PREPARE_ARGS > > + > > +ftrace_call: > > + .global ftrace_call > > + call ftrace_stub > > + > > + RESTORE_ABI_REGS 0 > > + jr t0 > > +ENDPROC(ftrace_caller) > > #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > > -- > > 2.36.1 > > > > > Regards, > Evgenii > > -- Best Regards Guo Ren
On Wed, Jan 11, 2023 at 4:23 PM Guo Ren <guoren@kernel.org> wrote: > > On Wed, Jan 11, 2023 at 1:16 AM Evgenii Shatokhin <e.shatokhin@yadro.com> wrote: > > > > On 07.01.2023 16:35, guoren@kernel.org wrote: > > > From: Song Shuai <suagrfillet@gmail.com> > > > > > > In order to make the function graph use ftrace directly, ftrace_caller > > > should be adjusted to save the necessary regs against the pt_regs layout > > > so it can call ftrace_graph_func reasonably. > > > > > > SAVE_ALL now saves all the regs according to the pt_regs struct. Here > > > supersedes SAVE_ALL by SAVE_ABI_REGS which has an extra option to allow > > > saving only the necessary ABI-related regs for ftrace_caller. > > > > > > ftrace_caller and ftrace_regs_caller save their regs with the respective > > > option of SAVE_ABI_REGS, then call the tracing function, especially > > > graph_ops's ftrace_graph_func. So the ftrace_graph_[regs]_call labels > > > aren't needed anymore if FTRACE_WITH_REGS is defined. > > > > > > As the previous patch described, the ftrace_caller remains with its > > > ftrace_graph_call if FTRACE_WITH_REGS isn't defined, > > > > > > For convenience, the original argument setup for the tracing function in > > > ftrace_[regs]_caller is separated as PREPARE_ARGS. > > > > > > Signed-off-by: Song Shuai <suagrfillet@gmail.com> > > > Tested-by: Guo Ren <guoren@kernel.org> > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > --- > > > arch/riscv/kernel/mcount-dyn.S | 142 ++++++++++++++++++++++++--------- > > > 1 file changed, 104 insertions(+), 38 deletions(-) > > > > > > diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S > > > index b75332ced757..d7d4d51b4bd7 100644 > > > --- a/arch/riscv/kernel/mcount-dyn.S > > > +++ b/arch/riscv/kernel/mcount-dyn.S > > > @@ -57,19 +57,52 @@ > > > .endm > > > > > > #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > > - .macro SAVE_ALL > > > + > > > +/** > > > +* SAVE_ABI_REGS - save regs against the pt_regs struct > > > +* > > > +* @all: tell if saving all the regs > > > +* > > > +* If all is set, all the regs will be saved, otherwise only ABI > > > +* related regs (a0-a7,epc,ra and optional s0) will be saved. > > > +* > > > +* After the stack is established, > > > +* > > > +* 0(sp) stores the PC of the traced function which can be accessed > > > +* by &(fregs)->regs->epc in tracing function. Note that the real > > > +* function entry address should be computed with -FENTRY_RA_OFFSET. > > > +* > > > +* 8(sp) stores the function return address (i.e. parent IP) that > > > +* can be accessed by &(fregs)->regs->ra in tracing function. > > > +* > > > +* The other regs are saved at the respective localtion and accessed > > > +* by the respective pt_regs member. > > > +* > > > +* Here is the layout of stack for your reference. > > > +* > > > +* PT_SIZE_ON_STACK -> +++++++++ > > > +* + ..... + > > > +* + t3-t6 + > > > +* + s2-s11+ > > > +* + a0-a7 + --++++-> ftrace_caller saved > > > +* + s1 + + > > > +* + s0 + --+ > > > +* + t0-t2 + + > > > +* + tp + + > > > +* + gp + + > > > +* + sp + + > > > +* + ra + --+ // parent IP > > > +* sp -> + epc + --+ // PC > > > +* +++++++++ > > > +**/ > > > + .macro SAVE_ABI_REGS, all=0 > > > addi sp, sp, -PT_SIZE_ON_STACK > > > > > > REG_S t0, PT_EPC(sp) > > > REG_S x1, PT_RA(sp) > > > - REG_S x2, PT_SP(sp) > > > - REG_S x3, PT_GP(sp) > > > - REG_S x4, PT_TP(sp) > > > - REG_S x5, PT_T0(sp) > > > - REG_S x6, PT_T1(sp) > > > - REG_S x7, PT_T2(sp) > > > - REG_S x8, PT_S0(sp) > > > - REG_S x9, PT_S1(sp) > > > + > > > + // always save the ABI regs > > > + > > > REG_S x10, PT_A0(sp) > > > REG_S x11, PT_A1(sp) > > > REG_S x12, PT_A2(sp) > > > @@ -78,6 +111,18 @@ > > > REG_S x15, PT_A5(sp) > > > REG_S x16, PT_A6(sp) > > > REG_S x17, PT_A7(sp) > > > + > > > + // save the leftover regs > > > + > > > + .if \all == 1 > > > + REG_S x2, PT_SP(sp) > > > + REG_S x3, PT_GP(sp) > > > + REG_S x4, PT_TP(sp) > > > + REG_S x5, PT_T0(sp) > > > + REG_S x6, PT_T1(sp) > > > + REG_S x7, PT_T2(sp) > > > + REG_S x8, PT_S0(sp) > > > + REG_S x9, PT_S1(sp) > > > REG_S x18, PT_S2(sp) > > > REG_S x19, PT_S3(sp) > > > REG_S x20, PT_S4(sp) > > > @@ -92,19 +137,19 @@ > > > REG_S x29, PT_T4(sp) > > > REG_S x30, PT_T5(sp) > > > REG_S x31, PT_T6(sp) > > > + > > > + // save s0 if FP_TEST defined > > > + > > > + .else > > > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > > > + REG_S x8, PT_S0(sp) > > > +#endif > > > + .endif > > > .endm > > > > > > - .macro RESTORE_ALL > > > + .macro RESTORE_ABI_REGS, all=0 > > > REG_L t0, PT_EPC(sp) > > > REG_L x1, PT_RA(sp) > > > - REG_L x2, PT_SP(sp) > > > - REG_L x3, PT_GP(sp) > > > - REG_L x4, PT_TP(sp) > > > - REG_L x5, PT_T0(sp) > > > - REG_L x6, PT_T1(sp) > > > - REG_L x7, PT_T2(sp) > > > - REG_L x8, PT_S0(sp) > > > - REG_L x9, PT_S1(sp) > > > REG_L x10, PT_A0(sp) > > > REG_L x11, PT_A1(sp) > > > REG_L x12, PT_A2(sp) > > > @@ -113,6 +158,16 @@ > > > REG_L x15, PT_A5(sp) > > > REG_L x16, PT_A6(sp) > > > REG_L x17, PT_A7(sp) > > > + > > > + .if \all == 1 > > > + REG_L x2, PT_SP(sp) > > > + REG_L x3, PT_GP(sp) > > > + REG_L x4, PT_TP(sp) > > > + REG_L x5, PT_T0(sp) > > > > Same as for the patch #3, please skip "REG_L x5, PT_T0(sp)" here. The > > correct value of t0/x5 has already been read from PT_EPC(sp) at this point. > Oh, I don't want to do that here. It's a common macro. Because it's a > continuous load within the cacheline, I don't think it would cause a > performance gap. I misunderstood here; you're correct. The "REG_L x5, PT_T0(sp)" should be skipped. > > > > > > > + REG_L x6, PT_T1(sp) > > > + REG_L x7, PT_T2(sp) > > > + REG_L x8, PT_S0(sp) > > > + REG_L x9, PT_S1(sp) > > > REG_L x18, PT_S2(sp) > > > REG_L x19, PT_S3(sp) > > > REG_L x20, PT_S4(sp) > > > @@ -128,10 +183,25 @@ > > > REG_L x30, PT_T5(sp) > > > REG_L x31, PT_T6(sp) > > > > > > + .else > > > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > > > + REG_L x8, PT_S0(sp) > > > +#endif > > > + .endif > > > addi sp, sp, PT_SIZE_ON_STACK > > > .endm > > > + > > > + .macro PREPARE_ARGS > > > + addi a0, t0, -FENTRY_RA_OFFSET // ip > > > + la a1, function_trace_op > > > + REG_L a2, 0(a1) // op > > > + mv a1, ra // parent_ip > > > + mv a3, sp // fregs > > > + .endm > > > + > > > #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > > > > > > +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > > ENTRY(ftrace_caller) > > > SAVE_ABI > > > > > > @@ -160,33 +230,29 @@ ftrace_graph_call: > > > jr t0 > > > ENDPROC(ftrace_caller) > > > > > > -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > > +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > > > ENTRY(ftrace_regs_caller) > > > - SAVE_ALL > > > - > > > - addi a0, t0, -FENTRY_RA_OFFSET > > > - la a1, function_trace_op > > > - REG_L a2, 0(a1) > > > - mv a1, ra > > > - mv a3, sp > > > + SAVE_ABI_REGS 1 > > > + PREPARE_ARGS > > > > > > ftrace_regs_call: > > > .global ftrace_regs_call > > > call ftrace_stub > > > > > > -#ifdef CONFIG_FUNCTION_GRAPH_TRACER > > > - addi a0, sp, PT_RA > > > - REG_L a1, PT_T0(sp) > > > - addi a1, a1, -FENTRY_RA_OFFSET > > > -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST > > > - mv a2, s0 > > > -#endif > > > -ftrace_graph_regs_call: > > > - .global ftrace_graph_regs_call > > > - call ftrace_stub > > > -#endif > > > > > > - RESTORE_ALL > > > + RESTORE_ABI_REGS 1 > > > jr t0 > > > ENDPROC(ftrace_regs_caller) > > > + > > > +ENTRY(ftrace_caller) > > > + SAVE_ABI_REGS 0 > > > + PREPARE_ARGS > > > + > > > +ftrace_call: > > > + .global ftrace_call > > > + call ftrace_stub > > > + > > > + RESTORE_ABI_REGS 0 > > > + jr t0 > > > +ENDPROC(ftrace_caller) > > > #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ > > > -- > > > 2.36.1 > > > > > > > > Regards, > > Evgenii > > > > > > > -- > Best Regards > Guo Ren
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index b75332ced757..d7d4d51b4bd7 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -57,19 +57,52 @@ .endm #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - .macro SAVE_ALL + +/** +* SAVE_ABI_REGS - save regs against the pt_regs struct +* +* @all: tell if saving all the regs +* +* If all is set, all the regs will be saved, otherwise only ABI +* related regs (a0-a7,epc,ra and optional s0) will be saved. +* +* After the stack is established, +* +* 0(sp) stores the PC of the traced function which can be accessed +* by &(fregs)->regs->epc in tracing function. Note that the real +* function entry address should be computed with -FENTRY_RA_OFFSET. +* +* 8(sp) stores the function return address (i.e. parent IP) that +* can be accessed by &(fregs)->regs->ra in tracing function. +* +* The other regs are saved at the respective localtion and accessed +* by the respective pt_regs member. +* +* Here is the layout of stack for your reference. +* +* PT_SIZE_ON_STACK -> +++++++++ +* + ..... + +* + t3-t6 + +* + s2-s11+ +* + a0-a7 + --++++-> ftrace_caller saved +* + s1 + + +* + s0 + --+ +* + t0-t2 + + +* + tp + + +* + gp + + +* + sp + + +* + ra + --+ // parent IP +* sp -> + epc + --+ // PC +* +++++++++ +**/ + .macro SAVE_ABI_REGS, all=0 addi sp, sp, -PT_SIZE_ON_STACK REG_S t0, PT_EPC(sp) REG_S x1, PT_RA(sp) - REG_S x2, PT_SP(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) + + // always save the ABI regs + REG_S x10, PT_A0(sp) REG_S x11, PT_A1(sp) REG_S x12, PT_A2(sp) @@ -78,6 +111,18 @@ REG_S x15, PT_A5(sp) REG_S x16, PT_A6(sp) REG_S x17, PT_A7(sp) + + // save the leftover regs + + .if \all == 1 + REG_S x2, PT_SP(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) REG_S x18, PT_S2(sp) REG_S x19, PT_S3(sp) REG_S x20, PT_S4(sp) @@ -92,19 +137,19 @@ REG_S x29, PT_T4(sp) REG_S x30, PT_T5(sp) REG_S x31, PT_T6(sp) + + // save s0 if FP_TEST defined + + .else +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_S x8, PT_S0(sp) +#endif + .endif .endm - .macro RESTORE_ALL + .macro RESTORE_ABI_REGS, all=0 REG_L t0, PT_EPC(sp) REG_L x1, PT_RA(sp) - REG_L x2, PT_SP(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) REG_L x10, PT_A0(sp) REG_L x11, PT_A1(sp) REG_L x12, PT_A2(sp) @@ -113,6 +158,16 @@ REG_L x15, PT_A5(sp) REG_L x16, PT_A6(sp) REG_L x17, PT_A7(sp) + + .if \all == 1 + REG_L x2, PT_SP(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) REG_L x18, PT_S2(sp) REG_L x19, PT_S3(sp) REG_L x20, PT_S4(sp) @@ -128,10 +183,25 @@ REG_L x30, PT_T5(sp) REG_L x31, PT_T6(sp) + .else +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_L x8, PT_S0(sp) +#endif + .endif addi sp, sp, PT_SIZE_ON_STACK .endm + + .macro PREPARE_ARGS + addi a0, t0, -FENTRY_RA_OFFSET // ip + la a1, function_trace_op + REG_L a2, 0(a1) // op + mv a1, ra // parent_ip + mv a3, sp // fregs + .endm + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS ENTRY(ftrace_caller) SAVE_ABI @@ -160,33 +230,29 @@ ftrace_graph_call: jr t0 ENDPROC(ftrace_caller) -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ ENTRY(ftrace_regs_caller) - SAVE_ALL - - addi a0, t0, -FENTRY_RA_OFFSET - la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp + SAVE_ABI_REGS 1 + PREPARE_ARGS ftrace_regs_call: .global ftrace_regs_call call ftrace_stub -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - addi a0, sp, PT_RA - REG_L a1, PT_T0(sp) - addi a1, a1, -FENTRY_RA_OFFSET -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST - mv a2, s0 -#endif -ftrace_graph_regs_call: - .global ftrace_graph_regs_call - call ftrace_stub -#endif - RESTORE_ALL + RESTORE_ABI_REGS 1 jr t0 ENDPROC(ftrace_regs_caller) + +ENTRY(ftrace_caller) + SAVE_ABI_REGS 0 + PREPARE_ARGS + +ftrace_call: + .global ftrace_call + call ftrace_stub + + RESTORE_ABI_REGS 0 + jr t0 +ENDPROC(ftrace_caller) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */