Message ID | 20221224114315.850130-8-chenguokai17@mails.ucas.ac.cn (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Palmer Dabbelt |
Headers | show |
Series | Add OPTPROBES feature on RISCV | expand |
Context | Check | Description |
---|---|---|
conchuod/tree_selection | fail | Failed to apply to next/pending-fixes or riscv/for-next |
Chen Guokai <chenguokai17@mails.ucas.ac.cn> writes: > From: Liao Chang <liaochang1@huawei.com> > diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c > index 258a283c906d..bc232fce5b39 100644 > --- a/arch/riscv/kernel/probes/opt.c > +++ b/arch/riscv/kernel/probes/opt.c > @@ -11,9 +11,37 @@ > #include <linux/kprobes.h> > #include <asm/kprobes.h> > #include <asm/patch.h> > +#include <asm/asm-offsets.h> > > #include "simulate-insn.h" > #include "decode-insn.h" > +#include "../../net/bpf_jit.h" > + > +static void Super-nit, but I really prefer *not* breaking function name and return value, for grepability. > diff --git a/arch/riscv/kernel/probes/opt_trampoline.S b/arch/riscv/kernel/probes/opt_trampoline.S > index 16160c4367ff..75e34e373cf2 100644 > --- a/arch/riscv/kernel/probes/opt_trampoline.S > +++ b/arch/riscv/kernel/probes/opt_trampoline.S > @@ -1,12 +1,137 @@ > /* SPDX-License-Identifier: GPL-2.0-only */ > /* > * Copyright (C) 2022 Guokai Chen > + * Copyright (C) 2022 Liao, Chang <liaochang1@huawei.com> > */ > > #include <linux/linkage.h> > > +#include <asm/asm.h> > #incldue <asm/csr.h> > #include <asm/asm-offsets.h> > > SYM_ENTRY(optprobe_template_entry, SYM_L_GLOBAL, SYM_A_NONE) > + addi sp, sp, -(PT_SIZE_ON_STACK) > + REG_S x1, PT_RA(sp) > + REG_S x2, PT_SP(sp) > + REG_S x3, PT_GP(sp) > + REG_S x4, PT_TP(sp) > + REG_S x5, PT_T0(sp) > + REG_S x6, PT_T1(sp) > + REG_S x7, PT_T2(sp) > + REG_S x8, PT_S0(sp) > + REG_S x9, PT_S1(sp) > + REG_S x10, PT_A0(sp) > + REG_S x11, PT_A1(sp) > + REG_S x12, PT_A2(sp) > + REG_S x13, PT_A3(sp) > + REG_S x14, PT_A4(sp) > + REG_S x15, PT_A5(sp) > + REG_S x16, PT_A6(sp) > + REG_S x17, PT_A7(sp) > + REG_S x18, PT_S2(sp) > + REG_S x19, PT_S3(sp) > + REG_S x20, PT_S4(sp) > + REG_S x21, PT_S5(sp) > + REG_S x22, PT_S6(sp) > + REG_S x23, PT_S7(sp) > + REG_S x24, PT_S8(sp) > + REG_S x25, PT_S9(sp) > + REG_S x26, PT_S10(sp) > + REG_S x27, PT_S11(sp) > + REG_S x28, PT_T3(sp) > + REG_S x29, PT_T4(sp) > + REG_S x30, PT_T5(sp) > + REG_S x31, PT_T6(sp) > + /* Update fp is friendly for stacktrace */ > + addi s0, sp, (PT_SIZE_ON_STACK) > + j 1f > + > +SYM_ENTRY(optprobe_template_save, SYM_L_GLOBAL, SYM_A_NONE) > + /* > + * Step1: > + * Filled with the pointer to optimized_kprobe data > + */ > + .dword 0 > +1: > + /* Load optimize_kprobe pointer from .dword below */ > + auipc a0, 0 > + REG_L a0, -8(a0) > + add a1, sp, x0 > + > +SYM_ENTRY(optprobe_template_call, SYM_L_GLOBAL, SYM_A_NONE) > + /* > + * Step2: > + * <IMME> of AUIPC/JALR are modified to the offset to optimized_callback > + * jump target is loaded from above .dword. > + */ > + auipc ra, 0 > + jalr ra, 0(ra) > + > + REG_L x1, PT_RA(sp) > + REG_L x3, PT_GP(sp) > + REG_L x4, PT_TP(sp) > + REG_L x5, PT_T0(sp) > + REG_L x6, PT_T1(sp) > + REG_L x7, PT_T2(sp) > + REG_L x8, PT_S0(sp) > + REG_L x9, PT_S1(sp) > + REG_L x10, PT_A0(sp) > + REG_L x11, PT_A1(sp) > + REG_L x12, PT_A2(sp) > + REG_L x13, PT_A3(sp) > + REG_L x14, PT_A4(sp) > + REG_L x15, PT_A5(sp) > + REG_L x16, PT_A6(sp) > + REG_L x17, PT_A7(sp) > + REG_L x18, PT_S2(sp) > + REG_L x19, PT_S3(sp) > + REG_L x20, PT_S4(sp) > + REG_L x21, PT_S5(sp) > + REG_L x22, PT_S6(sp) > + REG_L x23, PT_S7(sp) > + REG_L x24, PT_S8(sp) > + REG_L x25, PT_S9(sp) > + REG_L x26, PT_S10(sp) > + REG_L x27, PT_S11(sp) > + REG_L x28, PT_T3(sp) > + REG_L x29, PT_T4(sp) > + REG_L x30, PT_T5(sp) > + REG_L x31, PT_T6(sp) > + REG_L x2, PT_SP(sp) > + addi sp, sp, (PT_SIZE_ON_STACK) > + > +SYM_ENTRY(optprobe_template_insn, SYM_L_GLOBAL, SYM_A_NONE) > + /* > + * Step3: > + * NOPS will be replaced by the probed instruction, at worst case 3 RVC > + * and 1 RVI instructions is about to execute out of line. > + */ > + nop A nop here will be either a compressed nop or a non-compressed, depending on the build (C-enabled or not), right? Maybe be explicit to the assembler what you want? Björn
在 2023/1/3 2:04, Björn Töpel 写道: > Chen Guokai <chenguokai17@mails.ucas.ac.cn> writes: > >> From: Liao Chang <liaochang1@huawei.com> > >> diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c >> index 258a283c906d..bc232fce5b39 100644 >> --- a/arch/riscv/kernel/probes/opt.c >> +++ b/arch/riscv/kernel/probes/opt.c >> @@ -11,9 +11,37 @@ >> #include <linux/kprobes.h> >> #include <asm/kprobes.h> >> #include <asm/patch.h> >> +#include <asm/asm-offsets.h> >> >> #include "simulate-insn.h" >> #include "decode-insn.h" >> +#include "../../net/bpf_jit.h" >> + >> +static void > > Super-nit, but I really prefer *not* breaking function name and return > value, for grepability. OK, i will keep function name and return at the same line. > >> diff --git a/arch/riscv/kernel/probes/opt_trampoline.S b/arch/riscv/kernel/probes/opt_trampoline.S >> index 16160c4367ff..75e34e373cf2 100644 >> --- a/arch/riscv/kernel/probes/opt_trampoline.S >> +++ b/arch/riscv/kernel/probes/opt_trampoline.S >> @@ -1,12 +1,137 @@ >> /* SPDX-License-Identifier: GPL-2.0-only */ >> /* >> * Copyright (C) 2022 Guokai Chen >> + * Copyright (C) 2022 Liao, Chang <liaochang1@huawei.com> >> */ >> >> #include <linux/linkage.h> >> >> +#include <asm/asm.h> >> #incldue <asm/csr.h> >> #include <asm/asm-offsets.h> >> >> SYM_ENTRY(optprobe_template_entry, SYM_L_GLOBAL, SYM_A_NONE) >> + addi sp, sp, -(PT_SIZE_ON_STACK) >> + REG_S x1, PT_RA(sp) >> + REG_S x2, PT_SP(sp) >> + REG_S x3, PT_GP(sp) >> + REG_S x4, PT_TP(sp) >> + REG_S x5, PT_T0(sp) >> + REG_S x6, PT_T1(sp) >> + REG_S x7, PT_T2(sp) >> + REG_S x8, PT_S0(sp) >> + REG_S x9, PT_S1(sp) >> + REG_S x10, PT_A0(sp) >> + REG_S x11, PT_A1(sp) >> + REG_S x12, PT_A2(sp) >> + REG_S x13, PT_A3(sp) >> + REG_S x14, PT_A4(sp) >> + REG_S x15, PT_A5(sp) >> + REG_S x16, PT_A6(sp) >> + REG_S x17, PT_A7(sp) >> + REG_S x18, PT_S2(sp) >> + REG_S x19, PT_S3(sp) >> + REG_S x20, PT_S4(sp) >> + REG_S x21, PT_S5(sp) >> + REG_S x22, PT_S6(sp) >> + REG_S x23, PT_S7(sp) >> + REG_S x24, PT_S8(sp) >> + REG_S x25, PT_S9(sp) >> + REG_S x26, PT_S10(sp) >> + REG_S x27, PT_S11(sp) >> + REG_S x28, PT_T3(sp) >> + REG_S x29, PT_T4(sp) >> + REG_S x30, PT_T5(sp) >> + REG_S x31, PT_T6(sp) >> + /* Update fp is friendly for stacktrace */ >> + addi s0, sp, (PT_SIZE_ON_STACK) >> + j 1f >> + >> +SYM_ENTRY(optprobe_template_save, SYM_L_GLOBAL, SYM_A_NONE) >> + /* >> + * Step1: >> + * Filled with the pointer to optimized_kprobe data >> + */ >> + .dword 0 >> +1: >> + /* Load optimize_kprobe pointer from .dword below */ >> + auipc a0, 0 >> + REG_L a0, -8(a0) >> + add a1, sp, x0 >> + >> +SYM_ENTRY(optprobe_template_call, SYM_L_GLOBAL, SYM_A_NONE) >> + /* >> + * Step2: >> + * <IMME> of AUIPC/JALR are modified to the offset to optimized_callback >> + * jump target is loaded from above .dword. >> + */ >> + auipc ra, 0 >> + jalr ra, 0(ra) >> + >> + REG_L x1, PT_RA(sp) >> + REG_L x3, PT_GP(sp) >> + REG_L x4, PT_TP(sp) >> + REG_L x5, PT_T0(sp) >> + REG_L x6, PT_T1(sp) >> + REG_L x7, PT_T2(sp) >> + REG_L x8, PT_S0(sp) >> + REG_L x9, PT_S1(sp) >> + REG_L x10, PT_A0(sp) >> + REG_L x11, PT_A1(sp) >> + REG_L x12, PT_A2(sp) >> + REG_L x13, PT_A3(sp) >> + REG_L x14, PT_A4(sp) >> + REG_L x15, PT_A5(sp) >> + REG_L x16, PT_A6(sp) >> + REG_L x17, PT_A7(sp) >> + REG_L x18, PT_S2(sp) >> + REG_L x19, PT_S3(sp) >> + REG_L x20, PT_S4(sp) >> + REG_L x21, PT_S5(sp) >> + REG_L x22, PT_S6(sp) >> + REG_L x23, PT_S7(sp) >> + REG_L x24, PT_S8(sp) >> + REG_L x25, PT_S9(sp) >> + REG_L x26, PT_S10(sp) >> + REG_L x27, PT_S11(sp) >> + REG_L x28, PT_T3(sp) >> + REG_L x29, PT_T4(sp) >> + REG_L x30, PT_T5(sp) >> + REG_L x31, PT_T6(sp) >> + REG_L x2, PT_SP(sp) >> + addi sp, sp, (PT_SIZE_ON_STACK) >> + >> +SYM_ENTRY(optprobe_template_insn, SYM_L_GLOBAL, SYM_A_NONE) >> + /* >> + * Step3: >> + * NOPS will be replaced by the probed instruction, at worst case 3 RVC >> + * and 1 RVI instructions is about to execute out of line. >> + */ >> + nop > > A nop here will be either a compressed nop or a non-compressed, > depending on the build (C-enabled or not), right? Maybe be explicit to > the assembler what you want? > You are right, if CONFIG_RISCV_ISA_C is disabled, two NOP is enough for 2 RVI execute out of line, if CONFIG_RISCV_ISA_C is enabled, it needs eight C.NOP here for the worst case (3 RVC + 1 RVI). I will use {C}.NOP explicitly for different configure in next revision, thanks. > > Björn
"liaochang (A)" <liaochang1@huawei.com> writes: >>> +SYM_ENTRY(optprobe_template_insn, SYM_L_GLOBAL, SYM_A_NONE) >>> + /* >>> + * Step3: >>> + * NOPS will be replaced by the probed instruction, at worst case 3 RVC >>> + * and 1 RVI instructions is about to execute out of line. >>> + */ >>> + nop >> >> A nop here will be either a compressed nop or a non-compressed, >> depending on the build (C-enabled or not), right? Maybe be explicit to >> the assembler what you want? >> > > You are right, if CONFIG_RISCV_ISA_C is disabled, two NOP is enough for 2 RVI execute out of line, > if CONFIG_RISCV_ISA_C is enabled, it needs eight C.NOP here for the worst case (3 RVC + 1 RVI). > > I will use {C}.NOP explicitly for different configure in next revision, thanks. What I meant was that "nop" can expand to compressed instructions, and you should be explicit. So you know how it's expanded by the compiler/assembler. An example: $ cat bar.S .text bar: nop nop $ riscv64-linux-gnu-gcc -O2 -o bar.o -c bar.S && riscv64-linux-gnu-objdump -M no-aliases -d bar.o bar.o: file format elf64-littleriscv Disassembly of section .text: 0000000000000000 <bar>: 0: 0001 c.addi zero,0 2: 0001 c.addi zero,0 vs $ cat foo.S .text foo: .option norvc nop nop $ riscv64-linux-gnu-gcc -O2 -o foo.o -c foo.S && riscv64-linux-gnu-objdump -M no-aliases -d foo.o foo.o: file format elf64-littleriscv Disassembly of section .text: 0000000000000000 <foo>: 0: 00000013 addi zero,zero,0 4: 00000013 addi zero,zero,0 Björn
在 2023/1/4 17:12, Björn Töpel 写道: > "liaochang (A)" <liaochang1@huawei.com> writes: > >>>> +SYM_ENTRY(optprobe_template_insn, SYM_L_GLOBAL, SYM_A_NONE) >>>> + /* >>>> + * Step3: >>>> + * NOPS will be replaced by the probed instruction, at worst case 3 RVC >>>> + * and 1 RVI instructions is about to execute out of line. >>>> + */ >>>> + nop >>> >>> A nop here will be either a compressed nop or a non-compressed, >>> depending on the build (C-enabled or not), right? Maybe be explicit to >>> the assembler what you want? >>> >> >> You are right, if CONFIG_RISCV_ISA_C is disabled, two NOP is enough for 2 RVI execute out of line, >> if CONFIG_RISCV_ISA_C is enabled, it needs eight C.NOP here for the worst case (3 RVC + 1 RVI). >> >> I will use {C}.NOP explicitly for different configure in next revision, thanks. > > What I meant was that "nop" can expand to compressed instructions, and > you should be explicit. So you know how it's expanded by the > compiler/assembler. > > An example: > > $ cat bar.S > .text > bar: > nop > nop > $ riscv64-linux-gnu-gcc -O2 -o bar.o -c bar.S && riscv64-linux-gnu-objdump -M no-aliases -d bar.o > > bar.o: file format elf64-littleriscv > > > Disassembly of section .text: > > 0000000000000000 <bar>: > 0: 0001 c.addi zero,0 > 2: 0001 c.addi zero,0 > > > vs > > $ cat foo.S > .text > foo: > .option norvc > nop > nop > > $ riscv64-linux-gnu-gcc -O2 -o foo.o -c foo.S && riscv64-linux-gnu-objdump -M no-aliases -d foo.o > > foo.o: file format elf64-littleriscv > > > Disassembly of section .text: > > 0000000000000000 <foo>: > 0: 00000013 addi zero,zero,0 > 4: 00000013 addi zero,zero,0 Above examples are very clear, i will use these expaned instructions in next revision, thanks. > > > Björn
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index e85130c9112f..e40c837d0a1d 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -46,10 +46,26 @@ bool kprobe_single_step_handler(struct pt_regs *regs); /* optinsn template addresses */ extern __visible kprobe_opcode_t optprobe_template_entry[]; extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_save[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_insn[]; +extern __visible kprobe_opcode_t optprobe_template_return[]; #define MAX_OPTINSN_SIZE \ ((unsigned long)optprobe_template_end - \ (unsigned long)optprobe_template_entry) +#define DETOUR_SAVE_OFFSET \ + ((unsigned long)optprobe_template_save - \ + (unsigned long)optprobe_template_entry) +#define DETOUR_CALL_OFFSET \ + ((unsigned long)optprobe_template_call - \ + (unsigned long)optprobe_template_entry) +#define DETOUR_INSN_OFFSET \ + ((unsigned long)optprobe_template_insn - \ + (unsigned long)optprobe_template_entry) +#define DETOUR_RETURN_OFFSET \ + ((unsigned long)optprobe_template_return - \ + (unsigned long)optprobe_template_entry) /* * For RVI and RVC hybird encoding kernel, althought long jump just needs diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c index 258a283c906d..bc232fce5b39 100644 --- a/arch/riscv/kernel/probes/opt.c +++ b/arch/riscv/kernel/probes/opt.c @@ -11,9 +11,37 @@ #include <linux/kprobes.h> #include <asm/kprobes.h> #include <asm/patch.h> +#include <asm/asm-offsets.h> #include "simulate-insn.h" #include "decode-insn.h" +#include "../../net/bpf_jit.h" + +static void +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +{ + unsigned long flags; + struct kprobe_ctlblk *kcb; + + /* Save skipped registers */ + regs->epc = (unsigned long)op->kp.addr; + regs->orig_a0 = ~0UL; + + local_irq_save(flags); + kcb = get_kprobe_ctlblk(); + + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); + } else { + __this_cpu_write(current_kprobe, &op->kp); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + local_irq_restore(flags); +} + +NOKPROBE_SYMBOL(optimized_callback) static inline int in_auipc_jalr_range(long val) { @@ -30,6 +58,11 @@ static inline int in_auipc_jalr_range(long val) #endif } +#define DETOUR_ADDR(code, offs) \ + ((void *)((unsigned long)(code) + (offs))) +#define DETOUR_INSN(code, offs) \ + (*(kprobe_opcode_t *)((unsigned long)(code) + (offs))) + /* * Copy optprobe assembly code template into detour buffer and modify some * instructions for each kprobe. @@ -38,6 +71,49 @@ static void prepare_detour_buffer(kprobe_opcode_t *code, kprobe_opcode_t *slot, int rd, struct optimized_kprobe *op, kprobe_opcode_t opcode) { + long offs; + unsigned long data; + + memcpy(code, optprobe_template_entry, MAX_OPTINSN_SIZE); + + /* Step1: record optimized_kprobe pointer into detour buffer */ + memcpy(DETOUR_ADDR(code, DETOUR_SAVE_OFFSET), &op, sizeof(op)); + + /* + * Step2 + * auipc ra, 0 --> aupic ra, HI20.{optimized_callback - pc} + * jalr ra, 0(ra) --> jalr ra, LO12.{optimized_callback - pc}(ra) + */ + offs = (unsigned long)&optimized_callback - + (unsigned long)DETOUR_ADDR(slot, DETOUR_CALL_OFFSET); + DETOUR_INSN(code, DETOUR_CALL_OFFSET) = + rv_auipc(1, (offs + (1 << 11)) >> 12); + DETOUR_INSN(code, DETOUR_CALL_OFFSET + 0x4) = + rv_jalr(1, 1, offs & 0xFFF); + + /* Step3: copy replaced instructions into detour buffer */ + memcpy(DETOUR_ADDR(code, DETOUR_INSN_OFFSET), op->kp.addr, + op->optinsn.length); + memcpy(DETOUR_ADDR(code, DETOUR_INSN_OFFSET), &opcode, + GET_INSN_LENGTH(opcode)); + + /* Step4: record return address of long jump into detour buffer */ + data = (unsigned long)op->kp.addr + op->optinsn.length; + memcpy(DETOUR_ADDR(code, DETOUR_RETURN_OFFSET), &data, sizeof(data)); + + /* + * Step5 + * auipc ra, 0 --> auipc rd, 0 + * ld/w ra, -4(ra) --> ld/w rd, -8(rd) + * jalr x0, 0(ra) --> jalr x0, 0(rd) + */ + DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0x8) = rv_auipc(rd, 0); +#if __riscv_xlen == 32 + DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0xC) = rv_lw(rd, -8, rd); +#else + DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0xC) = rv_ld(rd, -8, rd); +#endif + DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0x10) = rv_jalr(0, rd, 0); } /* Registers the first usage of which is the destination of instruction */ diff --git a/arch/riscv/kernel/probes/opt_trampoline.S b/arch/riscv/kernel/probes/opt_trampoline.S index 16160c4367ff..75e34e373cf2 100644 --- a/arch/riscv/kernel/probes/opt_trampoline.S +++ b/arch/riscv/kernel/probes/opt_trampoline.S @@ -1,12 +1,137 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2022 Guokai Chen + * Copyright (C) 2022 Liao, Chang <liaochang1@huawei.com> */ #include <linux/linkage.h> +#include <asm/asm.h> #incldue <asm/csr.h> #include <asm/asm-offsets.h> SYM_ENTRY(optprobe_template_entry, SYM_L_GLOBAL, SYM_A_NONE) + addi sp, sp, -(PT_SIZE_ON_STACK) + REG_S x1, PT_RA(sp) + REG_S x2, PT_SP(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + /* Update fp is friendly for stacktrace */ + addi s0, sp, (PT_SIZE_ON_STACK) + j 1f + +SYM_ENTRY(optprobe_template_save, SYM_L_GLOBAL, SYM_A_NONE) + /* + * Step1: + * Filled with the pointer to optimized_kprobe data + */ + .dword 0 +1: + /* Load optimize_kprobe pointer from .dword below */ + auipc a0, 0 + REG_L a0, -8(a0) + add a1, sp, x0 + +SYM_ENTRY(optprobe_template_call, SYM_L_GLOBAL, SYM_A_NONE) + /* + * Step2: + * <IMME> of AUIPC/JALR are modified to the offset to optimized_callback + * jump target is loaded from above .dword. + */ + auipc ra, 0 + jalr ra, 0(ra) + + REG_L x1, PT_RA(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + REG_L x2, PT_SP(sp) + addi sp, sp, (PT_SIZE_ON_STACK) + +SYM_ENTRY(optprobe_template_insn, SYM_L_GLOBAL, SYM_A_NONE) + /* + * Step3: + * NOPS will be replaced by the probed instruction, at worst case 3 RVC + * and 1 RVI instructions is about to execute out of line. + */ + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + j 2f + +SYM_ENTRY(optprobe_template_return, SYM_L_GLOBAL, SYM_A_NONE) + /* + * Step4: + * Filled with the return address of long jump(AUIPC/JALR) + */ + .dword 0 +2: + /* + * Step5: + * The <RA> of AUIPC/LD/JALR will be replaced for each kprobe, + * used to read return address saved in .dword above. + */ + auipc ra, 0 + REG_L ra, -8(ra) + jalr x0, 0(ra) SYM_ENTRY(optprobe_template_end, SYM_L_GLOBAL, SYM_A_NONE)