Message ID | 20240618035242.3491691-1-ruanjinjie@huawei.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Masami Hiramatsu |
Headers | show |
Series | [v2] arm: Add KPROBES_ON_FTRACE supported | expand |
Gentle ping. On 2024/6/18 11:52, Jinjie Ruan wrote: > Add support for kprobes on ftrace call sites to avoid much of the overhead > with regular kprobes. Try it with simple steps: > > cd /sys/kernel/debug/tracing/ > echo 'p:myprobe sys_clone r0=%r0 r1=%r1 r2=%r2' > kprobe_events > echo 1 > events/kprobes/enable > echo 1 > events/kprobes/myprobe/enable > cat trace > # tracer: nop > # > # entries-in-buffer/entries-written: 2/2 #P:4 > # > # _-----=> irqs-off/BH-disabled > # / _----=> need-resched > # | / _---=> hardirq/softirq > # || / _--=> preempt-depth > # ||| / _-=> migrate-disable > # |||| / delay > # TASK-PID CPU# ||||| TIMESTAMP FUNCTION > # | | | ||||| | | > sh-75 [000] ..... 33.793362: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 > sh-75 [000] ..... 34.817804: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 > > cat /sys/kernel/debug/kprobes/list > c03453e8 k sys_clone+0xc [FTRACE] > ^^^^^^ > > Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> > Reported-by: kernel test robot <lkp@intel.com> > Closes: https://lore.kernel.org/oe-kbuild-all/202406160646.J89U1UKK-lkp@intel.com/ > --- > v2: > - Fix the allmodconfig compile issue by renaming "NOP" to "FTRACE_NOP". > --- > .../debug/kprobes-on-ftrace/arch-support.txt | 2 +- > arch/arm/Kconfig | 1 + > arch/arm/include/asm/ftrace.h | 17 ++++++ > arch/arm/kernel/ftrace.c | 19 +------ > arch/arm/probes/Makefile | 1 + > arch/arm/probes/ftrace.c | 53 +++++++++++++++++++ > arch/arm/probes/kprobes/core.c | 32 +++++++++++ > 7 files changed, 106 insertions(+), 19 deletions(-) > create mode 100644 arch/arm/probes/ftrace.c > > diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt > index 02febc883588..4ecd7d53e859 100644 > --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt > +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt > @@ -8,7 +8,7 @@ > ----------------------- > | alpha: | TODO | > | arc: | TODO | > - | arm: | TODO | > + | arm: | ok | > | arm64: | TODO | > | csky: | ok | > | hexagon: | TODO | > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index 9f09a16338e3..036381c5d42f 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -114,6 +114,7 @@ config ARM > select HAVE_KERNEL_LZO > select HAVE_KERNEL_XZ > select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M > + select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M > select HAVE_KRETPROBES if HAVE_KPROBES > select HAVE_MOD_ARCH_SPECIFIC > select HAVE_NMI > diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h > index 5be3ddc96a50..ecf5590f3657 100644 > --- a/arch/arm/include/asm/ftrace.h > +++ b/arch/arm/include/asm/ftrace.h > @@ -22,6 +22,23 @@ struct dyn_arch_ftrace { > #endif > }; > > +/* > + * The compiler emitted profiling hook consists of > + * > + * PUSH {LR} > + * BL __gnu_mcount_nc > + * > + * To turn this combined sequence into a NOP, we need to restore the value of > + * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not > + * modified anyway, and reloading LR from memory is highly likely to be less > + * efficient. > + */ > +#ifdef CONFIG_THUMB2_KERNEL > +#define FTRACE_NOP 0xf10d0d04 /* add.w sp, sp, #4 */ > +#else > +#define FTRACE_NOP 0xe28dd004 /* add sp, sp, #4 */ > +#endif > + > static inline unsigned long ftrace_call_adjust(unsigned long addr) > { > /* With Thumb-2, the recorded addresses have the lsb set */ > diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c > index e61591f33a6c..0bb372f5aa1d 100644 > --- a/arch/arm/kernel/ftrace.c > +++ b/arch/arm/kernel/ftrace.c > @@ -25,23 +25,6 @@ > #include <asm/stacktrace.h> > #include <asm/patch.h> > > -/* > - * The compiler emitted profiling hook consists of > - * > - * PUSH {LR} > - * BL __gnu_mcount_nc > - * > - * To turn this combined sequence into a NOP, we need to restore the value of > - * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not > - * modified anyway, and reloading LR from memory is highly likely to be less > - * efficient. > - */ > -#ifdef CONFIG_THUMB2_KERNEL > -#define NOP 0xf10d0d04 /* add.w sp, sp, #4 */ > -#else > -#define NOP 0xe28dd004 /* add sp, sp, #4 */ > -#endif > - > #ifdef CONFIG_DYNAMIC_FTRACE > > static int __ftrace_modify_code(void *data) > @@ -60,7 +43,7 @@ void arch_ftrace_update_code(int command) > > static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) > { > - return NOP; > + return FTRACE_NOP; > } > > void ftrace_caller_from_init(void); > diff --git a/arch/arm/probes/Makefile b/arch/arm/probes/Makefile > index 8b0ea5ace100..b3c355942a21 100644 > --- a/arch/arm/probes/Makefile > +++ b/arch/arm/probes/Makefile > @@ -1,6 +1,7 @@ > # SPDX-License-Identifier: GPL-2.0 > obj-$(CONFIG_UPROBES) += decode.o decode-arm.o uprobes/ > obj-$(CONFIG_KPROBES) += decode.o kprobes/ > +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o > ifdef CONFIG_THUMB2_KERNEL > obj-$(CONFIG_KPROBES) += decode-thumb.o > else > diff --git a/arch/arm/probes/ftrace.c b/arch/arm/probes/ftrace.c > new file mode 100644 > index 000000000000..0f54b8e5d2a6 > --- /dev/null > +++ b/arch/arm/probes/ftrace.c > @@ -0,0 +1,53 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include <linux/kprobes.h> > + > +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ > +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, > + struct ftrace_ops *ops, struct ftrace_regs *regs) > +{ > + struct kprobe *p; > + struct kprobe_ctlblk *kcb; > + > + p = get_kprobe((kprobe_opcode_t *)ip); > + if (unlikely(!p) || kprobe_disabled(p)) > + return; > + > + kcb = get_kprobe_ctlblk(); > + if (kprobe_running()) { > + kprobes_inc_nmissed_count(p); > + } else { > + unsigned long orig_ip = instruction_pointer(&(regs->regs)); > + > + instruction_pointer_set(&(regs->regs), ip); > + > + __this_cpu_write(current_kprobe, p); > + kcb->kprobe_status = KPROBE_HIT_ACTIVE; > + if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) { > + /* > + * Emulate singlestep (and also recover regs->pc) > + * as if there is a nop > + */ > + instruction_pointer_set(&(regs->regs), > + (unsigned long)p->addr + MCOUNT_INSN_SIZE); > + if (unlikely(p->post_handler)) { > + kcb->kprobe_status = KPROBE_HIT_SSDONE; > + p->post_handler(p, &(regs->regs), 0); > + } > + instruction_pointer_set(&(regs->regs), orig_ip); > + } > + > + /* > + * If pre_handler returns !0, it changes regs->pc. We have to > + * skip emulating post_handler. > + */ > + __this_cpu_write(current_kprobe, NULL); > + } > +} > +NOKPROBE_SYMBOL(kprobe_ftrace_handler); > + > +int arch_prepare_kprobe_ftrace(struct kprobe *p) > +{ > + p->ainsn.insn = NULL; > + return 0; > +} > diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c > index d8238da095df..45ccf8bea5e4 100644 > --- a/arch/arm/probes/kprobes/core.c > +++ b/arch/arm/probes/kprobes/core.c > @@ -45,6 +45,38 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; > DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); > > > +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, > + bool *on_func_entry) > +{ > +#ifdef CONFIG_KPROBES_ON_FTRACE > + unsigned long nop_offset = 0; > + u32 insn = 0; > + > + /* > + * Since 'addr' is not guaranteed to be safe to access, use > + * copy_from_kernel_nofault() to read the instruction: > + */ > + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), > + sizeof(u32))) > + return NULL; > + > + while (insn != FTRACE_NOP) { > + nop_offset += 4; > + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), > + sizeof(u32))) > + return NULL; > + } > + > + *on_func_entry = offset <= nop_offset; > + if (*on_func_entry) > + offset = nop_offset; > +#else > + *on_func_entry = !offset; > +#endif > + > + return (kprobe_opcode_t *)(addr + offset); > +} > + > int __kprobes arch_prepare_kprobe(struct kprobe *p) > { > kprobe_opcode_t insn;
Hi Jinjie, Sorry for reviewing so late. I have some comments below. On Tue, 18 Jun 2024 11:52:42 +0800 Jinjie Ruan <ruanjinjie@huawei.com> wrote: > Add support for kprobes on ftrace call sites to avoid much of the overhead > with regular kprobes. Try it with simple steps: > > cd /sys/kernel/debug/tracing/ > echo 'p:myprobe sys_clone r0=%r0 r1=%r1 r2=%r2' > kprobe_events > echo 1 > events/kprobes/enable > echo 1 > events/kprobes/myprobe/enable > cat trace > # tracer: nop > # > # entries-in-buffer/entries-written: 2/2 #P:4 > # > # _-----=> irqs-off/BH-disabled > # / _----=> need-resched > # | / _---=> hardirq/softirq > # || / _--=> preempt-depth > # ||| / _-=> migrate-disable > # |||| / delay > # TASK-PID CPU# ||||| TIMESTAMP FUNCTION > # | | | ||||| | | > sh-75 [000] ..... 33.793362: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 > sh-75 [000] ..... 34.817804: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 > > cat /sys/kernel/debug/kprobes/list > c03453e8 k sys_clone+0xc [FTRACE] > ^^^^^^ > > Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> > Reported-by: kernel test robot <lkp@intel.com> > Closes: https://lore.kernel.org/oe-kbuild-all/202406160646.J89U1UKK-lkp@intel.com/ > --- > v2: > - Fix the allmodconfig compile issue by renaming "NOP" to "FTRACE_NOP". > --- > .../debug/kprobes-on-ftrace/arch-support.txt | 2 +- > arch/arm/Kconfig | 1 + > arch/arm/include/asm/ftrace.h | 17 ++++++ > arch/arm/kernel/ftrace.c | 19 +------ > arch/arm/probes/Makefile | 1 + > arch/arm/probes/ftrace.c | 53 +++++++++++++++++++ > arch/arm/probes/kprobes/core.c | 32 +++++++++++ > 7 files changed, 106 insertions(+), 19 deletions(-) > create mode 100644 arch/arm/probes/ftrace.c > > diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt > index 02febc883588..4ecd7d53e859 100644 > --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt > +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt > @@ -8,7 +8,7 @@ > ----------------------- > | alpha: | TODO | > | arc: | TODO | > - | arm: | TODO | > + | arm: | ok | > | arm64: | TODO | > | csky: | ok | > | hexagon: | TODO | > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index 9f09a16338e3..036381c5d42f 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -114,6 +114,7 @@ config ARM > select HAVE_KERNEL_LZO > select HAVE_KERNEL_XZ > select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M > + select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M You should write this as select HAVE_KPROBES_ON_FTRACE if HAVE_KPROBES to clear that this needs more restrictions than Kprobes or not. > select HAVE_KRETPROBES if HAVE_KPROBES > select HAVE_MOD_ARCH_SPECIFIC > select HAVE_NMI > diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h > index 5be3ddc96a50..ecf5590f3657 100644 > --- a/arch/arm/include/asm/ftrace.h > +++ b/arch/arm/include/asm/ftrace.h > @@ -22,6 +22,23 @@ struct dyn_arch_ftrace { > #endif > }; > > +/* > + * The compiler emitted profiling hook consists of > + * > + * PUSH {LR} > + * BL __gnu_mcount_nc > + * > + * To turn this combined sequence into a NOP, we need to restore the value of > + * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not > + * modified anyway, and reloading LR from memory is highly likely to be less > + * efficient. > + */ > +#ifdef CONFIG_THUMB2_KERNEL > +#define FTRACE_NOP 0xf10d0d04 /* add.w sp, sp, #4 */ > +#else > +#define FTRACE_NOP 0xe28dd004 /* add sp, sp, #4 */ > +#endif > + > static inline unsigned long ftrace_call_adjust(unsigned long addr) > { > /* With Thumb-2, the recorded addresses have the lsb set */ > diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c > index e61591f33a6c..0bb372f5aa1d 100644 > --- a/arch/arm/kernel/ftrace.c > +++ b/arch/arm/kernel/ftrace.c > @@ -25,23 +25,6 @@ > #include <asm/stacktrace.h> > #include <asm/patch.h> > > -/* > - * The compiler emitted profiling hook consists of > - * > - * PUSH {LR} > - * BL __gnu_mcount_nc > - * > - * To turn this combined sequence into a NOP, we need to restore the value of > - * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not > - * modified anyway, and reloading LR from memory is highly likely to be less > - * efficient. > - */ > -#ifdef CONFIG_THUMB2_KERNEL > -#define NOP 0xf10d0d04 /* add.w sp, sp, #4 */ > -#else > -#define NOP 0xe28dd004 /* add sp, sp, #4 */ > -#endif > - > #ifdef CONFIG_DYNAMIC_FTRACE > > static int __ftrace_modify_code(void *data) > @@ -60,7 +43,7 @@ void arch_ftrace_update_code(int command) > > static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) > { > - return NOP; > + return FTRACE_NOP; > } > > void ftrace_caller_from_init(void); > diff --git a/arch/arm/probes/Makefile b/arch/arm/probes/Makefile > index 8b0ea5ace100..b3c355942a21 100644 > --- a/arch/arm/probes/Makefile > +++ b/arch/arm/probes/Makefile > @@ -1,6 +1,7 @@ > # SPDX-License-Identifier: GPL-2.0 > obj-$(CONFIG_UPROBES) += decode.o decode-arm.o uprobes/ > obj-$(CONFIG_KPROBES) += decode.o kprobes/ > +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o > ifdef CONFIG_THUMB2_KERNEL > obj-$(CONFIG_KPROBES) += decode-thumb.o > else > diff --git a/arch/arm/probes/ftrace.c b/arch/arm/probes/ftrace.c > new file mode 100644 > index 000000000000..0f54b8e5d2a6 > --- /dev/null > +++ b/arch/arm/probes/ftrace.c > @@ -0,0 +1,53 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include <linux/kprobes.h> > + > +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ > +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, > + struct ftrace_ops *ops, struct ftrace_regs *regs) nit: ftrace_regs argument should be 'fregs' to distinguish from pt_regs. > +{ To avoid accessing regs inside fregs, use ftrace_get_regs() here. struct pt_regs *regs = ftrace_get_regs(fregs); And check regs is not NULL. > + struct kprobe *p; > + struct kprobe_ctlblk *kcb; ---- int bit; if (unlikely(kprobe_ftrace_disabled)) return; bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; ---- See arch/x86/kernel/kprobes/ftrace.c > + > + p = get_kprobe((kprobe_opcode_t *)ip); > + if (unlikely(!p) || kprobe_disabled(p)) > + return; > + > + kcb = get_kprobe_ctlblk(); > + if (kprobe_running()) { > + kprobes_inc_nmissed_count(p); > + } else { > + unsigned long orig_ip = instruction_pointer(&(regs->regs)); > + > + instruction_pointer_set(&(regs->regs), ip); > + > + __this_cpu_write(current_kprobe, p); > + kcb->kprobe_status = KPROBE_HIT_ACTIVE; > + if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) { > + /* > + * Emulate singlestep (and also recover regs->pc) > + * as if there is a nop > + */ > + instruction_pointer_set(&(regs->regs), > + (unsigned long)p->addr + MCOUNT_INSN_SIZE); Hm, x86 implementation is somewhat wrong (not good). This instruction pointer should be updated only if p->post_handler is there... > + if (unlikely(p->post_handler)) { > + kcb->kprobe_status = KPROBE_HIT_SSDONE; > + p->post_handler(p, &(regs->regs), 0); > + } > + instruction_pointer_set(&(regs->regs), orig_ip); > + } > + > + /* > + * If pre_handler returns !0, it changes regs->pc. We have to > + * skip emulating post_handler. > + */ > + __this_cpu_write(current_kprobe, NULL); > + } And out: ftrace_test_recursion_unlock(bit); > +} > +NOKPROBE_SYMBOL(kprobe_ftrace_handler); > + > +int arch_prepare_kprobe_ftrace(struct kprobe *p) > +{ > + p->ainsn.insn = NULL; > + return 0; > +} > diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c > index d8238da095df..45ccf8bea5e4 100644 > --- a/arch/arm/probes/kprobes/core.c > +++ b/arch/arm/probes/kprobes/core.c > @@ -45,6 +45,38 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; > DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); > > > +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, > + bool *on_func_entry) > +{ Here @addr is a symbol address and @offset is the probe offset from symbol. > +#ifdef CONFIG_KPROBES_ON_FTRACE > + unsigned long nop_offset = 0; In the above comment in ftrace.h, the mcount callsite code should be <FUNCTION>: PUSH {LR} FTRACE_NOP Thus if the @offset is 0 or 4, then we should check the function's entry sequence (2 instructions) should be the above pattern, and if so, the offset should be skipped and the address should be @addr + 4. If there is no such pattern, do not change it, return @addr + @offset. So, something like, #define ARM_MCOUNT_OFFSET 4 if (offset <= ARM_MCOUNT_OFFSET) { if (copy_from_kernel_nofault()) goto out; if (insn != FTRACE_PUSH_LR) goto out; if (copy_from_kernel_nofault()) goto out; if (insn != FTRACE_NOP) goto out; if (!offset) offset = ARM_MCOUNT_OFFSET; *on_func_entry = true; } out: return addr + offset; Thank you, > + u32 insn = 0; > + > + /* > + * Since 'addr' is not guaranteed to be safe to access, use > + * copy_from_kernel_nofault() to read the instruction: > + */ > + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), > + sizeof(u32))) > + return NULL; > + > + while (insn != FTRACE_NOP) { > + nop_offset += 4; > + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), > + sizeof(u32))) > + return NULL; > + } > + > + *on_func_entry = offset <= nop_offset; > + if (*on_func_entry) > + offset = nop_offset; > +#else > + *on_func_entry = !offset; > +#endif > + > + return (kprobe_opcode_t *)(addr + offset); > +} > + > int __kprobes arch_prepare_kprobe(struct kprobe *p) > { > kprobe_opcode_t insn; > -- > 2.34.1 >
On 2024/10/21 16:40, Masami Hiramatsu wrote: > Hi Jinjie, > > Sorry for reviewing so late. I have some comments below. > > On Tue, 18 Jun 2024 11:52:42 +0800 > Jinjie Ruan <ruanjinjie@huawei.com> wrote: > >> Add support for kprobes on ftrace call sites to avoid much of the overhead >> with regular kprobes. Try it with simple steps: >> >> cd /sys/kernel/debug/tracing/ >> echo 'p:myprobe sys_clone r0=%r0 r1=%r1 r2=%r2' > kprobe_events >> echo 1 > events/kprobes/enable >> echo 1 > events/kprobes/myprobe/enable >> cat trace >> # tracer: nop >> # >> # entries-in-buffer/entries-written: 2/2 #P:4 >> # >> # _-----=> irqs-off/BH-disabled >> # / _----=> need-resched >> # | / _---=> hardirq/softirq >> # || / _--=> preempt-depth >> # ||| / _-=> migrate-disable >> # |||| / delay >> # TASK-PID CPU# ||||| TIMESTAMP FUNCTION >> # | | | ||||| | | >> sh-75 [000] ..... 33.793362: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 >> sh-75 [000] ..... 34.817804: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 >> >> cat /sys/kernel/debug/kprobes/list >> c03453e8 k sys_clone+0xc [FTRACE] >> ^^^^^^ >> >> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> >> Reported-by: kernel test robot <lkp@intel.com> >> Closes: https://lore.kernel.org/oe-kbuild-all/202406160646.J89U1UKK-lkp@intel.com/ >> --- >> v2: >> - Fix the allmodconfig compile issue by renaming "NOP" to "FTRACE_NOP". >> --- >> .../debug/kprobes-on-ftrace/arch-support.txt | 2 +- >> arch/arm/Kconfig | 1 + >> arch/arm/include/asm/ftrace.h | 17 ++++++ >> arch/arm/kernel/ftrace.c | 19 +------ >> arch/arm/probes/Makefile | 1 + >> arch/arm/probes/ftrace.c | 53 +++++++++++++++++++ >> arch/arm/probes/kprobes/core.c | 32 +++++++++++ >> 7 files changed, 106 insertions(+), 19 deletions(-) >> create mode 100644 arch/arm/probes/ftrace.c >> >> diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt >> index 02febc883588..4ecd7d53e859 100644 >> --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt >> +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt >> @@ -8,7 +8,7 @@ >> ----------------------- >> | alpha: | TODO | >> | arc: | TODO | >> - | arm: | TODO | >> + | arm: | ok | >> | arm64: | TODO | >> | csky: | ok | >> | hexagon: | TODO | >> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig >> index 9f09a16338e3..036381c5d42f 100644 >> --- a/arch/arm/Kconfig >> +++ b/arch/arm/Kconfig >> @@ -114,6 +114,7 @@ config ARM >> select HAVE_KERNEL_LZO >> select HAVE_KERNEL_XZ >> select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M >> + select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M > > You should write this as > > select HAVE_KPROBES_ON_FTRACE if HAVE_KPROBES > > to clear that this needs more restrictions than Kprobes or not. > >> select HAVE_KRETPROBES if HAVE_KPROBES >> select HAVE_MOD_ARCH_SPECIFIC >> select HAVE_NMI >> diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h >> index 5be3ddc96a50..ecf5590f3657 100644 >> --- a/arch/arm/include/asm/ftrace.h >> +++ b/arch/arm/include/asm/ftrace.h >> @@ -22,6 +22,23 @@ struct dyn_arch_ftrace { >> #endif >> }; >> >> +/* >> + * The compiler emitted profiling hook consists of >> + * >> + * PUSH {LR} >> + * BL __gnu_mcount_nc >> + * >> + * To turn this combined sequence into a NOP, we need to restore the value of >> + * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not >> + * modified anyway, and reloading LR from memory is highly likely to be less >> + * efficient. >> + */ >> +#ifdef CONFIG_THUMB2_KERNEL >> +#define FTRACE_NOP 0xf10d0d04 /* add.w sp, sp, #4 */ >> +#else >> +#define FTRACE_NOP 0xe28dd004 /* add sp, sp, #4 */ >> +#endif >> + >> static inline unsigned long ftrace_call_adjust(unsigned long addr) >> { >> /* With Thumb-2, the recorded addresses have the lsb set */ >> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c >> index e61591f33a6c..0bb372f5aa1d 100644 >> --- a/arch/arm/kernel/ftrace.c >> +++ b/arch/arm/kernel/ftrace.c >> @@ -25,23 +25,6 @@ >> #include <asm/stacktrace.h> >> #include <asm/patch.h> >> >> -/* >> - * The compiler emitted profiling hook consists of >> - * >> - * PUSH {LR} >> - * BL __gnu_mcount_nc >> - * >> - * To turn this combined sequence into a NOP, we need to restore the value of >> - * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not >> - * modified anyway, and reloading LR from memory is highly likely to be less >> - * efficient. >> - */ >> -#ifdef CONFIG_THUMB2_KERNEL >> -#define NOP 0xf10d0d04 /* add.w sp, sp, #4 */ >> -#else >> -#define NOP 0xe28dd004 /* add sp, sp, #4 */ >> -#endif >> - >> #ifdef CONFIG_DYNAMIC_FTRACE >> >> static int __ftrace_modify_code(void *data) >> @@ -60,7 +43,7 @@ void arch_ftrace_update_code(int command) >> >> static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) >> { >> - return NOP; >> + return FTRACE_NOP; >> } >> >> void ftrace_caller_from_init(void); >> diff --git a/arch/arm/probes/Makefile b/arch/arm/probes/Makefile >> index 8b0ea5ace100..b3c355942a21 100644 >> --- a/arch/arm/probes/Makefile >> +++ b/arch/arm/probes/Makefile >> @@ -1,6 +1,7 @@ >> # SPDX-License-Identifier: GPL-2.0 >> obj-$(CONFIG_UPROBES) += decode.o decode-arm.o uprobes/ >> obj-$(CONFIG_KPROBES) += decode.o kprobes/ >> +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o >> ifdef CONFIG_THUMB2_KERNEL >> obj-$(CONFIG_KPROBES) += decode-thumb.o >> else >> diff --git a/arch/arm/probes/ftrace.c b/arch/arm/probes/ftrace.c >> new file mode 100644 >> index 000000000000..0f54b8e5d2a6 >> --- /dev/null >> +++ b/arch/arm/probes/ftrace.c >> @@ -0,0 +1,53 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> + >> +#include <linux/kprobes.h> >> + >> +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ >> +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, >> + struct ftrace_ops *ops, struct ftrace_regs *regs) > > nit: ftrace_regs argument should be 'fregs' to distinguish from pt_regs. > >> +{ > > To avoid accessing regs inside fregs, use ftrace_get_regs() here. > > struct pt_regs *regs = ftrace_get_regs(fregs); > > And check regs is not NULL. > >> + struct kprobe *p; >> + struct kprobe_ctlblk *kcb; > > ---- > int bit; > > if (unlikely(kprobe_ftrace_disabled)) > return; > > bit = ftrace_test_recursion_trylock(ip, parent_ip); > if (bit < 0) > return; > ---- > > See arch/x86/kernel/kprobes/ftrace.c > >> + >> + p = get_kprobe((kprobe_opcode_t *)ip); >> + if (unlikely(!p) || kprobe_disabled(p)) >> + return; >> + >> + kcb = get_kprobe_ctlblk(); >> + if (kprobe_running()) { >> + kprobes_inc_nmissed_count(p); >> + } else { >> + unsigned long orig_ip = instruction_pointer(&(regs->regs)); >> + >> + instruction_pointer_set(&(regs->regs), ip); >> + >> + __this_cpu_write(current_kprobe, p); >> + kcb->kprobe_status = KPROBE_HIT_ACTIVE; >> + if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) { >> + /* >> + * Emulate singlestep (and also recover regs->pc) >> + * as if there is a nop >> + */ >> + instruction_pointer_set(&(regs->regs), >> + (unsigned long)p->addr + MCOUNT_INSN_SIZE); > > Hm, x86 implementation is somewhat wrong (not good). This instruction > pointer should be updated only if p->post_handler is there... > >> + if (unlikely(p->post_handler)) { >> + kcb->kprobe_status = KPROBE_HIT_SSDONE; >> + p->post_handler(p, &(regs->regs), 0); >> + } >> + instruction_pointer_set(&(regs->regs), orig_ip); >> + } >> + >> + /* >> + * If pre_handler returns !0, it changes regs->pc. We have to >> + * skip emulating post_handler. >> + */ >> + __this_cpu_write(current_kprobe, NULL); >> + } > > And > > out: > ftrace_test_recursion_unlock(bit); > >> +} >> +NOKPROBE_SYMBOL(kprobe_ftrace_handler); >> + >> +int arch_prepare_kprobe_ftrace(struct kprobe *p) >> +{ >> + p->ainsn.insn = NULL; >> + return 0; >> +} >> diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c >> index d8238da095df..45ccf8bea5e4 100644 >> --- a/arch/arm/probes/kprobes/core.c >> +++ b/arch/arm/probes/kprobes/core.c >> @@ -45,6 +45,38 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; >> DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); >> >> >> +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, >> + bool *on_func_entry) >> +{ > > Here @addr is a symbol address and @offset is the probe offset from symbol. > >> +#ifdef CONFIG_KPROBES_ON_FTRACE >> + unsigned long nop_offset = 0; > > In the above comment in ftrace.h, the mcount callsite code should be > > <FUNCTION>: > PUSH {LR} > FTRACE_NOP > > Thus if the @offset is 0 or 4, then we should check the function's entry > sequence (2 instructions) should be the above pattern, and if so, > the offset should be skipped and the address should be @addr + 4. > If there is no such pattern, do not change it, return @addr + @offset. Hi, Masami, Actually, it doesn't have to be, the following is some example for arm32, before the { psu {lr}, bl mocunt}, there may be different number of push stack instructions: c04fa4dc <do_sys_open>: c04fa4dc: e92d4030 push {r4, r5, lr} c04fa4e0: e24dd03c sub sp, sp, #60 ; 0x3c c04fa4e4: e52de004 push {lr} ; (str lr, [sp, #-4]!) c04fa4e8: ebf85796 bl c0310348 <__gnu_mcount_nc> c03453dc <__se_sys_clone>: c03453dc: e92d40f0 push {r4, r5, r6, r7, lr} c03453e0: e24dd05c sub sp, sp, #92 ; 0x5c c03453e4: e52de004 push {lr} ; (str lr, [sp, #-4]!) c03453e8: ebff2bd6 bl c0310348 <__gnu_mcount_nc> c03453ec: e1a06003 mov r6, r3 c0378820 <sched_show_task> c0378820: e92d4810 push {r4, fp, lr} c0378824: e28db008 add fp, sp, #8 c0378828: e24dd00c sub sp, sp, #12 c037882c: e52de004 push {lr} ;(str lr,[sp,#-4]!) c0378830: ebfe5ec4 bl c0310348 <__gnu_mcount_nc> 8010111c <handle_fiq_as_nmi>: 8010111c: e92d4070 push {r4, r5, r6, lr} 80101120: e52de004 push {lr} ; (str lr, [sp, #-4]!) 80101124: eb007f4a bl 80120e54 <__gnu_mcount_nc> 80101334 <__do_softirq>: 80101334: e52de004 push {lr} ; (str lr, [sp, #-4]!) 80101338: eb007ec5 bl 80120e54 <__gnu_mcount_nc> > > So, something like, > > #define ARM_MCOUNT_OFFSET 4 > > if (offset <= ARM_MCOUNT_OFFSET) { > if (copy_from_kernel_nofault()) > goto out; > if (insn != FTRACE_PUSH_LR) > goto out; > if (copy_from_kernel_nofault()) > goto out; > if (insn != FTRACE_NOP) > goto out; > if (!offset) > > offset = ARM_MCOUNT_OFFSET; > *on_func_entry = true; > } > out: > return addr + offset; > > Thank you, > >> + u32 insn = 0; >> + >> + /* >> + * Since 'addr' is not guaranteed to be safe to access, use >> + * copy_from_kernel_nofault() to read the instruction: >> + */ >> + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), >> + sizeof(u32))) >> + return NULL; >> + >> + while (insn != FTRACE_NOP) { >> + nop_offset += 4; >> + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), >> + sizeof(u32))) >> + return NULL; >> + } >> + >> + *on_func_entry = offset <= nop_offset; >> + if (*on_func_entry) >> + offset = nop_offset; >> +#else >> + *on_func_entry = !offset; >> +#endif >> + >> + return (kprobe_opcode_t *)(addr + offset); >> +} >> + >> int __kprobes arch_prepare_kprobe(struct kprobe *p) >> { >> kprobe_opcode_t insn; >> -- >> 2.34.1 >> > >
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt index 02febc883588..4ecd7d53e859 100644 --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt @@ -8,7 +8,7 @@ ----------------------- | alpha: | TODO | | arc: | TODO | - | arm: | TODO | + | arm: | ok | | arm64: | TODO | | csky: | ok | | hexagon: | TODO | diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f09a16338e3..036381c5d42f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -114,6 +114,7 @@ config ARM select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M + select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 5be3ddc96a50..ecf5590f3657 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -22,6 +22,23 @@ struct dyn_arch_ftrace { #endif }; +/* + * The compiler emitted profiling hook consists of + * + * PUSH {LR} + * BL __gnu_mcount_nc + * + * To turn this combined sequence into a NOP, we need to restore the value of + * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not + * modified anyway, and reloading LR from memory is highly likely to be less + * efficient. + */ +#ifdef CONFIG_THUMB2_KERNEL +#define FTRACE_NOP 0xf10d0d04 /* add.w sp, sp, #4 */ +#else +#define FTRACE_NOP 0xe28dd004 /* add sp, sp, #4 */ +#endif + static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* With Thumb-2, the recorded addresses have the lsb set */ diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index e61591f33a6c..0bb372f5aa1d 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -25,23 +25,6 @@ #include <asm/stacktrace.h> #include <asm/patch.h> -/* - * The compiler emitted profiling hook consists of - * - * PUSH {LR} - * BL __gnu_mcount_nc - * - * To turn this combined sequence into a NOP, we need to restore the value of - * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not - * modified anyway, and reloading LR from memory is highly likely to be less - * efficient. - */ -#ifdef CONFIG_THUMB2_KERNEL -#define NOP 0xf10d0d04 /* add.w sp, sp, #4 */ -#else -#define NOP 0xe28dd004 /* add sp, sp, #4 */ -#endif - #ifdef CONFIG_DYNAMIC_FTRACE static int __ftrace_modify_code(void *data) @@ -60,7 +43,7 @@ void arch_ftrace_update_code(int command) static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) { - return NOP; + return FTRACE_NOP; } void ftrace_caller_from_init(void); diff --git a/arch/arm/probes/Makefile b/arch/arm/probes/Makefile index 8b0ea5ace100..b3c355942a21 100644 --- a/arch/arm/probes/Makefile +++ b/arch/arm/probes/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_UPROBES) += decode.o decode-arm.o uprobes/ obj-$(CONFIG_KPROBES) += decode.o kprobes/ +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o ifdef CONFIG_THUMB2_KERNEL obj-$(CONFIG_KPROBES) += decode-thumb.o else diff --git a/arch/arm/probes/ftrace.c b/arch/arm/probes/ftrace.c new file mode 100644 index 000000000000..0f54b8e5d2a6 --- /dev/null +++ b/arch/arm/probes/ftrace.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/kprobes.h> + +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct ftrace_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + return; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_ip = instruction_pointer(&(regs->regs)); + + instruction_pointer_set(&(regs->regs), ip); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) { + /* + * Emulate singlestep (and also recover regs->pc) + * as if there is a nop + */ + instruction_pointer_set(&(regs->regs), + (unsigned long)p->addr + MCOUNT_INSN_SIZE); + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, &(regs->regs), 0); + } + instruction_pointer_set(&(regs->regs), orig_ip); + } + + /* + * If pre_handler returns !0, it changes regs->pc. We have to + * skip emulating post_handler. + */ + __this_cpu_write(current_kprobe, NULL); + } +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + return 0; +} diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index d8238da095df..45ccf8bea5e4 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -45,6 +45,38 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ +#ifdef CONFIG_KPROBES_ON_FTRACE + unsigned long nop_offset = 0; + u32 insn = 0; + + /* + * Since 'addr' is not guaranteed to be safe to access, use + * copy_from_kernel_nofault() to read the instruction: + */ + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), + sizeof(u32))) + return NULL; + + while (insn != FTRACE_NOP) { + nop_offset += 4; + if (copy_from_kernel_nofault(&insn, (void *)(addr + nop_offset), + sizeof(u32))) + return NULL; + } + + *on_func_entry = offset <= nop_offset; + if (*on_func_entry) + offset = nop_offset; +#else + *on_func_entry = !offset; +#endif + + return (kprobe_opcode_t *)(addr + offset); +} + int __kprobes arch_prepare_kprobe(struct kprobe *p) { kprobe_opcode_t insn;
Add support for kprobes on ftrace call sites to avoid much of the overhead with regular kprobes. Try it with simple steps: cd /sys/kernel/debug/tracing/ echo 'p:myprobe sys_clone r0=%r0 r1=%r1 r2=%r2' > kprobe_events echo 1 > events/kprobes/enable echo 1 > events/kprobes/myprobe/enable cat trace # tracer: nop # # entries-in-buffer/entries-written: 2/2 #P:4 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | sh-75 [000] ..... 33.793362: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 sh-75 [000] ..... 34.817804: myprobe: (sys_clone+0xc/0xa0) r0=0x1200011 r1=0x0 r2=0x0 cat /sys/kernel/debug/kprobes/list c03453e8 k sys_clone+0xc [FTRACE] ^^^^^^ Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202406160646.J89U1UKK-lkp@intel.com/ --- v2: - Fix the allmodconfig compile issue by renaming "NOP" to "FTRACE_NOP". --- .../debug/kprobes-on-ftrace/arch-support.txt | 2 +- arch/arm/Kconfig | 1 + arch/arm/include/asm/ftrace.h | 17 ++++++ arch/arm/kernel/ftrace.c | 19 +------ arch/arm/probes/Makefile | 1 + arch/arm/probes/ftrace.c | 53 +++++++++++++++++++ arch/arm/probes/kprobes/core.c | 32 +++++++++++ 7 files changed, 106 insertions(+), 19 deletions(-) create mode 100644 arch/arm/probes/ftrace.c