Message ID | 20190501113238.0ab3f9dd@gandalf.local.home (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [RFC,v3] ftrace/x86_64: Emulate call function while updating in breakpoint handler | expand |
This looks sane to me, although I'm surprised that we didn't already have an annotation for the nonstandard stack frame for asm files. That probably would be cleaner in a separate commit, but I guess it doesn't matter. Anyway, I'm willing to consider the entry code version if it looks a _lot_ simpler than this (so I'd like to see them side-by-side), but it's not like this looks all that complicated to me either. Linus
On Wed, 1 May 2019 11:01:07 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote: > This looks sane to me, although I'm surprised that we didn't already > have an annotation for the nonstandard stack frame for asm files. That > probably would be cleaner in a separate commit, but I guess it doesn't > matter. It's still an RFC patch, and I was planning on breaking out the non-standard stack change before the real patches. > > Anyway, I'm willing to consider the entry code version if it looks a > _lot_ simpler than this (so I'd like to see them side-by-side), but > it's not like this looks all that complicated to me either. I got Peter's patch working. Here it is. What do you think? What's nice about Peter's is that this will easily work for the static call case too, without needing any more special trampolines. The diffstat looks nice too: Peter's patch: entry/entry_32.S | 7 +++++++ entry/entry_64.S | 14 ++++++++++++-- include/asm/text-patching.h | 20 ++++++++++++++++++++ kernel/ftrace.c | 25 ++++++++++++++++++++----- 4 files changed, 59 insertions(+), 7 deletions(-) My patch: arch/x86/include/asm/frame.h | 15 ++++++ arch/x86/kernel/ftrace.c | 102 ++++++++++++++++++++++++++++++++++- arch/x86/kernel/ftrace_64.S | 56 +++++++++++++++++++ 3 files changed, 172 insertions(+), 1 deletion(-) -- Steve Index: linux-trace.git/arch/x86/entry/entry_32.S =================================================================== --- linux-trace.git.orig/arch/x86/entry/entry_32.S +++ linux-trace.git/arch/x86/entry/entry_32.S @@ -1478,6 +1478,13 @@ ENTRY(int3) ASM_CLAC pushl $-1 # mark this as an int + testl $SEGMENT_RPL_MASK, PT_CS(%esp) + jnz .Lfrom_usermode_no_gap + .rept 6 + pushl 5*4(%esp) + .endr +.Lfrom_usermode_no_gap: + SAVE_ALL switch_stacks=1 ENCODE_FRAME_POINTER TRACE_IRQS_OFF Index: linux-trace.git/arch/x86/entry/entry_64.S =================================================================== --- linux-trace.git.orig/arch/x86/entry/entry_64.S +++ linux-trace.git/arch/x86/entry/entry_64.S @@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -899,6 +899,16 @@ ENTRY(\sym) jnz .Lfrom_usermode_switch_stack_\@ .endif + .if \create_gap == 1 + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_no_gap_\@ + .rept 6 + pushq 5*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS offset=8 +.Lfrom_usermode_no_gap_\@: + .endif + .if \paranoid call paranoid_entry .else @@ -1130,7 +1140,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 +idtentry int3 do_int3 has_error_code=0 create_gap=1 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN_PV Index: linux-trace.git/arch/x86/include/asm/text-patching.h =================================================================== --- linux-trace.git.orig/arch/x86/include/asm/text-patching.h +++ linux-trace.git/arch/x86/include/asm/text-patching.h @@ -39,4 +39,24 @@ extern int poke_int3_handler(struct pt_r extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern int after_bootmem; +static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) +{ + regs->sp -= sizeof(unsigned long); + *(unsigned long *)regs->sp = val; +} + +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + +#define INT3_INSN_SIZE 1 +#define CALL_INSN_SIZE 5 + +static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) +{ + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); + int3_emulate_jmp(regs, func); +} + #endif /* _ASM_X86_TEXT_PATCHING_H */ Index: linux-trace.git/arch/x86/kernel/ftrace.c =================================================================== --- linux-trace.git.orig/arch/x86/kernel/ftrace.c +++ linux-trace.git/arch/x86/kernel/ftrace.c @@ -29,6 +29,7 @@ #include <asm/kprobes.h> #include <asm/ftrace.h> #include <asm/nops.h> +#include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -231,6 +232,7 @@ int ftrace_modify_call(struct dyn_ftrace } static unsigned long ftrace_update_func; +static unsigned long ftrace_update_func_call; static int update_ftrace_func(unsigned long ip, void *new) { @@ -259,6 +261,8 @@ int ftrace_update_ftrace_func(ftrace_fun unsigned char *new; int ret; + ftrace_update_func_call = (unsigned long)func; + new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -294,13 +298,21 @@ int ftrace_int3_handler(struct pt_regs * if (WARN_ON_ONCE(!regs)) return 0; - ip = regs->ip - 1; - if (!ftrace_location(ip) && !is_ftrace_caller(ip)) - return 0; + ip = regs->ip - INT3_INSN_SIZE; - regs->ip += MCOUNT_INSN_SIZE - 1; + if (ftrace_location(ip)) { + int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); + return 1; + } else if (is_ftrace_caller(ip)) { + if (!ftrace_update_func_call) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } + int3_emulate_call(regs, ftrace_update_func_call); + return 1; + } - return 1; + return 0; } NOKPROBE_SYMBOL(ftrace_int3_handler); @@ -859,6 +871,8 @@ void arch_ftrace_update_trampoline(struc func = ftrace_ops_get_func(ops); + ftrace_update_func_call = (unsigned long)func; + /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -960,6 +974,7 @@ static int ftrace_mod_jmp(unsigned long { unsigned char *new; + ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); return update_ftrace_func(ip, new);
On Wed, May 01, 2019 at 11:01:07AM -0700, Linus Torvalds wrote: > This looks sane to me, although I'm surprised that we didn't already > have an annotation for the nonstandard stack frame for asm files. That > probably would be cleaner in a separate commit, but I guess it doesn't > matter. > > Anyway, I'm willing to consider the entry code version if it looks a > _lot_ simpler than this (so I'd like to see them side-by-side), but > it's not like this looks all that complicated to me either. So I posted one earlier today: https://lkml.kernel.org/r/20190501131117.GW2623@hirez.programming.kicks-ass.net it's about a 1/3rd the number of lines and has 32bit support. It also provides all the bits required to implement static_call(). That is; I think I'm firmly in favour of the entry variant -- provided it actually works of course.
On Wed, May 1, 2019 at 11:52 AM Steven Rostedt <rostedt@goodmis.org> wrote: > > I got Peter's patch working. Here it is. What do you think? I can tell from just looking at it for five seconds that at least the 32-bit case is buggy. You can't look at CS(%rsp) without first also checking that you're not coming from vm86 mode. But other than that I guess it does end up being pretty simple. Linus
On Wed, 1 May 2019 20:57:26 +0200 Peter Zijlstra <peterz@infradead.org> wrote: > On Wed, May 01, 2019 at 11:01:07AM -0700, Linus Torvalds wrote: > > This looks sane to me, although I'm surprised that we didn't already > > have an annotation for the nonstandard stack frame for asm files. That > > probably would be cleaner in a separate commit, but I guess it doesn't > > matter. > > > > Anyway, I'm willing to consider the entry code version if it looks a > > _lot_ simpler than this (so I'd like to see them side-by-side), but > > it's not like this looks all that complicated to me either. > > So I posted one earlier today: > > https://lkml.kernel.org/r/20190501131117.GW2623@hirez.programming.kicks-ass.net > > it's about a 1/3rd the number of lines and has 32bit support. It also > provides all the bits required to implement static_call(). That's the patch I started with. > > That is; I think I'm firmly in favour of the entry variant -- provided > it actually works of course. And it works. I ran it through tools/testing/selftests/ftrace/ftracetest and it passed as good as without that patch. I haven't ran it through my full test suite. I can do that and see how it makes out. -- Steve
On Wed, May 01, 2019 at 11:59:05AM -0700, Linus Torvalds wrote: > On Wed, May 1, 2019 at 11:52 AM Steven Rostedt <rostedt@goodmis.org> wrote: > > > > I got Peter's patch working. Here it is. What do you think? > > I can tell from just looking at it for five seconds that at least the > 32-bit case is buggy. > > You can't look at CS(%rsp) without first also checking that you're not > coming from vm86 mode. Something like so then? Index: linux-2.6/arch/x86/entry/entry_32.S =================================================================== --- linux-2.6.orig/arch/x86/entry/entry_32.S +++ linux-2.6/arch/x86/entry/entry_32.S @@ -1479,6 +1479,10 @@ ENTRY(int3) ASM_CLAC pushl $-1 # mark this as an int +#ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) + jnz .Lfrom_usermode_no_gap +#endif testl $SEGMENT_RPL_MASK, PT_CS(%esp) jnz .Lfrom_usermode_no_gap .rept 6
On Wed, May 1, 2019 at 12:17 PM Peter Zijlstra <peterz@infradead.org> wrote: > > Something like so then? Yes, that looks correct. We have those X86_EFLAGS_VM tests pretty randomly scattered around, and I wish there was some cleaner model for this, but I also guess that there's no point in worrying about the 32-bit code much. Linus
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 5cbce6fbb534..04892b374b93 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -42,4 +42,19 @@ #endif /* CONFIG_FRAME_POINTER */ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_STACK_VALIDATION +#define STACK_FRAME_NON_STANDARD(func) \ +.section .discard.func_stack_frame_non_standard,"aw"; \ +.align 8; \ +.type __func_stack_frame_non_standard_##func, @object; \ +.size __func_stack_frame_non_standard_##func, 8; \ +__func_stack_frame_non_standard_##func: \ +.quad func; \ +.previous +#else /* !CONFIG_STACK_VALIDATION */ +#define STACK_FRAME_NON_STANDARD(func) +#endif /* CONFIG_STACK_VALIDATION */ +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_FRAME_H */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ef49517f6bb2..634fc0d4fe97 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -232,6 +232,9 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, static unsigned long ftrace_update_func; +/* Used within inline asm below */ +unsigned long ftrace_update_func_call; + static int update_ftrace_func(unsigned long ip, void *new) { unsigned char old[MCOUNT_INSN_SIZE]; @@ -259,6 +262,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char *new; int ret; + ftrace_update_func_call = (unsigned long)func; + new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -280,6 +285,46 @@ static nokprobe_inline int is_ftrace_caller(unsigned long ip) return 0; } +#ifdef CONFIG_X86_64 +/* + * We need to handle the "call func1" -> "call func2" case. + * Just skipping the call is not sufficient as it will be like + * changing to "nop" first and then updating the call. But some + * users of ftrace require calls never to be missed. + * + * To emulate the call while converting the call site with a breakpoint, + * some trampolines are used along with per CPU buffers. + * There are three trampolines for the call sites and three trampolines + * for the updating of the call in ftrace trampoline. The three + * trampolines are: + * + * 1) Interrupts are enabled when the breakpoint is hit + * 2) Interrupts are disabled when the breakpoint is hit + * 3) The breakpoint was hit in an NMI + * + * As per CPU data is used, interrupts must be disabled to prevent them + * from corrupting the data. A separate NMI trampoline is used for the + * NMI case. If interrupts are already disabled, then the return path + * of where the breakpoint was hit (saved in the per CPU data) is pushed + * on the stack and then a jump to either the ftrace_caller (which will + * loop through all registered ftrace_ops handlers depending on the ip + * address), or if its a ftrace trampoline call update, it will call + * ftrace_update_func_call which will hold the call that should be + * called. + */ +extern asmlinkage void ftrace_emulate_call_sti(void); +extern asmlinkage void ftrace_emulate_call(void); +extern asmlinkage void ftrace_emulate_call_nmi(void); +extern asmlinkage void ftrace_emulate_call_update_sti(void); +extern asmlinkage void ftrace_emulate_call_update(void); +extern asmlinkage void ftrace_emulate_call_update_nmi(void); + +DEFINE_PER_CPU(void *, ftrace_bp_call_return); +DEFINE_PER_CPU(void *, ftrace_bp_call_nmi_return); + +/* To hold the ftrace_regs_caller address to push on the stack */ +void *ftrace_caller_func = (void *)ftrace_regs_caller; + /* * A breakpoint was added to the code address we are about to * modify, and this is the handle that will just skip over it. @@ -291,6 +336,58 @@ int ftrace_int3_handler(struct pt_regs *regs) { unsigned long ip; + if (WARN_ON_ONCE(!regs)) + return 0; + + ip = regs->ip - 1; + if (ftrace_location(ip)) { + /* A breakpoint at the beginning of the function was hit */ + if (in_nmi()) { + /* NMIs have their own trampoline */ + this_cpu_write(ftrace_bp_call_nmi_return, (void *)ip + MCOUNT_INSN_SIZE); + regs->ip = (unsigned long) ftrace_emulate_call_nmi; + return 1; + } + this_cpu_write(ftrace_bp_call_return, (void *)ip + MCOUNT_INSN_SIZE); + if (regs->flags & X86_EFLAGS_IF) { + regs->flags &= ~X86_EFLAGS_IF; + regs->ip = (unsigned long) ftrace_emulate_call_sti; + /* Tell lockdep here we are enabling interrupts */ + lockdep_hardirqs_on(_THIS_IP_); + } else { + regs->ip = (unsigned long) ftrace_emulate_call; + } + return 1; + } else if (is_ftrace_caller(ip)) { + /* An ftrace trampoline is being updated */ + if (!ftrace_update_func_call) { + /* If it's a jump, just need to skip it */ + regs->ip += MCOUNT_INSN_SIZE -1; + return 1; + } + if (in_nmi()) { + /* NMIs have their own trampoline */ + this_cpu_write(ftrace_bp_call_nmi_return, (void *)ip + MCOUNT_INSN_SIZE); + regs->ip = (unsigned long) ftrace_emulate_call_update_nmi; + return 1; + } + this_cpu_write(ftrace_bp_call_return, (void *)ip + MCOUNT_INSN_SIZE); + if (regs->flags & X86_EFLAGS_IF) { + regs->flags &= ~X86_EFLAGS_IF; + regs->ip = (unsigned long) ftrace_emulate_call_update_sti; + } else { + regs->ip = (unsigned long) ftrace_emulate_call_update; + } + return 1; + } + + return 0; +} +#else /* !X86_64 */ +int ftrace_int3_handler(struct pt_regs *regs) +{ + unsigned long ip; + if (WARN_ON_ONCE(!regs)) return 0; @@ -299,9 +396,9 @@ int ftrace_int3_handler(struct pt_regs *regs) return 0; regs->ip += MCOUNT_INSN_SIZE - 1; - return 1; } +#endif NOKPROBE_SYMBOL(ftrace_int3_handler); static int ftrace_write(unsigned long ip, const char *val, int size) @@ -859,6 +956,8 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) func = ftrace_ops_get_func(ops); + ftrace_update_func_call = (unsigned long)func; + /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -960,6 +1059,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func) { unsigned char *new; + ftrace_update_func_call = 0; new = ftrace_jmp_replace(ip, (unsigned long)func); return update_ftrace_func(ip, new); diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 75f2b36b41a6..8642e1719370 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -9,6 +9,9 @@ #include <asm/export.h> #include <asm/nospec-branch.h> #include <asm/unwind_hints.h> +#include <asm/irqflags.h> +#include <asm/percpu.h> +#include <asm/frame.h> .code64 .section .entry.text, "ax" @@ -262,6 +265,59 @@ GLOBAL(ftrace_regs_caller_end) ENDPROC(ftrace_regs_caller) +/* Trampoline for function update with interrupts enabled */ +GLOBAL(ftrace_emulate_call_sti) + push PER_CPU_VAR(ftrace_bp_call_return) + push ftrace_caller_func + TRACE_IRQS_ON + sti + ret +ENDPROC(ftrace_emulate_call_sti) + +/* Trampoline for function update with interrupts disabled*/ +GLOBAL(ftrace_emulate_call) + push PER_CPU_VAR(ftrace_bp_call_return) + push ftrace_caller_func + ret +ENDPROC(ftrace_emulate_call) + +/* Trampoline for function update in an NMI */ +GLOBAL(ftrace_emulate_call_nmi) + push PER_CPU_VAR(ftrace_bp_call_nmi_return) + push ftrace_caller_func + ret +ENDPROC(ftrace_emulate_call_nmi) + +/* Trampoline for ftrace trampoline call update with interrupts enabled */ +GLOBAL(ftrace_emulate_call_update_sti) + push PER_CPU_VAR(ftrace_bp_call_return) + push ftrace_update_func_call + TRACE_IRQS_ON + sti + ret +ENDPROC(ftrace_emulate_call_update_sti) + +/* Trampoline for ftrace trampoline call update with interrupts disabled */ +GLOBAL(ftrace_emulate_call_update) + push PER_CPU_VAR(ftrace_bp_call_return) + push ftrace_update_func_call + ret +ENDPROC(ftrace_emulate_call_update) + +/* Trampoline for ftrace trampoline call update in an NMI */ +GLOBAL(ftrace_emulate_call_update_nmi) + push PER_CPU_VAR(ftrace_bp_call_nmi_return) + push ftrace_update_func_call + ret +ENDPROC(ftrace_emulate_call_update_nmi) + +STACK_FRAME_NON_STANDARD(ftrace_emulate_call_sti) +STACK_FRAME_NON_STANDARD(ftrace_emulate_call) +STACK_FRAME_NON_STANDARD(ftrace_emulate_call_nmi) +STACK_FRAME_NON_STANDARD(ftrace_emulate_call_update_sti) +STACK_FRAME_NON_STANDARD(ftrace_emulate_call_update) +STACK_FRAME_NON_STANDARD(ftrace_emulate_call_update_nmi) + #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(function_hook)