Message ID | 1486508275-18449-1-git-send-email-abelvesa@linux.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
2017-02-07 23:57 GMT+01:00 Abel Vesa <abelvesa@linux.com>: > The DYNAMIC_FTRACE_WITH_REGS configuration makes it possible for a ftrace > operation to specify if registers need to saved/restored by the ftrace handler. > This is needed by kgraft and possibly other ftrace-based tools, and the ARM > architecture is currently lacking this feature. It would also be the first step > to support the "Kprobes-on-ftrace" optimization on ARM. > > This patch introduces a new ftrace handler that stores the registers on the > stack before calling the next stage. The registers are restored from the stack > before going back to the instrumented function. > > A side-effect of this patch is to activate the support for ftrace_modify_call() > as it defines ARCH_SUPPORTS_FTRACE_OPS for the ARM architecture. > > Signed-off-by: Abel Vesa <abelvesa@linux.com> > --- > arch/arm/Kconfig | 1 + > arch/arm/include/asm/ftrace.h | 4 +++ > arch/arm/kernel/entry-ftrace.S | 79 ++++++++++++++++++++++++++++++++++++++++++ > arch/arm/kernel/ftrace.c | 37 ++++++++++++++++++++ > 4 files changed, 121 insertions(+) > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index 186c4c2..df401f4 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -50,6 +50,7 @@ config ARM > select HAVE_DMA_API_DEBUG > select HAVE_DMA_CONTIGUOUS if MMU > select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU > + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE && OLD_MCOUNT > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU > select HAVE_EXIT_THREAD > select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) > diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h > index 22b7311..f379881 100644 > --- a/arch/arm/include/asm/ftrace.h > +++ b/arch/arm/include/asm/ftrace.h > @@ -1,6 +1,10 @@ > #ifndef _ASM_ARM_FTRACE > #define _ASM_ARM_FTRACE > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > +#define ARCH_SUPPORTS_FTRACE_OPS 1 > +#endif > + > #ifdef CONFIG_FUNCTION_TRACER > #define MCOUNT_ADDR ((unsigned long)(__gnu_mcount_nc)) > #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ > diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S > index c73c403..b1fee6c 100644 > --- a/arch/arm/kernel/entry-ftrace.S > +++ b/arch/arm/kernel/entry-ftrace.S > @@ -92,12 +92,74 @@ > 2: mcount_exit > .endm > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > + > +.macro __ftrace_regs_caller > + > + add ip, sp, #4 @ move in IP the value of SP as it was > + @ before the push {lr} of the mcount mechanism > + stmdb sp!, {ip,lr,pc} > + stmdb sp!, {r0-r11,lr} > + > + @ stack content at this point: > + @ 0 4 44 48 52 56 60 64 > + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | > + > + mov r3, sp @ struct pt_regs* > + ldr r2, =function_trace_op > + ldr r2, [r2] @ pointer to the current > + @ function tracing op > + ldr r1, [sp, #64] @ lr of instrumented func > + mcount_adjust_addr r0, lr @ instrumented function > + > + .globl ftrace_regs_call > +ftrace_regs_call: > + bl ftrace_stub > + > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER > + .globl ftrace_graph_regs_call > +ftrace_graph_regs_call: > + mov r0, r0 > +#endif > + @ pop saved regs > + ldr lr, [sp, #64] @ get the previous LR value from stack > + ldmia sp, {r0-r11, ip, sp, pc} @ pop the saved registers INCLUDING > + @ the stack pointer > +.endm > + > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER > +.macro __ftrace_graph_regs_caller > + > + sub r0, fp, #4 @ lr of instrumented routine (parent) > + > + @ called from __ftrace_regs_caller > + ldr r1, [sp, #56] @ instrumented routine (func) > + mcount_adjust_addr r1, r1 > + > + sub r2, r0, #4 @ frame pointer why not mov r2, fp ? > + bl prepare_ftrace_return > + > + @ pop regs saved in ftrace_regs_caller > + ldr lr, [sp, #64] @ restore lr from the stack > + ldmia sp, {r0-r11, ip, sp, pc} @ restore r0 through pc > + > +.endm > +#endif > +#endif > + > .macro __ftrace_caller suffix > mcount_enter > > mcount_get_lr r1 @ lr of instrumented func > mcount_adjust_addr r0, lr @ instrumented function > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > + ldr r2, =function_trace_op > + ldr r2, [r2] @ pointer to the current > + @ function tracing op > + mov r3, #0 @ regs is NULL > +#endif > + > .globl ftrace_call\suffix > ftrace_call\suffix: > bl ftrace_stub > @@ -212,6 +274,15 @@ UNWIND(.fnstart) > __ftrace_caller > UNWIND(.fnend) > ENDPROC(ftrace_caller) > + > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > +ENTRY(ftrace_regs_caller) > +UNWIND(.fnstart) > + __ftrace_regs_caller > +UNWIND(.fnend) > +ENDPROC(ftrace_regs_caller) > +#endif > + > #endif > > #ifdef CONFIG_FUNCTION_GRAPH_TRACER > @@ -220,6 +291,14 @@ UNWIND(.fnstart) > __ftrace_graph_caller > UNWIND(.fnend) > ENDPROC(ftrace_graph_caller) > + > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > +ENTRY(ftrace_graph_regs_caller) > +UNWIND(.fnstart) > + __ftrace_graph_regs_caller > +UNWIND(.fnend) > +ENDPROC(ftrace_graph_regs_caller) > +#endif > #endif > > .purgem mcount_enter > diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c > index 3f17594..cb0358c 100644 > --- a/arch/arm/kernel/ftrace.c > +++ b/arch/arm/kernel/ftrace.c > @@ -139,6 +139,15 @@ int ftrace_update_ftrace_func(ftrace_func_t func) > > ret = ftrace_modify_code(pc, 0, new, false); > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > + if (!ret) { > + pc = (unsigned long)&ftrace_regs_call; > + new = ftrace_call_replace(pc, (unsigned long)func); > + > + ret = ftrace_modify_code(pc, 0, new, false); > + } > +#endif > + > #ifdef CONFIG_OLD_MCOUNT > if (!ret) { > pc = (unsigned long)&ftrace_call_old; > @@ -157,11 +166,29 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) > unsigned long ip = rec->ip; > > old = ftrace_nop_replace(rec); > + > + new = ftrace_call_replace(ip, adjust_address(rec, addr)); > + > + return ftrace_modify_code(rec->ip, old, new, true); > +} > + > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > + > +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, > + unsigned long addr) > +{ > + unsigned long new, old; > + unsigned long ip = rec->ip; > + > + old = ftrace_call_replace(ip, adjust_address(rec, old_addr)); > + > new = ftrace_call_replace(ip, adjust_address(rec, addr)); > > return ftrace_modify_code(rec->ip, old, new, true); > } > > +#endif > + > int ftrace_make_nop(struct module *mod, > struct dyn_ftrace *rec, unsigned long addr) > { > @@ -229,6 +256,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, > extern unsigned long ftrace_graph_call; > extern unsigned long ftrace_graph_call_old; > extern void ftrace_graph_caller_old(void); > +extern unsigned long ftrace_graph_regs_call; > +extern void ftrace_graph_regs_caller(void); > > static int __ftrace_modify_caller(unsigned long *callsite, > void (*func) (void), bool enable) > @@ -251,6 +280,14 @@ static int ftrace_modify_graph_caller(bool enable) > ftrace_graph_caller, > enable); > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > + if (!ret) > + ret = __ftrace_modify_caller(&ftrace_graph_regs_call, > + ftrace_graph_regs_caller, > + enable); > +#endif > + > + > #ifdef CONFIG_OLD_MCOUNT > if (!ret) > ret = __ftrace_modify_caller(&ftrace_graph_call_old, > -- > 2.7.4 >
On Thu, Feb 09, 2017 at 04:38:32PM +0100, Jean-Jacques Hiblot wrote: > 2017-02-07 23:57 GMT+01:00 Abel Vesa <abelvesa@linux.com>: > > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER > > +.macro __ftrace_graph_regs_caller > > + > > + sub r0, fp, #4 @ lr of instrumented routine (parent) > > + > > + @ called from __ftrace_regs_caller > > + ldr r1, [sp, #56] @ instrumented routine (func) > > + mcount_adjust_addr r1, r1 > > + > > + sub r2, r0, #4 @ frame pointer > why not mov r2, fp ? r0 = fp - 4 r2 = r0 - 4 therefore r2 = (fp - 4) - 4 r2 = fp - 8, not r2 = fp
On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote: > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > + > +.macro __ftrace_regs_caller > + > + add ip, sp, #4 @ move in IP the value of SP as it was > + @ before the push {lr} of the mcount mechanism > + stmdb sp!, {ip,lr,pc} > + stmdb sp!, {r0-r11,lr} > + > + @ stack content at this point: > + @ 0 4 44 48 52 56 60 64 > + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | How important is this to be close to "struct pt_regs" ? Do we care about r12 being "wrong" ? The other issue is that pt_regs is actually 72 bytes in size, not 68 bytes. So, does that mean we end up inappropriately leaking some of the kernel stack to userspace through ftrace? It's possible to save all the registers like this if we need to provide a complete picture of the register set at function entry: str ip, [sp, #-16]! add ip, sp, #20 stmia sp, {ip, lr, pc} stmdb sp!, {r0 - r11} However, is that even correct - don't we want pt_regs' LR and PC to be related to the function call itself? The "previous LR" as you describe it is where the called function (the one that is being traced) will return to. The current LR at this point is the address within the traced function. So actually I think this is more strictly correct, if I'm understanding the intention here correctly: str ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP ldr ip, [sp, #PT_REGS_SIZE - S_IP] @ get LR at traced function entry str lr, [sp, #S_PC - S_IP] @ save current LR as PC str ip, [sp, #S_LR - S_IP] @ save traced function return add ip, sp, #PT_REGS_SIZE - S_IP + 4 str ip, [sp, #S_SP - SP_IP] @ save stack pointer at function entry stmdb sp!, {r0 - r11} @ clear CPSR and old_r0 words mov r3, #0 str r3, [sp, #S_PSR] str r3, [sp, #S_OLD_R0] However, that has the side effect of misaligning the stack (the stack needs to be aligned to 8 bytes). So, if we decide we don't care about the saved LR value (except as a mechanism to preserve it across the call into the ftrace code): str ip, [sp, #S_IP - PT_REGS_SIZE + 4]! str lr, [sp, #S_PC - S_IP] ldr lr, [sp, #PT_REGS_SIZE - 4 - S_IP] add ip, sp, #PT_REGS_SIZE - S_IP stmib sp, {ip, lr} stmdb sp!, {r0 - r11} @ clear CPSR and old_r0 words mov r3, #0 str r3, [sp, #S_PSR] str r3, [sp, #S_OLD_R0] and the return would be: ldmia sp, {r0 - pc} That all said - maybe someone from the ftrace community can comment on how much of pt_regs is actually necessary here?
On Thu, 9 Feb 2017 16:29:56 +0000 Russell King - ARM Linux <linux@armlinux.org.uk> wrote: > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote: > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > + > > +.macro __ftrace_regs_caller > > + > > + add ip, sp, #4 @ move in IP the value of SP as it was > > + @ before the push {lr} of the mcount mechanism > > + stmdb sp!, {ip,lr,pc} > > + stmdb sp!, {r0-r11,lr} > > + > > + @ stack content at this point: > > + @ 0 4 44 48 52 56 60 64 > > + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | > > How important is this to be close to "struct pt_regs" ? Do we care about > r12 being "wrong" ? The other issue is that pt_regs is actually 72 > bytes in size, not 68 bytes. So, does that mean we end up inappropriately > leaking some of the kernel stack to userspace through ftrace? The regs passed to the ftrace code isn't passed to userspace. It's used by kprobes as a "fake breakpoint" (like fake news?), and by kernel live patching to modify what function actually gets called after ftrace returns. > > It's possible to save all the registers like this if we need to provide > a complete picture of the register set at function entry: > > str ip, [sp, #-16]! > add ip, sp, #20 > stmia sp, {ip, lr, pc} > stmdb sp!, {r0 - r11} > > However, is that even correct - don't we want pt_regs' LR and PC to be > related to the function call itself? The "previous LR" as you describe > it is where the called function (the one that is being traced) will > return to. The current LR at this point is the address within the > traced function. So actually I think this is more strictly correct, if > I'm understanding the intention here correctly: > > str ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP > ldr ip, [sp, #PT_REGS_SIZE - S_IP] @ get LR at traced function entry > str lr, [sp, #S_PC - S_IP] @ save current LR as PC > str ip, [sp, #S_LR - S_IP] @ save traced function return > add ip, sp, #PT_REGS_SIZE - S_IP + 4 > str ip, [sp, #S_SP - SP_IP] @ save stack pointer at function entry > stmdb sp!, {r0 - r11} > @ clear CPSR and old_r0 words > mov r3, #0 > str r3, [sp, #S_PSR] > str r3, [sp, #S_OLD_R0] > > However, that has the side effect of misaligning the stack (the stack > needs to be aligned to 8 bytes). So, if we decide we don't care about > the saved LR value (except as a mechanism to preserve it across the > call into the ftrace code): > > str ip, [sp, #S_IP - PT_REGS_SIZE + 4]! > str lr, [sp, #S_PC - S_IP] > ldr lr, [sp, #PT_REGS_SIZE - 4 - S_IP] > add ip, sp, #PT_REGS_SIZE - S_IP > stmib sp, {ip, lr} > stmdb sp!, {r0 - r11} > @ clear CPSR and old_r0 words > mov r3, #0 > str r3, [sp, #S_PSR] > str r3, [sp, #S_OLD_R0] > > and the return would be: > > ldmia sp, {r0 - pc} > > That all said - maybe someone from the ftrace community can comment on > how much of pt_regs is actually necessary here? > Matters about the users. The REGS was originally created for kprobes, to simulate a kprobe breakpoint. As calling kprobes directly is much faster than going through the breakpoint mechanism. As adding a kprobe to the start of a function is a very common practice, it made sense to have ftrace give it a boost. Then came along live kernel patching, which I believe this series is trying to support. What is needed by pt_regs is a way to "hijack" the function being called to instead call the patched function. That is, ftrace is not being used for tracing, but in reality, being used to modify the running kernel. It is being used to change what function gets called. ftrace is just a hook for that mechanism. -- Steve
On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote: > Then came along live kernel patching, which I believe this series is > trying to support. What is needed by pt_regs is a way to "hijack" the > function being called to instead call the patched function. That is, > ftrace is not being used for tracing, but in reality, being used to > modify the running kernel. It is being used to change what function > gets called. ftrace is just a hook for that mechanism. So, would I be correct to assume that the only parts of pt_regs that would be touched are those which contain arguments to the function, and the register which would contain the return value?
On Thu, 9 Feb 2017 18:06:44 +0000 Russell King - ARM Linux <linux@armlinux.org.uk> wrote: > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote: > > Then came along live kernel patching, which I believe this series is > > trying to support. What is needed by pt_regs is a way to "hijack" the > > function being called to instead call the patched function. That is, > > ftrace is not being used for tracing, but in reality, being used to > > modify the running kernel. It is being used to change what function > > gets called. ftrace is just a hook for that mechanism. > > So, would I be correct to assume that the only parts of pt_regs that > would be touched are those which contain arguments to the function, > and the register which would contain the return value? > For live kernel patching, perhaps. But for kprobes, I think they can touch anything. Matters what the creater of the kprobe wanted to do. -- Steve
[ sending again with Masami Cc'd ] On Thu, 9 Feb 2017 13:14:14 -0500 Steven Rostedt <rostedt@goodmis.org> wrote: > On Thu, 9 Feb 2017 18:06:44 +0000 > Russell King - ARM Linux <linux@armlinux.org.uk> wrote: > > > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote: > > > Then came along live kernel patching, which I believe this series is > > > trying to support. What is needed by pt_regs is a way to "hijack" the > > > function being called to instead call the patched function. That is, > > > ftrace is not being used for tracing, but in reality, being used to > > > modify the running kernel. It is being used to change what function > > > gets called. ftrace is just a hook for that mechanism. > > > > So, would I be correct to assume that the only parts of pt_regs that > > would be touched are those which contain arguments to the function, > > and the register which would contain the return value? > > > > For live kernel patching, perhaps. > > But for kprobes, I think they can touch anything. Matters what the > creater of the kprobe wanted to do. > > -- Steve
On Thu, Feb 09, 2017 at 06:06:44PM +0000, Russell King - ARM Linux wrote: > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote: > > Then came along live kernel patching, which I believe this series is > > trying to support. What is needed by pt_regs is a way to "hijack" the > > function being called to instead call the patched function. That is, > > ftrace is not being used for tracing, but in reality, being used to > > modify the running kernel. It is being used to change what function > > gets called. ftrace is just a hook for that mechanism. > > So, would I be correct to assume that the only parts of pt_regs that > would be touched are those which contain arguments to the function, > and the register which would contain the return value? At this point, yeah, but I intend to come up with another patch series for livepatching which needs the exact same context as the function livepatched. This means all the regs need to be saved and restored before calling the replacing function. > > -- > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up > according to speedtest.net.
On Thu, Feb 09, 2017 at 04:29:56PM +0000, Russell King - ARM Linux wrote: > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote: > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > + > > +.macro __ftrace_regs_caller > > + > > + add ip, sp, #4 @ move in IP the value of SP as it was > > + @ before the push {lr} of the mcount mechanism > > + stmdb sp!, {ip,lr,pc} > > + stmdb sp!, {r0-r11,lr} > > + > > + @ stack content at this point: > > + @ 0 4 44 48 52 56 60 64 > > + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | > > How important is this to be close to "struct pt_regs" ? Do we care about > r12 being "wrong" ? The other issue is that pt_regs is actually 72 > bytes in size, not 68 bytes. So, does that mean we end up inappropriately > leaking some of the kernel stack to userspace through ftrace? You are right. pt_regs is 72 (due to old_r0, AFAIU). The risk is actually to corrupt the stack if any ftrace_call implementation is writing to pt_regs->uregs[i], where i >= 16 (at this point). A solution would be to decrement the SP with 4 at the beginning of ftrace_regs_caller, this way ensuring that every ftrace_call implementation gets to play with the whole size of pt_regs. Will take this into consideration in the next iteration. > > It's possible to save all the registers like this if we need to provide > a complete picture of the register set at function entry: > > str ip, [sp, #-16]! > add ip, sp, #20 > stmia sp, {ip, lr, pc} > stmdb sp!, {r0 - r11} > > However, is that even correct - don't we want pt_regs' LR and PC to be > related to the function call itself? The "previous LR" as you describe > it is where the called function (the one that is being traced) will > return to. The current LR at this point is the address within the > traced function. So actually I think this is more strictly correct, if > I'm understanding the intention here correctly: > > str ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP > ldr ip, [sp, #PT_REGS_SIZE - S_IP] @ get LR at traced function entry > str lr, [sp, #S_PC - S_IP] @ save current LR as PC > str ip, [sp, #S_LR - S_IP] @ save traced function return > add ip, sp, #PT_REGS_SIZE - S_IP + 4 > str ip, [sp, #S_SP - SP_IP] @ save stack pointer at function entry > stmdb sp!, {r0 - r11} > @ clear CPSR and old_r0 words > mov r3, #0 > str r3, [sp, #S_PSR] > str r3, [sp, #S_OLD_R0] > > However, that has the side effect of misaligning the stack (the stack > needs to be aligned to 8 bytes). So, if we decide we don't care about > the saved LR value (except as a mechanism to preserve it across the > call into the ftrace code): > The solution proposed upwards will take care of the stack alignment as well. Again, LR needed by ftrace_graph_caller/ftrace_regs_graph_caller. > str ip, [sp, #S_IP - PT_REGS_SIZE + 4]! > str lr, [sp, #S_PC - S_IP] > ldr lr, [sp, #PT_REGS_SIZE - 4 - S_IP] > add ip, sp, #PT_REGS_SIZE - S_IP > stmib sp, {ip, lr} > stmdb sp!, {r0 - r11} > @ clear CPSR and old_r0 words > mov r3, #0 > str r3, [sp, #S_PSR] > str r3, [sp, #S_OLD_R0] > > and the return would be: > > ldmia sp, {r0 - pc} > > That all said - maybe someone from the ftrace community can comment on > how much of pt_regs is actually necessary here? > > -- > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up > according to speedtest.net.
On Thu, Feb 09, 2017 at 01:14:52PM -0500, Steven Rostedt wrote: > > [ sending again with Masami Cc'd ] > > On Thu, 9 Feb 2017 13:14:14 -0500 > Steven Rostedt <rostedt@goodmis.org> wrote: > > > On Thu, 9 Feb 2017 18:06:44 +0000 > > Russell King - ARM Linux <linux@armlinux.org.uk> wrote: > > > > > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote: > > > > Then came along live kernel patching, which I believe this series is > > > > trying to support. What is needed by pt_regs is a way to "hijack" the > > > > function being called to instead call the patched function. That is, > > > > ftrace is not being used for tracing, but in reality, being used to > > > > modify the running kernel. It is being used to change what function > > > > gets called. ftrace is just a hook for that mechanism. > > > > > > So, would I be correct to assume that the only parts of pt_regs that > > > would be touched are those which contain arguments to the function, > > > and the register which would contain the return value? > > > > > > > For live kernel patching, perhaps. > > > > But for kprobes, I think they can touch anything. Matters what the > > creater of the kprobe wanted to do. > > Thing is, by saving all of them is the easiest way to ensure that the whole context is the same when the replacing function gets called, as I said before. We can't be sure that while __ftrace_ops_list_func is executing, any of the regs will have the value they had when the function-to-be-replaced was called. That's the reason I say we need to save them all. > > -- Steve >
On Thu, Feb 09, 2017 at 07:01:10PM +0000, Abel Vesa wrote: > On Thu, Feb 09, 2017 at 01:14:52PM -0500, Steven Rostedt wrote: > > > > [ sending again with Masami Cc'd ] > > > > On Thu, 9 Feb 2017 13:14:14 -0500 > > Steven Rostedt <rostedt@goodmis.org> wrote: > > > > > On Thu, 9 Feb 2017 18:06:44 +0000 > > > Russell King - ARM Linux <linux@armlinux.org.uk> wrote: > > > > > > > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote: > > > > > Then came along live kernel patching, which I believe this series is > > > > > trying to support. What is needed by pt_regs is a way to "hijack" the > > > > > function being called to instead call the patched function. That is, > > > > > ftrace is not being used for tracing, but in reality, being used to > > > > > modify the running kernel. It is being used to change what function > > > > > gets called. ftrace is just a hook for that mechanism. > > > > > > > > So, would I be correct to assume that the only parts of pt_regs that > > > > would be touched are those which contain arguments to the function, > > > > and the register which would contain the return value? > > > > > > > > > > For live kernel patching, perhaps. > > > > > > But for kprobes, I think they can touch anything. Matters what the > > > creater of the kprobe wanted to do. > > > > Thing is, by saving all of them is the easiest way to ensure that the > whole context is the same when the replacing function gets called, as > I said before. > > We can't be sure that while __ftrace_ops_list_func is executing, any of > the regs will have the value they had when the function-to-be-replaced > was called. That's the reason I say we need to save them all. Scratch that, I'm wrong, the reason is stupid. The context gets restored anyway after __ftrace_ops_list_func is done. > > > -- Steve > >
2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>: > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote: >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS >> + >> +.macro __ftrace_regs_caller >> + >> + add ip, sp, #4 @ move in IP the value of SP as it was >> + @ before the push {lr} of the mcount mechanism >> + stmdb sp!, {ip,lr,pc} >> + stmdb sp!, {r0-r11,lr} >> + >> + @ stack content at this point: >> + @ 0 4 44 48 52 56 60 64 >> + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | > > How important is this to be close to "struct pt_regs" ? Do we care about > r12 being "wrong" ? The other issue is that pt_regs is actually 72 > bytes in size, not 68 bytes. So, does that mean we end up inappropriately > leaking some of the kernel stack to userspace through ftrace? > > It's possible to save all the registers like this if we need to provide > a complete picture of the register set at function entry: > > str ip, [sp, #-16]! > add ip, sp, #20 > stmia sp, {ip, lr, pc} > stmdb sp!, {r0 - r11} > > However, is that even correct - don't we want pt_regs' LR and PC to be > related to the function call itself? The "previous LR" as you describe > it is where the called function (the one that is being traced) will > return to. The current LR at this point is the address within the > traced function. So actually I think this is more strictly correct, if > I'm understanding the intention here correctly: > > str ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP > ldr ip, [sp, #PT_REGS_SIZE - S_IP] @ get LR at traced function entry > str lr, [sp, #S_PC - S_IP] @ save current LR as PC > str ip, [sp, #S_LR - S_IP] @ save traced function return > add ip, sp, #PT_REGS_SIZE - S_IP + 4 > str ip, [sp, #S_SP - SP_IP] @ save stack pointer at function entry > stmdb sp!, {r0 - r11} > @ clear CPSR and old_r0 words > mov r3, #0 > str r3, [sp, #S_PSR] > str r3, [sp, #S_OLD_R0] > > However, that has the side effect of misaligning the stack (the stack > needs to be aligned to 8 bytes). So, if we decide we don't care about > the saved LR value (except as a mechanism to preserve it across the > call into the ftrace code): > > str ip, [sp, #S_IP - PT_REGS_SIZE + 4]! > str lr, [sp, #S_PC - S_IP] > ldr lr, [sp, #PT_REGS_SIZE - 4 - S_IP] > add ip, sp, #PT_REGS_SIZE - S_IP > stmib sp, {ip, lr} > stmdb sp!, {r0 - r11} > @ clear CPSR and old_r0 words > mov r3, #0 > str r3, [sp, #S_PSR] > str r3, [sp, #S_OLD_R0] > > and the return would be: > > ldmia sp, {r0 - pc} > > That all said - maybe someone from the ftrace community can comment on > how much of pt_regs is actually necessary here? I would suggest the following: r0-r11: filled with current values. r12 : the value of r12 doesn't matter (Intra-procedure call scratch reg), we can either save it or not. r13 - sp: the value as it was when the instrumented function was entered. in the mcount case, it's the current sp value - 4, otherwise it'f sp -4 r14 - lr: the value as it was when the instrumented function was entered. first element in stack or available in frame depending on GCC's version (mcount vs __gnu_mcount_nc) r15 - pc : the address after the modified instruction (value of lr when the ftrace caller is entered) I don't think we need CSPR and ORIG_r0. > > -- > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up > according to speedtest.net.
On Fri, Feb 10, 2017 at 11:36:12AM +0100, Jean-Jacques Hiblot wrote: > 2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>: > > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote: > >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > >> + > >> +.macro __ftrace_regs_caller > >> + > >> + add ip, sp, #4 @ move in IP the value of SP as it was > >> + @ before the push {lr} of the mcount mechanism > >> + stmdb sp!, {ip,lr,pc} > >> + stmdb sp!, {r0-r11,lr} > >> + > >> + @ stack content at this point: > >> + @ 0 4 44 48 52 56 60 64 > >> + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | > > > > How important is this to be close to "struct pt_regs" ? Do we care about > > r12 being "wrong" ? The other issue is that pt_regs is actually 72 > > bytes in size, not 68 bytes. So, does that mean we end up inappropriately > > leaking some of the kernel stack to userspace through ftrace? > > > > It's possible to save all the registers like this if we need to provide > > a complete picture of the register set at function entry: > > > > str ip, [sp, #-16]! > > add ip, sp, #20 > > stmia sp, {ip, lr, pc} > > stmdb sp!, {r0 - r11} > > > > However, is that even correct - don't we want pt_regs' LR and PC to be > > related to the function call itself? The "previous LR" as you describe > > it is where the called function (the one that is being traced) will > > return to. The current LR at this point is the address within the > > traced function. So actually I think this is more strictly correct, if > > I'm understanding the intention here correctly: > > > > str ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP > > ldr ip, [sp, #PT_REGS_SIZE - S_IP] @ get LR at traced function entry > > str lr, [sp, #S_PC - S_IP] @ save current LR as PC > > str ip, [sp, #S_LR - S_IP] @ save traced function return > > add ip, sp, #PT_REGS_SIZE - S_IP + 4 > > str ip, [sp, #S_SP - SP_IP] @ save stack pointer at function entry > > stmdb sp!, {r0 - r11} > > @ clear CPSR and old_r0 words > > mov r3, #0 > > str r3, [sp, #S_PSR] > > str r3, [sp, #S_OLD_R0] > > > > However, that has the side effect of misaligning the stack (the stack > > needs to be aligned to 8 bytes). So, if we decide we don't care about > > the saved LR value (except as a mechanism to preserve it across the > > call into the ftrace code): > > > > str ip, [sp, #S_IP - PT_REGS_SIZE + 4]! > > str lr, [sp, #S_PC - S_IP] > > ldr lr, [sp, #PT_REGS_SIZE - 4 - S_IP] > > add ip, sp, #PT_REGS_SIZE - S_IP > > stmib sp, {ip, lr} > > stmdb sp!, {r0 - r11} > > @ clear CPSR and old_r0 words > > mov r3, #0 > > str r3, [sp, #S_PSR] > > str r3, [sp, #S_OLD_R0] > > > > and the return would be: > > > > ldmia sp, {r0 - pc} > > > > That all said - maybe someone from the ftrace community can comment on > > how much of pt_regs is actually necessary here? > > I would suggest the following: > r0-r11: filled with current values. > r12 : the value of r12 doesn't matter (Intra-procedure call scratch > reg), we can either save it or not. > r13 - sp: the value as it was when the instrumented function was > entered. in the mcount case, it's the current sp value - 4, otherwise > it'f sp -4 > r14 - lr: the value as it was when the instrumented function was > entered. first element in stack or available in frame depending on > GCC's version (mcount vs __gnu_mcount_nc) > r15 - pc : the address after the modified instruction (value of lr > when the ftrace caller is entered) > So basically you're saying: save all regs, r0 through r15, except r12. Based on that, I think it's easier to save all of them and then restore all of them except r12. Plus, you have to take into consideration all the possible users of ftrace with regs. At some point some implementation of ftrace_regs_call will probably need the value from r12. > I don't think we need CSPR and ORIG_r0. I think we do. As I said before, because PT_REGS is 72 and some function might (in the future) make use of CSPR or ORIG_r0, to ensure there is no stack corruption taking place, we have to provide whole pt_regs, that is 72 (including CSPR and ORIG_r0). Plus, the stack alignment thing Russell mentioned would be fixed. The only problem I don't have a solution for at this point is OLD_LR (or previous LR as it is called in this patch). If we take the approach described earlier, we need to add to pt_regs the OLD_LR which I really don't like because it is breaking the whole purpose of pt_regs (it should only contain one copy of each reg, though it already has the ORIG_r0 in it) and will also break the stack alignment. > > > > > -- > > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ > > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up > > according to speedtest.net.
2017-02-10 13:03 GMT+01:00 Abel Vesa <abelvesa@gmail.com>: > On Fri, Feb 10, 2017 at 11:36:12AM +0100, Jean-Jacques Hiblot wrote: >> 2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>: >> > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote: >> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS >> >> + >> >> +.macro __ftrace_regs_caller >> >> + >> >> + add ip, sp, #4 @ move in IP the value of SP as it was >> >> + @ before the push {lr} of the mcount mechanism >> >> + stmdb sp!, {ip,lr,pc} >> >> + stmdb sp!, {r0-r11,lr} >> >> + >> >> + @ stack content at this point: >> >> + @ 0 4 44 48 52 56 60 64 >> >> + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | >> > >> > How important is this to be close to "struct pt_regs" ? Do we care about >> > r12 being "wrong" ? The other issue is that pt_regs is actually 72 >> > bytes in size, not 68 bytes. So, does that mean we end up inappropriately >> > leaking some of the kernel stack to userspace through ftrace? >> > >> > It's possible to save all the registers like this if we need to provide >> > a complete picture of the register set at function entry: >> > >> > str ip, [sp, #-16]! >> > add ip, sp, #20 >> > stmia sp, {ip, lr, pc} >> > stmdb sp!, {r0 - r11} >> > >> > However, is that even correct - don't we want pt_regs' LR and PC to be >> > related to the function call itself? The "previous LR" as you describe >> > it is where the called function (the one that is being traced) will >> > return to. The current LR at this point is the address within the >> > traced function. So actually I think this is more strictly correct, if >> > I'm understanding the intention here correctly: >> > >> > str ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP >> > ldr ip, [sp, #PT_REGS_SIZE - S_IP] @ get LR at traced function entry >> > str lr, [sp, #S_PC - S_IP] @ save current LR as PC >> > str ip, [sp, #S_LR - S_IP] @ save traced function return >> > add ip, sp, #PT_REGS_SIZE - S_IP + 4 >> > str ip, [sp, #S_SP - SP_IP] @ save stack pointer at function entry >> > stmdb sp!, {r0 - r11} >> > @ clear CPSR and old_r0 words >> > mov r3, #0 >> > str r3, [sp, #S_PSR] >> > str r3, [sp, #S_OLD_R0] >> > >> > However, that has the side effect of misaligning the stack (the stack >> > needs to be aligned to 8 bytes). So, if we decide we don't care about >> > the saved LR value (except as a mechanism to preserve it across the >> > call into the ftrace code): >> > >> > str ip, [sp, #S_IP - PT_REGS_SIZE + 4]! >> > str lr, [sp, #S_PC - S_IP] >> > ldr lr, [sp, #PT_REGS_SIZE - 4 - S_IP] >> > add ip, sp, #PT_REGS_SIZE - S_IP >> > stmib sp, {ip, lr} >> > stmdb sp!, {r0 - r11} >> > @ clear CPSR and old_r0 words >> > mov r3, #0 >> > str r3, [sp, #S_PSR] >> > str r3, [sp, #S_OLD_R0] >> > >> > and the return would be: >> > >> > ldmia sp, {r0 - pc} >> > >> > That all said - maybe someone from the ftrace community can comment on >> > how much of pt_regs is actually necessary here? >> >> I would suggest the following: >> r0-r11: filled with current values. >> r12 : the value of r12 doesn't matter (Intra-procedure call scratch >> reg), we can either save it or not. >> r13 - sp: the value as it was when the instrumented function was >> entered. in the mcount case, it's the current sp value - 4, otherwise >> it'f sp -4 >> r14 - lr: the value as it was when the instrumented function was >> entered. first element in stack or available in frame depending on >> GCC's version (mcount vs __gnu_mcount_nc) >> r15 - pc : the address after the modified instruction (value of lr >> when the ftrace caller is entered) >> > So basically you're saying: save all regs, r0 through r15, except r12. > Based on that, I think it's easier to save all of them and then restore > all of them except r12. Plus, you have to take into consideration all > the possible users of ftrace with regs. At some point some implementation > of ftrace_regs_call will probably need the value from r12. >> I don't think we need CSPR and ORIG_r0. > I think we do. As I said before, because PT_REGS is 72 and some function > might (in the future) make use of CSPR or ORIG_r0, to ensure there is no > stack corruption taking place, we have to provide whole pt_regs, that is > 72 (including CSPR and ORIG_r0). Plus, the stack alignment thing Russell > mentioned would be fixed. You're right for the size of the structure. For the content, I don't think we need all of them but it won't hurt to save more than necessary. > > The only problem I don't have a solution for at this point is OLD_LR (or previous LR > as it is called in this patch). If we take the approach described earlier, previous LR is the lr when the instrumented function is entered, it should be stored in pt_regs as r14 > we need to add to pt_regs the OLD_LR which I really don't like because it is > breaking the whole purpose of pt_regs (it should only contain one copy of each reg, > though it already has the ORIG_r0 in it) and will also break the stack alignment. >> >> > >> > -- >> > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ >> > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up >> > according to speedtest.net.
On Fri, Feb 10, 2017 at 12:03:06PM +0000, Abel Vesa wrote: > The only problem I don't have a solution for at this point is OLD_LR (or > previous LR as it is called in this patch). If you want the context at function entry, then you need to save the registers as they were at that point. The stacking of LR in the gnu_mcount thing is there to avoid this problem: a: push {lr} bl __gnu_mcount_mc That "bl" instruction can be thought of as being effectively this: adr lr, 1f b __gnu_mcount_mc 1: and from that, you can plainly see that "lr" gets corrupted by the call. So, to save the register state as it was at point "a", you need to save (in order): r0 through to sp the saved lr on the stack (which was the value of lr at point a) the current lr (which is the value of the PC _after_ __gnu_mcount_mc returns) cpsr write zero to old_r0 Stacking actual value of the PC at the point that you're stacking these registers is really senseless - it doesn't convey any useful information about the context being saved. Does it make sense to leave the compiler's saving of lr on the stack? Probably not - which I think my last iteration overwrote with the old_r0 value. The only thing my last iteration did not do was save a real value for CPSR. I didn't test it either...
On Fri, Feb 10, 2017 at 02:28:47PM +0000, Russell King - ARM Linux wrote: > On Fri, Feb 10, 2017 at 12:03:06PM +0000, Abel Vesa wrote: > > The only problem I don't have a solution for at this point is OLD_LR (or > > previous LR as it is called in this patch). > > If you want the context at function entry, then you need to save the > registers as they were at that point. > > The stacking of LR in the gnu_mcount thing is there to avoid this problem: > > a: > push {lr} > bl __gnu_mcount_mc > > That "bl" instruction can be thought of as being effectively this: > > adr lr, 1f > b __gnu_mcount_mc > 1: > > and from that, you can plainly see that "lr" gets corrupted by the call. > So, to save the register state as it was at point "a", you need to > save (in order): > > r0 through to sp > the saved lr on the stack (which was the value of lr at point a) > the current lr (which is the value of the PC _after_ __gnu_mcount_mc > returns) > cpsr > write zero to old_r0 > > Stacking actual value of the PC at the point that you're stacking these > registers is really senseless - it doesn't convey any useful information > about the context being saved. > > Does it make sense to leave the compiler's saving of lr on the stack? > Probably not - which I think my last iteration overwrote with the old_r0 Actually, the "compiler's saving of lr" is needed by prepare_ftrace_return (which is called from __ftrace_graph_regs_caller/__ftrace_graph_caller) to be replaced by return_to_handler. > value. The only thing my last iteration did not do was save a real value > for CPSR. > The stack needs to look like this: Right before __gnu_mcount_mc is called: 0 4 | compiler's saving of lr | ... (we were wrong, stack was actually aligned to 8) After regs saving in ftrace_regs_caller (the replacer of __gnu_mcount_mc): 0 4 8 52 56 60 64 68 72 76 | R0 | R1 | ... | SP + 4 | new LR | PC | CPSR | OLD_R0 | compiler's saving of lr | ... this means the saving needs to be something like this: sub sp, sp, #8 @ space for CPSR and OLD_R0 (not used at this point) add ip, sp, #12 @ move in IP the value of SP as it was ( compute "SP + 4" ) stmdb sp!, {ip,lr,pc} @ push PC, new LR, "SP + 4" (in this order) stmdb sp!, {r0-r11,lr} @ push new LR, R11 through to R0 (in this order) And then the restoring needs to be like this: ldr lr, [sp, #PT_REGS_SIZE] @ load "compiler's saved of lr" ldmia sp, {r0-r11, ip, sp, pc} @ pop r0-r11, "new LR" in ip, "SP + 4" in SP @ and "new LR" in PC After this, SP would be at '76', PC will contain the address of the next instruction after "b __gnu_mcount_mc", and LR will be "compiler's saved of lr". The only register that would have a different value than before would be IP. I know we can skip saving and restoring IP, but it doesn't seem to be worth it. I hope this time I'm not mistaken. > I didn't test it either... > > -- > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up > according to speedtest.net.
On Fri, Feb 10, 2017 at 02:57:38PM +0100, Jean-Jacques Hiblot wrote: > 2017-02-10 13:03 GMT+01:00 Abel Vesa <abelvesa@gmail.com>: > > On Fri, Feb 10, 2017 at 11:36:12AM +0100, Jean-Jacques Hiblot wrote: > >> 2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>: > >> > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote: > >> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > >> >> + > >> >> +.macro __ftrace_regs_caller > >> >> + > >> >> + add ip, sp, #4 @ move in IP the value of SP as it was > >> >> + @ before the push {lr} of the mcount mechanism > >> >> + stmdb sp!, {ip,lr,pc} > >> >> + stmdb sp!, {r0-r11,lr} > >> >> + > >> >> + @ stack content at this point: > >> >> + @ 0 4 44 48 52 56 60 64 > >> >> + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | > >> > > >> > How important is this to be close to "struct pt_regs" ? Do we care about > >> > r12 being "wrong" ? The other issue is that pt_regs is actually 72 > >> > bytes in size, not 68 bytes. So, does that mean we end up inappropriately > >> > leaking some of the kernel stack to userspace through ftrace? > >> > > >> > It's possible to save all the registers like this if we need to provide > >> > a complete picture of the register set at function entry: > >> > > >> > str ip, [sp, #-16]! > >> > add ip, sp, #20 > >> > stmia sp, {ip, lr, pc} > >> > stmdb sp!, {r0 - r11} > >> > > >> > However, is that even correct - don't we want pt_regs' LR and PC to be > >> > related to the function call itself? The "previous LR" as you describe > >> > it is where the called function (the one that is being traced) will > >> > return to. The current LR at this point is the address within the > >> > traced function. So actually I think this is more strictly correct, if > >> > I'm understanding the intention here correctly: > >> > > >> > str ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP > >> > ldr ip, [sp, #PT_REGS_SIZE - S_IP] @ get LR at traced function entry > >> > str lr, [sp, #S_PC - S_IP] @ save current LR as PC > >> > str ip, [sp, #S_LR - S_IP] @ save traced function return > >> > add ip, sp, #PT_REGS_SIZE - S_IP + 4 > >> > str ip, [sp, #S_SP - SP_IP] @ save stack pointer at function entry > >> > stmdb sp!, {r0 - r11} > >> > @ clear CPSR and old_r0 words > >> > mov r3, #0 > >> > str r3, [sp, #S_PSR] > >> > str r3, [sp, #S_OLD_R0] > >> > > >> > However, that has the side effect of misaligning the stack (the stack > >> > needs to be aligned to 8 bytes). So, if we decide we don't care about > >> > the saved LR value (except as a mechanism to preserve it across the > >> > call into the ftrace code): > >> > > >> > str ip, [sp, #S_IP - PT_REGS_SIZE + 4]! > >> > str lr, [sp, #S_PC - S_IP] > >> > ldr lr, [sp, #PT_REGS_SIZE - 4 - S_IP] > >> > add ip, sp, #PT_REGS_SIZE - S_IP > >> > stmib sp, {ip, lr} > >> > stmdb sp!, {r0 - r11} > >> > @ clear CPSR and old_r0 words > >> > mov r3, #0 > >> > str r3, [sp, #S_PSR] > >> > str r3, [sp, #S_OLD_R0] > >> > > >> > and the return would be: > >> > > >> > ldmia sp, {r0 - pc} > >> > > >> > That all said - maybe someone from the ftrace community can comment on > >> > how much of pt_regs is actually necessary here? > >> > >> I would suggest the following: > >> r0-r11: filled with current values. > >> r12 : the value of r12 doesn't matter (Intra-procedure call scratch > >> reg), we can either save it or not. > >> r13 - sp: the value as it was when the instrumented function was > >> entered. in the mcount case, it's the current sp value - 4, otherwise > >> it'f sp -4 > >> r14 - lr: the value as it was when the instrumented function was > >> entered. first element in stack or available in frame depending on > >> GCC's version (mcount vs __gnu_mcount_nc) > >> r15 - pc : the address after the modified instruction (value of lr > >> when the ftrace caller is entered) > >> > > So basically you're saying: save all regs, r0 through r15, except r12. > > Based on that, I think it's easier to save all of them and then restore > > all of them except r12. Plus, you have to take into consideration all > > the possible users of ftrace with regs. At some point some implementation > > of ftrace_regs_call will probably need the value from r12. > >> I don't think we need CSPR and ORIG_r0. > > I think we do. As I said before, because PT_REGS is 72 and some function > > might (in the future) make use of CSPR or ORIG_r0, to ensure there is no > > stack corruption taking place, we have to provide whole pt_regs, that is > > 72 (including CSPR and ORIG_r0). Plus, the stack alignment thing Russell > > mentioned would be fixed. > You're right for the size of the structure. For the content, I don't > think we need all of them but it won't hurt to save more than > necessary. Exactly. > > > > The only problem I don't have a solution for at this point is OLD_LR (or previous LR > > as it is called in this patch). If we take the approach described earlier, > > previous LR is the lr when the instrumented function is entered, it > should be stored in pt_regs as r14 > No, I disagree. I just realized I was also wrong. It doesn't have to be a part of pt_regs at all. prepare_ftrace_return will receive it as first parameter due to this: sub r0, fp, #4 @ lr of instrumented routine (parent) first instruction in __ftrace_graph_regs_caller. > > we need to add to pt_regs the OLD_LR which I really don't like because it is > > breaking the whole purpose of pt_regs (it should only contain one copy of each reg, > > though it already has the ORIG_r0 in it) and will also break the stack alignment. > >> > >> > > >> > -- > >> > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ > >> > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up > >> > according to speedtest.net.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 186c4c2..df401f4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -50,6 +50,7 @@ config ARM select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE && OLD_MCOUNT select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 22b7311..f379881 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -1,6 +1,10 @@ #ifndef _ASM_ARM_FTRACE #define _ASM_ARM_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((unsigned long)(__gnu_mcount_nc)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index c73c403..b1fee6c 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -92,12 +92,74 @@ 2: mcount_exit .endm +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + +.macro __ftrace_regs_caller + + add ip, sp, #4 @ move in IP the value of SP as it was + @ before the push {lr} of the mcount mechanism + stmdb sp!, {ip,lr,pc} + stmdb sp!, {r0-r11,lr} + + @ stack content at this point: + @ 0 4 44 48 52 56 60 64 + @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR | + + mov r3, sp @ struct pt_regs* + ldr r2, =function_trace_op + ldr r2, [r2] @ pointer to the current + @ function tracing op + ldr r1, [sp, #64] @ lr of instrumented func + mcount_adjust_addr r0, lr @ instrumented function + + .globl ftrace_regs_call +ftrace_regs_call: + bl ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_regs_call +ftrace_graph_regs_call: + mov r0, r0 +#endif + @ pop saved regs + ldr lr, [sp, #64] @ get the previous LR value from stack + ldmia sp, {r0-r11, ip, sp, pc} @ pop the saved registers INCLUDING + @ the stack pointer +.endm + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.macro __ftrace_graph_regs_caller + + sub r0, fp, #4 @ lr of instrumented routine (parent) + + @ called from __ftrace_regs_caller + ldr r1, [sp, #56] @ instrumented routine (func) + mcount_adjust_addr r1, r1 + + sub r2, r0, #4 @ frame pointer + bl prepare_ftrace_return + + @ pop regs saved in ftrace_regs_caller + ldr lr, [sp, #64] @ restore lr from the stack + ldmia sp, {r0-r11, ip, sp, pc} @ restore r0 through pc + +.endm +#endif +#endif + .macro __ftrace_caller suffix mcount_enter mcount_get_lr r1 @ lr of instrumented func mcount_adjust_addr r0, lr @ instrumented function +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + ldr r2, =function_trace_op + ldr r2, [r2] @ pointer to the current + @ function tracing op + mov r3, #0 @ regs is NULL +#endif + .globl ftrace_call\suffix ftrace_call\suffix: bl ftrace_stub @@ -212,6 +274,15 @@ UNWIND(.fnstart) __ftrace_caller UNWIND(.fnend) ENDPROC(ftrace_caller) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +ENTRY(ftrace_regs_caller) +UNWIND(.fnstart) + __ftrace_regs_caller +UNWIND(.fnend) +ENDPROC(ftrace_regs_caller) +#endif + #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -220,6 +291,14 @@ UNWIND(.fnstart) __ftrace_graph_caller UNWIND(.fnend) ENDPROC(ftrace_graph_caller) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +ENTRY(ftrace_graph_regs_caller) +UNWIND(.fnstart) + __ftrace_graph_regs_caller +UNWIND(.fnend) +ENDPROC(ftrace_graph_regs_caller) +#endif #endif .purgem mcount_enter diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 3f17594..cb0358c 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -139,6 +139,15 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(pc, 0, new, false); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!ret) { + pc = (unsigned long)&ftrace_regs_call; + new = ftrace_call_replace(pc, (unsigned long)func); + + ret = ftrace_modify_code(pc, 0, new, false); + } +#endif + #ifdef CONFIG_OLD_MCOUNT if (!ret) { pc = (unsigned long)&ftrace_call_old; @@ -157,11 +166,29 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned long ip = rec->ip; old = ftrace_nop_replace(rec); + + new = ftrace_call_replace(ip, adjust_address(rec, addr)); + + return ftrace_modify_code(rec->ip, old, new, true); +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long new, old; + unsigned long ip = rec->ip; + + old = ftrace_call_replace(ip, adjust_address(rec, old_addr)); + new = ftrace_call_replace(ip, adjust_address(rec, addr)); return ftrace_modify_code(rec->ip, old, new, true); } +#endif + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { @@ -229,6 +256,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, extern unsigned long ftrace_graph_call; extern unsigned long ftrace_graph_call_old; extern void ftrace_graph_caller_old(void); +extern unsigned long ftrace_graph_regs_call; +extern void ftrace_graph_regs_caller(void); static int __ftrace_modify_caller(unsigned long *callsite, void (*func) (void), bool enable) @@ -251,6 +280,14 @@ static int ftrace_modify_graph_caller(bool enable) ftrace_graph_caller, enable); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_regs_call, + ftrace_graph_regs_caller, + enable); +#endif + + #ifdef CONFIG_OLD_MCOUNT if (!ret) ret = __ftrace_modify_caller(&ftrace_graph_call_old,
The DYNAMIC_FTRACE_WITH_REGS configuration makes it possible for a ftrace operation to specify if registers need to saved/restored by the ftrace handler. This is needed by kgraft and possibly other ftrace-based tools, and the ARM architecture is currently lacking this feature. It would also be the first step to support the "Kprobes-on-ftrace" optimization on ARM. This patch introduces a new ftrace handler that stores the registers on the stack before calling the next stage. The registers are restored from the stack before going back to the instrumented function. A side-effect of this patch is to activate the support for ftrace_modify_call() as it defines ARCH_SUPPORTS_FTRACE_OPS for the ARM architecture. Signed-off-by: Abel Vesa <abelvesa@linux.com> --- arch/arm/Kconfig | 1 + arch/arm/include/asm/ftrace.h | 4 +++ arch/arm/kernel/entry-ftrace.S | 79 ++++++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/ftrace.c | 37 ++++++++++++++++++++ 4 files changed, 121 insertions(+)