diff mbox series

[v3] ftrace: Get the true parent ip for function tracer

Message ID 20240910133620.19711-1-jeff.xie@linux.dev (mailing list archive)
State Superseded
Headers show
Series [v3] ftrace: Get the true parent ip for function tracer | expand

Commit Message

Jeff Xie Sept. 10, 2024, 1:36 p.m. UTC
When using both function tracer and function graph simultaneously,
it is found that function tracer sometimes captures a fake parent ip
(return_to_handler) instead of the true parent ip.

This issue is easy to reproduce. Below are my reproduction steps:

jeff-labs:~/bin # ./trace-net.sh

jeff-labs:~/bin # cat /sys/kernel/debug/tracing/instances/foo/trace | grep return_to_handler
    trace-net.sh-405     [001] ...2.    31.859501: avc_has_perm+0x4/0x190 <-return_to_handler+0x0/0x40
    trace-net.sh-405     [001] ...2.    31.859503: simple_setattr+0x4/0x70 <-return_to_handler+0x0/0x40
    trace-net.sh-405     [001] ...2.    31.859503: truncate_pagecache+0x4/0x60 <-return_to_handler+0x0/0x40
    trace-net.sh-405     [001] ...2.    31.859505: unmap_mapping_range+0x4/0x140 <-return_to_handler+0x0/0x40
    trace-net.sh-405     [001] ...3.    31.859508: _raw_spin_unlock+0x4/0x30 <-return_to_handler+0x0/0x40
    [...]

The following is my simple trace script:

<snip>
jeff-labs:~/bin # cat ./trace-net.sh
TRACE_PATH="/sys/kernel/debug/tracing"

set_events() {
        echo 1 > $1/events/net/enable
        echo 1 > $1/events/tcp/enable
        echo 1 > $1/events/sock/enable
        echo 1 > $1/events/napi/enable
        echo 1 > $1/events/fib/enable
        echo 1 > $1/events/neigh/enable
}

set_events ${TRACE_PATH}
echo 1 > ${TRACE_PATH}/options/sym-offset
echo 1 > ${TRACE_PATH}/options/funcgraph-tail
echo 1 > ${TRACE_PATH}/options/funcgraph-proc
echo 1 > ${TRACE_PATH}/options/funcgraph-abstime

echo 'tcp_orphan*' > ${TRACE_PATH}/set_ftrace_notrace
echo function_graph > ${TRACE_PATH}/current_tracer

INSTANCE_FOO=${TRACE_PATH}/instances/foo
if [ ! -e $INSTANCE_FOO ]; then
        mkdir ${INSTANCE_FOO}
fi
set_events ${INSTANCE_FOO}
echo 1 > ${INSTANCE_FOO}/options/sym-offset
echo 'tcp_orphan*' > ${INSTANCE_FOO}/set_ftrace_notrace
echo function > ${INSTANCE_FOO}/current_tracer

echo 1 > ${TRACE_PATH}/tracing_on
echo 1 > ${INSTANCE_FOO}/tracing_on

echo > ${TRACE_PATH}/trace
echo > ${INSTANCE_FOO}/trace
</snip>

Signed-off-by: Jeff Xie <jeff.xie@linux.dev>
---
v3:
- fixed build error when CONFIG_FUNCTION_GRAPH_TRACER=n suggested by Masami

v2:
- Adding __always_inline to function_get_true_parent_ip suggested by Steve

 kernel/trace/trace_functions.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

Comments

Jeff Xie Oct. 2, 2024, 2:55 p.m. UTC | #1
September 10, 2024 at 9:36 PM, "Jeff Xie" <jeff.xie@linux.dev> wrote:

Kindly ping...

> 
> When using both function tracer and function graph simultaneously,
> 
> it is found that function tracer sometimes captures a fake parent ip
> 
> (return_to_handler) instead of the true parent ip.
> 
> This issue is easy to reproduce. Below are my reproduction steps:
> 
> jeff-labs:~/bin # ./trace-net.sh
> 
> jeff-labs:~/bin # cat /sys/kernel/debug/tracing/instances/foo/trace | grep return_to_handler
> 
>  trace-net.sh-405 [001] ...2. 31.859501: avc_has_perm+0x4/0x190 <-return_to_handler+0x0/0x40
> 
>  trace-net.sh-405 [001] ...2. 31.859503: simple_setattr+0x4/0x70 <-return_to_handler+0x0/0x40
> 
>  trace-net.sh-405 [001] ...2. 31.859503: truncate_pagecache+0x4/0x60 <-return_to_handler+0x0/0x40
> 
>  trace-net.sh-405 [001] ...2. 31.859505: unmap_mapping_range+0x4/0x140 <-return_to_handler+0x0/0x40
> 
>  trace-net.sh-405 [001] ...3. 31.859508: _raw_spin_unlock+0x4/0x30 <-return_to_handler+0x0/0x40
> 
>  [...]
> 
> The following is my simple trace script:
> 
> <snip>
> 
> jeff-labs:~/bin # cat ./trace-net.sh
> 
> TRACE_PATH="/sys/kernel/debug/tracing"
> 
> set_events() {
> 
>  echo 1 > $1/events/net/enable
> 
>  echo 1 > $1/events/tcp/enable
> 
>  echo 1 > $1/events/sock/enable
> 
>  echo 1 > $1/events/napi/enable
> 
>  echo 1 > $1/events/fib/enable
> 
>  echo 1 > $1/events/neigh/enable
> 
> }
> 
> set_events ${TRACE_PATH}
> 
> echo 1 > ${TRACE_PATH}/options/sym-offset
> 
> echo 1 > ${TRACE_PATH}/options/funcgraph-tail
> 
> echo 1 > ${TRACE_PATH}/options/funcgraph-proc
> 
> echo 1 > ${TRACE_PATH}/options/funcgraph-abstime
> 
> echo 'tcp_orphan*' > ${TRACE_PATH}/set_ftrace_notrace
> 
> echo function_graph > ${TRACE_PATH}/current_tracer
> 
> INSTANCE_FOO=${TRACE_PATH}/instances/foo
> 
> if [ ! -e $INSTANCE_FOO ]; then
> 
>  mkdir ${INSTANCE_FOO}
> 
> fi
> 
> set_events ${INSTANCE_FOO}
> 
> echo 1 > ${INSTANCE_FOO}/options/sym-offset
> 
> echo 'tcp_orphan*' > ${INSTANCE_FOO}/set_ftrace_notrace
> 
> echo function > ${INSTANCE_FOO}/current_tracer
> 
> echo 1 > ${TRACE_PATH}/tracing_on
> 
> echo 1 > ${INSTANCE_FOO}/tracing_on
> 
> echo > ${TRACE_PATH}/trace
> 
> echo > ${INSTANCE_FOO}/trace
> 
> </snip>
> 
> Signed-off-by: Jeff Xie <jeff.xie@linux.dev>
> 
> ---
> 
> v3:
> 
> - fixed build error when CONFIG_FUNCTION_GRAPH_TRACER=n suggested by Masami
> 
> v2:
> 
> - Adding __always_inline to function_get_true_parent_ip suggested by Steve
> 
>  kernel/trace/trace_functions.c | 26 ++++++++++++++++++++++++++
> 
>  1 file changed, 26 insertions(+)
> 
> diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
> 
> index 3b0cea37e029..5dc5fcdf4e6f 100644
> 
> --- a/kernel/trace/trace_functions.c
> 
> +++ b/kernel/trace/trace_functions.c
> 
> @@ -176,6 +176,27 @@ static void function_trace_start(struct trace_array *tr)
> 
>  tracing_reset_online_cpus(&tr->array_buffer);
> 
>  }
> 
>  
> 
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> 
> +static __always_inline unsigned long
> 
> +function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> 
> +{
> 
> + unsigned long true_parent_ip;
> 
> + int idx = 0;
> 
> +
> 
> + true_parent_ip = parent_ip;
> 
> + if (unlikely(parent_ip == (unsigned long)&return_to_handler))
> 
> + true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,
> 
> + (unsigned long *)fregs->regs.sp);
> 
> + return true_parent_ip;
> 
> +}
> 
> +#else
> 
> +static __always_inline unsigned long
> 
> +function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> 
> +{
> 
> + return parent_ip;
> 
> +}
> 
> +#endif
> 
> +
> 
>  static void
> 
>  function_trace_call(unsigned long ip, unsigned long parent_ip,
> 
>  struct ftrace_ops *op, struct ftrace_regs *fregs)
> 
> @@ -193,6 +214,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
> 
>  if (bit < 0)
> 
>  return;
> 
>  
> 
> + parent_ip = function_get_true_parent_ip(parent_ip, fregs);
> 
> +
> 
>  trace_ctx = tracing_gen_ctx();
> 
>  
> 
>  cpu = smp_processor_id();
> 
> @@ -241,6 +264,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
> 
>  * recursive protection is performed.
> 
>  */
> 
>  local_irq_save(flags);
> 
> + parent_ip = function_get_true_parent_ip(parent_ip, fregs);
> 
>  cpu = raw_smp_processor_id();
> 
>  data = per_cpu_ptr(tr->array_buffer.data, cpu);
> 
>  disabled = atomic_inc_return(&data->disabled);
> 
> @@ -309,6 +333,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
> 
>  if (bit < 0)
> 
>  return;
> 
>  
> 
> + parent_ip = function_get_true_parent_ip(parent_ip, fregs);
> 
>  cpu = smp_processor_id();
> 
>  data = per_cpu_ptr(tr->array_buffer.data, cpu);
> 
>  if (atomic_read(&data->disabled))
> 
> @@ -356,6 +381,7 @@ function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
> 
>  * recursive protection is performed.
> 
>  */
> 
>  local_irq_save(flags);
> 
> + parent_ip = function_get_true_parent_ip(parent_ip, fregs);
> 
>  cpu = raw_smp_processor_id();
> 
>  data = per_cpu_ptr(tr->array_buffer.data, cpu);
> 
>  disabled = atomic_inc_return(&data->disabled);
> 
> -- 
> 
> 2.43.0
>
Steven Rostedt Oct. 2, 2024, 3:14 p.m. UTC | #2
On Wed, 02 Oct 2024 14:55:54 +0000
jeff.xie@linux.dev wrote:

> September 10, 2024 at 9:36 PM, "Jeff Xie" <jeff.xie@linux.dev> wrote:
> 
> Kindly ping...

Thanks for the reminder. It's in my queue so it should be picked up for 6.13.

  https://patchwork.kernel.org/project/linux-trace-kernel/patch/20240910133620.19711-1-jeff.xie@linux.dev/

-- Steve
Steven Rostedt Oct. 5, 2024, 2:13 p.m. UTC | #3
On Tue, 10 Sep 2024 21:36:20 +0800
Jeff Xie <jeff.xie@linux.dev> wrote:

> When using both function tracer and function graph simultaneously,
> it is found that function tracer sometimes captures a fake parent ip
> (return_to_handler) instead of the true parent ip.
> 
> This issue is easy to reproduce. Below are my reproduction steps:
> 
> jeff-labs:~/bin # ./trace-net.sh

I applied this and it failed one of my testes:

On x86 32 bit:

[  668.043429] BUG: kernel NULL pointer dereference, address: 0000003c
[  668.045840] #PF: supervisor read access in kernel mode
[  668.047869] #PF: error_code(0x0000) - not-present page
[  668.049867] *pdpt = 000000000afad001 *pde = 0000000000000000 
[  668.052481] Oops: Oops: 0000 [#1] PREEMPT SMP PTI
[  668.054408] CPU: 3 UID: 0 PID: 4347 Comm: cat Not tainted 6.12.0-rc1-test-00008-gc274c40c37e8-dirty #444
[  668.057679] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  668.060944] EIP: function_trace_call+0x14b/0x1a0
[  668.062824] Code: ff b8 10 00 00 00 e9 56 ff ff ff 8d 74 26 00 e8 1b c7 c0 00 eb a1 2e 8d b4 26 00 00 00 00 90 8b 45 e0 8d 55 ec b9 98 9e 10 c7 <ff> 70 3c 89 f8 e8 eb 0a 01 00 89 45 e8 5a e9 47 ff ff ff 8b 87 ac
[  668.068726] EAX: 00000000 EBX: c1b02000 ECX: c7109e98 EDX: cab21e50
[  668.071076] ESI: fffffff7 EDI: ca828000 EBP: cab21e64 ESP: cab21e44
[  668.073425] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 EFLAGS: 00010246
[  668.075938] CR0: 80050033 CR2: 0000003c CR3: 091a0000 CR4: 000006f0
[  668.078331] Call Trace:
[  668.079687]  ? ftrace_graph_caller+0x1c/0x1c
[  668.081541]  ? __die_body.cold+0x14/0x1b
[  668.083440]  ? ftrace_graph_caller+0x1c/0x1c
[  668.085347]  ? __die+0x21/0x28
[  668.087062]  ? ftrace_graph_caller+0x1c/0x1c
[  668.088944]  ? page_fault_oops+0x68/0x120
[  668.091245]  ? ftrace_graph_caller+0x1c/0x1c
[  668.093208]  ? kernelmode_fixup_or_oops.constprop.0+0x58/0x70
[  668.095669]  ? ftrace_graph_caller+0x1c/0x1c
[  668.097574]  ? __bad_area_nosemaphore.constprop.0+0x107/0x130
[  668.101278]  ? ftrace_graph_caller+0x18/0x1c
[  668.103358]  ? ftrace_graph_caller+0x1c/0x1c
[  668.105272]  ? bad_area_nosemaphore+0xf/0x20
[  668.107276]  ? ftrace_graph_caller+0x1c/0x1c
[  668.109279]  ? do_user_addr_fault+0x20b/0x3d0
[  668.111518]  ? exc_page_fault+0x5b/0x240
[  668.113474]  ? pvclock_clocksource_read_nowd+0x110/0x110
[  668.115738]  ? handle_exception+0x133/0x133
[  668.117689]  ? ftrace_graph_caller+0x1c/0x1c
[  668.119827]  ? ftrace_graph_set_hash+0xbb/0x1c0
[  668.121955]  ? pvclock_clocksource_read_nowd+0x110/0x110
[  668.124258]  ? function_trace_call+0x14b/0x1a0
[  668.126456]  ? pvclock_clocksource_read_nowd+0x110/0x110
[  668.128773]  ? function_trace_call+0x14b/0x1a0
[  668.130909]  ? sched_tick+0x370/0x370
[  668.134063]  ? ftrace_graph_caller+0x1c/0x1c
[  668.136213]  ? ftrace_stacktrace_count+0xa0/0xa0

The crash happened here:

static __always_inline unsigned long
function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
{
        unsigned long true_parent_ip;
        int idx = 0;

        true_parent_ip = parent_ip;
        if (unlikely(parent_ip == (unsigned long)&return_to_handler))
                true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,  <<<----- CRASH
                                (unsigned long *)fregs->regs.sp);
        return true_parent_ip;
}

It appears that on some archs (x86 32 bit) the function tracer can be
called when "current" is not set up yet, and can crash when accessing it.

So perhaps we need to add:

#ifdef CONFIG_ARCH_WANTS_NO_INSTR
static __always_inline unsigned long
function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
{
        unsigned long true_parent_ip;
        int idx = 0;

        true_parent_ip = parent_ip;
        if (unlikely(parent_ip == (unsigned long)&return_to_handler))
                true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,  <<<----- CRASH
                                (unsigned long *)fregs->regs.sp);
        return true_parent_ip;
}
#else
# define function_get_true_parent_ip(parent_ip, fregs)  parent_ip
#endif

That is, if the arch has noinstr implemented, it should always be safe
to access current, but if not, then there's no guarantee.

?

-- Steve
Jeff Xie Oct. 6, 2024, 3:28 a.m. UTC | #4
October 5, 2024 at 10:13 PM, "Steven Rostedt" <rostedt@goodmis.org> wrote:



> 
> On Tue, 10 Sep 2024 21:36:20 +0800
> 
> Jeff Xie <jeff.xie@linux.dev> wrote:
> 
> > 
> > When using both function tracer and function graph simultaneously,
> > 
> >  it is found that function tracer sometimes captures a fake parent ip
> > 
> >  (return_to_handler) instead of the true parent ip.
> > 
> >  
> > 
> >  This issue is easy to reproduce. Below are my reproduction steps:
> > 
> >  
> > 
> >  jeff-labs:~/bin # ./trace-net.sh
> > 
> 
> I applied this and it failed one of my testes:
> 
> On x86 32 bit:
> 
> [ 668.043429] BUG: kernel NULL pointer dereference, address: 0000003c
> 
> [ 668.045840] #PF: supervisor read access in kernel mode
> 
> [ 668.047869] #PF: error_code(0x0000) - not-present page
> 
> [ 668.049867] *pdpt = 000000000afad001 *pde = 0000000000000000 
> 
> [ 668.052481] Oops: Oops: 0000 [#1] PREEMPT SMP PTI
> 
> [ 668.054408] CPU: 3 UID: 0 PID: 4347 Comm: cat Not tainted 6.12.0-rc1-test-00008-gc274c40c37e8-dirty #444
> 
> [ 668.057679] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
> 
> [ 668.060944] EIP: function_trace_call+0x14b/0x1a0
> 
> [ 668.062824] Code: ff b8 10 00 00 00 e9 56 ff ff ff 8d 74 26 00 e8 1b c7 c0 00 eb a1 2e 8d b4 26 00 00 00 00 90 8b 45 e0 8d 55 ec b9 98 9e 10 c7 <ff> 70 3c 89 f8 e8 eb 0a 01 00 89 45 e8 5a e9 47 ff ff ff 8b 87 ac
> 
> [ 668.068726] EAX: 00000000 EBX: c1b02000 ECX: c7109e98 EDX: cab21e50
> 
> [ 668.071076] ESI: fffffff7 EDI: ca828000 EBP: cab21e64 ESP: cab21e44
> 
> [ 668.073425] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 EFLAGS: 00010246
> 
> [ 668.075938] CR0: 80050033 CR2: 0000003c CR3: 091a0000 CR4: 000006f0
> 
> [ 668.078331] Call Trace:
> 
> [ 668.079687] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.081541] ? __die_body.cold+0x14/0x1b
> 
> [ 668.083440] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.085347] ? __die+0x21/0x28
> 
> [ 668.087062] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.088944] ? page_fault_oops+0x68/0x120
> 
> [ 668.091245] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.093208] ? kernelmode_fixup_or_oops.constprop.0+0x58/0x70
> 
> [ 668.095669] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.097574] ? __bad_area_nosemaphore.constprop.0+0x107/0x130
> 
> [ 668.101278] ? ftrace_graph_caller+0x18/0x1c
> 
> [ 668.103358] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.105272] ? bad_area_nosemaphore+0xf/0x20
> 
> [ 668.107276] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.109279] ? do_user_addr_fault+0x20b/0x3d0
> 
> [ 668.111518] ? exc_page_fault+0x5b/0x240
> 
> [ 668.113474] ? pvclock_clocksource_read_nowd+0x110/0x110
> 
> [ 668.115738] ? handle_exception+0x133/0x133
> 
> [ 668.117689] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.119827] ? ftrace_graph_set_hash+0xbb/0x1c0
> 
> [ 668.121955] ? pvclock_clocksource_read_nowd+0x110/0x110
> 
> [ 668.124258] ? function_trace_call+0x14b/0x1a0
> 
> [ 668.126456] ? pvclock_clocksource_read_nowd+0x110/0x110
> 
> [ 668.128773] ? function_trace_call+0x14b/0x1a0
> 
> [ 668.130909] ? sched_tick+0x370/0x370
> 
> [ 668.134063] ? ftrace_graph_caller+0x1c/0x1c
> 
> [ 668.136213] ? ftrace_stacktrace_count+0xa0/0xa0
> 
> The crash happened here:
> 
> static __always_inline unsigned long
> 
> function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> 
> {
> 
>  unsigned long true_parent_ip;
> 
>  int idx = 0;
> 
>  true_parent_ip = parent_ip;
> 
>  if (unlikely(parent_ip == (unsigned long)&return_to_handler))
> 
>  true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip, <<<----- CRASH
> 
>  (unsigned long *)fregs->regs.sp);
> 
>  return true_parent_ip;
> 
> }
> 
> It appears that on some archs (x86 32 bit) the function tracer can be
> 
> called when "current" is not set up yet, and can crash when accessing it.
> 
> So perhaps we need to add:
> 
> #ifdef CONFIG_ARCH_WANTS_NO_INSTR
> 
> static __always_inline unsigned long
> 
> function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> 
> {
> 
>  unsigned long true_parent_ip;
> 
>  int idx = 0;
> 
>  true_parent_ip = parent_ip;
> 
>  if (unlikely(parent_ip == (unsigned long)&return_to_handler))
> 
>  true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip, <<<----- CRASH
> 
>  (unsigned long *)fregs->regs.sp);
> 
>  return true_parent_ip;
> 
> }
> 
> #else
> 
> # define function_get_true_parent_ip(parent_ip, fregs) parent_ip
> 
> #endif
> 
> That is, if the arch has noinstr implemented, it should always be safe
> 
> to access current, but if not, then there's no guarantee.
> 
> ?

Makes sense. Thanks for the test and detailed explanation! I’ll send a new version.

> 
> -- Steve
>
Steven Rostedt Oct. 7, 2024, 9:10 p.m. UTC | #5
On Sat, 5 Oct 2024 10:13:20 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> The crash happened here:

This has been bothering me all weekend so I dug more into it.

The reason it was bothering me is because we use current later on, and it
has no issue. But then I noticed the real bug!

I was confused because the crashed instruction pointer was in the
get_current() code. But that's not where the crash actually happened.

> 
> static __always_inline unsigned long
> function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> {
>         unsigned long true_parent_ip;
>         int idx = 0;
> 
>         true_parent_ip = parent_ip;
>         if (unlikely(parent_ip == (unsigned long)&return_to_handler))
>                 true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,  <<<----- CRASH

That's not the crash

>                                 (unsigned long *)fregs->regs.sp);

The above is!

fregs should *NEVER* be used directly. OK, I need to make:

struct ftrace_regs {
	void *nothing_here[];
};

Now, to stop bugs like this.

fregs is unique by architecture, and may not even be defined! That is, it
can pass NULL for fregs. And only x86 has fregs->regs available. Other
archs do not.

You must use the fregs helper functions, thus the above can be:


static __always_inline unsigned long
function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
{
	unsigned long true_parent_ip;
	int idx = 0;

	true_parent_ip = parent_ip;
	if (unlikely(parent_ip == (unsigned long)&return_to_handler) && fregs)
		true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,
				(unsigned long *)ftrace_regs_get_stack_pointer(fregs));

	return true_parent_ip;
}

So you can make a v5 with this update. And I'll go and make the empty
ftrace_regs structure.

Thanks!

-- Steve


>         return true_parent_ip;
> }
> 
> It appears that on some archs (x86 32 bit) the function tracer can be
> called when "current" is not set up yet, and can crash when accessing it.
> 
> So perhaps we need to add:
> 
> #ifdef CONFIG_ARCH_WANTS_NO_INSTR
> static __always_inline unsigned long
> function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> {
>         unsigned long true_parent_ip;
>         int idx = 0;
> 
>         true_parent_ip = parent_ip;
>         if (unlikely(parent_ip == (unsigned long)&return_to_handler))
>                 true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,  <<<----- CRASH
>                                 (unsigned long *)fregs->regs.sp);
>         return true_parent_ip;
> }
> #else
> # define function_get_true_parent_ip(parent_ip, fregs)  parent_ip
> #endif
> 
> That is, if the arch has noinstr implemented, it should always be safe
> to access current, but if not, then there's no guarantee.
> 
> ?
Jeff Xie Oct. 8, 2024, 2:17 a.m. UTC | #6
October 8, 2024 at 5:10 AM, "Steven Rostedt" <rostedt@goodmis.org> wrote:



> 
> On Sat, 5 Oct 2024 10:13:20 -0400
> 
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > 
> > The crash happened here:
> > 
> 
> This has been bothering me all weekend so I dug more into it.
> 
> The reason it was bothering me is because we use current later on, and it
> 
> has no issue. But then I noticed the real bug!
> 
> I was confused because the crashed instruction pointer was in the
> 
> get_current() code. But that's not where the crash actually happened.
> 
> > 
> > static __always_inline unsigned long
> > 
> >  function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> > 
> >  {
> > 
> >  unsigned long true_parent_ip;
> > 
> >  int idx = 0;
> > 
> >  
> > 
> >  true_parent_ip = parent_ip;
> > 
> >  if (unlikely(parent_ip == (unsigned long)&return_to_handler))
> > 
> >  true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip, <<<----- CRASH
> > 
> 
> That's not the crash
> 
> > 
> > (unsigned long *)fregs->regs.sp);
> > 
> 
> The above is!
> 
> fregs should *NEVER* be used directly. OK, I need to make:
> 
> struct ftrace_regs {
> 
>  void *nothing_here[];
> 
> };
> 
> Now, to stop bugs like this.
> 
> fregs is unique by architecture, and may not even be defined! That is, it
> 
> can pass NULL for fregs. And only x86 has fregs->regs available. Other
> 
> archs do not.

Thanks, I just saw this comment from include/linux/ftrace.h

 * NOTE: user *must not* access regs directly, only do it via APIs, because
 * the member can be changed according to the architecture.
 */
struct ftrace_regs {
        struct pt_regs          regs;
};


> 
> You must use the fregs helper functions, thus the above can be:
> 
> static __always_inline unsigned long
> 
> function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> 
> {
> 
>  unsigned long true_parent_ip;
> 
>  int idx = 0;
> 
>  true_parent_ip = parent_ip;
> 
>  if (unlikely(parent_ip == (unsigned long)&return_to_handler) && fregs)
> 
>  true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,
> 
>  (unsigned long *)ftrace_regs_get_stack_pointer(fregs));
> 
>  return true_parent_ip;
> 
> }
> 
> So you can make a v5 with this update. And I'll go and make the empty
> 
> ftrace_regs structure.

Thanks, I will update the patch.

> 
> Thanks!
> 
> -- Steve
> 
> > 
> > return true_parent_ip;
> > 
> >  }
> > 
> >  
> > 
> >  It appears that on some archs (x86 32 bit) the function tracer can be
> > 
> >  called when "current" is not set up yet, and can crash when accessing it.
> > 
> >  
> > 
> >  So perhaps we need to add:
> > 
> >  
> > 
> >  #ifdef CONFIG_ARCH_WANTS_NO_INSTR
> > 
> >  static __always_inline unsigned long
> > 
> >  function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
> > 
> >  {
> > 
> >  unsigned long true_parent_ip;
> > 
> >  int idx = 0;
> > 
> >  
> > 
> >  true_parent_ip = parent_ip;
> > 
> >  if (unlikely(parent_ip == (unsigned long)&return_to_handler))
> > 
> >  true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip, <<<----- CRASH
> > 
> >  (unsigned long *)fregs->regs.sp);
> > 
> >  return true_parent_ip;
> > 
> >  }
> > 
> >  #else
> > 
> >  # define function_get_true_parent_ip(parent_ip, fregs) parent_ip
> > 
> >  #endif
> > 
> >  
> > 
> >  That is, if the arch has noinstr implemented, it should always be safe
> > 
> >  to access current, but if not, then there's no guarantee.
> > 
> >  
> > 
> >  ?
> >
>
diff mbox series

Patch

diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 3b0cea37e029..5dc5fcdf4e6f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -176,6 +176,27 @@  static void function_trace_start(struct trace_array *tr)
 	tracing_reset_online_cpus(&tr->array_buffer);
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static __always_inline unsigned long
+function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
+{
+	unsigned long true_parent_ip;
+	int idx = 0;
+
+	true_parent_ip = parent_ip;
+	if (unlikely(parent_ip == (unsigned long)&return_to_handler))
+		true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,
+				(unsigned long *)fregs->regs.sp);
+	return true_parent_ip;
+}
+#else
+static __always_inline unsigned long
+function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
+{
+	return parent_ip;
+}
+#endif
+
 static void
 function_trace_call(unsigned long ip, unsigned long parent_ip,
 		    struct ftrace_ops *op, struct ftrace_regs *fregs)
@@ -193,6 +214,8 @@  function_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (bit < 0)
 		return;
 
+	parent_ip = function_get_true_parent_ip(parent_ip, fregs);
+
 	trace_ctx = tracing_gen_ctx();
 
 	cpu = smp_processor_id();
@@ -241,6 +264,7 @@  function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	 * recursive protection is performed.
 	 */
 	local_irq_save(flags);
+	parent_ip = function_get_true_parent_ip(parent_ip, fregs);
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
 	disabled = atomic_inc_return(&data->disabled);
@@ -309,6 +333,7 @@  function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (bit < 0)
 		return;
 
+	parent_ip = function_get_true_parent_ip(parent_ip, fregs);
 	cpu = smp_processor_id();
 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
 	if (atomic_read(&data->disabled))
@@ -356,6 +381,7 @@  function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
 	 * recursive protection is performed.
 	 */
 	local_irq_save(flags);
+	parent_ip = function_get_true_parent_ip(parent_ip, fregs);
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
 	disabled = atomic_inc_return(&data->disabled);