diff mbox series

[v2] riscv: fix oops caused by irq on/off tracer

Message ID 20220129004226.32868-1-changbin.du@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] riscv: fix oops caused by irq on/off tracer | expand

Commit Message

Changbin Du Jan. 29, 2022, 12:42 a.m. UTC
The trace_hardirqs_on/off requires at least two parent call frames.
If not, the code generated by CALLER_ADDR1 (aka. ftrace_return_address(1))
could trigger memory access fault.

[    0.039615][    T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8
[    0.041925][    T0] Oops [#1]
[    0.042063][    T0] Modules linked in:
[    0.042864][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29
[    0.043568][    T0] Hardware name: riscv-virtio,qemu (DT)
[    0.044343][    T0] epc : trace_hardirqs_on+0x56/0xe2
[    0.044601][    T0]  ra : restore_all+0x12/0x6e
[    0.044721][    T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0
[    0.044801][    T0]  gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020
[    0.044882][    T0]  t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0
[    0.044967][    T0]  s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100
[    0.045046][    T0]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
[    0.045124][    T0]  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45
[    0.045210][    T0]  s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50
[    0.045289][    T0]  s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8
[    0.045389][    T0]  s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000
[    0.045474][    T0]  s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000
[    0.045548][    T0]  t5 : 0000000000000000 t6 : ffffffff814aa368
[    0.045620][    T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d
[    0.046402][    T0] [<ffffffff80003b94>] restore_all+0x12/0x6e

To fix above issue, here we add one extra level wrapper so they can be
safely called by low level entry code.

Signed-off-by: Changbin Du <changbin.du@gmail.com>

---
v2: fix compile warning.
---
 arch/riscv/kernel/Makefile    |  2 ++
 arch/riscv/kernel/entry.S     | 10 +++++-----
 arch/riscv/kernel/trace_irq.c | 26 ++++++++++++++++++++++++++
 arch/riscv/kernel/trace_irq.h | 11 +++++++++++
 4 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 arch/riscv/kernel/trace_irq.c
 create mode 100644 arch/riscv/kernel/trace_irq.h

Comments

kernel test robot Jan. 29, 2022, 2:49 p.m. UTC | #1
Hi Changbin,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.17-rc1 next-20220128]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Changbin-Du/riscv-fix-oops-caused-by-irq-on-off-tracer/20220129-084342
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 169387e2aa291a4e3cb856053730fe99d6cec06f
config: riscv-randconfig-c006-20220125 (https://download.01.org/0day-ci/archive/20220129/202201292238.F1q5U57j-lkp@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 33b45ee44b1f32ffdbc995e6fec806271b4b3ba4)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/389bbb8268ab6ab20c8a76808dfe52997577a06b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Changbin-Du/riscv-fix-oops-caused-by-irq-on-off-tracer/20220129-084342
        git checkout 389bbb8268ab6ab20c8a76808dfe52997577a06b
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash arch/riscv/kernel/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> arch/riscv/kernel/trace_irq.c:11: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
    * trace_hardirqs_on/off requires at least two parent call frames.


vim +11 arch/riscv/kernel/trace_irq.c

     9	
    10	/**
  > 11	 * trace_hardirqs_on/off requires at least two parent call frames.
    12	 * Here we add one extra level so they can be safely called by low
    13	 * level entry code.
    14	 */
    15	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Jisheng Zhang Feb. 6, 2022, 5:25 p.m. UTC | #2
On Sat, Jan 29, 2022 at 08:42:26AM +0800, Changbin Du wrote:
> The trace_hardirqs_on/off requires at least two parent call frames.
> If not, the code generated by CALLER_ADDR1 (aka. ftrace_return_address(1))
> could trigger memory access fault.
> 
> [    0.039615][    T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8
> [    0.041925][    T0] Oops [#1]
> [    0.042063][    T0] Modules linked in:
> [    0.042864][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29
> [    0.043568][    T0] Hardware name: riscv-virtio,qemu (DT)
> [    0.044343][    T0] epc : trace_hardirqs_on+0x56/0xe2
> [    0.044601][    T0]  ra : restore_all+0x12/0x6e
> [    0.044721][    T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0
> [    0.044801][    T0]  gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020
> [    0.044882][    T0]  t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0
> [    0.044967][    T0]  s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100
> [    0.045046][    T0]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
> [    0.045124][    T0]  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45
> [    0.045210][    T0]  s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50
> [    0.045289][    T0]  s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8
> [    0.045389][    T0]  s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000
> [    0.045474][    T0]  s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000
> [    0.045548][    T0]  t5 : 0000000000000000 t6 : ffffffff814aa368
> [    0.045620][    T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d
> [    0.046402][    T0] [<ffffffff80003b94>] restore_all+0x12/0x6e
> 

Hi Changbin,

Could you please provide the reproduce steps? It looks a bit
interesting.

> To fix above issue, here we add one extra level wrapper so they can be
> safely called by low level entry code.
> 
> Signed-off-by: Changbin Du <changbin.du@gmail.com>
> 
> ---
> v2: fix compile warning.
> ---
>  arch/riscv/kernel/Makefile    |  2 ++
>  arch/riscv/kernel/entry.S     | 10 +++++-----
>  arch/riscv/kernel/trace_irq.c | 26 ++++++++++++++++++++++++++
>  arch/riscv/kernel/trace_irq.h | 11 +++++++++++
>  4 files changed, 44 insertions(+), 5 deletions(-)
>  create mode 100644 arch/riscv/kernel/trace_irq.c
>  create mode 100644 arch/riscv/kernel/trace_irq.h
> 
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index 612556faa527..ffc87e76b1dd 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -51,6 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
>  obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
>  obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
>  
> +obj-$(CONFIG_TRACE_IRQFLAGS)	+= trace_irq.o
> +
>  obj-$(CONFIG_RISCV_BASE_PMU)	+= perf_event.o
>  obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
>  obj-$(CONFIG_HAVE_PERF_REGS)	+= perf_regs.o
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index ed29e9c8f660..d6a46ed0bf05 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -108,7 +108,7 @@ _save_context:
>  .option pop
>  
>  #ifdef CONFIG_TRACE_IRQFLAGS
> -	call trace_hardirqs_off
> +	call __trace_hardirqs_off
>  #endif
>  
>  #ifdef CONFIG_CONTEXT_TRACKING
> @@ -143,7 +143,7 @@ skip_context_tracking:
>  	li t0, EXC_BREAKPOINT
>  	beq s4, t0, 1f
>  #ifdef CONFIG_TRACE_IRQFLAGS
> -	call trace_hardirqs_on
> +	call __trace_hardirqs_on
>  #endif
>  	csrs CSR_STATUS, SR_IE
>  
> @@ -234,7 +234,7 @@ ret_from_exception:
>  	REG_L s0, PT_STATUS(sp)
>  	csrc CSR_STATUS, SR_IE
>  #ifdef CONFIG_TRACE_IRQFLAGS
> -	call trace_hardirqs_off
> +	call __trace_hardirqs_off
>  #endif
>  #ifdef CONFIG_RISCV_M_MODE
>  	/* the MPP value is too large to be used as an immediate arg for addi */
> @@ -270,10 +270,10 @@ restore_all:
>  	REG_L s1, PT_STATUS(sp)
>  	andi t0, s1, SR_PIE
>  	beqz t0, 1f
> -	call trace_hardirqs_on
> +	call __trace_hardirqs_on
>  	j 2f
>  1:
> -	call trace_hardirqs_off
> +	call __trace_hardirqs_off
>  2:
>  #endif
>  	REG_L a0, PT_STATUS(sp)
> diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
> new file mode 100644
> index 000000000000..fc194c56a35d
> --- /dev/null
> +++ b/arch/riscv/kernel/trace_irq.c
> @@ -0,0 +1,26 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
> + */
> +
> +#include <linux/irqflags.h>
> +#include <linux/kprobes.h>
> +#include "trace_irq.h"
> +
> +/**
> + * trace_hardirqs_on/off requires at least two parent call frames.
> + * Here we add one extra level so they can be safely called by low
> + * level entry code.
> + */

Hmm, I believe there's elegant solution without this grue, maybe
fix in stacktrace implementation or something else?


> +
> +void __trace_hardirqs_on(void)
> +{
> +	trace_hardirqs_on();
> +}
> +NOKPROBE_SYMBOL(__trace_hardirqs_on);
> +
> +void __trace_hardirqs_off(void)
> +{
> +	trace_hardirqs_off();
> +}
> +NOKPROBE_SYMBOL(__trace_hardirqs_off);
> diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
> new file mode 100644
> index 000000000000..99fe67377e5e
> --- /dev/null
> +++ b/arch/riscv/kernel/trace_irq.h
> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
> + */
> +#ifndef __TRACE_IRQ_H
> +#define __TRACE_IRQ_H
> +
> +void __trace_hardirqs_on(void);
> +void __trace_hardirqs_off(void);
> +
> +#endif /* __TRACE_IRQ_H */
> -- 
> 2.32.0
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Changbin Du Feb. 7, 2022, 12:38 p.m. UTC | #3
On Mon, Feb 07, 2022 at 01:25:03AM +0800, Jisheng Zhang wrote:
> On Sat, Jan 29, 2022 at 08:42:26AM +0800, Changbin Du wrote:
> > The trace_hardirqs_on/off requires at least two parent call frames.
> > If not, the code generated by CALLER_ADDR1 (aka. ftrace_return_address(1))
> > could trigger memory access fault.
> > 
> > [    0.039615][    T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8
> > [    0.041925][    T0] Oops [#1]
> > [    0.042063][    T0] Modules linked in:
> > [    0.042864][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29
> > [    0.043568][    T0] Hardware name: riscv-virtio,qemu (DT)
> > [    0.044343][    T0] epc : trace_hardirqs_on+0x56/0xe2
> > [    0.044601][    T0]  ra : restore_all+0x12/0x6e
> > [    0.044721][    T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0
> > [    0.044801][    T0]  gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020
> > [    0.044882][    T0]  t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0
> > [    0.044967][    T0]  s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100
> > [    0.045046][    T0]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
> > [    0.045124][    T0]  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45
> > [    0.045210][    T0]  s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50
> > [    0.045289][    T0]  s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8
> > [    0.045389][    T0]  s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000
> > [    0.045474][    T0]  s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000
> > [    0.045548][    T0]  t5 : 0000000000000000 t6 : ffffffff814aa368
> > [    0.045620][    T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d
> > [    0.046402][    T0] [<ffffffff80003b94>] restore_all+0x12/0x6e
> > 
> 
> Hi Changbin,
> 
> Could you please provide the reproduce steps? It looks a bit
> interesting.
>
Just enable CONFIG_IRQSOFF_TRACER and rebuild kernel with llvm. Then boot the
new kernel.

> > To fix above issue, here we add one extra level wrapper so they can be
> > safely called by low level entry code.
> > 
> > Signed-off-by: Changbin Du <changbin.du@gmail.com>
> > 
> > ---
> > v2: fix compile warning.
> > ---
> >  arch/riscv/kernel/Makefile    |  2 ++
> >  arch/riscv/kernel/entry.S     | 10 +++++-----
> >  arch/riscv/kernel/trace_irq.c | 26 ++++++++++++++++++++++++++
> >  arch/riscv/kernel/trace_irq.h | 11 +++++++++++
> >  4 files changed, 44 insertions(+), 5 deletions(-)
> >  create mode 100644 arch/riscv/kernel/trace_irq.c
> >  create mode 100644 arch/riscv/kernel/trace_irq.h
> > 
> > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > index 612556faa527..ffc87e76b1dd 100644
> > --- a/arch/riscv/kernel/Makefile
> > +++ b/arch/riscv/kernel/Makefile
> > @@ -51,6 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
> >  obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
> >  obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
> >  
> > +obj-$(CONFIG_TRACE_IRQFLAGS)	+= trace_irq.o
> > +
> >  obj-$(CONFIG_RISCV_BASE_PMU)	+= perf_event.o
> >  obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
> >  obj-$(CONFIG_HAVE_PERF_REGS)	+= perf_regs.o
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index ed29e9c8f660..d6a46ed0bf05 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -108,7 +108,7 @@ _save_context:
> >  .option pop
> >  
> >  #ifdef CONFIG_TRACE_IRQFLAGS
> > -	call trace_hardirqs_off
> > +	call __trace_hardirqs_off
> >  #endif
> >  
> >  #ifdef CONFIG_CONTEXT_TRACKING
> > @@ -143,7 +143,7 @@ skip_context_tracking:
> >  	li t0, EXC_BREAKPOINT
> >  	beq s4, t0, 1f
> >  #ifdef CONFIG_TRACE_IRQFLAGS
> > -	call trace_hardirqs_on
> > +	call __trace_hardirqs_on
> >  #endif
> >  	csrs CSR_STATUS, SR_IE
> >  
> > @@ -234,7 +234,7 @@ ret_from_exception:
> >  	REG_L s0, PT_STATUS(sp)
> >  	csrc CSR_STATUS, SR_IE
> >  #ifdef CONFIG_TRACE_IRQFLAGS
> > -	call trace_hardirqs_off
> > +	call __trace_hardirqs_off
> >  #endif
> >  #ifdef CONFIG_RISCV_M_MODE
> >  	/* the MPP value is too large to be used as an immediate arg for addi */
> > @@ -270,10 +270,10 @@ restore_all:
> >  	REG_L s1, PT_STATUS(sp)
> >  	andi t0, s1, SR_PIE
> >  	beqz t0, 1f
> > -	call trace_hardirqs_on
> > +	call __trace_hardirqs_on
> >  	j 2f
> >  1:
> > -	call trace_hardirqs_off
> > +	call __trace_hardirqs_off
> >  2:
> >  #endif
> >  	REG_L a0, PT_STATUS(sp)
> > diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
> > new file mode 100644
> > index 000000000000..fc194c56a35d
> > --- /dev/null
> > +++ b/arch/riscv/kernel/trace_irq.c
> > @@ -0,0 +1,26 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
> > + */
> > +
> > +#include <linux/irqflags.h>
> > +#include <linux/kprobes.h>
> > +#include "trace_irq.h"
> > +
> > +/**
> > + * trace_hardirqs_on/off requires at least two parent call frames.
> > + * Here we add one extra level so they can be safely called by low
> > + * level entry code.
> > + */
> 
> Hmm, I believe there's elegant solution without this grue, maybe
> fix in stacktrace implementation or something else?
> 
This is not about stacktrace. As describe in commit msg, the problem is
introduced by ftrace_return_address(1). The complier doesn't have knowledge
about how many frames available at rumtime and wether each frame is valid.

> 
> > +
> > +void __trace_hardirqs_on(void)
> > +{
> > +	trace_hardirqs_on();
> > +}
> > +NOKPROBE_SYMBOL(__trace_hardirqs_on);
> > +
> > +void __trace_hardirqs_off(void)
> > +{
> > +	trace_hardirqs_off();
> > +}
> > +NOKPROBE_SYMBOL(__trace_hardirqs_off);
> > diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
> > new file mode 100644
> > index 000000000000..99fe67377e5e
> > --- /dev/null
> > +++ b/arch/riscv/kernel/trace_irq.h
> > @@ -0,0 +1,11 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
> > + */
> > +#ifndef __TRACE_IRQ_H
> > +#define __TRACE_IRQ_H
> > +
> > +void __trace_hardirqs_on(void);
> > +void __trace_hardirqs_off(void);
> > +
> > +#endif /* __TRACE_IRQ_H */
> > -- 
> > 2.32.0
> > 
> > 
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
Jisheng Zhang Feb. 7, 2022, 3:31 p.m. UTC | #4
On Mon, Feb 07, 2022 at 08:38:50PM +0800, ChangbinCONFIG_IRQSOFF_TRACER Du wrote:
> On Mon, Feb 07, 2022 at 01:25:03AM +0800, Jisheng Zhang wrote:
> > On Sat, Jan 29, 2022 at 08:42:26AM +0800, Changbin Du wrote:
> > > The trace_hardirqs_on/off requires at least two parent call frames.
> > > If not, the code generated by CALLER_ADDR1 (aka. ftrace_return_address(1))
> > > could trigger memory access fault.
> > > 
> > > [    0.039615][    T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8
> > > [    0.041925][    T0] Oops [#1]
> > > [    0.042063][    T0] Modules linked in:
> > > [    0.042864][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29
> > > [    0.043568][    T0] Hardware name: riscv-virtio,qemu (DT)
> > > [    0.044343][    T0] epc : trace_hardirqs_on+0x56/0xe2
> > > [    0.044601][    T0]  ra : restore_all+0x12/0x6e
> > > [    0.044721][    T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0
> > > [    0.044801][    T0]  gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020
> > > [    0.044882][    T0]  t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0
> > > [    0.044967][    T0]  s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100
> > > [    0.045046][    T0]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
> > > [    0.045124][    T0]  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45
> > > [    0.045210][    T0]  s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50
> > > [    0.045289][    T0]  s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8
> > > [    0.045389][    T0]  s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000
> > > [    0.045474][    T0]  s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000
> > > [    0.045548][    T0]  t5 : 0000000000000000 t6 : ffffffff814aa368
> > > [    0.045620][    T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d
> > > [    0.046402][    T0] [<ffffffff80003b94>] restore_all+0x12/0x6e
> > > 
> > 
> > Hi Changbin,
> > 
> > Could you please provide the reproduce steps? It looks a bit
> > interesting.
> >
> Just enable CONFIG_IRQSOFF_TRACER and rebuild kernel with llvm. Then boot the
> new kernel.

Thanks for the information. I tried IRQSOFF_TRACER with gcc+binutils,
can't reproduce the issue. I forget to try clang+llvm. From another side
The fact that gcc+bintuils can't reproduce it means this is a clang+llvm
speicial case, no?
Changbin Du Feb. 8, 2022, 12:35 a.m. UTC | #5
On Mon, Feb 07, 2022 at 11:31:41PM +0800, Jisheng Zhang wrote:
> On Mon, Feb 07, 2022 at 08:38:50PM +0800, ChangbinCONFIG_IRQSOFF_TRACER Du wrote:
> > On Mon, Feb 07, 2022 at 01:25:03AM +0800, Jisheng Zhang wrote:
> > > On Sat, Jan 29, 2022 at 08:42:26AM +0800, Changbin Du wrote:
> > > > The trace_hardirqs_on/off requires at least two parent call frames.
> > > > If not, the code generated by CALLER_ADDR1 (aka. ftrace_return_address(1))
> > > > could trigger memory access fault.
> > > > 
> > > > [    0.039615][    T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8
> > > > [    0.041925][    T0] Oops [#1]
> > > > [    0.042063][    T0] Modules linked in:
> > > > [    0.042864][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29
> > > > [    0.043568][    T0] Hardware name: riscv-virtio,qemu (DT)
> > > > [    0.044343][    T0] epc : trace_hardirqs_on+0x56/0xe2
> > > > [    0.044601][    T0]  ra : restore_all+0x12/0x6e
> > > > [    0.044721][    T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0
> > > > [    0.044801][    T0]  gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020
> > > > [    0.044882][    T0]  t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0
> > > > [    0.044967][    T0]  s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100
> > > > [    0.045046][    T0]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
> > > > [    0.045124][    T0]  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45
> > > > [    0.045210][    T0]  s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50
> > > > [    0.045289][    T0]  s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8
> > > > [    0.045389][    T0]  s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000
> > > > [    0.045474][    T0]  s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000
> > > > [    0.045548][    T0]  t5 : 0000000000000000 t6 : ffffffff814aa368
> > > > [    0.045620][    T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d
> > > > [    0.046402][    T0] [<ffffffff80003b94>] restore_all+0x12/0x6e
> > > > 
> > > 
> > > Hi Changbin,
> > > 
> > > Could you please provide the reproduce steps? It looks a bit
> > > interesting.
> > >
> > Just enable CONFIG_IRQSOFF_TRACER and rebuild kernel with llvm. Then boot the
> > new kernel.
> 
> Thanks for the information. I tried IRQSOFF_TRACER with gcc+binutils,
> can't reproduce the issue. I forget to try clang+llvm. From another side
> The fact that gcc+bintuils can't reproduce it means this is a clang+llvm
> speicial case, no?
The behaviour of GCC is a bit different, please refer to another disccusion:
https://lore.kernel.org/lkml/C2470F2D-9E45-49D7-A03B-E6A7BB4B9738@jrtc27.com/T/

But I suppose it still has similar issue. Make sure FRAME_POINTER is enabled
also.
Jisheng Zhang Feb. 9, 2022, 5:32 p.m. UTC | #6
On Tue, Feb 08, 2022 at 08:35:02AM +0800, Changbin Du wrote:
> On Mon, Feb 07, 2022 at 11:31:41PM +0800, Jisheng Zhang wrote:
> > On Mon, Feb 07, 2022 at 08:38:50PM +0800, ChangbinCONFIG_IRQSOFF_TRACER Du wrote:
> > > On Mon, Feb 07, 2022 at 01:25:03AM +0800, Jisheng Zhang wrote:
> > > > On Sat, Jan 29, 2022 at 08:42:26AM +0800, Changbin Du wrote:
> > > > > The trace_hardirqs_on/off requires at least two parent call frames.
> > > > > If not, the code generated by CALLER_ADDR1 (aka. ftrace_return_address(1))
> > > > > could trigger memory access fault.
> > > > > 
> > > > > [    0.039615][    T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8
> > > > > [    0.041925][    T0] Oops [#1]
> > > > > [    0.042063][    T0] Modules linked in:
> > > > > [    0.042864][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29
> > > > > [    0.043568][    T0] Hardware name: riscv-virtio,qemu (DT)
> > > > > [    0.044343][    T0] epc : trace_hardirqs_on+0x56/0xe2
> > > > > [    0.044601][    T0]  ra : restore_all+0x12/0x6e
> > > > > [    0.044721][    T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0
> > > > > [    0.044801][    T0]  gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020
> > > > > [    0.044882][    T0]  t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0
> > > > > [    0.044967][    T0]  s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100
> > > > > [    0.045046][    T0]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
> > > > > [    0.045124][    T0]  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45
> > > > > [    0.045210][    T0]  s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50
> > > > > [    0.045289][    T0]  s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8
> > > > > [    0.045389][    T0]  s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000
> > > > > [    0.045474][    T0]  s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000
> > > > > [    0.045548][    T0]  t5 : 0000000000000000 t6 : ffffffff814aa368
> > > > > [    0.045620][    T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d
> > > > > [    0.046402][    T0] [<ffffffff80003b94>] restore_all+0x12/0x6e
> > > > > 
> > > > 
> > > > Hi Changbin,
> > > > 
> > > > Could you please provide the reproduce steps? It looks a bit
> > > > interesting.
> > > >
> > > Just enable CONFIG_IRQSOFF_TRACER and rebuild kernel with llvm. Then boot the
> > > new kernel.
> > 
> > Thanks for the information. I tried IRQSOFF_TRACER with gcc+binutils,
> > can't reproduce the issue. I forget to try clang+llvm. From another side
> > The fact that gcc+bintuils can't reproduce it means this is a clang+llvm
> > speicial case, no?
> The behaviour of GCC is a bit different, please refer to another disccusion:
> https://lore.kernel.org/lkml/C2470F2D-9E45-49D7-A03B-E6A7BB4B9738@jrtc27.com/T/
> 
> But I suppose it still has similar issue. Make sure FRAME_POINTER is enabled
> also.
> 

Hi Changbin,

I read the code and find that current riscv frame records during
exception isn't as completed as other architectures. riscv only
records frames from the ret_from_exception(). If we add completed
frame records as other arch do, then the issue you saw can also
be fixed at the same time.

However, I'm not sure what's the best choice now.

A simple demo to this incomplete frames:
add dump_stack() in any ISR, then

in riscv:
[    2.961294] Call Trace:
[    2.961460] [<ffffffff8000485e>] dump_backtrace+0x1c/0x24
[    2.961823] [<ffffffff805ed980>] show_stack+0x2c/0x38
[    2.962153] [<ffffffff805f292e>] dump_stack_lvl+0x40/0x58
[    2.962483] [<ffffffff805f295a>] dump_stack+0x14/0x1c
[    2.962792] [<ffffffff805f31a0>] serial8250_interrupt+0x20/0x82
[    2.963139] [<ffffffff80053032>] __handle_irq_event_percpu+0x4c/0x106
[    2.963526] [<ffffffff80053170>] handle_irq_event+0x38/0x80
[    2.963856] [<ffffffff80056a32>] handle_fasteoi_irq+0x96/0x188
[    2.964198] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
[    2.964567] [<ffffffff802f0ae4>] plic_handle_irq+0x88/0xec
[    2.964896] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
[    2.965264] [<ffffffff802f08e4>] riscv_intc_irq+0x34/0x5c
[    2.965584] [<ffffffff805f6dc8>] generic_handle_arch_irq+0x4a/0x74
[    2.966068] [<ffffffff80002fe8>] ret_from_exception+0x0/0xc

in x86:
[    1.191274] Call Trace:
[    1.192223]  <IRQ>
[    1.192758]  dump_stack_lvl+0x45/0x59
[    1.192982]  serial8250_interrupt+0x24/0x88
[    1.193105]  __handle_irq_event_percpu+0x66/0x1b0
[    1.193239]  handle_irq_event+0x34/0x70
[    1.193345]  handle_edge_irq+0x85/0x1e0
[    1.193455]  __common_interrupt+0x38/0x90
[    1.193573]  common_interrupt+0x73/0x90
[    1.193809]  </IRQ>
[    1.193889]  <TASK>
[    1.193956]  asm_common_interrupt+0x1b/0x40
[    1.194318] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x40
[    1.194566] Code: 24 be 01 02 00 00 e9 54 20 bf ff 0f 1f 40 00 0f 1f
44 00 00 f7 c6 00f
[    1.195137] RSP: 0000:ffff888000243b68 EFLAGS: 00000246
[    1.195314] RAX: 0000000000000000 RBX: ffffffff82025840 RCX:
0000000000000000
[    1.195482] RDX: 0000000000000001 RSI: 0000000000000000 RDI:
0000000000000001
[    1.195645] RBP: 0000000000000202 R08: ffffffffffffffff R09:
0000000000000000
[    1.195808] R10: 00000000000000eb R11: 0000000000000000 R12:
0000000000000000
[    1.195972] R13: 0000000000000040 R14: 0000000000000000 R15:
ffff888000c39000
[    1.196245]  ? _raw_spin_unlock_irqrestore+0x15/0x40
[    1.196373]  serial8250_do_startup+0x42d/0x600
[    1.196502]  uart_port_startup+0x11b/0x270
[    1.196619]  uart_port_activate+0x3f/0x60
[    1.196729]  tty_port_open+0x7e/0xd0
[    1.196835]  ? _raw_spin_unlock+0x12/0x30
[    1.196942]  uart_open+0x1a/0x30
[    1.197036]  tty_open+0x153/0x7c0
[    1.197144]  chrdev_open+0xbf/0x230
[    1.197253]  ? cdev_device_add+0x90/0x90
[    1.197359]  do_dentry_open+0x13c/0x360
[    1.197470]  path_openat+0xb0c/0xe00
[    1.197577]  ? update_load_avg+0x5f/0x640
[    1.197691]  ? finish_task_switch.isra.0+0xac/0x240
[    1.197821]  do_filp_open+0xb2/0x150
[    1.197935]  ? preempt_schedule_thunk+0x16/0x18
[    1.198049]  ? preempt_schedule_common+0x90/0xd0
[    1.198167]  ? preempt_schedule_thunk+0x16/0x18
[    1.198291]  file_open_name+0xf1/0x1b0
[    1.198397]  filp_open+0x2c/0x50
[    1.198495]  console_on_rootfs+0x19/0x52
[    1.198648]  kernel_init_freeable+0x19a/0x1c7
[    1.198765]  ? rest_init+0xc0/0xc0
[    1.198867]  kernel_init+0x16/0x110
[    1.198965]  ret_from_fork+0x1f/0x30
[    1.199131]  </TASK>
Changbin Du Feb. 10, 2022, 1:37 p.m. UTC | #7
On Thu, Feb 10, 2022 at 01:32:59AM +0800, Jisheng Zhang wrote:
[snip]
> Hi Changbin,
> 
> I read the code and find that current riscv frame records during
> exception isn't as completed as other architectures. riscv only
> records frames from the ret_from_exception(). If we add completed
What do you mean for 'record'?

> frame records as other arch do, then the issue you saw can also
> be fixed at the same time.
> 
I don't think so. The problem is __builtin_return_address(1) trigger page fault
here.

> However, I'm not sure what's the best choice now.
> 
> A simple demo to this incomplete frames:
> add dump_stack() in any ISR, then
> 
> in riscv:
> [    2.961294] Call Trace:
> [    2.961460] [<ffffffff8000485e>] dump_backtrace+0x1c/0x24
> [    2.961823] [<ffffffff805ed980>] show_stack+0x2c/0x38
> [    2.962153] [<ffffffff805f292e>] dump_stack_lvl+0x40/0x58
> [    2.962483] [<ffffffff805f295a>] dump_stack+0x14/0x1c
> [    2.962792] [<ffffffff805f31a0>] serial8250_interrupt+0x20/0x82
> [    2.963139] [<ffffffff80053032>] __handle_irq_event_percpu+0x4c/0x106
> [    2.963526] [<ffffffff80053170>] handle_irq_event+0x38/0x80
> [    2.963856] [<ffffffff80056a32>] handle_fasteoi_irq+0x96/0x188
> [    2.964198] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
> [    2.964567] [<ffffffff802f0ae4>] plic_handle_irq+0x88/0xec
> [    2.964896] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
> [    2.965264] [<ffffffff802f08e4>] riscv_intc_irq+0x34/0x5c
> [    2.965584] [<ffffffff805f6dc8>] generic_handle_arch_irq+0x4a/0x74
> [    2.966068] [<ffffffff80002fe8>] ret_from_exception+0x0/0xc
> 
> in x86:
> [    1.191274] Call Trace:
> [    1.192223]  <IRQ>
> [    1.192758]  dump_stack_lvl+0x45/0x59
> [    1.192982]  serial8250_interrupt+0x24/0x88
> [    1.193105]  __handle_irq_event_percpu+0x66/0x1b0
> [    1.193239]  handle_irq_event+0x34/0x70
> [    1.193345]  handle_edge_irq+0x85/0x1e0
> [    1.193455]  __common_interrupt+0x38/0x90
> [    1.193573]  common_interrupt+0x73/0x90
> [    1.193809]  </IRQ>
> [    1.193889]  <TASK>
> [    1.193956]  asm_common_interrupt+0x1b/0x40
> [    1.194318] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x40
> [    1.194566] Code: 24 be 01 02 00 00 e9 54 20 bf ff 0f 1f 40 00 0f 1f
> 44 00 00 f7 c6 00f
> [    1.195137] RSP: 0000:ffff888000243b68 EFLAGS: 00000246
> [    1.195314] RAX: 0000000000000000 RBX: ffffffff82025840 RCX:
> 0000000000000000
> [    1.195482] RDX: 0000000000000001 RSI: 0000000000000000 RDI:
> 0000000000000001
> [    1.195645] RBP: 0000000000000202 R08: ffffffffffffffff R09:
> 0000000000000000
> [    1.195808] R10: 00000000000000eb R11: 0000000000000000 R12:
> 0000000000000000
> [    1.195972] R13: 0000000000000040 R14: 0000000000000000 R15:
> ffff888000c39000
> [    1.196245]  ? _raw_spin_unlock_irqrestore+0x15/0x40
> [    1.196373]  serial8250_do_startup+0x42d/0x600
> [    1.196502]  uart_port_startup+0x11b/0x270
> [    1.196619]  uart_port_activate+0x3f/0x60
> [    1.196729]  tty_port_open+0x7e/0xd0
> [    1.196835]  ? _raw_spin_unlock+0x12/0x30
> [    1.196942]  uart_open+0x1a/0x30
> [    1.197036]  tty_open+0x153/0x7c0
> [    1.197144]  chrdev_open+0xbf/0x230
> [    1.197253]  ? cdev_device_add+0x90/0x90
> [    1.197359]  do_dentry_open+0x13c/0x360
> [    1.197470]  path_openat+0xb0c/0xe00
> [    1.197577]  ? update_load_avg+0x5f/0x640
> [    1.197691]  ? finish_task_switch.isra.0+0xac/0x240
> [    1.197821]  do_filp_open+0xb2/0x150
> [    1.197935]  ? preempt_schedule_thunk+0x16/0x18
> [    1.198049]  ? preempt_schedule_common+0x90/0xd0
> [    1.198167]  ? preempt_schedule_thunk+0x16/0x18
> [    1.198291]  file_open_name+0xf1/0x1b0
> [    1.198397]  filp_open+0x2c/0x50
> [    1.198495]  console_on_rootfs+0x19/0x52
> [    1.198648]  kernel_init_freeable+0x19a/0x1c7
> [    1.198765]  ? rest_init+0xc0/0xc0
> [    1.198867]  kernel_init+0x16/0x110
> [    1.198965]  ret_from_fork+0x1f/0x30
> [    1.199131]  </TASK>
> 
As I said before, this issue is not related to stackdump.

Besides, you can see more calltrace on x86 that because x86 iterate all stacks
(kernel, irq or exception) when dumping stacktrace. While RISCV only show
calltrace of current stack.
Jisheng Zhang Feb. 10, 2022, 3:27 p.m. UTC | #8
On Thu, Feb 10, 2022 at 09:37:58PM +0800, Changbin Du wrote:
> On Thu, Feb 10, 2022 at 01:32:59AM +0800, Jisheng Zhang wrote:
> [snip]
> > Hi Changbin,
> > 
> > I read the code and find that current riscv frame records during
> > exception isn't as completed as other architectures. riscv only
> > records frames from the ret_from_exception(). If we add completed
> What do you mean for 'record'?
> 

stack frame record.

> > frame records as other arch do, then the issue you saw can also
> > be fixed at the same time.
> > 
> I don't think so. The problem is __builtin_return_address(1) trigger page fault
> here.

There's misunderstanding here. I interpret this bug as incomplete
stackframes.

This is current riscv stackframe during exception:

high
 	----------------
top	|		|  <- ret_from_exception
	----------------
	|		|  <- trace_hardirqs_on
	-----------------
low

As you said, the CALLER_ADDR1 a.k.a __builtin_return_address(1) needs
at least two parent call frames. 

If we complete the stackframes during exception as other arch does:

high
 	----------------
top	|		|  <- the synthetic stackframe from the interrupted point
 	----------------
	      .....	      
        ----------------
	|		|  <- ret_from_exception
	----------------
	|		|  <- trace_hardirqs_on
	-----------------
low


Then we meet the "at least two parent call frames" requirement. IOW, my
solution solve the problem from the entry.S side. One of the advantages
would be we let interrupted point show up in dump_stack() as other arch
do. What I'm not sure is whether it's safe to do so now since rc3 is
released.

> 
> > However, I'm not sure what's the best choice now.
> > 
> > A simple demo to this incomplete frames:
> > add dump_stack() in any ISR, then
> > 
> > in riscv:
> > [    2.961294] Call Trace:
> > [    2.961460] [<ffffffff8000485e>] dump_backtrace+0x1c/0x24
> > [    2.961823] [<ffffffff805ed980>] show_stack+0x2c/0x38
> > [    2.962153] [<ffffffff805f292e>] dump_stack_lvl+0x40/0x58
> > [    2.962483] [<ffffffff805f295a>] dump_stack+0x14/0x1c
> > [    2.962792] [<ffffffff805f31a0>] serial8250_interrupt+0x20/0x82
> > [    2.963139] [<ffffffff80053032>] __handle_irq_event_percpu+0x4c/0x106
> > [    2.963526] [<ffffffff80053170>] handle_irq_event+0x38/0x80
> > [    2.963856] [<ffffffff80056a32>] handle_fasteoi_irq+0x96/0x188
> > [    2.964198] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
> > [    2.964567] [<ffffffff802f0ae4>] plic_handle_irq+0x88/0xec
> > [    2.964896] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
> > [    2.965264] [<ffffffff802f08e4>] riscv_intc_irq+0x34/0x5c
> > [    2.965584] [<ffffffff805f6dc8>] generic_handle_arch_irq+0x4a/0x74
> > [    2.966068] [<ffffffff80002fe8>] ret_from_exception+0x0/0xc
> > 
> > in x86:
> > [    1.191274] Call Trace:
> > [    1.192223]  <IRQ>
> > [    1.192758]  dump_stack_lvl+0x45/0x59
> > [    1.192982]  serial8250_interrupt+0x24/0x88
> > [    1.193105]  __handle_irq_event_percpu+0x66/0x1b0
> > [    1.193239]  handle_irq_event+0x34/0x70
> > [    1.193345]  handle_edge_irq+0x85/0x1e0
> > [    1.193455]  __common_interrupt+0x38/0x90
> > [    1.193573]  common_interrupt+0x73/0x90
> > [    1.193809]  </IRQ>
> > [    1.193889]  <TASK>
> > [    1.193956]  asm_common_interrupt+0x1b/0x40
> > [    1.194318] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x40
> > [    1.194566] Code: 24 be 01 02 00 00 e9 54 20 bf ff 0f 1f 40 00 0f 1f
> > 44 00 00 f7 c6 00f
> > [    1.195137] RSP: 0000:ffff888000243b68 EFLAGS: 00000246
> > [    1.195314] RAX: 0000000000000000 RBX: ffffffff82025840 RCX:
> > 0000000000000000
> > [    1.195482] RDX: 0000000000000001 RSI: 0000000000000000 RDI:
> > 0000000000000001
> > [    1.195645] RBP: 0000000000000202 R08: ffffffffffffffff R09:
> > 0000000000000000
> > [    1.195808] R10: 00000000000000eb R11: 0000000000000000 R12:
> > 0000000000000000
> > [    1.195972] R13: 0000000000000040 R14: 0000000000000000 R15:
> > ffff888000c39000
> > [    1.196245]  ? _raw_spin_unlock_irqrestore+0x15/0x40
> > [    1.196373]  serial8250_do_startup+0x42d/0x600
> > [    1.196502]  uart_port_startup+0x11b/0x270
> > [    1.196619]  uart_port_activate+0x3f/0x60
> > [    1.196729]  tty_port_open+0x7e/0xd0
> > [    1.196835]  ? _raw_spin_unlock+0x12/0x30
> > [    1.196942]  uart_open+0x1a/0x30
> > [    1.197036]  tty_open+0x153/0x7c0
> > [    1.197144]  chrdev_open+0xbf/0x230
> > [    1.197253]  ? cdev_device_add+0x90/0x90
> > [    1.197359]  do_dentry_open+0x13c/0x360
> > [    1.197470]  path_openat+0xb0c/0xe00
> > [    1.197577]  ? update_load_avg+0x5f/0x640
> > [    1.197691]  ? finish_task_switch.isra.0+0xac/0x240
> > [    1.197821]  do_filp_open+0xb2/0x150
> > [    1.197935]  ? preempt_schedule_thunk+0x16/0x18
> > [    1.198049]  ? preempt_schedule_common+0x90/0xd0
> > [    1.198167]  ? preempt_schedule_thunk+0x16/0x18
> > [    1.198291]  file_open_name+0xf1/0x1b0
> > [    1.198397]  filp_open+0x2c/0x50
> > [    1.198495]  console_on_rootfs+0x19/0x52
> > [    1.198648]  kernel_init_freeable+0x19a/0x1c7
> > [    1.198765]  ? rest_init+0xc0/0xc0
> > [    1.198867]  kernel_init+0x16/0x110
> > [    1.198965]  ret_from_fork+0x1f/0x30
> > [    1.199131]  </TASK>
> > 
> As I said before, this issue is not related to stackdump.
> 
> Besides, you can see more calltrace on x86 that because x86 iterate all stacks
> (kernel, irq or exception) when dumping stacktrace. While RISCV only show
> calltrace of current stack.
> 

I'm not sure whether there's misunderstanding. See above.

Thanks
Jisheng Zhang Feb. 10, 2022, 3:37 p.m. UTC | #9
On Thu, Feb 10, 2022 at 11:27:21PM +0800, Jisheng Zhang wrote:
> On Thu, Feb 10, 2022 at 09:37:58PM +0800, Changbin Du wrote:
> > On Thu, Feb 10, 2022 at 01:32:59AM +0800, Jisheng Zhang wrote:
> > [snip]
> > > Hi Changbin,
> > > 
> > > I read the code and find that current riscv frame records during
> > > exception isn't as completed as other architectures. riscv only
> > > records frames from the ret_from_exception(). If we add completed
> > What do you mean for 'record'?
> > 
> 
> stack frame record.
> 
> > > frame records as other arch do, then the issue you saw can also
> > > be fixed at the same time.
> > > 
> > I don't think so. The problem is __builtin_return_address(1) trigger page fault
> > here.
> 
> There's misunderstanding here. I interpret this bug as incomplete
> stackframes.
> 
> This is current riscv stackframe during exception:
> 
> high
>  	----------------
> top	|		|  <- ret_from_exception
> 	----------------
> 	|		|  <- trace_hardirqs_on
> 	-----------------
> low

sorry, the "top" is wrongly placed.
 high
  	----------------
 	|		|  <- ret_from_exception
 	----------------
 	|		|  <- trace_hardirqs_on
 	-----------------
top

 low



> 
> As you said, the CALLER_ADDR1 a.k.a __builtin_return_address(1) needs
> at least two parent call frames. 
> 
> If we complete the stackframes during exception as other arch does:
> 
> high
>  	----------------
> top	|		|  <- the synthetic stackframe from the interrupted point
>  	----------------
> 	      .....	      
>         ----------------
> 	|		|  <- ret_from_exception
> 	----------------
> 	|		|  <- trace_hardirqs_on
> 	-----------------
> low

ditto

> 
> 
> Then we meet the "at least two parent call frames" requirement. IOW, my
> solution solve the problem from the entry.S side. One of the advantages
> would be we let interrupted point show up in dump_stack() as other arch
> do. What I'm not sure is whether it's safe to do so now since rc3 is
> released.
>
Jessica Clarke Feb. 10, 2022, 3:59 p.m. UTC | #10
On 10 Feb 2022, at 15:27, Jisheng Zhang <jszhang@kernel.org> wrote:
> On Thu, Feb 10, 2022 at 09:37:58PM +0800, Changbin Du wrote:
>> On Thu, Feb 10, 2022 at 01:32:59AM +0800, Jisheng Zhang wrote:
>> [snip]
>>> Hi Changbin,
>>> 
>>> I read the code and find that current riscv frame records during
>>> exception isn't as completed as other architectures. riscv only
>>> records frames from the ret_from_exception(). If we add completed
>> What do you mean for 'record'?
>> 
> 
> stack frame record.
> 
>>> frame records as other arch do, then the issue you saw can also
>>> be fixed at the same time.
>>> 
>> I don't think so. The problem is __builtin_return_address(1) trigger page fault
>> here.
> 
> There's misunderstanding here. I interpret this bug as incomplete
> stackframes.
> 
> This is current riscv stackframe during exception:
> 
> high
> 	----------------
> top	|		|  <- ret_from_exception
> 	----------------
> 	|		|  <- trace_hardirqs_on
> 	-----------------
> low
> 
> As you said, the CALLER_ADDR1 a.k.a __builtin_return_address(1) needs
> at least two parent call frames. 

No it doesn’t, you’re off by one, it only needs a valid current frame.

Jess

> If we complete the stackframes during exception as other arch does:
> 
> high
> 	----------------
> top	|		|  <- the synthetic stackframe from the interrupted point
> 	----------------
> 	      .....	      
>        ----------------
> 	|		|  <- ret_from_exception
> 	----------------
> 	|		|  <- trace_hardirqs_on
> 	-----------------
> low
> 
> 
> Then we meet the "at least two parent call frames" requirement. IOW, my
> solution solve the problem from the entry.S side. One of the advantages
> would be we let interrupted point show up in dump_stack() as other arch
> do. What I'm not sure is whether it's safe to do so now since rc3 is
> released.
> 
>> 
>>> However, I'm not sure what's the best choice now.
>>> 
>>> A simple demo to this incomplete frames:
>>> add dump_stack() in any ISR, then
>>> 
>>> in riscv:
>>> [    2.961294] Call Trace:
>>> [    2.961460] [<ffffffff8000485e>] dump_backtrace+0x1c/0x24
>>> [    2.961823] [<ffffffff805ed980>] show_stack+0x2c/0x38
>>> [    2.962153] [<ffffffff805f292e>] dump_stack_lvl+0x40/0x58
>>> [    2.962483] [<ffffffff805f295a>] dump_stack+0x14/0x1c
>>> [    2.962792] [<ffffffff805f31a0>] serial8250_interrupt+0x20/0x82
>>> [    2.963139] [<ffffffff80053032>] __handle_irq_event_percpu+0x4c/0x106
>>> [    2.963526] [<ffffffff80053170>] handle_irq_event+0x38/0x80
>>> [    2.963856] [<ffffffff80056a32>] handle_fasteoi_irq+0x96/0x188
>>> [    2.964198] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
>>> [    2.964567] [<ffffffff802f0ae4>] plic_handle_irq+0x88/0xec
>>> [    2.964896] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
>>> [    2.965264] [<ffffffff802f08e4>] riscv_intc_irq+0x34/0x5c
>>> [    2.965584] [<ffffffff805f6dc8>] generic_handle_arch_irq+0x4a/0x74
>>> [    2.966068] [<ffffffff80002fe8>] ret_from_exception+0x0/0xc
>>> 
>>> in x86:
>>> [    1.191274] Call Trace:
>>> [    1.192223]  <IRQ>
>>> [    1.192758]  dump_stack_lvl+0x45/0x59
>>> [    1.192982]  serial8250_interrupt+0x24/0x88
>>> [    1.193105]  __handle_irq_event_percpu+0x66/0x1b0
>>> [    1.193239]  handle_irq_event+0x34/0x70
>>> [    1.193345]  handle_edge_irq+0x85/0x1e0
>>> [    1.193455]  __common_interrupt+0x38/0x90
>>> [    1.193573]  common_interrupt+0x73/0x90
>>> [    1.193809]  </IRQ>
>>> [    1.193889]  <TASK>
>>> [    1.193956]  asm_common_interrupt+0x1b/0x40
>>> [    1.194318] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x40
>>> [    1.194566] Code: 24 be 01 02 00 00 e9 54 20 bf ff 0f 1f 40 00 0f 1f
>>> 44 00 00 f7 c6 00f
>>> [    1.195137] RSP: 0000:ffff888000243b68 EFLAGS: 00000246
>>> [    1.195314] RAX: 0000000000000000 RBX: ffffffff82025840 RCX:
>>> 0000000000000000
>>> [    1.195482] RDX: 0000000000000001 RSI: 0000000000000000 RDI:
>>> 0000000000000001
>>> [    1.195645] RBP: 0000000000000202 R08: ffffffffffffffff R09:
>>> 0000000000000000
>>> [    1.195808] R10: 00000000000000eb R11: 0000000000000000 R12:
>>> 0000000000000000
>>> [    1.195972] R13: 0000000000000040 R14: 0000000000000000 R15:
>>> ffff888000c39000
>>> [    1.196245]  ? _raw_spin_unlock_irqrestore+0x15/0x40
>>> [    1.196373]  serial8250_do_startup+0x42d/0x600
>>> [    1.196502]  uart_port_startup+0x11b/0x270
>>> [    1.196619]  uart_port_activate+0x3f/0x60
>>> [    1.196729]  tty_port_open+0x7e/0xd0
>>> [    1.196835]  ? _raw_spin_unlock+0x12/0x30
>>> [    1.196942]  uart_open+0x1a/0x30
>>> [    1.197036]  tty_open+0x153/0x7c0
>>> [    1.197144]  chrdev_open+0xbf/0x230
>>> [    1.197253]  ? cdev_device_add+0x90/0x90
>>> [    1.197359]  do_dentry_open+0x13c/0x360
>>> [    1.197470]  path_openat+0xb0c/0xe00
>>> [    1.197577]  ? update_load_avg+0x5f/0x640
>>> [    1.197691]  ? finish_task_switch.isra.0+0xac/0x240
>>> [    1.197821]  do_filp_open+0xb2/0x150
>>> [    1.197935]  ? preempt_schedule_thunk+0x16/0x18
>>> [    1.198049]  ? preempt_schedule_common+0x90/0xd0
>>> [    1.198167]  ? preempt_schedule_thunk+0x16/0x18
>>> [    1.198291]  file_open_name+0xf1/0x1b0
>>> [    1.198397]  filp_open+0x2c/0x50
>>> [    1.198495]  console_on_rootfs+0x19/0x52
>>> [    1.198648]  kernel_init_freeable+0x19a/0x1c7
>>> [    1.198765]  ? rest_init+0xc0/0xc0
>>> [    1.198867]  kernel_init+0x16/0x110
>>> [    1.198965]  ret_from_fork+0x1f/0x30
>>> [    1.199131]  </TASK>
>>> 
>> As I said before, this issue is not related to stackdump.
>> 
>> Besides, you can see more calltrace on x86 that because x86 iterate all stacks
>> (kernel, irq or exception) when dumping stacktrace. While RISCV only show
>> calltrace of current stack.
>> 
> 
> I'm not sure whether there's misunderstanding. See above.
> 
> Thanks
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Changbin Du Feb. 11, 2022, 3:06 a.m. UTC | #11
I reconsidered the problem and found my previous analysis is flawed. So let's re-explain.

The fault happens on code generated by CALLER_ADDR1 (aka.__builtin_return_address(1)):
   0xffffffff8011510e <+80>:    ld      a1,-16(s0)
   0xffffffff80115112 <+84>:    ld      s2,-8(a1)  # <-- paging fault here,a1=0x0000000000000100

This because the assembly entry code doesn't setup a valid frame pointer, and the fp(aka. s0) register is used for other purpose.
resume_kernel:
	REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
	bnez s0, restore_all
	REG_L s0, TASK_TI_FLAGS(tp)
	andi s0, s0, _TIF_NEED_RESCHED
	beqz s0, restore_all
	call preempt_schedule_irq
	j restore_all

So, there is two solutions:
 1) Invoke trace_hardirqs_on/off in C function, so the compiler will take care of frame pointer. This what I did.
 2) Always setup vaild frame pointer in assembly entry code. I think this is what JiSheng suggested?

I prefer #1 since we don't need to setup frame pointer if irqoff tracer is not enabled.

On Thu, Feb 10, 2022 at 11:37:06PM +0800, Jisheng Zhang wrote:
> On Thu, Feb 10, 2022 at 11:27:21PM +0800, Jisheng Zhang wrote:
> > On Thu, Feb 10, 2022 at 09:37:58PM +0800, Changbin Du wrote:
> > > On Thu, Feb 10, 2022 at 01:32:59AM +0800, Jisheng Zhang wrote:
> > > [snip]
> > > > Hi Changbin,
> > > > 
> > > > I read the code and find that current riscv frame records during
> > > > exception isn't as completed as other architectures. riscv only
> > > > records frames from the ret_from_exception(). If we add completed
> > > What do you mean for 'record'?
> > > 
> > 
> > stack frame record.
> > 
> > > > frame records as other arch do, then the issue you saw can also
> > > > be fixed at the same time.
> > > > 
> > > I don't think so. The problem is __builtin_return_address(1) trigger page fault
> > > here.
> > 
> > There's misunderstanding here. I interpret this bug as incomplete
> > stackframes.
> > 
> > This is current riscv stackframe during exception:
> > 
> > high
> >  	----------------
> > top	|		|  <- ret_from_exception
> > 	----------------
> > 	|		|  <- trace_hardirqs_on
> > 	-----------------
> > low
> 
> sorry, the "top" is wrongly placed.
>  high
>   	----------------
>  	|		|  <- ret_from_exception
>  	----------------
>  	|		|  <- trace_hardirqs_on
>  	-----------------
> top
> 
>  low
> 
> 
> 
> > 
> > As you said, the CALLER_ADDR1 a.k.a __builtin_return_address(1) needs
> > at least two parent call frames. 
> > 
> > If we complete the stackframes during exception as other arch does:
> > 
> > high
> >  	----------------
> > top	|		|  <- the synthetic stackframe from the interrupted point
> >  	----------------
> > 	      .....	      
> >         ----------------
> > 	|		|  <- ret_from_exception
> > 	----------------
> > 	|		|  <- trace_hardirqs_on
> > 	-----------------
> > low
> 
> ditto
> 
> > 
> > 
> > Then we meet the "at least two parent call frames" requirement. IOW, my
> > solution solve the problem from the entry.S side. One of the advantages
> > would be we let interrupted point show up in dump_stack() as other arch
> > do. What I'm not sure is whether it's safe to do so now since rc3 is
> > released.
> >
Changbin Du Feb. 11, 2022, 3:21 a.m. UTC | #12
On Thu, Feb 10, 2022 at 03:59:35PM +0000, Jessica Clarke wrote:
> On 10 Feb 2022, at 15:27, Jisheng Zhang <jszhang@kernel.org> wrote:
> > On Thu, Feb 10, 2022 at 09:37:58PM +0800, Changbin Du wrote:
> >> On Thu, Feb 10, 2022 at 01:32:59AM +0800, Jisheng Zhang wrote:
> >> [snip]
> >>> Hi Changbin,
> >>> 
> >>> I read the code and find that current riscv frame records during
> >>> exception isn't as completed as other architectures. riscv only
> >>> records frames from the ret_from_exception(). If we add completed
> >> What do you mean for 'record'?
> >> 
> > 
> > stack frame record.
> > 
> >>> frame records as other arch do, then the issue you saw can also
> >>> be fixed at the same time.
> >>> 
> >> I don't think so. The problem is __builtin_return_address(1) trigger page fault
> >> here.
> > 
> > There's misunderstanding here. I interpret this bug as incomplete
> > stackframes.
> > 
> > This is current riscv stackframe during exception:
> > 
> > high
> > 	----------------
> > top	|		|  <- ret_from_exception
> > 	----------------
> > 	|		|  <- trace_hardirqs_on
> > 	-----------------
> > low
> > 
> > As you said, the CALLER_ADDR1 a.k.a __builtin_return_address(1) needs
> > at least two parent call frames. 
> 
> No it doesn’t, you’re off by one, it only needs a valid current frame.
> 
> Jess
>
yes, it is two frames not two 'parent' frames. My fault.
 
> > If we complete the stackframes during exception as other arch does:
> > 
> > high
> > 	----------------
> > top	|		|  <- the synthetic stackframe from the interrupted point
> > 	----------------
> > 	      .....	      
> >        ----------------
> > 	|		|  <- ret_from_exception
> > 	----------------
> > 	|		|  <- trace_hardirqs_on
> > 	-----------------
> > low
> > 
> > 
> > Then we meet the "at least two parent call frames" requirement. IOW, my
> > solution solve the problem from the entry.S side. One of the advantages
> > would be we let interrupted point show up in dump_stack() as other arch
> > do. What I'm not sure is whether it's safe to do so now since rc3 is
> > released.
> > 
> >> 
> >>> However, I'm not sure what's the best choice now.
> >>> 
> >>> A simple demo to this incomplete frames:
> >>> add dump_stack() in any ISR, then
> >>> 
> >>> in riscv:
> >>> [    2.961294] Call Trace:
> >>> [    2.961460] [<ffffffff8000485e>] dump_backtrace+0x1c/0x24
> >>> [    2.961823] [<ffffffff805ed980>] show_stack+0x2c/0x38
> >>> [    2.962153] [<ffffffff805f292e>] dump_stack_lvl+0x40/0x58
> >>> [    2.962483] [<ffffffff805f295a>] dump_stack+0x14/0x1c
> >>> [    2.962792] [<ffffffff805f31a0>] serial8250_interrupt+0x20/0x82
> >>> [    2.963139] [<ffffffff80053032>] __handle_irq_event_percpu+0x4c/0x106
> >>> [    2.963526] [<ffffffff80053170>] handle_irq_event+0x38/0x80
> >>> [    2.963856] [<ffffffff80056a32>] handle_fasteoi_irq+0x96/0x188
> >>> [    2.964198] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
> >>> [    2.964567] [<ffffffff802f0ae4>] plic_handle_irq+0x88/0xec
> >>> [    2.964896] [<ffffffff800526ce>] generic_handle_domain_irq+0x28/0x3a
> >>> [    2.965264] [<ffffffff802f08e4>] riscv_intc_irq+0x34/0x5c
> >>> [    2.965584] [<ffffffff805f6dc8>] generic_handle_arch_irq+0x4a/0x74
> >>> [    2.966068] [<ffffffff80002fe8>] ret_from_exception+0x0/0xc
> >>> 
> >>> in x86:
> >>> [    1.191274] Call Trace:
> >>> [    1.192223]  <IRQ>
> >>> [    1.192758]  dump_stack_lvl+0x45/0x59
> >>> [    1.192982]  serial8250_interrupt+0x24/0x88
> >>> [    1.193105]  __handle_irq_event_percpu+0x66/0x1b0
> >>> [    1.193239]  handle_irq_event+0x34/0x70
> >>> [    1.193345]  handle_edge_irq+0x85/0x1e0
> >>> [    1.193455]  __common_interrupt+0x38/0x90
> >>> [    1.193573]  common_interrupt+0x73/0x90
> >>> [    1.193809]  </IRQ>
> >>> [    1.193889]  <TASK>
> >>> [    1.193956]  asm_common_interrupt+0x1b/0x40
> >>> [    1.194318] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x40
> >>> [    1.194566] Code: 24 be 01 02 00 00 e9 54 20 bf ff 0f 1f 40 00 0f 1f
> >>> 44 00 00 f7 c6 00f
> >>> [    1.195137] RSP: 0000:ffff888000243b68 EFLAGS: 00000246
> >>> [    1.195314] RAX: 0000000000000000 RBX: ffffffff82025840 RCX:
> >>> 0000000000000000
> >>> [    1.195482] RDX: 0000000000000001 RSI: 0000000000000000 RDI:
> >>> 0000000000000001
> >>> [    1.195645] RBP: 0000000000000202 R08: ffffffffffffffff R09:
> >>> 0000000000000000
> >>> [    1.195808] R10: 00000000000000eb R11: 0000000000000000 R12:
> >>> 0000000000000000
> >>> [    1.195972] R13: 0000000000000040 R14: 0000000000000000 R15:
> >>> ffff888000c39000
> >>> [    1.196245]  ? _raw_spin_unlock_irqrestore+0x15/0x40
> >>> [    1.196373]  serial8250_do_startup+0x42d/0x600
> >>> [    1.196502]  uart_port_startup+0x11b/0x270
> >>> [    1.196619]  uart_port_activate+0x3f/0x60
> >>> [    1.196729]  tty_port_open+0x7e/0xd0
> >>> [    1.196835]  ? _raw_spin_unlock+0x12/0x30
> >>> [    1.196942]  uart_open+0x1a/0x30
> >>> [    1.197036]  tty_open+0x153/0x7c0
> >>> [    1.197144]  chrdev_open+0xbf/0x230
> >>> [    1.197253]  ? cdev_device_add+0x90/0x90
> >>> [    1.197359]  do_dentry_open+0x13c/0x360
> >>> [    1.197470]  path_openat+0xb0c/0xe00
> >>> [    1.197577]  ? update_load_avg+0x5f/0x640
> >>> [    1.197691]  ? finish_task_switch.isra.0+0xac/0x240
> >>> [    1.197821]  do_filp_open+0xb2/0x150
> >>> [    1.197935]  ? preempt_schedule_thunk+0x16/0x18
> >>> [    1.198049]  ? preempt_schedule_common+0x90/0xd0
> >>> [    1.198167]  ? preempt_schedule_thunk+0x16/0x18
> >>> [    1.198291]  file_open_name+0xf1/0x1b0
> >>> [    1.198397]  filp_open+0x2c/0x50
> >>> [    1.198495]  console_on_rootfs+0x19/0x52
> >>> [    1.198648]  kernel_init_freeable+0x19a/0x1c7
> >>> [    1.198765]  ? rest_init+0xc0/0xc0
> >>> [    1.198867]  kernel_init+0x16/0x110
> >>> [    1.198965]  ret_from_fork+0x1f/0x30
> >>> [    1.199131]  </TASK>
> >>> 
> >> As I said before, this issue is not related to stackdump.
> >> 
> >> Besides, you can see more calltrace on x86 that because x86 iterate all stacks
> >> (kernel, irq or exception) when dumping stacktrace. While RISCV only show
> >> calltrace of current stack.
> >> 
> > 
> > I'm not sure whether there's misunderstanding. See above.
> > 
> > Thanks
> > 
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
diff mbox series

Patch

diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 612556faa527..ffc87e76b1dd 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -51,6 +51,8 @@  obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
 
+obj-$(CONFIG_TRACE_IRQFLAGS)	+= trace_irq.o
+
 obj-$(CONFIG_RISCV_BASE_PMU)	+= perf_event.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
 obj-$(CONFIG_HAVE_PERF_REGS)	+= perf_regs.o
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index ed29e9c8f660..d6a46ed0bf05 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -108,7 +108,7 @@  _save_context:
 .option pop
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-	call trace_hardirqs_off
+	call __trace_hardirqs_off
 #endif
 
 #ifdef CONFIG_CONTEXT_TRACKING
@@ -143,7 +143,7 @@  skip_context_tracking:
 	li t0, EXC_BREAKPOINT
 	beq s4, t0, 1f
 #ifdef CONFIG_TRACE_IRQFLAGS
-	call trace_hardirqs_on
+	call __trace_hardirqs_on
 #endif
 	csrs CSR_STATUS, SR_IE
 
@@ -234,7 +234,7 @@  ret_from_exception:
 	REG_L s0, PT_STATUS(sp)
 	csrc CSR_STATUS, SR_IE
 #ifdef CONFIG_TRACE_IRQFLAGS
-	call trace_hardirqs_off
+	call __trace_hardirqs_off
 #endif
 #ifdef CONFIG_RISCV_M_MODE
 	/* the MPP value is too large to be used as an immediate arg for addi */
@@ -270,10 +270,10 @@  restore_all:
 	REG_L s1, PT_STATUS(sp)
 	andi t0, s1, SR_PIE
 	beqz t0, 1f
-	call trace_hardirqs_on
+	call __trace_hardirqs_on
 	j 2f
 1:
-	call trace_hardirqs_off
+	call __trace_hardirqs_off
 2:
 #endif
 	REG_L a0, PT_STATUS(sp)
diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
new file mode 100644
index 000000000000..fc194c56a35d
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.c
@@ -0,0 +1,26 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kprobes.h>
+#include "trace_irq.h"
+
+/**
+ * trace_hardirqs_on/off requires at least two parent call frames.
+ * Here we add one extra level so they can be safely called by low
+ * level entry code.
+ */
+
+void __trace_hardirqs_on(void)
+{
+	trace_hardirqs_on();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_on);
+
+void __trace_hardirqs_off(void)
+{
+	trace_hardirqs_off();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_off);
diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
new file mode 100644
index 000000000000..99fe67377e5e
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.h
@@ -0,0 +1,11 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+#ifndef __TRACE_IRQ_H
+#define __TRACE_IRQ_H
+
+void __trace_hardirqs_on(void);
+void __trace_hardirqs_off(void);
+
+#endif /* __TRACE_IRQ_H */