diff mbox series

[V5,08/11] riscv: Support HAVE_IRQ_EXIT_ON_IRQ_STACK

Message ID 20220918155246.1203293-9-guoren@kernel.org (mailing list archive)
State Superseded
Headers show
Series riscv: Add GENERIC_ENTRY support and related features | expand

Commit Message

Guo Ren Sept. 18, 2022, 3:52 p.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

Add independent irq stacks for percpu to prevent kernel stack overflows.
It is also compatible with VMAP_STACK by implementing
arch_alloc_vmap_stack.  Many architectures have supported
HAVE_IRQ_EXIT_ON_IRQ_STACK, riscv should follow up.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/Kconfig                   |  8 +++++
 arch/riscv/include/asm/irq.h         |  3 ++
 arch/riscv/include/asm/thread_info.h |  2 ++
 arch/riscv/include/asm/vmap_stack.h  | 28 ++++++++++++++++
 arch/riscv/kernel/entry.S            | 27 ++++++++++++++++
 arch/riscv/kernel/irq.c              | 48 ++++++++++++++++++++++++++--
 6 files changed, 114 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/include/asm/vmap_stack.h

Comments

Peter Zijlstra Sept. 19, 2022, 1:45 p.m. UTC | #1
On Sun, Sep 18, 2022 at 11:52:43AM -0400, guoren@kernel.org wrote:

> +ENTRY(call_on_stack)
> +	/* Create a frame record to save our ra and fp */
> +	addi	sp, sp, -RISCV_SZPTR
> +	REG_S	ra, (sp)
> +	addi	sp, sp, -RISCV_SZPTR
> +	REG_S	fp, (sp)
> +
> +	/* Save sp in fp */
> +	move	fp, sp
> +
> +	/* Move to the new stack and call the function there */
> +	li	a3, IRQ_STACK_SIZE
> +	add	sp, a1, a3
> +	jalr	a2
> +
> +	/*
> +	 * Restore sp from prev fp, and fp, ra from the frame
> +	 */
> +	move	sp, fp
> +	REG_L	fp, (sp)
> +	addi	sp, sp, RISCV_SZPTR
> +	REG_L	ra, (sp)
> +	addi	sp, sp, RISCV_SZPTR
> +	ret
> +ENDPROC(call_on_stack)

IIRC x86_64 moved away from a stack-switch function like this because it
presents a convenient exploit gadget.

I'm not much of an exploit writer and I've no idea how effective our
inline stategy is, perhaps other can comment.
Guo Ren Sept. 20, 2022, 6:08 a.m. UTC | #2
On Mon, Sep 19, 2022 at 9:45 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Sun, Sep 18, 2022 at 11:52:43AM -0400, guoren@kernel.org wrote:
>
> > +ENTRY(call_on_stack)
> > +     /* Create a frame record to save our ra and fp */
> > +     addi    sp, sp, -RISCV_SZPTR
> > +     REG_S   ra, (sp)
> > +     addi    sp, sp, -RISCV_SZPTR
> > +     REG_S   fp, (sp)
> > +
> > +     /* Save sp in fp */
> > +     move    fp, sp
> > +
> > +     /* Move to the new stack and call the function there */
> > +     li      a3, IRQ_STACK_SIZE
> > +     add     sp, a1, a3
> > +     jalr    a2
> > +
> > +     /*
> > +      * Restore sp from prev fp, and fp, ra from the frame
> > +      */
> > +     move    sp, fp
> > +     REG_L   fp, (sp)
> > +     addi    sp, sp, RISCV_SZPTR
> > +     REG_L   ra, (sp)
> > +     addi    sp, sp, RISCV_SZPTR
> > +     ret
> > +ENDPROC(call_on_stack)
>
> IIRC x86_64 moved away from a stack-switch function like this because it
> presents a convenient exploit gadget.
I found:
https://lore.kernel.org/all/20210204204903.350275743@linutronix.de/

  - The fact that the stack switching code ended up being an easy to find
    exploit gadget.

What's the exploit gadget? Do you have a ref link? Thx.

>
> I'm not much of an exploit writer and I've no idea how effective our
> inline stategy is, perhaps other can comment.
It seems that I should move to an inline flavor. a0cfc74d0b00
("x86/irq: Provide macro for inlining irq stack switching")
Peter Zijlstra Sept. 20, 2022, 7:27 a.m. UTC | #3
On Tue, Sep 20, 2022 at 02:08:55PM +0800, Guo Ren wrote:
> On Mon, Sep 19, 2022 at 9:45 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Sun, Sep 18, 2022 at 11:52:43AM -0400, guoren@kernel.org wrote:
> >
> > > +ENTRY(call_on_stack)
> > > +     /* Create a frame record to save our ra and fp */
> > > +     addi    sp, sp, -RISCV_SZPTR
> > > +     REG_S   ra, (sp)
> > > +     addi    sp, sp, -RISCV_SZPTR
> > > +     REG_S   fp, (sp)
> > > +
> > > +     /* Save sp in fp */
> > > +     move    fp, sp
> > > +
> > > +     /* Move to the new stack and call the function there */
> > > +     li      a3, IRQ_STACK_SIZE
> > > +     add     sp, a1, a3
> > > +     jalr    a2
> > > +
> > > +     /*
> > > +      * Restore sp from prev fp, and fp, ra from the frame
> > > +      */
> > > +     move    sp, fp
> > > +     REG_L   fp, (sp)
> > > +     addi    sp, sp, RISCV_SZPTR
> > > +     REG_L   ra, (sp)
> > > +     addi    sp, sp, RISCV_SZPTR
> > > +     ret
> > > +ENDPROC(call_on_stack)
> >
> > IIRC x86_64 moved away from a stack-switch function like this because it
> > presents a convenient exploit gadget.
> I found:
> https://lore.kernel.org/all/20210204204903.350275743@linutronix.de/
> 
>   - The fact that the stack switching code ended up being an easy to find
>     exploit gadget.
> 
> What's the exploit gadget? Do you have a ref link? Thx.

Sadly no, I do not. Kees might. But basically it boils down to this
function taking both a stack pointer and a function pointer as
arguments (@a1 and @a2 resp. if I'm not reading this wrong).

If an attacker can call this with arguments of its choice then it gains
full control of subsequent execution.
Peter Zijlstra Sept. 20, 2022, 7:34 a.m. UTC | #4
On Tue, Sep 20, 2022 at 09:27:51AM +0200, Peter Zijlstra wrote:
> On Tue, Sep 20, 2022 at 02:08:55PM +0800, Guo Ren wrote:
> > On Mon, Sep 19, 2022 at 9:45 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Sun, Sep 18, 2022 at 11:52:43AM -0400, guoren@kernel.org wrote:
> > >
> > > > +ENTRY(call_on_stack)
> > > > +     /* Create a frame record to save our ra and fp */
> > > > +     addi    sp, sp, -RISCV_SZPTR
> > > > +     REG_S   ra, (sp)
> > > > +     addi    sp, sp, -RISCV_SZPTR
> > > > +     REG_S   fp, (sp)
> > > > +
> > > > +     /* Save sp in fp */
> > > > +     move    fp, sp
> > > > +
> > > > +     /* Move to the new stack and call the function there */
> > > > +     li      a3, IRQ_STACK_SIZE
> > > > +     add     sp, a1, a3
> > > > +     jalr    a2
> > > > +
> > > > +     /*
> > > > +      * Restore sp from prev fp, and fp, ra from the frame
> > > > +      */
> > > > +     move    sp, fp
> > > > +     REG_L   fp, (sp)
> > > > +     addi    sp, sp, RISCV_SZPTR
> > > > +     REG_L   ra, (sp)
> > > > +     addi    sp, sp, RISCV_SZPTR
> > > > +     ret
> > > > +ENDPROC(call_on_stack)
> > >
> > > IIRC x86_64 moved away from a stack-switch function like this because it
> > > presents a convenient exploit gadget.
> > I found:
> > https://lore.kernel.org/all/20210204204903.350275743@linutronix.de/
> > 
> >   - The fact that the stack switching code ended up being an easy to find
> >     exploit gadget.
> > 
> > What's the exploit gadget? Do you have a ref link? Thx.
> 
> Sadly no, I do not. Kees might. But basically it boils down to this
> function taking both a stack pointer and a function pointer as
> arguments (@a1 and @a2 resp. if I'm not reading this wrong).
> 
> If an attacker can call this with arguments of its choice then it gains
> full control of subsequent execution.

If you inline it the hope is that the function pointers go away or at
least the encompassing function doesn't have quite such a 'convenient'
signature to hijack control flow.
Guo Ren Sept. 21, 2022, 6:16 a.m. UTC | #5
On Tue, Sep 20, 2022 at 3:34 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Sep 20, 2022 at 09:27:51AM +0200, Peter Zijlstra wrote:
> > On Tue, Sep 20, 2022 at 02:08:55PM +0800, Guo Ren wrote:
> > > On Mon, Sep 19, 2022 at 9:45 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > > >
> > > > On Sun, Sep 18, 2022 at 11:52:43AM -0400, guoren@kernel.org wrote:
> > > >
> > > > > +ENTRY(call_on_stack)
> > > > > +     /* Create a frame record to save our ra and fp */
> > > > > +     addi    sp, sp, -RISCV_SZPTR
> > > > > +     REG_S   ra, (sp)
> > > > > +     addi    sp, sp, -RISCV_SZPTR
> > > > > +     REG_S   fp, (sp)
> > > > > +
> > > > > +     /* Save sp in fp */
> > > > > +     move    fp, sp
> > > > > +
> > > > > +     /* Move to the new stack and call the function there */
> > > > > +     li      a3, IRQ_STACK_SIZE
> > > > > +     add     sp, a1, a3
> > > > > +     jalr    a2
> > > > > +
> > > > > +     /*
> > > > > +      * Restore sp from prev fp, and fp, ra from the frame
> > > > > +      */
> > > > > +     move    sp, fp
> > > > > +     REG_L   fp, (sp)
> > > > > +     addi    sp, sp, RISCV_SZPTR
> > > > > +     REG_L   ra, (sp)
> > > > > +     addi    sp, sp, RISCV_SZPTR
> > > > > +     ret
> > > > > +ENDPROC(call_on_stack)
> > > >
> > > > IIRC x86_64 moved away from a stack-switch function like this because it
> > > > presents a convenient exploit gadget.
> > > I found:
> > > https://lore.kernel.org/all/20210204204903.350275743@linutronix.de/
> > >
> > >   - The fact that the stack switching code ended up being an easy to find
> > >     exploit gadget.
> > >
> > > What's the exploit gadget? Do you have a ref link? Thx.
> >
> > Sadly no, I do not. Kees might. But basically it boils down to this
> > function taking both a stack pointer and a function pointer as
> > arguments (@a1 and @a2 resp. if I'm not reading this wrong).
> >
> > If an attacker can call this with arguments of its choice then it gains
> > full control of subsequent execution.
>
> If you inline it the hope is that the function pointers go away or at
> least the encompassing function doesn't have quite such a 'convenient'
> signature to hijack control flow.
Thanks for mentioning it. I would change to an inline style.
Chen Zhongjin Sept. 21, 2022, 8:34 a.m. UTC | #6
Hi,

On 2022/9/18 23:52, guoren@kernel.org wrote:
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 5f49517cd3a2..426529b84db0 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -332,6 +332,33 @@ ENTRY(ret_from_kernel_thread)
>   	tail syscall_exit_to_user_mode
>   ENDPROC(ret_from_kernel_thread)
>   
> +#ifdef CONFIG_IRQ_STACKS
> +ENTRY(call_on_stack)
> +	/* Create a frame record to save our ra and fp */
> +	addi	sp, sp, -RISCV_SZPTR
> +	REG_S	ra, (sp)
> +	addi	sp, sp, -RISCV_SZPTR
> +	REG_S	fp, (sp)
> +
> +	/* Save sp in fp */
> +	move	fp, sp
> +
> +	/* Move to the new stack and call the function there */
> +	li	a3, IRQ_STACK_SIZE
> +	add	sp, a1, a3
> +	jalr	a2
> +
> +	/*
> +	 * Restore sp from prev fp, and fp, ra from the frame
> +	 */
> +	move	sp, fp
> +	REG_L	fp, (sp)
> +	addi	sp, sp, RISCV_SZPTR
> +	REG_L	ra, (sp)
> +	addi	sp, sp, RISCV_SZPTR
> +	ret
> +ENDPROC(call_on_stack)
> +#endif

Seems my compiler (riscv64-linux-gnu-gcc 8.4.0, cross compiling from 
x86) cannot recognize the register `fp`.

After I changed it to `s0` this can pass compiling.


Seems there is nowhere else using `fp`, can this just using `s0` instead?

Best,

Chen
Guo Ren Sept. 21, 2022, 9:53 a.m. UTC | #7
On Wed, Sep 21, 2022 at 4:34 PM Chen Zhongjin <chenzhongjin@huawei.com> wrote:
>
> Hi,
>
> On 2022/9/18 23:52, guoren@kernel.org wrote:
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index 5f49517cd3a2..426529b84db0 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -332,6 +332,33 @@ ENTRY(ret_from_kernel_thread)
> >       tail syscall_exit_to_user_mode
> >   ENDPROC(ret_from_kernel_thread)
> >
> > +#ifdef CONFIG_IRQ_STACKS
> > +ENTRY(call_on_stack)
> > +     /* Create a frame record to save our ra and fp */
> > +     addi    sp, sp, -RISCV_SZPTR
> > +     REG_S   ra, (sp)
> > +     addi    sp, sp, -RISCV_SZPTR
> > +     REG_S   fp, (sp)
> > +
> > +     /* Save sp in fp */
> > +     move    fp, sp
> > +
> > +     /* Move to the new stack and call the function there */
> > +     li      a3, IRQ_STACK_SIZE
> > +     add     sp, a1, a3
> > +     jalr    a2
> > +
> > +     /*
> > +      * Restore sp from prev fp, and fp, ra from the frame
> > +      */
> > +     move    sp, fp
> > +     REG_L   fp, (sp)
> > +     addi    sp, sp, RISCV_SZPTR
> > +     REG_L   ra, (sp)
> > +     addi    sp, sp, RISCV_SZPTR
> > +     ret
> > +ENDPROC(call_on_stack)
> > +#endif
>
> Seems my compiler (riscv64-linux-gnu-gcc 8.4.0, cross compiling from
> x86) cannot recognize the register `fp`.
The whole entry.S uses s0 instead of fp, so I approve of your advice. Thx.

>
> After I changed it to `s0` this can pass compiling.
>
>
> Seems there is nowhere else using `fp`, can this just using `s0` instead?
>
> Best,
>
> Chen
>
Chen Zhongjin Sept. 21, 2022, 11:56 a.m. UTC | #8
Hi,

Sorry to bother again, I just finished the test with your patches on 
mine patch set.

On 2022/9/21 17:53, Guo Ren wrote:
> On Wed, Sep 21, 2022 at 4:34 PM Chen Zhongjin <chenzhongjin@huawei.com> wrote:
>> Hi,
>>
>> On 2022/9/18 23:52, guoren@kernel.org wrote:
>>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>>> index 5f49517cd3a2..426529b84db0 100644
>>> --- a/arch/riscv/kernel/entry.S
>>> +++ b/arch/riscv/kernel/entry.S
>>> @@ -332,6 +332,33 @@ ENTRY(ret_from_kernel_thread)
>>>        tail syscall_exit_to_user_mode
>>>    ENDPROC(ret_from_kernel_thread)
>>>
>>> +#ifdef CONFIG_IRQ_STACKS
>>> +ENTRY(call_on_stack)
>>> +     /* Create a frame record to save our ra and fp */
>>> +     addi    sp, sp, -RISCV_SZPTR
>>> +     REG_S   ra, (sp)
>>> +     addi    sp, sp, -RISCV_SZPTR
>>> +     REG_S   fp, (sp)
>>> +
>>> +     /* Save sp in fp */
>>> +     move    fp, sp
>>> +

Considering that s0 points to previous sp normally, I think here we 
should have 'addi fp, sp, 2*RISCV_SZPTR'.

An example below:

     addi    sp, sp, -16
     sd  ra, 8(sp)
     sd  s0, 0(sp)
     addi    s0, sp, 16    <- s0 is set to previous sp
     ...

     ld  ra,8(sp)
     ld  s0,0(sp)
     addi    sp,sp,16

So maybe it's better to save the stack frame as below:

     addi    sp, sp, -2*RISCV_SZPTR
     REG_S   ra, RISCV_SZPTR(sp)
     REG_S   s0, (sp)

     /* Save sp in fp */
     addi    s0, sp, 2*RISCV_SZPTR

     ...

     /*
      * Restore sp from prev fp, and fp, ra from the frame
      */
     addi    sp, s0, -2*RISCV_SZPTR
     REG_L   ra, RISCV_SZPTR(sp)
     REG_L   s0, (sp)
     addi    sp, sp, 2*RISCV_SZPTR


Anyway, lets set fp as sp + 2 * RISCV_SZPTR, so that unwinder can 
connect two stacks same as normal function.

I tested this with my patch and the unwinder works properly.


Thanks for your time!

Best,

Chen

>>> +     /* Move to the new stack and call the function there */
>>> +     li      a3, IRQ_STACK_SIZE
>>> +     add     sp, a1, a3
>>> +     jalr    a2
>>> +
>>> +     /*
>>> +      * Restore sp from prev fp, and fp, ra from the frame
>>> +      */
>>> +     move    sp, fp
>>> +     REG_L   fp, (sp)
>>> +     addi    sp, sp, RISCV_SZPTR
>>> +     REG_L   ra, (sp)
>>> +     addi    sp, sp, RISCV_SZPTR
>>> +     ret
>>> +ENDPROC(call_on_stack)
>>> +#endif
>> Seems my compiler (riscv64-linux-gnu-gcc 8.4.0, cross compiling from
>> x86) cannot recognize the register `fp`.
> The whole entry.S uses s0 instead of fp, so I approve of your advice. Thx.
>
>> After I changed it to `s0` this can pass compiling.
>>
>>
>> Seems there is nowhere else using `fp`, can this just using `s0` instead?
>>
>> Best,
>>
>> Chen
>>
>>
Guo Ren Sept. 22, 2022, 1:26 a.m. UTC | #9
On Wed, Sep 21, 2022 at 7:56 PM Chen Zhongjin <chenzhongjin@huawei.com> wrote:
>
> Hi,
>
> Sorry to bother again, I just finished the test with your patches on
> mine patch set.
>
> On 2022/9/21 17:53, Guo Ren wrote:
> > On Wed, Sep 21, 2022 at 4:34 PM Chen Zhongjin <chenzhongjin@huawei.com> wrote:
> >> Hi,
> >>
> >> On 2022/9/18 23:52, guoren@kernel.org wrote:
> >>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> >>> index 5f49517cd3a2..426529b84db0 100644
> >>> --- a/arch/riscv/kernel/entry.S
> >>> +++ b/arch/riscv/kernel/entry.S
> >>> @@ -332,6 +332,33 @@ ENTRY(ret_from_kernel_thread)
> >>>        tail syscall_exit_to_user_mode
> >>>    ENDPROC(ret_from_kernel_thread)
> >>>
> >>> +#ifdef CONFIG_IRQ_STACKS
> >>> +ENTRY(call_on_stack)
> >>> +     /* Create a frame record to save our ra and fp */
> >>> +     addi    sp, sp, -RISCV_SZPTR
> >>> +     REG_S   ra, (sp)
> >>> +     addi    sp, sp, -RISCV_SZPTR
> >>> +     REG_S   fp, (sp)
> >>> +
> >>> +     /* Save sp in fp */
> >>> +     move    fp, sp
> >>> +
>
> Considering that s0 points to previous sp normally, I think here we
> should have 'addi fp, sp, 2*RISCV_SZPTR'.
>
> An example below:
>
>      addi    sp, sp, -16
>      sd  ra, 8(sp)
>      sd  s0, 0(sp)
>      addi    s0, sp, 16    <- s0 is set to previous sp
>      ...
>
>      ld  ra,8(sp)
>      ld  s0,0(sp)
>      addi    sp,sp,16
>
> So maybe it's better to save the stack frame as below:
>
>      addi    sp, sp, -2*RISCV_SZPTR
>      REG_S   ra, RISCV_SZPTR(sp)
>      REG_S   s0, (sp)
>
>      /* Save sp in fp */
>      addi    s0, sp, 2*RISCV_SZPTR
>
>      ...
>
>      /*
>       * Restore sp from prev fp, and fp, ra from the frame
>       */
>      addi    sp, s0, -2*RISCV_SZPTR
>      REG_L   ra, RISCV_SZPTR(sp)
>      REG_L   s0, (sp)
>      addi    sp, sp, 2*RISCV_SZPTR
>
>
> Anyway, lets set fp as sp + 2 * RISCV_SZPTR, so that unwinder can
> connect two stacks same as normal function.
>
> I tested this with my patch and the unwinder works properly.
Thx, you got it. My patch broke the fp chain. I would fix it in the
next version.

>
>
> Thanks for your time!
>
> Best,
>
> Chen
>
> >>> +     /* Move to the new stack and call the function there */
> >>> +     li      a3, IRQ_STACK_SIZE
> >>> +     add     sp, a1, a3
> >>> +     jalr    a2
> >>> +
> >>> +     /*
> >>> +      * Restore sp from prev fp, and fp, ra from the frame
> >>> +      */
> >>> +     move    sp, fp
> >>> +     REG_L   fp, (sp)
> >>> +     addi    sp, sp, RISCV_SZPTR
> >>> +     REG_L   ra, (sp)
> >>> +     addi    sp, sp, RISCV_SZPTR
> >>> +     ret
> >>> +ENDPROC(call_on_stack)
> >>> +#endif
> >> Seems my compiler (riscv64-linux-gnu-gcc 8.4.0, cross compiling from
> >> x86) cannot recognize the register `fp`.
> > The whole entry.S uses s0 instead of fp, so I approve of your advice. Thx.
> >
> >> After I changed it to `s0` this can pass compiling.
> >>
> >>
> >> Seems there is nowhere else using `fp`, can this just using `s0` instead?
> >>
> >> Best,
> >>
> >> Chen
> >>
> >>
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a07bb3b73b5b..75db47a983f2 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -433,6 +433,14 @@  config FPU
 
 	  If you don't know what to do here, say Y.
 
+config IRQ_STACKS
+	bool "Independent irq stacks" if EXPERT
+	default y
+	select HAVE_IRQ_EXIT_ON_IRQ_STACK
+	help
+	  Add independent irq stacks for percpu to prevent kernel stack overflows.
+	  We may save some memory footprint by disabling IRQ_STACKS.
+
 endmenu # "Platform type"
 
 menu "Kernel features"
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index e4c435509983..205e1c693dfd 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -13,5 +13,8 @@ 
 #include <asm-generic/irq.h>
 
 extern void __init init_IRQ(void);
+asmlinkage void call_on_stack(struct pt_regs *regs, ulong *sp,
+				     void (*fn)(struct pt_regs *), ulong tmp);
+asmlinkage void noinstr do_riscv_irq(struct pt_regs *regs);
 
 #endif /* _ASM_RISCV_IRQ_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 7de4fb96f0b5..043da8ccc7e6 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -40,6 +40,8 @@ 
 #define OVERFLOW_STACK_SIZE     SZ_4K
 #define SHADOW_OVERFLOW_STACK_SIZE (1024)
 
+#define IRQ_STACK_SIZE		THREAD_SIZE
+
 #ifndef __ASSEMBLY__
 
 extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
diff --git a/arch/riscv/include/asm/vmap_stack.h b/arch/riscv/include/asm/vmap_stack.h
new file mode 100644
index 000000000000..3fbf481abf4f
--- /dev/null
+++ b/arch/riscv/include/asm/vmap_stack.h
@@ -0,0 +1,28 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copied from arch/arm64/include/asm/vmap_stack.h.
+#ifndef _ASM_RISCV_VMAP_STACK_H
+#define _ASM_RISCV_VMAP_STACK_H
+
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/kconfig.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <asm/thread_info.h>
+
+/*
+ * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
+ * stacks need to have the same alignment.
+ */
+static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
+{
+	void *p;
+
+	BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
+
+	p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+			__builtin_return_address(0));
+	return kasan_reset_tag(p);
+}
+
+#endif /* _ASM_RISCV_VMAP_STACK_H */
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 5f49517cd3a2..426529b84db0 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -332,6 +332,33 @@  ENTRY(ret_from_kernel_thread)
 	tail syscall_exit_to_user_mode
 ENDPROC(ret_from_kernel_thread)
 
+#ifdef CONFIG_IRQ_STACKS
+ENTRY(call_on_stack)
+	/* Create a frame record to save our ra and fp */
+	addi	sp, sp, -RISCV_SZPTR
+	REG_S	ra, (sp)
+	addi	sp, sp, -RISCV_SZPTR
+	REG_S	fp, (sp)
+
+	/* Save sp in fp */
+	move	fp, sp
+
+	/* Move to the new stack and call the function there */
+	li	a3, IRQ_STACK_SIZE
+	add	sp, a1, a3
+	jalr	a2
+
+	/*
+	 * Restore sp from prev fp, and fp, ra from the frame
+	 */
+	move	sp, fp
+	REG_L	fp, (sp)
+	addi	sp, sp, RISCV_SZPTR
+	REG_L	ra, (sp)
+	addi	sp, sp, RISCV_SZPTR
+	ret
+ENDPROC(call_on_stack)
+#endif
 
 /*
  * Integer register context switch
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 24c2e1bd756a..5ad4952203c5 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -10,6 +10,37 @@ 
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
 #include <asm/smp.h>
+#include <asm/vmap_stack.h>
+
+#ifdef CONFIG_IRQ_STACKS
+static DEFINE_PER_CPU(ulong *, irq_stack_ptr);
+
+#ifdef CONFIG_VMAP_STACK
+static void init_irq_stacks(void)
+{
+	int cpu;
+	ulong *p;
+
+	for_each_possible_cpu(cpu) {
+		p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+		per_cpu(irq_stack_ptr, cpu) = p;
+	}
+}
+#else
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
+DEFINE_PER_CPU_ALIGNED(ulong [IRQ_STACK_SIZE/sizeof(ulong)], irq_stack);
+
+static void init_irq_stacks(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
+}
+#endif /* CONFIG_VMAP_STACK */
+#else
+static void init_irq_stacks(void) {}
+#endif /* CONFIG_IRQ_STACKS */
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
@@ -19,21 +50,34 @@  int arch_show_interrupts(struct seq_file *p, int prec)
 
 void __init init_IRQ(void)
 {
+	init_irq_stacks();
 	irqchip_init();
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
 
-asmlinkage void noinstr do_riscv_irq(struct pt_regs *regs)
+static void noinstr handle_riscv_irq(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs;
-	irqentry_state_t state = irqentry_enter(regs);
 
 	irq_enter_rcu();
 	old_regs = set_irq_regs(regs);
 	handle_arch_irq(regs);
 	set_irq_regs(old_regs);
 	irq_exit_rcu();
+}
+
+asmlinkage void noinstr do_riscv_irq(struct pt_regs *regs)
+{
+	irqentry_state_t state = irqentry_enter(regs);
+#ifdef CONFIG_IRQ_STACKS
+	ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id());
+
+	if (on_thread_stack())
+		call_on_stack(regs, sp, handle_riscv_irq, 0);
+	else
+#endif
+		handle_riscv_irq(regs);
 
 	irqentry_exit(regs, state);
 }