diff mbox series

[PATCHv8,bpf-next,3/9] uprobe: Add uretprobe syscall to speed up return probe

Message ID 20240611112158.40795-4-jolsa@kernel.org (mailing list archive)
State Accepted
Commit fdec048251ad16976e73ddfb16900a537408ac62
Headers show
Series uprobe: uretprobe speed up | expand

Commit Message

Jiri Olsa June 11, 2024, 11:21 a.m. UTC
Adding uretprobe syscall instead of trap to speed up return probe.

At the moment the uretprobe setup/path is:

  - install entry uprobe

  - when the uprobe is hit, it overwrites probed function's return address
    on stack with address of the trampoline that contains breakpoint
    instruction

  - the breakpoint trap code handles the uretprobe consumers execution and
    jumps back to original return address

This patch replaces the above trampoline's breakpoint instruction with new
ureprobe syscall call. This syscall does exactly the same job as the trap
with some more extra work:

  - syscall trampoline must save original value for rax/r11/rcx registers
    on stack - rax is set to syscall number and r11/rcx are changed and
    used by syscall instruction

  - the syscall code reads the original values of those registers and
    restore those values in task's pt_regs area

  - only caller from trampoline exposed in '[uprobes]' is allowed,
    the process will receive SIGILL signal otherwise

Even with some extra work, using the uretprobes syscall shows speed
improvement (compared to using standard breakpoint):

  On Intel (11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz)

  current:
    uretprobe-nop  :    1.498 ± 0.000M/s
    uretprobe-push :    1.448 ± 0.001M/s
    uretprobe-ret  :    0.816 ± 0.001M/s

  with the fix:
    uretprobe-nop  :    1.969 ± 0.002M/s  < 31% speed up
    uretprobe-push :    1.910 ± 0.000M/s  < 31% speed up
    uretprobe-ret  :    0.934 ± 0.000M/s  < 14% speed up

  On Amd (AMD Ryzen 7 5700U)

  current:
    uretprobe-nop  :    0.778 ± 0.001M/s
    uretprobe-push :    0.744 ± 0.001M/s
    uretprobe-ret  :    0.540 ± 0.001M/s

  with the fix:
    uretprobe-nop  :    0.860 ± 0.001M/s  < 10% speed up
    uretprobe-push :    0.818 ± 0.001M/s  < 10% speed up
    uretprobe-ret  :    0.578 ± 0.000M/s  <  7% speed up

The performance test spawns a thread that runs loop which triggers
uprobe with attached bpf program that increments the counter that
gets printed in results above.

The uprobe (and uretprobe) kind is determined by which instruction
is being patched with breakpoint instruction. That's also important
for uretprobes, because uprobe is installed for each uretprobe.

The performance test is part of bpf selftests:
  tools/testing/selftests/bpf/run_bench_uprobes.sh

Note at the moment uretprobe syscall is supported only for native
64-bit process, compat process still uses standard breakpoint.

Note that when shadow stack is enabled the uretprobe syscall returns
via iret, which is slower than return via sysret, but won't cause the
shadow stack violation.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/include/asm/shstk.h |   2 +
 arch/x86/kernel/shstk.c      |   5 ++
 arch/x86/kernel/uprobes.c    | 117 +++++++++++++++++++++++++++++++++++
 include/linux/uprobes.h      |   3 +
 kernel/events/uprobes.c      |  24 ++++---
 5 files changed, 144 insertions(+), 7 deletions(-)

Comments

Nathan Chancellor June 14, 2024, 5:48 p.m. UTC | #1
Hi Jiri,

On Tue, Jun 11, 2024 at 01:21:52PM +0200, Jiri Olsa wrote:
> Adding uretprobe syscall instead of trap to speed up return probe.
...
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 2c83ba776fc7..2816e65729ac 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -1474,11 +1474,20 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
>  	return ret;
>  }
>  
> +void * __weak arch_uprobe_trampoline(unsigned long *psize)
> +{
> +	static uprobe_opcode_t insn = UPROBE_SWBP_INSN;

This change as commit ff474a78cef5 ("uprobe: Add uretprobe syscall to
speed up return probe") in -next causes the following build error for
ARCH=loongarch allmodconfig:

  In file included from include/linux/uprobes.h:49,
                   from include/linux/mm_types.h:16,
                   from include/linux/mmzone.h:22,
                   from include/linux/gfp.h:7,
                   from include/linux/xarray.h:16,
                   from include/linux/list_lru.h:14,
                   from include/linux/fs.h:13,
                   from include/linux/highmem.h:5,
                   from kernel/events/uprobes.c:13:
  kernel/events/uprobes.c: In function 'arch_uprobe_trampoline':
  arch/loongarch/include/asm/uprobes.h:12:33: error: initializer element is not constant
     12 | #define UPROBE_SWBP_INSN        larch_insn_gen_break(BRK_UPROBE_BP)
        |                                 ^~~~~~~~~~~~~~~~~~~~
  kernel/events/uprobes.c:1479:39: note: in expansion of macro 'UPROBE_SWBP_INSN'
   1479 |         static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
        |                                       ^~~~~~~~~~~~~~~~

> +	*psize = UPROBE_SWBP_INSN_SIZE;
> +	return &insn;
> +}
> +
>  static struct xol_area *__create_xol_area(unsigned long vaddr)
>  {
>  	struct mm_struct *mm = current->mm;
> -	uprobe_opcode_t insn = UPROBE_SWBP_INSN;
> +	unsigned long insns_size;
>  	struct xol_area *area;
> +	void *insns;
>  
>  	area = kmalloc(sizeof(*area), GFP_KERNEL);
>  	if (unlikely(!area))
> @@ -1502,7 +1511,8 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
>  	/* Reserve the 1st slot for get_trampoline_vaddr() */
>  	set_bit(0, area->bitmap);
>  	atomic_set(&area->slot_count, 1);
> -	arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);
> +	insns = arch_uprobe_trampoline(&insns_size);
> +	arch_uprobe_copy_ixol(area->pages[0], 0, insns, insns_size);
>  
>  	if (!xol_add_vma(mm, area))
>  		return area;
> @@ -1827,7 +1837,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
>   *
>   * Returns -1 in case the xol_area is not allocated.
>   */
> -static unsigned long get_trampoline_vaddr(void)
> +unsigned long uprobe_get_trampoline_vaddr(void)
>  {
>  	struct xol_area *area;
>  	unsigned long trampoline_vaddr = -1;
> @@ -1878,7 +1888,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  	if (!ri)
>  		return;
>  
> -	trampoline_vaddr = get_trampoline_vaddr();
> +	trampoline_vaddr = uprobe_get_trampoline_vaddr();
>  	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
>  	if (orig_ret_vaddr == -1)
>  		goto fail;
> @@ -2123,7 +2133,7 @@ static struct return_instance *find_next_ret_chain(struct return_instance *ri)
>  	return ri;
>  }
>  
> -static void handle_trampoline(struct pt_regs *regs)
> +void uprobe_handle_trampoline(struct pt_regs *regs)
>  {
>  	struct uprobe_task *utask;
>  	struct return_instance *ri, *next;
> @@ -2187,8 +2197,8 @@ static void handle_swbp(struct pt_regs *regs)
>  	int is_swbp;
>  
>  	bp_vaddr = uprobe_get_swbp_addr(regs);
> -	if (bp_vaddr == get_trampoline_vaddr())
> -		return handle_trampoline(regs);
> +	if (bp_vaddr == uprobe_get_trampoline_vaddr())
> +		return uprobe_handle_trampoline(regs);
>  
>  	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
>  	if (!uprobe) {
> -- 
> 2.45.1
> 

Cheers,
Nathan
Jiri Olsa June 14, 2024, 7:26 p.m. UTC | #2
On Fri, Jun 14, 2024 at 10:48:22AM -0700, Nathan Chancellor wrote:
> Hi Jiri,
> 
> On Tue, Jun 11, 2024 at 01:21:52PM +0200, Jiri Olsa wrote:
> > Adding uretprobe syscall instead of trap to speed up return probe.
> ...
> > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > index 2c83ba776fc7..2816e65729ac 100644
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -1474,11 +1474,20 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
> >  	return ret;
> >  }
> >  
> > +void * __weak arch_uprobe_trampoline(unsigned long *psize)
> > +{
> > +	static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
> 
> This change as commit ff474a78cef5 ("uprobe: Add uretprobe syscall to
> speed up return probe") in -next causes the following build error for
> ARCH=loongarch allmodconfig:
> 
>   In file included from include/linux/uprobes.h:49,
>                    from include/linux/mm_types.h:16,
>                    from include/linux/mmzone.h:22,
>                    from include/linux/gfp.h:7,
>                    from include/linux/xarray.h:16,
>                    from include/linux/list_lru.h:14,
>                    from include/linux/fs.h:13,
>                    from include/linux/highmem.h:5,
>                    from kernel/events/uprobes.c:13:
>   kernel/events/uprobes.c: In function 'arch_uprobe_trampoline':
>   arch/loongarch/include/asm/uprobes.h:12:33: error: initializer element is not constant
>      12 | #define UPROBE_SWBP_INSN        larch_insn_gen_break(BRK_UPROBE_BP)
>         |                                 ^~~~~~~~~~~~~~~~~~~~
>   kernel/events/uprobes.c:1479:39: note: in expansion of macro 'UPROBE_SWBP_INSN'
>    1479 |         static uprobe_opcode_t insn = UPROBE_SWBP_INSN;

reproduced, could you please try the change below

thanks,
jirka


---
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2816e65729ac..6986bd993702 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1476,8 +1476,9 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 
 void * __weak arch_uprobe_trampoline(unsigned long *psize)
 {
-	static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+	static uprobe_opcode_t insn;
 
+	insn = insn ?: UPROBE_SWBP_INSN;
 	*psize = UPROBE_SWBP_INSN_SIZE;
 	return &insn;
 }
Nathan Chancellor June 14, 2024, 8:07 p.m. UTC | #3
On Fri, Jun 14, 2024 at 09:26:59PM +0200, Jiri Olsa wrote:
> On Fri, Jun 14, 2024 at 10:48:22AM -0700, Nathan Chancellor wrote:
> > Hi Jiri,
> > 
> > On Tue, Jun 11, 2024 at 01:21:52PM +0200, Jiri Olsa wrote:
> > > Adding uretprobe syscall instead of trap to speed up return probe.
> > ...
> > > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > > index 2c83ba776fc7..2816e65729ac 100644
> > > --- a/kernel/events/uprobes.c
> > > +++ b/kernel/events/uprobes.c
> > > @@ -1474,11 +1474,20 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
> > >  	return ret;
> > >  }
> > >  
> > > +void * __weak arch_uprobe_trampoline(unsigned long *psize)
> > > +{
> > > +	static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
> > 
> > This change as commit ff474a78cef5 ("uprobe: Add uretprobe syscall to
> > speed up return probe") in -next causes the following build error for
> > ARCH=loongarch allmodconfig:
> > 
> >   In file included from include/linux/uprobes.h:49,
> >                    from include/linux/mm_types.h:16,
> >                    from include/linux/mmzone.h:22,
> >                    from include/linux/gfp.h:7,
> >                    from include/linux/xarray.h:16,
> >                    from include/linux/list_lru.h:14,
> >                    from include/linux/fs.h:13,
> >                    from include/linux/highmem.h:5,
> >                    from kernel/events/uprobes.c:13:
> >   kernel/events/uprobes.c: In function 'arch_uprobe_trampoline':
> >   arch/loongarch/include/asm/uprobes.h:12:33: error: initializer element is not constant
> >      12 | #define UPROBE_SWBP_INSN        larch_insn_gen_break(BRK_UPROBE_BP)
> >         |                                 ^~~~~~~~~~~~~~~~~~~~
> >   kernel/events/uprobes.c:1479:39: note: in expansion of macro 'UPROBE_SWBP_INSN'
> >    1479 |         static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
> 
> reproduced, could you please try the change below

Yeah, that fixes the issue for me.

Tested-by: Nathan Chancellor <nathan@kernel.org> # build

> ---
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 2816e65729ac..6986bd993702 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -1476,8 +1476,9 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
>  
>  void * __weak arch_uprobe_trampoline(unsigned long *psize)
>  {
> -	static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
> +	static uprobe_opcode_t insn;
>  
> +	insn = insn ?: UPROBE_SWBP_INSN;
>  	*psize = UPROBE_SWBP_INSN_SIZE;
>  	return &insn;
>  }
Guenter Roeck June 20, 2024, 6:19 p.m. UTC | #4
On Tue, Jun 11, 2024 at 01:21:52PM +0200, Jiri Olsa wrote:
> Adding uretprobe syscall instead of trap to speed up return probe.
> 

This patch results in:

Building loongarch:allmodconfig ... failed
--------------
Error log:
In file included from include/linux/uprobes.h:49,
                 from include/linux/mm_types.h:16,
                 from include/linux/mmzone.h:22,
                 from include/linux/gfp.h:7,
                 from include/linux/xarray.h:16,
                 from include/linux/list_lru.h:14,
                 from include/linux/fs.h:13,
                 from include/linux/highmem.h:5,
                 from kernel/events/uprobes.c:13:
kernel/events/uprobes.c: In function 'arch_uprobe_trampoline':
arch/loongarch/include/asm/uprobes.h:12:33: error: initializer element is not constant
   12 | #define UPROBE_SWBP_INSN        larch_insn_gen_break(BRK_UPROBE_BP)
      |                                 ^~~~~~~~~~~~~~~~~~~~
kernel/events/uprobes.c:1479:39: note: in expansion of macro 'UPROBE_SWBP_INSN'
 1479 |         static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
      |                                       ^~~~~~~~~~~~~~~~

Bisect log attached.

Guenter

---
# bad: [2102cb0d050d34d50b9642a3a50861787527e922] Add linux-next specific files for 20240619
# good: [6ba59ff4227927d3a8530fc2973b80e94b54d58f] Linux 6.10-rc4
git bisect start 'HEAD' 'v6.10-rc4'
# good: [a8fa5261ec87d5aafd3211548d93008d5739457d] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
git bisect good a8fa5261ec87d5aafd3211548d93008d5739457d
# good: [ee551f4db89753511a399b808db75654facec7c8] Merge branch 'for-linux-next' of https://gitlab.freedesktop.org/drm/i915/kernel
git bisect good ee551f4db89753511a399b808db75654facec7c8
# bad: [ec3557f4b791d72d93bfb69702d441d2c9f8cd0d] Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm.git
git bisect bad ec3557f4b791d72d93bfb69702d441d2c9f8cd0d
# good: [29e7873afb5768f7af65802d021ee0c9bf2167be] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm.git
git bisect good 29e7873afb5768f7af65802d021ee0c9bf2167be
# good: [ffe376e4a4ec29bb29d97664b72ff607e86f5b02] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
git bisect good ffe376e4a4ec29bb29d97664b72ff607e86f5b02
# bad: [39264a48da368f5394289133802f7d105dd3a33c] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
git bisect bad 39264a48da368f5394289133802f7d105dd3a33c
# good: [8af40c77dfe215cb8ad60c221d8eb740b056460b] Merge ftrace/for-next
git bisect good 8af40c77dfe215cb8ad60c221d8eb740b056460b
# bad: [5dfebf3c26dc5fe0fe08a5b4f334922b650e43b9] Merge ring-buffer/for-next
git bisect bad 5dfebf3c26dc5fe0fe08a5b4f334922b650e43b9
# bad: [9172a2da3b4162b5af0d2b57a30e844c451e74b7] Merge probes/for-next
git bisect bad 9172a2da3b4162b5af0d2b57a30e844c451e74b7
# bad: [29edd8b003db897d81d82d950785327f164650d3] selftests/x86: Add return uprobe shadow stack test
git bisect bad 29edd8b003db897d81d82d950785327f164650d3
# good: [1b3c86eeea7594eeeb49b8d1c1db0a40f0ce7920] samples: kprobes: add missing MODULE_DESCRIPTION() macros
git bisect good 1b3c86eeea7594eeeb49b8d1c1db0a40f0ce7920
# good: [190fec72df4a5d4d98b1e783c333f471e5e5f344] uprobe: Wire up uretprobe system call
git bisect good 190fec72df4a5d4d98b1e783c333f471e5e5f344
# bad: [ff474a78cef5cb5f32be52fe25b78441327a2e7c] uprobe: Add uretprobe syscall to speed up return probe
git bisect bad ff474a78cef5cb5f32be52fe25b78441327a2e7c
# first bad commit: [ff474a78cef5cb5f32be52fe25b78441327a2e7c] uprobe: Add uretprobe syscall to speed up return probe
Oleg Nesterov June 20, 2024, 6:52 p.m. UTC | #5
On 06/20, Guenter Roeck wrote:
>
> On Tue, Jun 11, 2024 at 01:21:52PM +0200, Jiri Olsa wrote:
> > Adding uretprobe syscall instead of trap to speed up return probe.
> >
>
> This patch results in:
>
> Building loongarch:allmodconfig ... failed
> --------------
> Error log:
> In file included from include/linux/uprobes.h:49,
>                  from include/linux/mm_types.h:16,
>                  from include/linux/mmzone.h:22,
>                  from include/linux/gfp.h:7,
>                  from include/linux/xarray.h:16,
>                  from include/linux/list_lru.h:14,
>                  from include/linux/fs.h:13,
>                  from include/linux/highmem.h:5,
>                  from kernel/events/uprobes.c:13:
> kernel/events/uprobes.c: In function 'arch_uprobe_trampoline':
> arch/loongarch/include/asm/uprobes.h:12:33: error: initializer element is not constant

should be fixed by https://lore.kernel.org/all/ZmyZgzqsowkGyqmH@krava/
in this thread.

but may be arch/loongarch should override __weak arch_uprobe_trampoline() ?

Oleg.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index 896909f306e3..4cb77e004615 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -22,6 +22,7 @@  void shstk_free(struct task_struct *p);
 int setup_signal_shadow_stack(struct ksignal *ksig);
 int restore_signal_shadow_stack(void);
 int shstk_update_last_frame(unsigned long val);
+bool shstk_is_enabled(void);
 #else
 static inline long shstk_prctl(struct task_struct *task, int option,
 			       unsigned long arg2) { return -EINVAL; }
@@ -33,6 +34,7 @@  static inline void shstk_free(struct task_struct *p) {}
 static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
 static inline int restore_signal_shadow_stack(void) { return 0; }
 static inline int shstk_update_last_frame(unsigned long val) { return 0; }
+static inline bool shstk_is_enabled(void) { return false; }
 #endif /* CONFIG_X86_USER_SHADOW_STACK */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 9797d4cdb78a..059685612362 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -588,3 +588,8 @@  int shstk_update_last_frame(unsigned long val)
 	ssp = get_user_shstk_addr();
 	return write_user_shstk_64((u64 __user *)ssp, (u64)val);
 }
+
+bool shstk_is_enabled(void)
+{
+	return features_enabled(ARCH_SHSTK_SHSTK);
+}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6402fb3089d2..5a952c5ea66b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -12,6 +12,7 @@ 
 #include <linux/ptrace.h>
 #include <linux/uprobes.h>
 #include <linux/uaccess.h>
+#include <linux/syscalls.h>
 
 #include <linux/kdebug.h>
 #include <asm/processor.h>
@@ -308,6 +309,122 @@  static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
 }
 
 #ifdef CONFIG_X86_64
+
+asm (
+	".pushsection .rodata\n"
+	".global uretprobe_trampoline_entry\n"
+	"uretprobe_trampoline_entry:\n"
+	"pushq %rax\n"
+	"pushq %rcx\n"
+	"pushq %r11\n"
+	"movq $" __stringify(__NR_uretprobe) ", %rax\n"
+	"syscall\n"
+	".global uretprobe_syscall_check\n"
+	"uretprobe_syscall_check:\n"
+	"popq %r11\n"
+	"popq %rcx\n"
+
+	/* The uretprobe syscall replaces stored %rax value with final
+	 * return address, so we don't restore %rax in here and just
+	 * call ret.
+	 */
+	"retq\n"
+	".global uretprobe_trampoline_end\n"
+	"uretprobe_trampoline_end:\n"
+	".popsection\n"
+);
+
+extern u8 uretprobe_trampoline_entry[];
+extern u8 uretprobe_trampoline_end[];
+extern u8 uretprobe_syscall_check[];
+
+void *arch_uprobe_trampoline(unsigned long *psize)
+{
+	static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+	struct pt_regs *regs = task_pt_regs(current);
+
+	/*
+	 * At the moment the uretprobe syscall trampoline is supported
+	 * only for native 64-bit process, the compat process still uses
+	 * standard breakpoint.
+	 */
+	if (user_64bit_mode(regs)) {
+		*psize = uretprobe_trampoline_end - uretprobe_trampoline_entry;
+		return uretprobe_trampoline_entry;
+	}
+
+	*psize = UPROBE_SWBP_INSN_SIZE;
+	return &insn;
+}
+
+static unsigned long trampoline_check_ip(void)
+{
+	unsigned long tramp = uprobe_get_trampoline_vaddr();
+
+	return tramp + (uretprobe_syscall_check - uretprobe_trampoline_entry);
+}
+
+SYSCALL_DEFINE0(uretprobe)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	unsigned long err, ip, sp, r11_cx_ax[3];
+
+	if (regs->ip != trampoline_check_ip())
+		goto sigill;
+
+	err = copy_from_user(r11_cx_ax, (void __user *)regs->sp, sizeof(r11_cx_ax));
+	if (err)
+		goto sigill;
+
+	/* expose the "right" values of r11/cx/ax/sp to uprobe_consumer/s */
+	regs->r11 = r11_cx_ax[0];
+	regs->cx  = r11_cx_ax[1];
+	regs->ax  = r11_cx_ax[2];
+	regs->sp += sizeof(r11_cx_ax);
+	regs->orig_ax = -1;
+
+	ip = regs->ip;
+	sp = regs->sp;
+
+	uprobe_handle_trampoline(regs);
+
+	/*
+	 * Some of the uprobe consumers has changed sp, we can do nothing,
+	 * just return via iret.
+	 * .. or shadow stack is enabled, in which case we need to skip
+	 * return through the user space stack address.
+	 */
+	if (regs->sp != sp || shstk_is_enabled())
+		return regs->ax;
+	regs->sp -= sizeof(r11_cx_ax);
+
+	/* for the case uprobe_consumer has changed r11/cx */
+	r11_cx_ax[0] = regs->r11;
+	r11_cx_ax[1] = regs->cx;
+
+	/*
+	 * ax register is passed through as return value, so we can use
+	 * its space on stack for ip value and jump to it through the
+	 * trampoline's ret instruction
+	 */
+	r11_cx_ax[2] = regs->ip;
+	regs->ip = ip;
+
+	err = copy_to_user((void __user *)regs->sp, r11_cx_ax, sizeof(r11_cx_ax));
+	if (err)
+		goto sigill;
+
+	/* ensure sysret, see do_syscall_64() */
+	regs->r11 = regs->flags;
+	regs->cx  = regs->ip;
+
+	return regs->ax;
+
+sigill:
+	force_sig(SIGILL);
+	return -1;
+}
+
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f46e0ca0169c..b503fafb7fb3 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -138,6 +138,9 @@  extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c
 extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 					 void *src, unsigned long len);
+extern void uprobe_handle_trampoline(struct pt_regs *regs);
+extern void *arch_uprobe_trampoline(unsigned long *psize);
+extern unsigned long uprobe_get_trampoline_vaddr(void);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2c83ba776fc7..2816e65729ac 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1474,11 +1474,20 @@  static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 	return ret;
 }
 
+void * __weak arch_uprobe_trampoline(unsigned long *psize)
+{
+	static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+
+	*psize = UPROBE_SWBP_INSN_SIZE;
+	return &insn;
+}
+
 static struct xol_area *__create_xol_area(unsigned long vaddr)
 {
 	struct mm_struct *mm = current->mm;
-	uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+	unsigned long insns_size;
 	struct xol_area *area;
+	void *insns;
 
 	area = kmalloc(sizeof(*area), GFP_KERNEL);
 	if (unlikely(!area))
@@ -1502,7 +1511,8 @@  static struct xol_area *__create_xol_area(unsigned long vaddr)
 	/* Reserve the 1st slot for get_trampoline_vaddr() */
 	set_bit(0, area->bitmap);
 	atomic_set(&area->slot_count, 1);
-	arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);
+	insns = arch_uprobe_trampoline(&insns_size);
+	arch_uprobe_copy_ixol(area->pages[0], 0, insns, insns_size);
 
 	if (!xol_add_vma(mm, area))
 		return area;
@@ -1827,7 +1837,7 @@  void uprobe_copy_process(struct task_struct *t, unsigned long flags)
  *
  * Returns -1 in case the xol_area is not allocated.
  */
-static unsigned long get_trampoline_vaddr(void)
+unsigned long uprobe_get_trampoline_vaddr(void)
 {
 	struct xol_area *area;
 	unsigned long trampoline_vaddr = -1;
@@ -1878,7 +1888,7 @@  static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
 	if (!ri)
 		return;
 
-	trampoline_vaddr = get_trampoline_vaddr();
+	trampoline_vaddr = uprobe_get_trampoline_vaddr();
 	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
 	if (orig_ret_vaddr == -1)
 		goto fail;
@@ -2123,7 +2133,7 @@  static struct return_instance *find_next_ret_chain(struct return_instance *ri)
 	return ri;
 }
 
-static void handle_trampoline(struct pt_regs *regs)
+void uprobe_handle_trampoline(struct pt_regs *regs)
 {
 	struct uprobe_task *utask;
 	struct return_instance *ri, *next;
@@ -2187,8 +2197,8 @@  static void handle_swbp(struct pt_regs *regs)
 	int is_swbp;
 
 	bp_vaddr = uprobe_get_swbp_addr(regs);
-	if (bp_vaddr == get_trampoline_vaddr())
-		return handle_trampoline(regs);
+	if (bp_vaddr == uprobe_get_trampoline_vaddr())
+		return uprobe_handle_trampoline(regs);
 
 	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
 	if (!uprobe) {