@@ -13,6 +13,7 @@
#include <linux/syscalls.h>
#include <linux/rseq.h>
#include <linux/types.h>
+#include <linux/task_work.h>
#include <asm/ptrace.h>
#define CREATE_TRACE_POINTS
@@ -320,6 +321,8 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (unlikely(t->flags & PF_EXITING))
return;
+ t->kill_on_efault = true;
+
/*
* regs is NULL if and only if the caller is in a syscall path. Skip
* fixup and leave rseq_cs as is so that rseq_sycall() will detect and
@@ -330,13 +333,18 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (unlikely(ret < 0))
goto error;
}
- if (unlikely(rseq_update_cpu_node_id(t)))
- goto error;
- return;
+ if (likely(!rseq_update_cpu_node_id(t)))
+ goto out;
error:
+ /* Allow task work to override signr */
+ task_work_run();
+
sig = ksig ? ksig->sig : 0;
force_sigsegv(sig);
+
+out:
+ t->kill_on_efault = false;
}
#ifdef CONFIG_DEBUG_RSEQ
@@ -353,8 +361,17 @@ void rseq_syscall(struct pt_regs *regs)
if (!t->rseq)
return;
- if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
+
+ t->kill_on_efault = true;
+
+ if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs)) {
+ /* Allow task work to override signr */
+ task_work_run();
+
force_sig(SIGSEGV);
+ }
+
+ t->kill_on_efault = false;
}
#endif
Uncorrected memory errors for user pages are signaled to processes using SIGBUS or, if the error happens in a syscall, an error retval from the syscall. The SIGBUS is documented in Documentation/mm/hwpoison.rst#failure-recovery-modes Once a user task sets t->rseq in the rseq() syscall, if the kernel cannot access the memory pointed to by t->rseq->rseq_cs, that initial rseq() and all future syscalls should return an error so understandably the code just kills the task. To ensure that SIGBUS is used set the new t->kill_on_efault flag and run queued task work on rseq_get_rseq_cs() errors to give memory_failure the chance to run. Note: the rseq checks run inside resume_user_mode_work() so whenever _TIF_NOTIFY_RESUME is set. They do not run on every syscall exit so I'm not concerned that these extra flag operations are in a hot path, except with CONFIG_DEBUG_RSEQ. Signed-off-by: Andrew Zaborowski <andrew.zaborowski@intel.com> --- kernel/rseq.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-)