@@ -1153,3 +1153,14 @@ ENTRY(async_page_fault)
jmp error_code
END(async_page_fault)
#endif
+
+ENTRY(rewind_stack_do_exit)
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
+ leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+
+ call do_exit
+1: jmp 1b
+END(rewind_stack_do_exit)
@@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret)
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
+
+ENTRY(rewind_stack_do_exit)
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
+ leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+
+ call do_exit
+1: jmp 1b
+END(rewind_stack_do_exit)
@@ -228,6 +228,8 @@ unsigned long oops_begin(void)
EXPORT_SYMBOL_GPL(oops_begin);
NOKPROBE_SYMBOL(oops_begin);
+extern void __noreturn rewind_stack_do_exit(int signr);
+
void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
if (regs && kexec_should_crash(current))
@@ -247,12 +249,15 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
return;
if (in_interrupt())
panic("Fatal exception in interrupt");
- if (((current_stack_pointer() ^ (current_top_of_stack() - 1))
- & ~(THREAD_SIZE - 1)) != 0)
- panic("Fatal exception on special stack");
if (panic_on_oops)
panic("Fatal exception");
- do_exit(signr);
+
+ /*
+ * We're not going to return, but we might be on an IST stack or
+ * have very little stack space left. Rewind the stack and kill
+ * the task.
+ */
+ rewind_stack_do_exit(signr);
}
NOKPROBE_SYMBOL(oops_end);
If we call do_exit with a clean stack, we greatly reduce the risk of recursive oopses due to stack overflow in do_exit, and we allow do_exit to work even if we OOPS from an IST stack. The latter gives us a much better chance of surviving long enough after we detect a stack overflow to write out our logs. I intentionally separated this from the preceding patch that disables do_exit-on-OOPS on IST stacks. This way, if we need to revert this patch, we still end up in an acceptable state wrt stack overflow handling. Signed-off-by: Andy Lutomirski <luto@kernel.org> --- arch/x86/entry/entry_32.S | 11 +++++++++++ arch/x86/entry/entry_64.S | 11 +++++++++++ arch/x86/kernel/dumpstack.c | 13 +++++++++---- 3 files changed, 31 insertions(+), 4 deletions(-)