Message ID | 20250217063335.22257-4-xueshuai@linux.alibaba.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/hwpoison: Fix regressions in memory failure handling | expand |
On Mon, Feb 17, 2025 at 02:33:33PM +0800, Shuai Xue wrote: > diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c > index dac4d64dfb2a..14c2d71c3ce1 100644 > --- a/arch/x86/kernel/cpu/mce/severity.c > +++ b/arch/x86/kernel/cpu/mce/severity.c > @@ -16,6 +16,7 @@ > #include <asm/traps.h> > #include <asm/insn.h> > #include <asm/insn-eval.h> > +#include <linux/extable.h> > > #include "internal.h" > > @@ -285,7 +286,8 @@ static bool is_copy_from_user(struct pt_regs *regs) > */ > static noinstr int error_context(struct mce *m, struct pt_regs *regs) > { > - int fixup_type; > + const struct exception_table_entry *e; > + int fixup_type, imm; > bool copy_user; > > if ((m->cs & 3) == 3) > @@ -294,9 +296,14 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) > if (!mc_recoverable(m->mcgstatus)) > return IN_KERNEL; > > + e = search_exception_tables(m->ip); > + if (!e) > + return IN_KERNEL; You didn't actually build this, did you? Or did you ignore the extra noinstr warnings? > /* Allow instrumentation around external facilities usage. */ > instrumentation_begin(); > - fixup_type = ex_get_fixup_type(m->ip); > + fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); > + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); > copy_user = is_copy_from_user(regs); > instrumentation_end(); > > @@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) > case EX_TYPE_UACCESS: > if (!copy_user) > return IN_KERNEL; > - m->kflags |= MCE_IN_KERNEL_COPYIN; > - fallthrough; > - > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; > + return IN_KERNEL_RECOV; > + case EX_TYPE_IMM_REG: > + if (!copy_user || imm != -EFAULT) > + return IN_KERNEL; > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; > + return IN_KERNEL_RECOV; Maybe I'm justnot understanding things, but what's wrong with something like the below; why do we care about the ex-type if we know its a MOV reading from userspace? The less we muck about with the extable here, the better. --- diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dac4d64dfb2a..cb021058165f 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) copy_user = is_copy_from_user(regs); instrumentation_end(); - switch (fixup_type) { - case EX_TYPE_UACCESS: - if (!copy_user) - return IN_KERNEL; - m->kflags |= MCE_IN_KERNEL_COPYIN; - fallthrough; + if (copy_user) { + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN; + return IN_KERNEL_RECOV + } + switch (fixup_type) { case EX_TYPE_FAULT_MCE_SAFE: case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV;
On Tue, Feb 18, 2025 at 01:54:08PM +0100, Peter Zijlstra wrote: > --- > diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c > index dac4d64dfb2a..cb021058165f 100644 > --- a/arch/x86/kernel/cpu/mce/severity.c > +++ b/arch/x86/kernel/cpu/mce/severity.c > @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) > copy_user = is_copy_from_user(regs); > instrumentation_end(); > > - switch (fixup_type) { > - case EX_TYPE_UACCESS: > - if (!copy_user) > - return IN_KERNEL; > - m->kflags |= MCE_IN_KERNEL_COPYIN; > - fallthrough; > + if (copy_user) { > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN; Typing is hard, obviously that second should'be been _RECOV. > + return IN_KERNEL_RECOV But why are we having that bit *and* a return value saying the same thing? > + } > > + switch (fixup_type) { > case EX_TYPE_FAULT_MCE_SAFE: > case EX_TYPE_DEFAULT_MCE_SAFE: > m->kflags |= MCE_IN_KERNEL_RECOV;
Hi, Peter 在 2025/2/18 20:54, Peter Zijlstra 写道: > On Mon, Feb 17, 2025 at 02:33:33PM +0800, Shuai Xue wrote: > >> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c >> index dac4d64dfb2a..14c2d71c3ce1 100644 >> --- a/arch/x86/kernel/cpu/mce/severity.c >> +++ b/arch/x86/kernel/cpu/mce/severity.c >> @@ -16,6 +16,7 @@ >> #include <asm/traps.h> >> #include <asm/insn.h> >> #include <asm/insn-eval.h> >> +#include <linux/extable.h> >> >> #include "internal.h" >> >> @@ -285,7 +286,8 @@ static bool is_copy_from_user(struct pt_regs *regs) >> */ >> static noinstr int error_context(struct mce *m, struct pt_regs *regs) >> { >> - int fixup_type; >> + const struct exception_table_entry *e; >> + int fixup_type, imm; >> bool copy_user; >> >> if ((m->cs & 3) == 3) >> @@ -294,9 +296,14 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) >> if (!mc_recoverable(m->mcgstatus)) >> return IN_KERNEL; >> >> + e = search_exception_tables(m->ip); >> + if (!e) >> + return IN_KERNEL; > > You didn't actually build this, did you? Or did you ignore the extra > noinstr warnings? I did build and test this patch set on it. But I did not find any warnings. Could you provide more details? > >> /* Allow instrumentation around external facilities usage. */ >> instrumentation_begin(); >> - fixup_type = ex_get_fixup_type(m->ip); >> + fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); >> + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); >> copy_user = is_copy_from_user(regs); >> instrumentation_end(); >> >> @@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) >> case EX_TYPE_UACCESS: >> if (!copy_user) >> return IN_KERNEL; >> - m->kflags |= MCE_IN_KERNEL_COPYIN; >> - fallthrough; >> - >> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; >> + return IN_KERNEL_RECOV; >> + case EX_TYPE_IMM_REG: >> + if (!copy_user || imm != -EFAULT) >> + return IN_KERNEL; >> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; >> + return IN_KERNEL_RECOV; > > Maybe I'm justnot understanding things, but what's wrong with something > like the below; why do we care about the ex-type if we know its a MOV > reading from userspace? > > The less we muck about with the extable here, the better. We need to make sure that we have register a fixup handler for the copy_user case. If no fixup handler found, the PC accessing posion will trigger #MCE again and again resulting a hardlock up. > > --- > diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c > index dac4d64dfb2a..cb021058165f 100644 > --- a/arch/x86/kernel/cpu/mce/severity.c > +++ b/arch/x86/kernel/cpu/mce/severity.c > @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) > copy_user = is_copy_from_user(regs); > instrumentation_end(); > > - switch (fixup_type) { > - case EX_TYPE_UACCESS: > - if (!copy_user) > - return IN_KERNEL; > - m->kflags |= MCE_IN_KERNEL_COPYIN; > - fallthrough; > + if (copy_user) { > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN; > + return IN_KERNEL_RECOV > + } > > + switch (fixup_type) { > case EX_TYPE_FAULT_MCE_SAFE: > case EX_TYPE_DEFAULT_MCE_SAFE: > m->kflags |= MCE_IN_KERNEL_RECOV; Thanks. Shuai
在 2025/2/18 21:02, Peter Zijlstra 写道: > On Tue, Feb 18, 2025 at 01:54:08PM +0100, Peter Zijlstra wrote: > >> --- >> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c >> index dac4d64dfb2a..cb021058165f 100644 >> --- a/arch/x86/kernel/cpu/mce/severity.c >> +++ b/arch/x86/kernel/cpu/mce/severity.c >> @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) >> copy_user = is_copy_from_user(regs); >> instrumentation_end(); >> >> - switch (fixup_type) { >> - case EX_TYPE_UACCESS: >> - if (!copy_user) >> - return IN_KERNEL; >> - m->kflags |= MCE_IN_KERNEL_COPYIN; >> - fallthrough; >> + if (copy_user) { >> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN; > > Typing is hard, obviously that second should'be been _RECOV. Hah, it doesn't matter, I got your point. > >> + return IN_KERNEL_RECOV > > But why are we having that bit *and* a return value saying the same > thing? Yes, it is rather redundant. I can refactor this out if @Borislav is happy with that. > >> + } >> >> + switch (fixup_type) { >> case EX_TYPE_FAULT_MCE_SAFE: >> case EX_TYPE_DEFAULT_MCE_SAFE: >> m->kflags |= MCE_IN_KERNEL_RECOV; Thanks. Shuai
On Tue, Feb 18, 2025 at 09:28:33PM +0800, Shuai Xue wrote: > I did build and test this patch set on it. But I did not find any warnings. > Could you provide more details? NOINSTR_VALIDATION=y helps > > > /* Allow instrumentation around external facilities usage. */ > > > instrumentation_begin(); > > > - fixup_type = ex_get_fixup_type(m->ip); > > > + fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); > > > + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); > > > copy_user = is_copy_from_user(regs); > > > instrumentation_end(); > > > @@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) > > > case EX_TYPE_UACCESS: > > > if (!copy_user) > > > return IN_KERNEL; > > > - m->kflags |= MCE_IN_KERNEL_COPYIN; > > > - fallthrough; > > > - > > > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; > > > + return IN_KERNEL_RECOV; > > > + case EX_TYPE_IMM_REG: > > > + if (!copy_user || imm != -EFAULT) > > > + return IN_KERNEL; > > > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; > > > + return IN_KERNEL_RECOV; > > > > Maybe I'm justnot understanding things, but what's wrong with something > > like the below; why do we care about the ex-type if we know its a MOV > > reading from userspace? > > > > The less we muck about with the extable here, the better. > > We need to make sure that we have register a fixup handler for the copy_user > case. If no fixup handler found, the PC accessing posion will trigger #MCE > again and again resulting a hardlock up. Well, then write it like so. Afaict, you don't care what the actual exception type is, just that there is one, for the copy_user case. diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dac4d64dfb2a..cfdae25eacd7 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -301,18 +301,19 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) instrumentation_end(); switch (fixup_type) { - case EX_TYPE_UACCESS: - if (!copy_user) - return IN_KERNEL; - m->kflags |= MCE_IN_KERNEL_COPYIN; - fallthrough; - case EX_TYPE_FAULT_MCE_SAFE: case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV; default: + if (copy_user) { + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; + return IN_KERNEL_RECOV; + } + fallthrough; + + case EX_TYPE_NONE: return IN_KERNEL; } }
On Tue, Feb 18, 2025 at 03:15:35PM +0100, Peter Zijlstra wrote: > diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c > index dac4d64dfb2a..cfdae25eacd7 100644 > --- a/arch/x86/kernel/cpu/mce/severity.c > +++ b/arch/x86/kernel/cpu/mce/severity.c > @@ -301,18 +301,19 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) > instrumentation_end(); > > switch (fixup_type) { > - case EX_TYPE_UACCESS: > - if (!copy_user) > - return IN_KERNEL; > - m->kflags |= MCE_IN_KERNEL_COPYIN; > - fallthrough; > - > case EX_TYPE_FAULT_MCE_SAFE: > case EX_TYPE_DEFAULT_MCE_SAFE: > m->kflags |= MCE_IN_KERNEL_RECOV; > return IN_KERNEL_RECOV; > > default: > + if (copy_user) { As said on chat, if we can make is_copy_from_user() *always* correctly detect user access, then sure but I'm afraid EX_TYPE_UACCESS being generated at the handful places where we do user memory access is there for a reason as it makes it pretty explicit.
On Tue, Feb 18, 2025 at 05:48:00PM +0100, Borislav Petkov wrote: > On Tue, Feb 18, 2025 at 03:15:35PM +0100, Peter Zijlstra wrote: > > diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c > > index dac4d64dfb2a..cfdae25eacd7 100644 > > --- a/arch/x86/kernel/cpu/mce/severity.c > > +++ b/arch/x86/kernel/cpu/mce/severity.c > > @@ -301,18 +301,19 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) > > instrumentation_end(); > > > > switch (fixup_type) { > > - case EX_TYPE_UACCESS: > > - if (!copy_user) > > - return IN_KERNEL; > > - m->kflags |= MCE_IN_KERNEL_COPYIN; > > - fallthrough; > > - > > case EX_TYPE_FAULT_MCE_SAFE: > > case EX_TYPE_DEFAULT_MCE_SAFE: > > m->kflags |= MCE_IN_KERNEL_RECOV; > > return IN_KERNEL_RECOV; > > > > default: > > + if (copy_user) { > > As said on chat, if we can make is_copy_from_user() *always* correctly detect > user access, then sure but I'm afraid EX_TYPE_UACCESS being generated at the > handful places where we do user memory access is there for a reason as it > makes it pretty explicit. Thing is, we have copy routines that do not know if its user or not. is_copy_from_user() must be reliable. Anyway, if you all really want to go all funny, try the below. Someone has to go and stick some EX_FLAG_USER on things, but I just really don't believe that's doing to be useful. Because while you're doing that, you should also audit if is_copy_from_user() will catch it and if it does, you don't need the tag. See how much tags you end up with.. --- diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 906b0d5541e8..1d6c6ff51d28 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -31,6 +31,9 @@ #define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2) #define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3) +#define EX_FLAG_USER EX_DATA_FLAG(4) +#define EX_FLAG_MCE EX_DATA_FLAG(8) + /* types */ #define EX_TYPE_NONE 0 #define EX_TYPE_DEFAULT 1 @@ -46,8 +49,6 @@ #define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */ #define EX_TYPE_WRMSR_IN_MCE 12 #define EX_TYPE_RDMSR_IN_MCE 13 -#define EX_TYPE_DEFAULT_MCE_SAFE 14 -#define EX_TYPE_FAULT_MCE_SAFE 15 #define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */ #define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0)) diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dac4d64dfb2a..86a32fa020d2 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -300,21 +300,20 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) copy_user = is_copy_from_user(regs); instrumentation_end(); - switch (fixup_type) { - case EX_TYPE_UACCESS: - if (!copy_user) - return IN_KERNEL; - m->kflags |= MCE_IN_KERNEL_COPYIN; - fallthrough; - - case EX_TYPE_FAULT_MCE_SAFE: - case EX_TYPE_DEFAULT_MCE_SAFE: + if (fixup_type == EX_TYPE_NONE) + return IN_KERNEL; + + if (fixup_type & EX_FLAG_MCE) { m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV; + } - default: - return IN_KERNEL; + if ((fixup_type & EX_FLAG_USER) || copy_user) { + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; + return IN_KERNEL_RECOV; } + + return IN_KERNEL; } /* See AMD PPR(s) section Machine Check Error Handling. */ diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h index 098f367bb8a7..3f6036840d65 100644 --- a/arch/x86/kernel/fpu/legacy.h +++ b/arch/x86/kernel/fpu/legacy.h @@ -24,7 +24,7 @@ static inline void ldmxcsr(u32 mxcsr) asm volatile(ASM_STAC "\n" \ "1: " #insn "\n" \ "2: " ASM_CLAC "\n" \ - _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT | EX_FLAG_MCE) \ : [err] "=a" (err), output \ : "0"(0), input); \ err; \ diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index aa16f1a1bbcf..eef534091105 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -115,7 +115,7 @@ static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 ma asm volatile("1:" op "\n\t" \ "xor %[err], %[err]\n" \ "2:\n\t" \ - _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT | EX_FLAG_MCE) \ : [err] "=a" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index c859a8a09860..7977689ad46e 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -103,9 +103,9 @@ SYM_FUNC_START(copy_mc_fragile) movl %ecx, %edx jmp copy_mc_fragile_handle_tail - _ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE) - _ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE) - _ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE) + _ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT | EX_FLAG_MCE) + _ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT | EX_FLAG_MCE) + _ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT | EX_FLAG_MCE) _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) _ASM_EXTABLE(.L_write_words, .E_write_words) _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) @@ -143,7 +143,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string) movq %rcx, %rax RET - _ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE) + _ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT | EX_FLAG_MCE) SYM_FUNC_END(copy_mc_enhanced_fast_string) #endif /* !CONFIG_UML */ diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 51986e8a9d35..7358bf10baba 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -293,8 +293,10 @@ static bool ex_handler_eretu(const struct exception_table_entry *fixup, int ex_get_fixup_type(unsigned long ip) { const struct exception_table_entry *e = search_exception_tables(ip); + if (!e) + return EX_TYPE_NONE; - return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE; + return FIELD_GET(EX_DATA_TYPE_MASK, e->data) | (e->data & (EX_FLAG_USER | EX_FLAG_MCE)); } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, @@ -327,10 +329,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, switch (type) { case EX_TYPE_DEFAULT: - case EX_TYPE_DEFAULT_MCE_SAFE: return ex_handler_default(e, regs); case EX_TYPE_FAULT: - case EX_TYPE_FAULT_MCE_SAFE: return ex_handler_fault(e, regs, trapnr); case EX_TYPE_UACCESS: return ex_handler_uaccess(e, regs, trapnr, fault_addr);
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dac4d64dfb2a..14c2d71c3ce1 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -16,6 +16,7 @@ #include <asm/traps.h> #include <asm/insn.h> #include <asm/insn-eval.h> +#include <linux/extable.h> #include "internal.h" @@ -285,7 +286,8 @@ static bool is_copy_from_user(struct pt_regs *regs) */ static noinstr int error_context(struct mce *m, struct pt_regs *regs) { - int fixup_type; + const struct exception_table_entry *e; + int fixup_type, imm; bool copy_user; if ((m->cs & 3) == 3) @@ -294,9 +296,14 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) if (!mc_recoverable(m->mcgstatus)) return IN_KERNEL; + e = search_exception_tables(m->ip); + if (!e) + return IN_KERNEL; + /* Allow instrumentation around external facilities usage. */ instrumentation_begin(); - fixup_type = ex_get_fixup_type(m->ip); + fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data); + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data); copy_user = is_copy_from_user(regs); instrumentation_end(); @@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) case EX_TYPE_UACCESS: if (!copy_user) return IN_KERNEL; - m->kflags |= MCE_IN_KERNEL_COPYIN; - fallthrough; - + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; + return IN_KERNEL_RECOV; + case EX_TYPE_IMM_REG: + if (!copy_user || imm != -EFAULT) + return IN_KERNEL; + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; + return IN_KERNEL_RECOV; case EX_TYPE_FAULT_MCE_SAFE: case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV;
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the extable fixup type for copy-from-user operations, changing it from EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG. Specifically, commit 99641e094d6c ("x86/uaccess: Remove .fixup usage") altered the extable fixup type for the get_user family, while commit 4c132d1d844a ("x86/futex: Remove .fixup usage") addressed the futex operations. This change inadvertently caused a regression where the error context for some copy-from-user operations no longer functions as an in-kernel recovery context, leading to kernel panics with the message: "Machine check: Data load in unrecoverable area of kernel." To fix the regression, add EX_TYPE_EFAULT_REG as a in-kernel recovery context for copy-from-user operations. Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com> Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage") Cc: stable@vger.kernel.org --- arch/x86/kernel/cpu/mce/severity.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-)