Message ID | 1490869877-118713-8-git-send-email-xiexiuqi@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Xiuqi, On 2017/3/30 18:31, Xie XiuQi wrote: > Error Synchronization Barrier (ESB; part of the ARMv8.2 Extensions) > is used to synchronize Unrecoverable errors. That is, containable errors > architecturally consumed by the PE and not silently propagated. > > With ESB it is generally possible to isolate an unrecoverable error > between two ESB instructions. So, it's possible to recovery from > /* ISS field definitions for exceptions taken in to Hyp */ > #define ESR_ELx_CV (UL(1) << 24) > #define ESR_ELx_COND_SHIFT (20) > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > index 43512d4..d8a7306 100644 > --- a/arch/arm64/kernel/entry.S > +++ b/arch/arm64/kernel/entry.S > @@ -69,7 +69,14 @@ > #define BAD_FIQ 2 > #define BAD_ERROR 3 > > + .arch_extension ras > + > .macro kernel_entry, el, regsize = 64 > +#ifdef CONFIG_ARM64_ESB > + .if \el == 0 > + esb > + .endif > +#endif > sub sp, sp, #S_FRAME_SIZE > .if \regsize == 32 > mov w0, w0 // zero upper 32 bits of x0 > @@ -208,6 +215,7 @@ alternative_else_nop_endif > #endif > > .if \el == 0 > + msr daifset, #0xF // Set flags > ldr x23, [sp, #S_SP] // load return stack pointer > msr sp_el0, x23 > #ifdef CONFIG_ARM64_ERRATUM_845719 > @@ -226,6 +234,15 @@ alternative_else_nop_endif > > msr elr_el1, x21 // set up the return data > msr spsr_el1, x22 > + > +#ifdef CONFIG_ARM64_ESB > + .if \el == 0 > + esb // Error Synchronization Barrier > + mrs x21, disr_el1 // Check for deferred error > + tbnz x21, #31, el1_sei We may need to clear disr_el1.A after reading it because the hardware won't clear it. > + .endif > +#endif > + > ldp x0, x1, [sp, #16 * 0] > ldp x2, x3, [sp, #16 * 1] > ldp x4, x5, [sp, #16 * 2] > @@ -318,7 +335,7 @@ ENTRY(vectors) > ventry el1_sync_invalid // Synchronous EL1t > ventry el1_irq_invalid // IRQ EL1t > ventry el1_fiq_invalid // FIQ EL1t > - ventry el1_error_invalid // Error EL1t > + ventry el1_error // Error EL1t > > ventry el1_sync // Synchronous EL1h > ventry el1_irq // IRQ EL1h > @@ -328,7 +345,7 @@ ENTRY(vectors) > ventry el0_sync // Synchronous 64-bit EL0 > ventry el0_irq // IRQ 64-bit EL0 > ventry el0_fiq_invalid // FIQ 64-bit EL0 > - ventry el0_error_invalid // Error 64-bit EL0 > + ventry el0_error // Error 64-bit EL0 > > #ifdef CONFIG_COMPAT > ventry el0_sync_compat // Synchronous 32-bit EL0 > @@ -508,12 +525,31 @@ el1_preempt: > ret x24 > #endif > > + .align 6 > +el1_error: > + kernel_entry 1 > +el1_sei: > + /* > + * asynchronous SError interrupt from kernel > + */ > + mov x0, sp > + mrs x1, esr_el1 > + mov x2, #1 // exception level of SEI generated > + b do_sei > +ENDPROC(el1_error) > + > + > /* > * EL0 mode handlers. > */ > .align 6 > el0_sync: > kernel_entry 0 > +#ifdef CONFIG_ARM64_ESB > + mrs x26, disr_el1 > + tbnz x26, #31, el0_sei // check DISR.A > + msr daifclr, #0x4 // unmask SEI > +#endif > mrs x25, esr_el1 // read the syndrome register > lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class > cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state > @@ -688,8 +724,38 @@ el0_inv: > ENDPROC(el0_sync) > > .align 6 > +el0_error: > + kernel_entry 0 > +el0_sei: > + /* > + * asynchronous SError interrupt from userspace > + */ > + ct_user_exit > + mov x0, sp > + mrs x1, esr_el1 > + mov x2, #0 > + bl do_sei > + b ret_to_user > +ENDPROC(el0_error) > + > + .align 6 > el0_irq: > kernel_entry 0 > +#ifdef CONFIG_ARM64_ESB > + mrs x26, disr_el1 > + tbz x26, #31, el0_irq_naked // check DISR.A > + > + mov x0, sp > + mrs x1, esr_el1 > + mov x2, 0 > + > + /* > + * The SEI generated at EL0 is not affect this irq context, > + * so after sei handler, we continue process this irq. > + */ > + bl do_sei > + msr daifclr, #0x4 // unmask SEI > +#endif > el0_irq_naked: > enable_dbg > #ifdef CONFIG_TRACE_IRQFLAGS Thanks, Wang Xiongfeng
Hi, On Thu, Mar 30, 2017 at 06:31:07PM +0800, Xie XiuQi wrote: > diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h > index f20c64a..22f9c90 100644 > --- a/arch/arm64/include/asm/esr.h > +++ b/arch/arm64/include/asm/esr.h > @@ -106,6 +106,20 @@ > #define ESR_ELx_AR (UL(1) << 14) > #define ESR_ELx_CM (UL(1) << 8) > > +#define ESR_Elx_DFSC_SEI (0x11) We should probably have a definition for the uncategorized DFSC value, too. [...] > index 43512d4..d8a7306 100644 > --- a/arch/arm64/kernel/entry.S > +++ b/arch/arm64/kernel/entry.S > @@ -69,7 +69,14 @@ > #define BAD_FIQ 2 > #define BAD_ERROR 3 > > + .arch_extension ras Generally, arch_extension is a warning sign that code isn't going to work with contemporary assemblers, which we likely need to support. > + > .macro kernel_entry, el, regsize = 64 > +#ifdef CONFIG_ARM64_ESB > + .if \el == 0 > + esb Here, I think that we'll need to macro this such that we can build with existing toolchains. e.g. in <asm/assembler.h> we need something like: #define HINT_IMM_ESB 16 .macro ESB hint #HINT_IMM_ESB .endm > + .endif > +#endif > sub sp, sp, #S_FRAME_SIZE > .if \regsize == 32 > mov w0, w0 // zero upper 32 bits of x0 > @@ -208,6 +215,7 @@ alternative_else_nop_endif > #endif > > .if \el == 0 > + msr daifset, #0xF // Set flags Elsewhere in head.S we use helpers to fiddle with DAIF bits. Please be consistent with that. Add an enable_all macro if we need one. > ldr x23, [sp, #S_SP] // load return stack pointer > msr sp_el0, x23 > #ifdef CONFIG_ARM64_ERRATUM_845719 > @@ -226,6 +234,15 @@ alternative_else_nop_endif > > msr elr_el1, x21 // set up the return data > msr spsr_el1, x22 > + > +#ifdef CONFIG_ARM64_ESB > + .if \el == 0 > + esb // Error Synchronization Barrier > + mrs x21, disr_el1 // Check for deferred error We'll need an <asm/sysreg.h> definition for this register. With that, we can use mrs_s here. > + tbnz x21, #31, el1_sei > + .endif > +#endif > + > ldp x0, x1, [sp, #16 * 0] > ldp x2, x3, [sp, #16 * 1] > ldp x4, x5, [sp, #16 * 2] > @@ -318,7 +335,7 @@ ENTRY(vectors) > ventry el1_sync_invalid // Synchronous EL1t > ventry el1_irq_invalid // IRQ EL1t > ventry el1_fiq_invalid // FIQ EL1t > - ventry el1_error_invalid // Error EL1t > + ventry el1_error // Error EL1t > > ventry el1_sync // Synchronous EL1h > ventry el1_irq // IRQ EL1h > @@ -328,7 +345,7 @@ ENTRY(vectors) > ventry el0_sync // Synchronous 64-bit EL0 > ventry el0_irq // IRQ 64-bit EL0 > ventry el0_fiq_invalid // FIQ 64-bit EL0 > - ventry el0_error_invalid // Error 64-bit EL0 > + ventry el0_error // Error 64-bit EL0 > > #ifdef CONFIG_COMPAT > ventry el0_sync_compat // Synchronous 32-bit EL0 > @@ -508,12 +525,31 @@ el1_preempt: > ret x24 > #endif > > + .align 6 > +el1_error: > + kernel_entry 1 > +el1_sei: > + /* > + * asynchronous SError interrupt from kernel > + */ > + mov x0, sp > + mrs x1, esr_el1 I don't think this is correct if we branched here from kernel_exit. Surely we want the DISR_EL1 value, and ESR_EL1 is unrelated? > + mov x2, #1 // exception level of SEI generated > + b do_sei You don't need to figure out the EL here. In do_sei() we can determine the exception level from the regs (e.g. using user_mode(regs)). > +ENDPROC(el1_error) > + > + > /* > * EL0 mode handlers. > */ > .align 6 > el0_sync: > kernel_entry 0 > +#ifdef CONFIG_ARM64_ESB > + mrs x26, disr_el1 > + tbnz x26, #31, el0_sei // check DISR.A > + msr daifclr, #0x4 // unmask SEI > +#endif Why do we duplicate this across the EL0 handlers, rather than making it common in the el0 kernel_entry code? > mrs x25, esr_el1 // read the syndrome register > lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class > cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state > @@ -688,8 +724,38 @@ el0_inv: > ENDPROC(el0_sync) > > .align 6 > +el0_error: > + kernel_entry 0 > +el0_sei: > + /* > + * asynchronous SError interrupt from userspace > + */ > + ct_user_exit > + mov x0, sp > + mrs x1, esr_el1 As with el1_sei, I don't think this is correct if we branched to el0_sei. As far as I am aware, ESR_EL1 will contain whatever exception we took, and the value we want is in DISR_EL1. > + mov x2, #0 This EL parameter can go. > + bl do_sei > + b ret_to_user > +ENDPROC(el0_error) > + > + .align 6 > el0_irq: > kernel_entry 0 > +#ifdef CONFIG_ARM64_ESB > + mrs x26, disr_el1 > + tbz x26, #31, el0_irq_naked // check DISR.A > + > + mov x0, sp > + mrs x1, esr_el1 > + mov x2, 0 > + > + /* > + * The SEI generated at EL0 is not affect this irq context, > + * so after sei handler, we continue process this irq. > + */ > + bl do_sei > + msr daifclr, #0x4 // unmask SEI This rough pattern is duplicated several times across the EL0 entry paths. I think it should be made common. > +#endif > el0_irq_naked: > enable_dbg > #ifdef CONFIG_TRACE_IRQFLAGS > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > index b6d6727..99be6d8 100644 > --- a/arch/arm64/kernel/traps.c > +++ b/arch/arm64/kernel/traps.c > @@ -643,6 +643,34 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) > handler[reason], smp_processor_id(), esr, > esr_get_class_string(esr)); > > + die("Oops - bad mode", regs, 0); > + local_irq_disable(); > + panic("bad mode"); > +} > + > +static const char *sei_context[] = { > + "userspace", /* EL0 */ > + "kernel", /* EL1 */ > +}; This should go. It's only used in one place, and would be clearer with the strings inline. More on that below. > + > +static const char *sei_severity[] = { Please name this for what it actually represents: static const char *esr_aet_str[] = { > + [0 ... ESR_ELx_AET_MAX] = "Unknown", For consistency with esr_class_str, please make this: [0 ... ESR_ELx_AET_MAX] = "UNRECOGNIZED AET", ... which makes it clear that this isn't some AET value which reports an "Unknown" status. > + [ESR_ELx_AET_UC] = "Uncontainable", > + [ESR_ELx_AET_UEU] = "Unrecoverable", > + [ESR_ELx_AET_UEO] = "Restartable", > + [ESR_ELx_AET_UER] = "Recoverable", > + [ESR_ELx_AET_CE] = "Corrected", > +}; > + > +DEFINE_PER_CPU(int, sei_in_process); A previous patch added definition of this. > +asmlinkage void do_sei(struct pt_regs *regs, unsigned int esr, int el) > +{ > + int aet = ESR_ELx_AET(esr); The AET field is only valid when the DFSC is 0b010001, so we need to check that before we interpret AET. > + console_verbose(); > + > + pr_crit("Asynchronous SError interrupt detected on CPU%d, %s, %s\n", > + smp_processor_id(), sei_context[el], sei_severity[aet]); We should dump the full ESR_ELx value, regardless of what automated decoding we do, so that we have a chance of debugging issues in the field. It would also be nice to align with how bad_mode reports this today. Please make this: pr_crit("SError detected on CPU%d while in %s mode: code: 0x%08x -- %s\n", smp_processor_id(), user_mode(regs) ? "user" : "kernel", esr, esr_aet_str[aet]); ... though it might be best to dump the raw SPSR rather than trying to say user/kernel, so that we can distinguish EL1/EL2 with VHE, etc. > + > /* > * In firmware first mode, we could assume firmware will only generate one > * of cper records at a time. There is no risk for one cpu to parse ghes table. > @@ -653,9 +681,31 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) > this_cpu_dec(sei_in_process); > } > > - die("Oops - bad mode", regs, 0); > + if (el == 0 && IS_ENABLED(CONFIG_ARM64_ESB) && Please use user_mode(regs), and get rid of the el parameter to this function entirely. > + cpus_have_cap(ARM64_HAS_RAS_EXTN)) { > + siginfo_t info; > + void __user *pc = (void __user *)instruction_pointer(regs); > + > + if (aet >= ESR_ELx_AET_UEO) > + return; We need to check the DFSC first, and 0b111 is a reserved value (which the ARM ARM doesn't define the recoverability of), so I don't think this is correct. We should probably test the DSFC, then switch on the AET value, so as to handle only the cases we are aware of. > + > + if (aet == ESR_ELx_AET_UEU) { > + info.si_signo = SIGILL; > + info.si_errno = 0; > + info.si_code = ILL_ILLOPC; > + info.si_addr = pc; An unrecoverable error is not necessarily a particular bad instruction, so I'm not sure this makes sense. Thanks, Mark.
Hi Mark, Thanks for your comments. On 2017/4/13 18:51, Mark Rutland wrote: > Hi, > > On Thu, Mar 30, 2017 at 06:31:07PM +0800, Xie XiuQi wrote: >> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h >> index f20c64a..22f9c90 100644 >> --- a/arch/arm64/include/asm/esr.h >> +++ b/arch/arm64/include/asm/esr.h >> @@ -106,6 +106,20 @@ >> #define ESR_ELx_AR (UL(1) << 14) >> #define ESR_ELx_CM (UL(1) << 8) >> >> +#define ESR_Elx_DFSC_SEI (0x11) > > We should probably have a definition for the uncategorized DFSC value, > too. > Will do, thanks. How about "#define ESR_Elx_DFSC_UNCATEGORIZED (0)" ? > [...] > >> index 43512d4..d8a7306 100644 >> --- a/arch/arm64/kernel/entry.S >> +++ b/arch/arm64/kernel/entry.S >> @@ -69,7 +69,14 @@ >> #define BAD_FIQ 2 >> #define BAD_ERROR 3 >> >> + .arch_extension ras > > Generally, arch_extension is a warning sign that code isn't going to > work with contemporary assemblers, which we likely need to support. > >> + >> .macro kernel_entry, el, regsize = 64 >> +#ifdef CONFIG_ARM64_ESB >> + .if \el == 0 >> + esb > > Here, I think that we'll need to macro this such that we can build with > existing toolchains. > > e.g. in <asm/assembler.h> we need something like: > > #define HINT_IMM_ESB 16 > > .macro ESB > hint #HINT_IMM_ESB > .endm > Good, thanks for your suggestion. I'll use this macro in next versin. >> + .endif >> +#endif >> sub sp, sp, #S_FRAME_SIZE >> .if \regsize == 32 >> mov w0, w0 // zero upper 32 bits of x0 >> @@ -208,6 +215,7 @@ alternative_else_nop_endif >> #endif >> >> .if \el == 0 >> + msr daifset, #0xF // Set flags > > Elsewhere in head.S we use helpers to fiddle with DAIF bits. > > Please be consistent with that. Add an enable_all macro if we need one. OK, I'll do it refer to head.S. > >> ldr x23, [sp, #S_SP] // load return stack pointer >> msr sp_el0, x23 >> #ifdef CONFIG_ARM64_ERRATUM_845719 >> @@ -226,6 +234,15 @@ alternative_else_nop_endif >> >> msr elr_el1, x21 // set up the return data >> msr spsr_el1, x22 >> + >> +#ifdef CONFIG_ARM64_ESB >> + .if \el == 0 >> + esb // Error Synchronization Barrier >> + mrs x21, disr_el1 // Check for deferred error > > We'll need an <asm/sysreg.h> definition for this register. With that, we > can use mrs_s here. OK, thanks. > >> + tbnz x21, #31, el1_sei >> + .endif >> +#endif >> + >> ldp x0, x1, [sp, #16 * 0] >> ldp x2, x3, [sp, #16 * 1] >> ldp x4, x5, [sp, #16 * 2] >> @@ -318,7 +335,7 @@ ENTRY(vectors) >> ventry el1_sync_invalid // Synchronous EL1t >> ventry el1_irq_invalid // IRQ EL1t >> ventry el1_fiq_invalid // FIQ EL1t >> - ventry el1_error_invalid // Error EL1t >> + ventry el1_error // Error EL1t >> >> ventry el1_sync // Synchronous EL1h >> ventry el1_irq // IRQ EL1h >> @@ -328,7 +345,7 @@ ENTRY(vectors) >> ventry el0_sync // Synchronous 64-bit EL0 >> ventry el0_irq // IRQ 64-bit EL0 >> ventry el0_fiq_invalid // FIQ 64-bit EL0 >> - ventry el0_error_invalid // Error 64-bit EL0 >> + ventry el0_error // Error 64-bit EL0 >> >> #ifdef CONFIG_COMPAT >> ventry el0_sync_compat // Synchronous 32-bit EL0 >> @@ -508,12 +525,31 @@ el1_preempt: >> ret x24 >> #endif >> >> + .align 6 >> +el1_error: >> + kernel_entry 1 >> +el1_sei: >> + /* >> + * asynchronous SError interrupt from kernel >> + */ >> + mov x0, sp >> + mrs x1, esr_el1 > > I don't think this is correct if we branched here from kernel_exit. > Surely we want the DISR_EL1 value, and ESR_EL1 is unrelated? Yes, indeed. I'll change it in next version. > >> + mov x2, #1 // exception level of SEI generated >> + b do_sei > > You don't need to figure out the EL here. In do_sei() we can determine > the exception level from the regs (e.g. using user_mode(regs)). Yes, you're right. I'll fix it. > >> +ENDPROC(el1_error) >> + >> + >> /* >> * EL0 mode handlers. >> */ >> .align 6 >> el0_sync: >> kernel_entry 0 >> +#ifdef CONFIG_ARM64_ESB >> + mrs x26, disr_el1 >> + tbnz x26, #31, el0_sei // check DISR.A >> + msr daifclr, #0x4 // unmask SEI >> +#endif > > Why do we duplicate this across the EL0 handlers, rather than making it > common in the el0 kernel_entry code? It's different between el0_sync and el0_irq. If sei come from el0_sync, the context is the same process, so we could just return to user space after do_sei, instead of continue the syscall. The process may be killed very likely, it's not necessary to continue the system call. However, if sei come from el0_irq, in the irq context which is not related the current process, we should continue the irq handler after do_sei. So I think we should handle these two situation differently. If I'm wrong, please correct me, Thanks. > >> mrs x25, esr_el1 // read the syndrome register >> lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class >> cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state >> @@ -688,8 +724,38 @@ el0_inv: >> ENDPROC(el0_sync) >> >> .align 6 >> +el0_error: >> + kernel_entry 0 >> +el0_sei: >> + /* >> + * asynchronous SError interrupt from userspace >> + */ >> + ct_user_exit >> + mov x0, sp >> + mrs x1, esr_el1 > > As with el1_sei, I don't think this is correct if we branched to > el0_sei. As far as I am aware, ESR_EL1 will contain whatever exception > we took, and the value we want is in DISR_EL1. Will fix, thanks. > >> + mov x2, #0 > > This EL parameter can go. > >> + bl do_sei >> + b ret_to_user >> +ENDPROC(el0_error) >> + >> + .align 6 >> el0_irq: >> kernel_entry 0 >> +#ifdef CONFIG_ARM64_ESB >> + mrs x26, disr_el1 >> + tbz x26, #31, el0_irq_naked // check DISR.A >> + >> + mov x0, sp >> + mrs x1, esr_el1 >> + mov x2, 0 >> + >> + /* >> + * The SEI generated at EL0 is not affect this irq context, >> + * so after sei handler, we continue process this irq. >> + */ >> + bl do_sei >> + msr daifclr, #0x4 // unmask SEI > > This rough pattern is duplicated several times across the EL0 entry > paths. I think it should be made common. I agree, I'll do it in next version, thanks. > >> +#endif >> el0_irq_naked: >> enable_dbg >> #ifdef CONFIG_TRACE_IRQFLAGS >> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c >> index b6d6727..99be6d8 100644 >> --- a/arch/arm64/kernel/traps.c >> +++ b/arch/arm64/kernel/traps.c >> @@ -643,6 +643,34 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) >> handler[reason], smp_processor_id(), esr, >> esr_get_class_string(esr)); >> >> + die("Oops - bad mode", regs, 0); >> + local_irq_disable(); >> + panic("bad mode"); >> +} >> + >> +static const char *sei_context[] = { >> + "userspace", /* EL0 */ >> + "kernel", /* EL1 */ >> +}; > > This should go. It's only used in one place, and would be clearer with > the strings inline. More on that below. > OK, thanks. >> + >> +static const char *sei_severity[] = { > > Please name this for what it actually represents: > > static const char *esr_aet_str[] = { > >> + [0 ... ESR_ELx_AET_MAX] = "Unknown", > > For consistency with esr_class_str, please make this: > > [0 ... ESR_ELx_AET_MAX] = "UNRECOGNIZED AET", > > ... which makes it clear that this isn't some AET value which reports an > "Unknown" status. > OK, thanks. >> + [ESR_ELx_AET_UC] = "Uncontainable", >> + [ESR_ELx_AET_UEU] = "Unrecoverable", >> + [ESR_ELx_AET_UEO] = "Restartable", >> + [ESR_ELx_AET_UER] = "Recoverable", >> + [ESR_ELx_AET_CE] = "Corrected", >> +}; >> + >> +DEFINE_PER_CPU(int, sei_in_process); > > A previous patch added definition of this. > I'll remote it, thanks. >> +asmlinkage void do_sei(struct pt_regs *regs, unsigned int esr, int el) >> +{ >> + int aet = ESR_ELx_AET(esr); > > The AET field is only valid when the DFSC is 0b010001, so we need to > check that before we interpret AET. > Will fix. >> + console_verbose(); >> + >> + pr_crit("Asynchronous SError interrupt detected on CPU%d, %s, %s\n", >> + smp_processor_id(), sei_context[el], sei_severity[aet]); > > We should dump the full ESR_ELx value, regardless of what automated > decoding we do, so that we have a chance of debugging issues in the > field. > > It would also be nice to align with how bad_mode reports this today. > Please make this: > > pr_crit("SError detected on CPU%d while in %s mode: code: 0x%08x -- %s\n", > smp_processor_id(), user_mode(regs) ? "user" : "kernel", > esr, esr_aet_str[aet]); > > ... though it might be best to dump the raw SPSR rather than trying to > say user/kernel, so that we can distinguish EL1/EL2 with VHE, etc. > OK, I'll modify in next version. >> + >> /* >> * In firmware first mode, we could assume firmware will only generate one >> * of cper records at a time. There is no risk for one cpu to parse ghes table. >> @@ -653,9 +681,31 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) >> this_cpu_dec(sei_in_process); >> } >> >> - die("Oops - bad mode", regs, 0); >> + if (el == 0 && IS_ENABLED(CONFIG_ARM64_ESB) && > > Please use user_mode(regs), and get rid of the el parameter to this > function entirely. > Will fix. >> + cpus_have_cap(ARM64_HAS_RAS_EXTN)) { >> + siginfo_t info; >> + void __user *pc = (void __user *)instruction_pointer(regs); >> + >> + if (aet >= ESR_ELx_AET_UEO) >> + return; > > We need to check the DFSC first, and 0b111 is a reserved value (which > the ARM ARM doesn't define the recoverability of), so I don't think this > is correct. > > We should probably test the DSFC, then switch on the AET value, so as to > handle only the cases we are aware of. Will fix. > >> + >> + if (aet == ESR_ELx_AET_UEU) { >> + info.si_signo = SIGILL; >> + info.si_errno = 0; >> + info.si_code = ILL_ILLOPC; >> + info.si_addr = pc; > > An unrecoverable error is not necessarily a particular bad instruction, > so I'm not sure this makes sense. > Generally, a SEI is generated when PE consumes an uncorrectable error. So, may be we could send a SIGBUS instead. I'm not sure too. Any other suggestion? > Thanks, > Mark. > > . >
Hi Mark, I have some confusion about the RAS feature when VHE is enabled. Does RAS spec support the situation when VHE is enabled. When VHE is disabled, the hyperviosr delegates the error exception to EL1 by setting HCR_EL2.VSE to 1, and this will inject a virtual SEI into OS. My understanding is that HCR_EL2.VSE is only used to inject a virtual SEI into EL1. But when VHE is enabled, the host OS will run at EL2. We can't inject a virtual SEI into host OS. I don't know if RAS spec can handle this situation. Thanks, Wang Xiongfeng On 2017/4/13 18:51, Mark Rutland wrote: > Hi, > > On Thu, Mar 30, 2017 at 06:31:07PM +0800, Xie XiuQi wrote: >> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h >> index f20c64a..22f9c90 100644 >> --- a/arch/arm64/include/asm/esr.h >> +++ b/arch/arm64/include/asm/esr.h >> @@ -106,6 +106,20 @@ >> #define ESR_ELx_AR (UL(1) << 14) >> #define ESR_ELx_CM (UL(1) << 8) >> >> +#define ESR_Elx_DFSC_SEI (0x11) > > We should probably have a definition for the uncategorized DFSC value, > too. > > [...] > >> index 43512d4..d8a7306 100644 >> --- a/arch/arm64/kernel/entry.S >> +++ b/arch/arm64/kernel/entry.S >> @@ -69,7 +69,14 @@ >> #define BAD_FIQ 2 >> #define BAD_ERROR 3 >> >> + .arch_extension ras > > Generally, arch_extension is a warning sign that code isn't going to > work with contemporary assemblers, which we likely need to support. > >> + >> .macro kernel_entry, el, regsize = 64 >> +#ifdef CONFIG_ARM64_ESB >> + .if \el == 0 >> + esb > > Here, I think that we'll need to macro this such that we can build with > existing toolchains. > > e.g. in <asm/assembler.h> we need something like: > > #define HINT_IMM_ESB 16 > > .macro ESB > hint #HINT_IMM_ESB > .endm > >> + .endif >> +#endif >> sub sp, sp, #S_FRAME_SIZE >> .if \regsize == 32 >> mov w0, w0 // zero upper 32 bits of x0 >> @@ -208,6 +215,7 @@ alternative_else_nop_endif >> #endif >> >> .if \el == 0 >> + msr daifset, #0xF // Set flags > > Elsewhere in head.S we use helpers to fiddle with DAIF bits. > > Please be consistent with that. Add an enable_all macro if we need one. > >> ldr x23, [sp, #S_SP] // load return stack pointer >> msr sp_el0, x23 >> #ifdef CONFIG_ARM64_ERRATUM_845719 >> @@ -226,6 +234,15 @@ alternative_else_nop_endif >> >> msr elr_el1, x21 // set up the return data >> msr spsr_el1, x22 >> + >> +#ifdef CONFIG_ARM64_ESB >> + .if \el == 0 >> + esb // Error Synchronization Barrier >> + mrs x21, disr_el1 // Check for deferred error > > We'll need an <asm/sysreg.h> definition for this register. With that, we > can use mrs_s here. > >> + tbnz x21, #31, el1_sei >> + .endif >> +#endif >> + >> ldp x0, x1, [sp, #16 * 0] >> ldp x2, x3, [sp, #16 * 1] >> ldp x4, x5, [sp, #16 * 2] >> @@ -318,7 +335,7 @@ ENTRY(vectors) >> ventry el1_sync_invalid // Synchronous EL1t >> ventry el1_irq_invalid // IRQ EL1t >> ventry el1_fiq_invalid // FIQ EL1t >> - ventry el1_error_invalid // Error EL1t >> + ventry el1_error // Error EL1t >> >> ventry el1_sync // Synchronous EL1h >> ventry el1_irq // IRQ EL1h >> @@ -328,7 +345,7 @@ ENTRY(vectors) >> ventry el0_sync // Synchronous 64-bit EL0 >> ventry el0_irq // IRQ 64-bit EL0 >> ventry el0_fiq_invalid // FIQ 64-bit EL0 >> - ventry el0_error_invalid // Error 64-bit EL0 >> + ventry el0_error // Error 64-bit EL0 >> >> #ifdef CONFIG_COMPAT >> ventry el0_sync_compat // Synchronous 32-bit EL0 >> @@ -508,12 +525,31 @@ el1_preempt: >> ret x24 >> #endif >> >> + .align 6 >> +el1_error: >> + kernel_entry 1 >> +el1_sei: >> + /* >> + * asynchronous SError interrupt from kernel >> + */ >> + mov x0, sp >> + mrs x1, esr_el1 > > I don't think this is correct if we branched here from kernel_exit. > Surely we want the DISR_EL1 value, and ESR_EL1 is unrelated? > >> + mov x2, #1 // exception level of SEI generated >> + b do_sei > > You don't need to figure out the EL here. In do_sei() we can determine > the exception level from the regs (e.g. using user_mode(regs)). > >> +ENDPROC(el1_error) >> + >> + >> /* >> * EL0 mode handlers. >> */ >> .align 6 >> el0_sync: >> kernel_entry 0 >> +#ifdef CONFIG_ARM64_ESB >> + mrs x26, disr_el1 >> + tbnz x26, #31, el0_sei // check DISR.A >> + msr daifclr, #0x4 // unmask SEI >> +#endif > > Why do we duplicate this across the EL0 handlers, rather than making it > common in the el0 kernel_entry code? > >> mrs x25, esr_el1 // read the syndrome register >> lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class >> cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state >> @@ -688,8 +724,38 @@ el0_inv: >> ENDPROC(el0_sync) >> >> .align 6 >> +el0_error: >> + kernel_entry 0 >> +el0_sei: >> + /* >> + * asynchronous SError interrupt from userspace >> + */ >> + ct_user_exit >> + mov x0, sp >> + mrs x1, esr_el1 > > As with el1_sei, I don't think this is correct if we branched to > el0_sei. As far as I am aware, ESR_EL1 will contain whatever exception > we took, and the value we want is in DISR_EL1. > >> + mov x2, #0 > > This EL parameter can go. > >> + bl do_sei >> + b ret_to_user >> +ENDPROC(el0_error) >> + >> + .align 6 >> el0_irq: >> kernel_entry 0 >> +#ifdef CONFIG_ARM64_ESB >> + mrs x26, disr_el1 >> + tbz x26, #31, el0_irq_naked // check DISR.A >> + >> + mov x0, sp >> + mrs x1, esr_el1 >> + mov x2, 0 >> + >> + /* >> + * The SEI generated at EL0 is not affect this irq context, >> + * so after sei handler, we continue process this irq. >> + */ >> + bl do_sei >> + msr daifclr, #0x4 // unmask SEI > > This rough pattern is duplicated several times across the EL0 entry > paths. I think it should be made common. > >> +#endif >> el0_irq_naked: >> enable_dbg >> #ifdef CONFIG_TRACE_IRQFLAGS >> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c >> index b6d6727..99be6d8 100644 >> --- a/arch/arm64/kernel/traps.c >> +++ b/arch/arm64/kernel/traps.c >> @@ -643,6 +643,34 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) >> handler[reason], smp_processor_id(), esr, >> esr_get_class_string(esr)); >> >> + die("Oops - bad mode", regs, 0); >> + local_irq_disable(); >> + panic("bad mode"); >> +} >> + >> +static const char *sei_context[] = { >> + "userspace", /* EL0 */ >> + "kernel", /* EL1 */ >> +}; > > This should go. It's only used in one place, and would be clearer with > the strings inline. More on that below. > >> + >> +static const char *sei_severity[] = { > > Please name this for what it actually represents: > > static const char *esr_aet_str[] = { > >> + [0 ... ESR_ELx_AET_MAX] = "Unknown", > > For consistency with esr_class_str, please make this: > > [0 ... ESR_ELx_AET_MAX] = "UNRECOGNIZED AET", > > ... which makes it clear that this isn't some AET value which reports an > "Unknown" status. > >> + [ESR_ELx_AET_UC] = "Uncontainable", >> + [ESR_ELx_AET_UEU] = "Unrecoverable", >> + [ESR_ELx_AET_UEO] = "Restartable", >> + [ESR_ELx_AET_UER] = "Recoverable", >> + [ESR_ELx_AET_CE] = "Corrected", >> +}; >> + >> +DEFINE_PER_CPU(int, sei_in_process); > > A previous patch added definition of this. > >> +asmlinkage void do_sei(struct pt_regs *regs, unsigned int esr, int el) >> +{ >> + int aet = ESR_ELx_AET(esr); > > The AET field is only valid when the DFSC is 0b010001, so we need to > check that before we interpret AET. > >> + console_verbose(); >> + >> + pr_crit("Asynchronous SError interrupt detected on CPU%d, %s, %s\n", >> + smp_processor_id(), sei_context[el], sei_severity[aet]); > > We should dump the full ESR_ELx value, regardless of what automated > decoding we do, so that we have a chance of debugging issues in the > field. > > It would also be nice to align with how bad_mode reports this today. > Please make this: > > pr_crit("SError detected on CPU%d while in %s mode: code: 0x%08x -- %s\n", > smp_processor_id(), user_mode(regs) ? "user" : "kernel", > esr, esr_aet_str[aet]); > > ... though it might be best to dump the raw SPSR rather than trying to > say user/kernel, so that we can distinguish EL1/EL2 with VHE, etc. > >> + >> /* >> * In firmware first mode, we could assume firmware will only generate one >> * of cper records at a time. There is no risk for one cpu to parse ghes table. >> @@ -653,9 +681,31 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) >> this_cpu_dec(sei_in_process); >> } >> >> - die("Oops - bad mode", regs, 0); >> + if (el == 0 && IS_ENABLED(CONFIG_ARM64_ESB) && > > Please use user_mode(regs), and get rid of the el parameter to this > function entirely. > >> + cpus_have_cap(ARM64_HAS_RAS_EXTN)) { >> + siginfo_t info; >> + void __user *pc = (void __user *)instruction_pointer(regs); >> + >> + if (aet >= ESR_ELx_AET_UEO) >> + return; > > We need to check the DFSC first, and 0b111 is a reserved value (which > the ARM ARM doesn't define the recoverability of), so I don't think this > is correct. > > We should probably test the DSFC, then switch on the AET value, so as to > handle only the cases we are aware of. > >> + >> + if (aet == ESR_ELx_AET_UEU) { >> + info.si_signo = SIGILL; >> + info.si_errno = 0; >> + info.si_code = ILL_ILLOPC; >> + info.si_addr = pc; > > An unrecoverable error is not necessarily a particular bad instruction, > so I'm not sure this makes sense. > > Thanks, > Mark. > > . >
Hi Wang Xiongfeng, On 18/04/17 02:09, Xiongfeng Wang wrote: > I have some confusion about the RAS feature when VHE is enabled. Does RAS spec support > the situation when VHE is enabled. When VHE is disabled, the hyperviosr delegates the error > exception to EL1 by setting HCR_EL2.VSE to 1, and this will inject a virtual SEI into OS. (The ARM-ARM also requires the HCR_EL2.AMO to be set so that physical SError Interrupts are taken to EL2, meaning EL1 can never receive a physical SError) > My understanding is that HCR_EL2.VSE is only used to inject a virtual SEI into EL1. ... mine too ... > But when VHE is enabled, the host OS will run at EL2. We can't inject a virtual SEI into > host OS. I don't know if RAS spec can handle this situation. The host expects to receive physical SError Interrupts. The ARM-ARM doesn't describe a way to inject these as they are generated by the CPU. Am I right in thinking you want this to use SError Interrupts as an APEI notification? (This isn't a CPU thing so the RAS spec doesn't cover this use) This is straightforward for the hyper-visor to implement using Virtual SError. I don't think its not always feasible for the host as Physical SError is routed to EL3 by SCR_EL3.EA, meaning there is no hardware generated SError that can reach EL2. Another APEI notification mechanism may be more appropriate. EL3 may be able to 'fake' an SError by returning into the appropriate EL2 vector if the exception came from EL{0,1}, or from EL2 and PSTATE.A is clear. If the SError came from EL2 and the ESR_EL3.IESB bit is set, we can write an appropriate ESR into DISR. You cant use SError to cover all the possible RAS exceptions. We already have this problem using SEI if PSTATE.A was set and the exception was an imprecise abort from EL2. We can't return to the interrupted context and we can't deliver an SError to EL2 either. Setting SCR_EL3.EA allows firmware to handle these ugly corner cases. Notifying the OS is a separate problem where APEI's SEI may not always be the best choice. Thanks, James
Hi James, Thanks for your reply. On 2017/4/18 18:51, James Morse wrote: > Hi Wang Xiongfeng, > > On 18/04/17 02:09, Xiongfeng Wang wrote: >> I have some confusion about the RAS feature when VHE is enabled. Does RAS spec support >> the situation when VHE is enabled. When VHE is disabled, the hyperviosr delegates the error >> exception to EL1 by setting HCR_EL2.VSE to 1, and this will inject a virtual SEI into OS. > > (The ARM-ARM also requires the HCR_EL2.AMO to be set so that physical SError > Interrupts are taken to EL2, meaning EL1 can never receive a physical SError) > > >> My understanding is that HCR_EL2.VSE is only used to inject a virtual SEI into EL1. > > ... mine too ... > >> But when VHE is enabled, the host OS will run at EL2. We can't inject a virtual SEI into >> host OS. I don't know if RAS spec can handle this situation. > > The host expects to receive physical SError Interrupts. The ARM-ARM doesn't > describe a way to inject these as they are generated by the CPU. > > Am I right in thinking you want this to use SError Interrupts as an APEI > notification? (This isn't a CPU thing so the RAS spec doesn't cover this use) Yes, using sei as an APEI notification is one part of my consideration. Another use is for ESB. RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' describes the SEI recovery process when ESB is implemented. In this situation, SEI is routed to EL3 (SCR_EL3.EA = 1). When an SEI occurs in EL0 and not been taken immediately, and then an ESB instruction at SVC entry is executed, SEI is taken to EL3. The ESB at SVC entry is used for preventing the error propagating from user space to kernel space. The EL3 SEI handler collects the errors and fills in the APEI table, and then jump to EL2 SEI handler. EL2 SEI handler inject an vSEI into EL1 by setting HCR_EL2.VSE = 1, so that when returned to OS, an SEI is pending. Then ESB is executed again, and DISR_EL1.A is set by hardware (2.4.4 ESB and virtual errors), so that the following process can be executed. So we want to inject a vSEI into OS, when control is returned from EL3/2 to OS, no matter whether it is on host OS or guest OS. I don't know if my understanding is right here. > > This is straightforward for the hyper-visor to implement using Virtual SError. > I don't think its not always feasible for the host as Physical SError is routed > to EL3 by SCR_EL3.EA, meaning there is no hardware generated SError that can > reach EL2. Another APEI notification mechanism may be more appropriate. > > EL3 may be able to 'fake' an SError by returning into the appropriate EL2 vector > if the exception came from EL{0,1}, or from EL2 and PSTATE.A is clear. > If the SError came from EL2 and the ESR_EL3.IESB bit is set, we can write an > appropriate ESR into DISR. Yes, this can work. When VHE is enabled, we can set DISR.A by software, and 'fake' an SError by returning into the EL2 SEI vector. > You cant use SError to cover all the possible RAS exceptions. We already have > this problem using SEI if PSTATE.A was set and the exception was an imprecise > abort from EL2. We can't return to the interrupted context and we can't deliver > an SError to EL2 either. SEI came from EL2 and PSTATE.A is set. Is it the situation where VHE is enabled and CPU is running in kernel space. If SEI occurs in kernel space, can we just panic or shutdown. > > Setting SCR_EL3.EA allows firmware to handle these ugly corner cases. Notifying > the OS is a separate problem where APEI's SEI may not always be the best choice. > > > Thanks, > > James > > . > Thanks, Wang Xiongfeng
Hi Wang Xiongfeng, On 19/04/17 03:37, Xiongfeng Wang wrote: > On 2017/4/18 18:51, James Morse wrote: >> The host expects to receive physical SError Interrupts. The ARM-ARM doesn't >> describe a way to inject these as they are generated by the CPU. >> >> Am I right in thinking you want this to use SError Interrupts as an APEI >> notification? (This isn't a CPU thing so the RAS spec doesn't cover this use) > > Yes, using sei as an APEI notification is one part of my consideration. Another use is for ESB. > RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' > describes the SEI recovery process when ESB is implemented. > > In this situation, SEI is routed to EL3 (SCR_EL3.EA = 1). When an SEI occurs in EL0 and not been taken immediately, > and then an ESB instruction at SVC entry is executed, SEI is taken to EL3. The ESB at SVC entry is > used for preventing the error propagating from user space to kernel space. The EL3 SEI handler collects > the errors and fills in the APEI table, and then jump to EL2 SEI handler. EL2 SEI handler inject > an vSEI into EL1 by setting HCR_EL2.VSE = 1, so that when returned to OS, an SEI is pending. This step has confused me. How would this work with VHE where the host runs at EL2 and there is nothing at Host:EL1? From your description I assume you have some firmware resident at EL2. > Then ESB is executed again, and DISR_EL1.A is set by hardware (2.4.4 ESB and virtual errors), so that > the following process can be executed. > So we want to inject a vSEI into OS, when control is returned from EL3/2 to OS, no matter whether > it is on host OS or guest OS. I disagree. With Linux/KVM you can't use Virtual SError to notify the host OS. The host OS expects to receive Physical SError, these shouldn't be taken to EL2 unless a guest is running. Notifications from firmware that use SEA or SEI should follow the routing rules in the ARM-ARM, which means they should never reach a guest OS. For VHE the host runs at EL2 and sets HCR_EL2.AMO. Any firmware notification should come from EL3 to the host at EL2. The host may choose to notify the guest, but this should always go via Qemu. For non-VHE systems the host runs at EL1 and KVM lives at EL2 to do the world switch. When KVM is running a guest it sets HCR_EL2.AMO, when it has switched back to the host it clears it. Notifications from EL3 that pretend to be SError should route SError to EL2 or EL1 depending on HCR_EL2.AMO. When KVM gets a Synchronous External Abort or an SError while a guest is running it will switch back to the host and tell the handle_exit() code. Again the host may choose to notify the guest, but this should always go via Qemu. The ARM-ARM pseudo code for the routing rules is in AArch64.TakePhysicalSErrorException(). Firmware injecting fake SError should behave exactly like this. Newly appeared in the ARM-ARM is HCR_EL2.TEA, which takes Synchronous External Aborts to EL2 (if SCR_EL3 hasn't claimed them). We should set/clear this bit like we do HCR_EL2.AMO if the CPU has the RAS extensions. >> You cant use SError to cover all the possible RAS exceptions. We already have >> this problem using SEI if PSTATE.A was set and the exception was an imprecise >> abort from EL2. We can't return to the interrupted context and we can't deliver >> an SError to EL2 either. > > SEI came from EL2 and PSTATE.A is set. Is it the situation where VHE is enabled and CPU is running > in kernel space. If SEI occurs in kernel space, can we just panic or shutdown. firmware-panic? We can do a little better than that: If the IESB bit is set in the ESR we can behave as if this were an ESB and have firmware write an appropriate ESR to DISR_EL1 if PSTATE.A is set and the exception came from EL2. Linux should have an ESB in its __switch_to(), I've re-worked the daif masking in entry.S so that PSTATE.A will always be unmasked here. Other than these two cases, yes, this CPU really is wrecked. Firmware can power it off via PSCI, and could notify another CPU about what happened. UEFI's table 250 'Processor Generic Error Section' has a 'Processor ID' field, with a note that on ARM this is the MPIDR_EL1. Table 260 'ARM Processor Error Section' has something similar. Thanks, James
Hi XiuQi, On 2017/3/30 18:31, Xie XiuQi wrote: > Error Synchronization Barrier (ESB; part of the ARMv8.2 Extensions) > is used to synchronize Unrecoverable errors. That is, containable errors > architecturally consumed by the PE and not silently propagated. > > With ESB it is generally possible to isolate an unrecoverable error > between two ESB instructions. So, it's possible to recovery from > unrecoverable errors reported by asynchronous SError interrupt. > > If ARMv8.2 RAS Extension is not support, ESB is treated as a NOP. > > Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> > Signed-off-by: Wang Xiongfeng <wangxiongfengi2@huawei.com> > --- > arch/arm64/Kconfig | 16 ++++++++++ > arch/arm64/include/asm/esr.h | 14 +++++++++ > arch/arm64/kernel/entry.S | 70 ++++++++++++++++++++++++++++++++++++++++++-- > arch/arm64/kernel/traps.c | 54 ++++++++++++++++++++++++++++++++-- > 4 files changed, 150 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 859a90e..7402175 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -911,6 +911,22 @@ endmenu > > menu "ARMv8.2 architectural features" > > +config ARM64_ESB > + bool "Enable support for Error Synchronization Barrier (ESB)" > + default n > + help > + Error Synchronization Barrier (ESB; part of the ARMv8.2 Extensions) > + is used to synchronize Unrecoverable errors. That is, containable errors > + architecturally consumed by the PE and not silently propagated. > + > + Without ESB it is not generally possible to isolate an Unrecoverable > + error because it is not known which instruction generated the error. > + > + Selecting this option allows inject esb instruction before the exception > + change. If ARMv8.2 RAS Extension is not support, ESB is treated as a NOP. > + > + Note that ESB instruction can introduce slight overhead, so say N if unsure. > + > config ARM64_UAO > bool "Enable support for User Access Override (UAO)" > default y > diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h > index f20c64a..22f9c90 100644 > --- a/arch/arm64/include/asm/esr.h > +++ b/arch/arm64/include/asm/esr.h > @@ -106,6 +106,20 @@ > #define ESR_ELx_AR (UL(1) << 14) > #define ESR_ELx_CM (UL(1) << 8) > > +#define ESR_Elx_DFSC_SEI (0x11) > + > +#define ESR_ELx_AET_SHIFT (10) > +#define ESR_ELx_AET_MAX (7) > +#define ESR_ELx_AET_MASK (UL(7) << ESR_ELx_AET_SHIFT) > +#define ESR_ELx_AET(esr) (((esr) & ESR_ELx_AET_MASK) >> ESR_ELx_AET_SHIFT) > + > +#define ESR_ELx_AET_UC (0) > +#define ESR_ELx_AET_UEU (1) > +#define ESR_ELx_AET_UEO (2) > +#define ESR_ELx_AET_UER (3) > +#define ESR_ELx_AET_CE (6) > + > + > /* ISS field definitions for exceptions taken in to Hyp */ > #define ESR_ELx_CV (UL(1) << 24) > #define ESR_ELx_COND_SHIFT (20) > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > index 43512d4..d8a7306 100644 > --- a/arch/arm64/kernel/entry.S > +++ b/arch/arm64/kernel/entry.S > @@ -69,7 +69,14 @@ > #define BAD_FIQ 2 > #define BAD_ERROR 3 > > + .arch_extension ras > + > .macro kernel_entry, el, regsize = 64 because we also want to take SEI exception in kernel space, we may need to unmask SEI in kernel_entry. > +#ifdef CONFIG_ARM64_ESB > + .if \el == 0 > + esb > + .endif > +#endif > sub sp, sp, #S_FRAME_SIZE > .if \regsize == 32 > mov w0, w0 // zero upper 32 bits of x0 > @@ -208,6 +215,7 @@ alternative_else_nop_endif > #endif > > .if \el == 0 > + msr daifset, #0xF // Set flags > ldr x23, [sp, #S_SP] // load return stack pointer > msr sp_el0, x23 > #ifdef CONFIG_ARM64_ERRATUM_845719 > @@ -226,6 +234,15 @@ alternative_else_nop_endif > > msr elr_el1, x21 // set up the return data > msr spsr_el1, x22 > + > +#ifdef CONFIG_ARM64_ESB > + .if \el == 0 > + esb // Error Synchronization Barrier > + mrs x21, disr_el1 // Check for deferred error > + tbnz x21, #31, el1_sei > + .endif > +#endif > + > ldp x0, x1, [sp, #16 * 0] > ldp x2, x3, [sp, #16 * 1] > ldp x4, x5, [sp, #16 * 2] > @@ -318,7 +335,7 @@ ENTRY(vectors) > ventry el1_sync_invalid // Synchronous EL1t > ventry el1_irq_invalid // IRQ EL1t > ventry el1_fiq_invalid // FIQ EL1t > - ventry el1_error_invalid // Error EL1t > + ventry el1_error // Error EL1t > > ventry el1_sync // Synchronous EL1h > ventry el1_irq // IRQ EL1h > @@ -328,7 +345,7 @@ ENTRY(vectors) > ventry el0_sync // Synchronous 64-bit EL0 > ventry el0_irq // IRQ 64-bit EL0 > ventry el0_fiq_invalid // FIQ 64-bit EL0 > - ventry el0_error_invalid // Error 64-bit EL0 > + ventry el0_error // Error 64-bit EL0 for the situation 'Current EL with SPx', we also need to change the sError vector to el1_error. > > #ifdef CONFIG_COMPAT > ventry el0_sync_compat // Synchronous 32-bit EL0 > @@ -508,12 +525,31 @@ el1_preempt: > ret x24 > #endif > > + .align 6 > +el1_error: > + kernel_entry 1 > +el1_sei: > + /* > + * asynchronous SError interrupt from kernel > + */ > + mov x0, sp > + mrs x1, esr_el1 > + mov x2, #1 // exception level of SEI generated > + b do_sei > +ENDPROC(el1_error) > + > + Thanks, Wang Xiongfeng
Hi James, Thanks for your reply. On 2017/4/20 16:52, James Morse wrote: > Hi Wang Xiongfeng, > > On 19/04/17 03:37, Xiongfeng Wang wrote: >> On 2017/4/18 18:51, James Morse wrote: >>> The host expects to receive physical SError Interrupts. The ARM-ARM doesn't >>> describe a way to inject these as they are generated by the CPU. >>> >>> Am I right in thinking you want this to use SError Interrupts as an APEI >>> notification? (This isn't a CPU thing so the RAS spec doesn't cover this use) >> >> Yes, using sei as an APEI notification is one part of my consideration. Another use is for ESB. >> RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' >> describes the SEI recovery process when ESB is implemented. >> >> In this situation, SEI is routed to EL3 (SCR_EL3.EA = 1). When an SEI occurs in EL0 and not been taken immediately, >> and then an ESB instruction at SVC entry is executed, SEI is taken to EL3. The ESB at SVC entry is >> used for preventing the error propagating from user space to kernel space. The EL3 SEI handler collects > >> the errors and fills in the APEI table, and then jump to EL2 SEI handler. EL2 SEI handler inject >> an vSEI into EL1 by setting HCR_EL2.VSE = 1, so that when returned to OS, an SEI is pending. > > This step has confused me. How would this work with VHE where the host runs at > EL2 and there is nothing at Host:EL1? RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' I don't know whether the step described in the section is designed for guest os or host os or both. Yes, it doesn't work with VHE where the host runs at EL2. >>From your description I assume you have some firmware resident at EL2. Our actual SEI step is planned as follows: Host OS: EL0/EL1 -> EL3 -> EL0/EL1 Guest OS: EL0/EL1 -> EL3 -> EL2 -> EL0/EL1 Our problem is that, when returning to EL0/EL1, whether we should jump to EL1 SEI vector or just return to where the SEI is taken from? In guest os situation, we can inject an vSEI and return to where the SEI is taken from. But in host os situation, we can't inject an vSEI (if we don't set HCR_EL2.AMO), so we have to jump to EL1 SEI vector. Then the process following ESB won't be executed becuase SEI is taken to EL3 from the ESB instruction in EL1, and when control is returned to OS, we are in EL1 SEI vector rather than the ESB instruction. It is ok to just ignore the process following the ESB instruction in el0_sync, because the process will be sent SIGBUS signal. But for el0_irq, the irq process following the ESB may should be executed because the irq is not related to the user process. If we set HCR_EL2.AMO when SCR_EL3.EA is set. We can still inject an vSEI into host OS in EL3. Physical SError won't be taken to EL2 because SCR_EL3.EA is set. But this may be too racy is not consistent with ARM rules. > > >> Then ESB is executed again, and DISR_EL1.A is set by hardware (2.4.4 ESB and virtual errors), so that >> the following process can be executed. > > >> So we want to inject a vSEI into OS, when control is returned from EL3/2 to OS, no matter whether >> it is on host OS or guest OS. > > I disagree. With Linux/KVM you can't use Virtual SError to notify the host OS. > The host OS expects to receive Physical SError, these shouldn't be taken to EL2 > unless a guest is running. Notifications from firmware that use SEA or SEI > should follow the routing rules in the ARM-ARM, which means they should never > reach a guest OS. Yes, I agree. When running the host os, exception should not be taken to EL2, because EL2 SEI vector always suppose that exception is taken from guest os and save guest context in the first place. > > For VHE the host runs at EL2 and sets HCR_EL2.AMO. Any firmware notification > should come from EL3 to the host at EL2. The host may choose to notify the > guest, but this should always go via Qemu. > > For non-VHE systems the host runs at EL1 and KVM lives at EL2 to do the world > switch. When KVM is running a guest it sets HCR_EL2.AMO, when it has switched > back to the host it clears it. > > Notifications from EL3 that pretend to be SError should route SError to EL2 or > EL1 depending on HCR_EL2.AMO. > When KVM gets a Synchronous External Abort or an SError while a guest is running > it will switch back to the host and tell the handle_exit() code. Again the host > may choose to notify the guest, but this should always go via Qemu. > > The ARM-ARM pseudo code for the routing rules is in > AArch64.TakePhysicalSErrorException(). Firmware injecting fake SError should > behave exactly like this. > > > Newly appeared in the ARM-ARM is HCR_EL2.TEA, which takes Synchronous External > Aborts to EL2 (if SCR_EL3 hasn't claimed them). We should set/clear this bit > like we do HCR_EL2.AMO if the CPU has the RAS extensions. > > >>> You cant use SError to cover all the possible RAS exceptions. We already have >>> this problem using SEI if PSTATE.A was set and the exception was an imprecise >>> abort from EL2. We can't return to the interrupted context and we can't deliver >>> an SError to EL2 either. >> >> SEI came from EL2 and PSTATE.A is set. Is it the situation where VHE is enabled and CPU is running >> in kernel space. If SEI occurs in kernel space, can we just panic or shutdown. > > firmware-panic? We can do a little better than that: > If the IESB bit is set in the ESR we can behave as if this were an ESB and have > firmware write an appropriate ESR to DISR_EL1 if PSTATE.A is set and the > exception came from EL2. This method may solve the problem I said above. Is IESB a new bit added int ESR in the newest RAS spec? > > Linux should have an ESB in its __switch_to(), I've re-worked the daif masking > in entry.S so that PSTATE.A will always be unmasked here. > > Other than these two cases, yes, this CPU really is wrecked. Firmware can power > it off via PSCI, and could notify another CPU about what happened. UEFI's table > 250 'Processor Generic Error Section' has a 'Processor ID' field, with a note > that on ARM this is the MPIDR_EL1. Table 260 'ARM Processor Error Section' has > something similar. > > > Thanks, > > James > > . > Thanks, Wang Xiongfeng
Hi Wang Xiongfeng, On 21/04/17 12:33, Xiongfeng Wang wrote: > On 2017/4/20 16:52, James Morse wrote: >> On 19/04/17 03:37, Xiongfeng Wang wrote: >>> On 2017/4/18 18:51, James Morse wrote: >>>> The host expects to receive physical SError Interrupts. The ARM-ARM doesn't >>>> describe a way to inject these as they are generated by the CPU. >>>> >>>> Am I right in thinking you want this to use SError Interrupts as an APEI >>>> notification? (This isn't a CPU thing so the RAS spec doesn't cover this use) >>> >>> Yes, using sei as an APEI notification is one part of my consideration. Another use is for ESB. >>> RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' >>> describes the SEI recovery process when ESB is implemented. >>> >>> In this situation, SEI is routed to EL3 (SCR_EL3.EA = 1). When an SEI occurs in EL0 and not been taken immediately, >>> and then an ESB instruction at SVC entry is executed, SEI is taken to EL3. The ESB at SVC entry is >>> used for preventing the error propagating from user space to kernel space. The EL3 SEI handler collects >> >>> the errors and fills in the APEI table, and then jump to EL2 SEI handler. EL2 SEI handler inject >>> an vSEI into EL1 by setting HCR_EL2.VSE = 1, so that when returned to OS, an SEI is pending. >> >> This step has confused me. How would this work with VHE where the host runs at >> EL2 and there is nothing at Host:EL1? > > RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' > I don't know whether the step described in the section is designed for guest os or host os or both. > Yes, it doesn't work with VHE where the host runs at EL2. If it uses Virtual SError, it must be for an OS running at EL1, as the code running at EL2 (VHE:Linux, firmware or Xen) must set HCR_EL2.AMO to make this work. I can't find the section you describe in this RAS spec: https://developer.arm.com/docs/ddi0587/latest/armreliabilityavailabilityandserviceabilityrasspecificationarmv8forthearmv8aarchitectureprofile >> >From your description I assume you have some firmware resident at EL2. > Our actual SEI step is planned as follows: Ah, okay. You can't use Virtual SError at all then, as firmware doesn't own EL2: KVM does. KVM will save/restore the HCR_EL2 register as part of its world switch. Firmware must not modify the hyp registers behind the hyper-visors back. > Host OS: EL0/EL1 -> EL3 -> EL0/EL1 i.e. While the host was running, so EL3 sees HCR_EL2.AMO clear, and directs the SEI to EL1. > Guest OS: EL0/EL1 -> EL3 -> EL2 -> EL0/EL1 i.e. While the guest was running, so EL3 sees HCR_EL2.AMO set, directs the SEI to EL2 where KVM performs the world-switch and returns to host EL1. Looks sensible. > In guest os situation, we can inject an vSEI and return to where the SEI is taken from. An SEI from firmware should always end up in the host OS. If a guest was running KVM is responsible for doing the world switch to restore host EL1 and return there. This should all work today. > But in host os situation, we can't inject an vSEI (if we don't set HCR_EL2.AMO), so we have to jump to EL1 SEI vector. Because your firmware is at EL3 you have to check PSTATE.A and jump into the SError vector in both cases, you just choose if its VBAR_EL2 or VBAR_EL1 based on HCR_EL2.AMO. > Then the process following ESB won't be executed becuase SEI is taken to EL3 from the ESB instruction in EL1, and when control > is returned to OS, we are in EL1 SEI vector rather than the ESB instruction. Firmware should set ELR_EL1 so that an eret from the EL1 SError vector takes you back to the ESB instruction that took us to EL3 in the first place. There is a problem here mixing SError as the CPU->Software notification of RAS errors and as APEI's SEI Software->Software notification that a firmware-first error has been handled by EL3. To elaborate: The routine in this patch was something like: 1 ESB 2 Read DISR_EL1 3 If set, branch to the SError handler. If we have firmware-first ESB will generate an SError as PSTATE.A at EL{1,2} doesn't mask SError for EL3. Firmware can then handle the RAS error. With this the CPU->Software story is done. Firmware notifying the OS via APEI is a different problem. If the error doesn't need to be notified to the OS via SEI, firmware can return to step 1 above with DISR_EL1 clear. The OS may be told via another mechanism such as polling or an irq. If PSTATE.A was clear, firmware can return to the SError vector. If PSTATE.A was set and firmware knows this was due to an ESB, it can set DISR_EL1. The problem is firmware can't know if the SError was due to an ESB. The only time we are likely to have PSTATE.A masked is when changing exception level. For this we can rely on IESB, so step 1 isn't necessary in the routine above. Firmware knows if an SError taken to EL3 is due to an IESB, as the ESR_EL3.IESB bit will be set, in this case it can write to DISR_EL1 and return. In Linux we can use IESB for changes in exception level, and ESB with PSTATE.A clear for any other need. (e.g. __switch_to()). Firmware can notify us using SEI if we use either of these two patterns. This may not work for other OS (Xen?), but this is a problem with using SEI as an APEI notification method. (Some cleanup of our SError unmasking is needed, I have some patches I will post on a branch shortly) > It is ok to just ignore the process following the ESB instruction in el0_sync, because the process will be sent SIGBUS signal. I don't understand. How will Linux know the process caused an error if we neither take an SError nor read DISR_EL1 after an ESB? > But for el0_irq, the irq process following the ESB may should be executed because the irq is not related to the user process. If we exit EL0 for an IRQ, and take an SError due to IESB we know the fault was due to EL0, not the exception level we entered to process the IRQ. > If we set HCR_EL2.AMO when SCR_EL3.EA is set. We can still inject an vSEI into host OS in EL3. (from EL3?) Only if the host OS is running at EL1 and isn't using EL2. Firmware can't know if we're using EL2 so this can't work. > Physical SError won't be taken to EL2 because SCR_EL3.EA is set. But this may be too racy is > not consistent with ARM rules. EL3 flipping bits in EL2 system registers behind EL2 software's back is going to have strange side effects. e.g. some Hypervisor may not change HCR_EL2 when switching VM-A to VM-B as the configuration is the same; now the virtual SError will be delivered to the wrong VM. (KVM doesn't do this, but Xen might.) >>>> You cant use SError to cover all the possible RAS exceptions. We already have >>>> this problem using SEI if PSTATE.A was set and the exception was an imprecise >>>> abort from EL2. We can't return to the interrupted context and we can't deliver >>>> an SError to EL2 either. >>> >>> SEI came from EL2 and PSTATE.A is set. Is it the situation where VHE is enabled and CPU is running >>> in kernel space. If SEI occurs in kernel space, can we just panic or shutdown. >> >> firmware-panic? We can do a little better than that: >> If the IESB bit is set in the ESR we can behave as if this were an ESB and have >> firmware write an appropriate ESR to DISR_EL1 if PSTATE.A is set and the >> exception came from EL2. > > This method may solve the problem I said above. > Is IESB a new bit added int ESR in the newest RAS spec? The latest ARM-ARM (DDI0487B.a) describes it as part of the "RAS Extension in ARMv8.2" in section 'A1.7.5 The Reliability, Availability, and Serviceability (RAS) Extension'. Jumping to 'D7.2.28 ESR_ELx, Exception Syndrome Register (ELx)', then page D7-2284 for the 'ISS encoding for an SError interrupt', it indicates "the SError interrupt was synchronized by the implicit [ESB] and taken immediately" As above, with IESB for changes in exception level, and unmasking PSTATE.A for any other use of ESB, Linux can make sure that firmware can always notify us of an APEI event using SEI. Another OS (or even UEFI) may not do the same, so firmware should be prepared for ESB to be executed with PSTATE.A masked (i.e, not using IESB). If there is no wider firmware-policy for this (reboot, notify a remote CPU), then returning to the faulting location and trying to inject SError later is all we can do. Thanks, James
Hi James, Thanks for your explanation and suggests. On 2017/4/25 1:14, James Morse wrote: > Hi Wang Xiongfeng, > > On 21/04/17 12:33, Xiongfeng Wang wrote: >> On 2017/4/20 16:52, James Morse wrote: >>> On 19/04/17 03:37, Xiongfeng Wang wrote: >>>> On 2017/4/18 18:51, James Morse wrote: >>>>> The host expects to receive physical SError Interrupts. The ARM-ARM doesn't >>>>> describe a way to inject these as they are generated by the CPU. >>>>> >>>>> Am I right in thinking you want this to use SError Interrupts as an APEI >>>>> notification? (This isn't a CPU thing so the RAS spec doesn't cover this use) >>>> >>>> Yes, using sei as an APEI notification is one part of my consideration. Another use is for ESB. >>>> RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' >>>> describes the SEI recovery process when ESB is implemented. >>>> >>>> In this situation, SEI is routed to EL3 (SCR_EL3.EA = 1). When an SEI occurs in EL0 and not been taken immediately, >>>> and then an ESB instruction at SVC entry is executed, SEI is taken to EL3. The ESB at SVC entry is >>>> used for preventing the error propagating from user space to kernel space. The EL3 SEI handler collects >>> >>>> the errors and fills in the APEI table, and then jump to EL2 SEI handler. EL2 SEI handler inject >>>> an vSEI into EL1 by setting HCR_EL2.VSE = 1, so that when returned to OS, an SEI is pending. >>> >>> This step has confused me. How would this work with VHE where the host runs at >>> EL2 and there is nothing at Host:EL1? >> >> RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External Abort with ESB' >> I don't know whether the step described in the section is designed for guest os or host os or both. >> Yes, it doesn't work with VHE where the host runs at EL2. > > If it uses Virtual SError, it must be for an OS running at EL1, as the code > running at EL2 (VHE:Linux, firmware or Xen) must set HCR_EL2.AMO to make this work. > > I can't find the section you describe in this RAS spec: > https://developer.arm.com/docs/ddi0587/latest/armreliabilityavailabilityandserviceabilityrasspecificationarmv8forthearmv8aarchitectureprofile > I got the RAS spec from my company's ARM account. The document number is PRD03-PRDC-010953 . > >>> >From your description I assume you have some firmware resident at EL2. > >> Our actual SEI step is planned as follows: > > Ah, okay. You can't use Virtual SError at all then, as firmware doesn't own EL2: > KVM does. KVM will save/restore the HCR_EL2 register as part of its world > switch. Firmware must not modify the hyp registers behind the hyper-visors back. > > >> Host OS: EL0/EL1 -> EL3 -> EL0/EL1 > > i.e. While the host was running, so EL3 sees HCR_EL2.AMO clear, and directs the > SEI to EL1. > >> Guest OS: EL0/EL1 -> EL3 -> EL2 -> EL0/EL1 > > i.e. While the guest was running, so EL3 sees HCR_EL2.AMO set, directs the SEI > to EL2 where KVM performs the world-switch and returns to host EL1. > > Looks sensible. > > >> In guest os situation, we can inject an vSEI and return to where the SEI is taken from. > > An SEI from firmware should always end up in the host OS. If a guest was running > KVM is responsible for doing the world switch to restore host EL1 and return > there. This should all work today. > > >> But in host os situation, we can't inject an vSEI (if we don't set HCR_EL2.AMO), so we have to jump to EL1 SEI vector. > > Because your firmware is at EL3 you have to check PSTATE.A and jump into the > SError vector in both cases, you just choose if its VBAR_EL2 or VBAR_EL1 based > on HCR_EL2.AMO. > > >> Then the process following ESB won't be executed becuase SEI is taken to EL3 from the ESB instruction in EL1, and when control >> is returned to OS, we are in EL1 SEI vector rather than the ESB instruction. > > Firmware should set ELR_EL1 so that an eret from the EL1 SError vector takes you > back to the ESB instruction that took us to EL3 in the first place. > > > There is a problem here mixing SError as the CPU->Software notification of RAS > errors and as APEI's SEI Software->Software notification that a firmware-first > error has been handled by EL3. > > To elaborate: > The routine in this patch was something like: > 1 ESB > 2 Read DISR_EL1 > 3 If set, branch to the SError handler. > > If we have firmware-first ESB will generate an SError as PSTATE.A at EL{1,2} > doesn't mask SError for EL3. Firmware can then handle the RAS error. With this > the CPU->Software story is done. Firmware notifying the OS via APEI is a > different problem. > > If the error doesn't need to be notified to the OS via SEI, firmware can return > to step 1 above with DISR_EL1 clear. The OS may be told via another mechanism > such as polling or an irq. > > If PSTATE.A was clear, firmware can return to the SError vector. If PSTATE.A was > set and firmware knows this was due to an ESB, it can set DISR_EL1. The problem > is firmware can't know if the SError was due to an ESB. Yes, if implicit ESB is not implemented, we can't know if the sError was due to an ESB unless we only set PSTATE.A when ESB is executed. > > The only time we are likely to have PSTATE.A masked is when changing exception > level. For this we can rely on IESB, so step 1 isn't necessary in the routine > above. Firmware knows if an SError taken to EL3 is due to an IESB, as the > ESR_EL3.IESB bit will be set, in this case it can write to DISR_EL1 and return. > > In Linux we can use IESB for changes in exception level, and ESB with PSTATE.A > clear for any other need. (e.g. __switch_to()). Firmware can notify us using SEI > if we use either of these two patterns. This may not work for other OS (Xen?), > but this is a problem with using SEI as an APEI notification method. > > (Some cleanup of our SError unmasking is needed, I have some patches I will post > on a branch shortly) > > >> It is ok to just ignore the process following the ESB instruction in el0_sync, because the process will be sent SIGBUS signal. > > I don't understand. How will Linux know the process caused an error if we > neither take an SError nor read DISR_EL1 after an ESB? > I think there may be some misunderstanding here. The ESB instruction is placed in kernel_entry of el0_sync and el0_irq. For the el0_sync, such as an syscall from userspace, after ESB is executed, we check whether DISR.A is set. If it is not set, we go on to process the syscall. If it is set, we jump to sError vector and then just eret. But for el0_irq, after ESB is executed, we check whether DISR.A is set. If it is set, we jump to (using BL) sError vector, process SEI, return back to irq handler and process irq, and then eret. > >> But for el0_irq, the irq process following the ESB may should be executed because the irq is not related to the user process. > > If we exit EL0 for an IRQ, and take an SError due to IESB we know the fault was > due to EL0, not the exception level we entered to process the IRQ. > > >> If we set HCR_EL2.AMO when SCR_EL3.EA is set. We can still inject an vSEI into host OS in EL3. > > (from EL3?) Only if the host OS is running at EL1 and isn't using EL2. Firmware > can't know if we're using EL2 so this can't work. > > >> Physical SError won't be taken to EL2 because SCR_EL3.EA is set. But this may be too racy is >> not consistent with ARM rules. > > EL3 flipping bits in EL2 system registers behind EL2 software's back is going to > have strange side effects. e.g. some Hypervisor may not change HCR_EL2 when > switching VM-A to VM-B as the configuration is the same; now the virtual SError > will be delivered to the wrong VM. (KVM doesn't do this, but Xen might.) > > >>>>> You cant use SError to cover all the possible RAS exceptions. We already have >>>>> this problem using SEI if PSTATE.A was set and the exception was an imprecise >>>>> abort from EL2. We can't return to the interrupted context and we can't deliver >>>>> an SError to EL2 either. >>>> >>>> SEI came from EL2 and PSTATE.A is set. Is it the situation where VHE is enabled and CPU is running >>>> in kernel space. If SEI occurs in kernel space, can we just panic or shutdown. >>> >>> firmware-panic? We can do a little better than that: >>> If the IESB bit is set in the ESR we can behave as if this were an ESB and have >>> firmware write an appropriate ESR to DISR_EL1 if PSTATE.A is set and the >>> exception came from EL2. >> >> This method may solve the problem I said above. >> Is IESB a new bit added int ESR in the newest RAS spec? > > The latest ARM-ARM (DDI0487B.a) describes it as part of the "RAS Extension in > ARMv8.2" in section 'A1.7.5 The Reliability, Availability, and Serviceability > (RAS) Extension'. > > Jumping to 'D7.2.28 ESR_ELx, Exception Syndrome Register (ELx)', then page > D7-2284 for the 'ISS encoding for an SError interrupt', it indicates "the SError > interrupt was synchronized by the implicit [ESB] and taken immediately" > > > As above, with IESB for changes in exception level, and unmasking PSTATE.A for > any other use of ESB, Linux can make sure that firmware can always notify us of > an APEI event using SEI. > > Another OS (or even UEFI) may not do the same, so firmware should be prepared > for ESB to be executed with PSTATE.A masked (i.e, not using IESB). If there is > no wider firmware-policy for this (reboot, notify a remote CPU), then returning > to the faulting location and trying to inject SError later is all we can do. > > > Thanks, > > James > > . > Thanks, Wang Xiongfeng
Hi Xiongfeng Wang, On 28/04/17 03:55, Xiongfeng Wang wrote: >>> >> It is ok to just ignore the process following the ESB instruction in el0_sync, because the process will be sent SIGBUS signal. >> > >> > I don't understand. How will Linux know the process caused an error if we >> > neither take an SError nor read DISR_EL1 after an ESB? > I think there may be some misunderstanding here. The ESB instruction is placed in kernel_entry > of el0_sync and el0_irq. For the el0_sync, such as an syscall from userspace, after ESB is executed, > we check whether DISR.A is set. If it is not set, we go on to process the syscall. If it is set, we > jump to sError vector and then just eret. Ah, this looks like an early optimisation! We can't assume that the SError will result in the processing being killed, the AET bits of the SError ISS Encoding (page D7-2284 of ARM-ARM DDI0487B.a), has a 'corrected' error encoding. For these I would expect the SError-vector C code to do nothing and return to where it came from. In this case the syscall should still be run. Thanks, James
Hi James, On 2017/5/9 1:27, James Morse wrote: > Hi Xiongfeng Wang, > > On 28/04/17 03:55, Xiongfeng Wang wrote: >>>>>> It is ok to just ignore the process following the ESB instruction in el0_sync, because the process will be sent SIGBUS signal. >>>> >>>> I don't understand. How will Linux know the process caused an error if we >>>> neither take an SError nor read DISR_EL1 after an ESB? > >> I think there may be some misunderstanding here. The ESB instruction is placed in kernel_entry >> of el0_sync and el0_irq. For the el0_sync, such as an syscall from userspace, after ESB is executed, >> we check whether DISR.A is set. If it is not set, we go on to process the syscall. If it is set, we >> jump to sError vector and then just eret. > > Ah, this looks like an early optimisation! > > We can't assume that the SError will result in the processing being killed, the > AET bits of the SError ISS Encoding (page D7-2284 of ARM-ARM DDI0487B.a), has a > 'corrected' error encoding. > For these I would expect the SError-vector C code to do nothing and return to > where it came from. In this case the syscall should still be run. > Yes, it makes sense, so we should add a return value for the do_sei handler. Thanks, Wang Xiongfeng
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 859a90e..7402175 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -911,6 +911,22 @@ endmenu menu "ARMv8.2 architectural features" +config ARM64_ESB + bool "Enable support for Error Synchronization Barrier (ESB)" + default n + help + Error Synchronization Barrier (ESB; part of the ARMv8.2 Extensions) + is used to synchronize Unrecoverable errors. That is, containable errors + architecturally consumed by the PE and not silently propagated. + + Without ESB it is not generally possible to isolate an Unrecoverable + error because it is not known which instruction generated the error. + + Selecting this option allows inject esb instruction before the exception + change. If ARMv8.2 RAS Extension is not support, ESB is treated as a NOP. + + Note that ESB instruction can introduce slight overhead, so say N if unsure. + config ARM64_UAO bool "Enable support for User Access Override (UAO)" default y diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index f20c64a..22f9c90 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -106,6 +106,20 @@ #define ESR_ELx_AR (UL(1) << 14) #define ESR_ELx_CM (UL(1) << 8) +#define ESR_Elx_DFSC_SEI (0x11) + +#define ESR_ELx_AET_SHIFT (10) +#define ESR_ELx_AET_MAX (7) +#define ESR_ELx_AET_MASK (UL(7) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET(esr) (((esr) & ESR_ELx_AET_MASK) >> ESR_ELx_AET_SHIFT) + +#define ESR_ELx_AET_UC (0) +#define ESR_ELx_AET_UEU (1) +#define ESR_ELx_AET_UEO (2) +#define ESR_ELx_AET_UER (3) +#define ESR_ELx_AET_CE (6) + + /* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 43512d4..d8a7306 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -69,7 +69,14 @@ #define BAD_FIQ 2 #define BAD_ERROR 3 + .arch_extension ras + .macro kernel_entry, el, regsize = 64 +#ifdef CONFIG_ARM64_ESB + .if \el == 0 + esb + .endif +#endif sub sp, sp, #S_FRAME_SIZE .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -208,6 +215,7 @@ alternative_else_nop_endif #endif .if \el == 0 + msr daifset, #0xF // Set flags ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -226,6 +234,15 @@ alternative_else_nop_endif msr elr_el1, x21 // set up the return data msr spsr_el1, x22 + +#ifdef CONFIG_ARM64_ESB + .if \el == 0 + esb // Error Synchronization Barrier + mrs x21, disr_el1 // Check for deferred error + tbnz x21, #31, el1_sei + .endif +#endif + ldp x0, x1, [sp, #16 * 0] ldp x2, x3, [sp, #16 * 1] ldp x4, x5, [sp, #16 * 2] @@ -318,7 +335,7 @@ ENTRY(vectors) ventry el1_sync_invalid // Synchronous EL1t ventry el1_irq_invalid // IRQ EL1t ventry el1_fiq_invalid // FIQ EL1t - ventry el1_error_invalid // Error EL1t + ventry el1_error // Error EL1t ventry el1_sync // Synchronous EL1h ventry el1_irq // IRQ EL1h @@ -328,7 +345,7 @@ ENTRY(vectors) ventry el0_sync // Synchronous 64-bit EL0 ventry el0_irq // IRQ 64-bit EL0 ventry el0_fiq_invalid // FIQ 64-bit EL0 - ventry el0_error_invalid // Error 64-bit EL0 + ventry el0_error // Error 64-bit EL0 #ifdef CONFIG_COMPAT ventry el0_sync_compat // Synchronous 32-bit EL0 @@ -508,12 +525,31 @@ el1_preempt: ret x24 #endif + .align 6 +el1_error: + kernel_entry 1 +el1_sei: + /* + * asynchronous SError interrupt from kernel + */ + mov x0, sp + mrs x1, esr_el1 + mov x2, #1 // exception level of SEI generated + b do_sei +ENDPROC(el1_error) + + /* * EL0 mode handlers. */ .align 6 el0_sync: kernel_entry 0 +#ifdef CONFIG_ARM64_ESB + mrs x26, disr_el1 + tbnz x26, #31, el0_sei // check DISR.A + msr daifclr, #0x4 // unmask SEI +#endif mrs x25, esr_el1 // read the syndrome register lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state @@ -688,8 +724,38 @@ el0_inv: ENDPROC(el0_sync) .align 6 +el0_error: + kernel_entry 0 +el0_sei: + /* + * asynchronous SError interrupt from userspace + */ + ct_user_exit + mov x0, sp + mrs x1, esr_el1 + mov x2, #0 + bl do_sei + b ret_to_user +ENDPROC(el0_error) + + .align 6 el0_irq: kernel_entry 0 +#ifdef CONFIG_ARM64_ESB + mrs x26, disr_el1 + tbz x26, #31, el0_irq_naked // check DISR.A + + mov x0, sp + mrs x1, esr_el1 + mov x2, 0 + + /* + * The SEI generated at EL0 is not affect this irq context, + * so after sei handler, we continue process this irq. + */ + bl do_sei + msr daifclr, #0x4 // unmask SEI +#endif el0_irq_naked: enable_dbg #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b6d6727..99be6d8 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -643,6 +643,34 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); + die("Oops - bad mode", regs, 0); + local_irq_disable(); + panic("bad mode"); +} + +static const char *sei_context[] = { + "userspace", /* EL0 */ + "kernel", /* EL1 */ +}; + +static const char *sei_severity[] = { + [0 ... ESR_ELx_AET_MAX] = "Unknown", + [ESR_ELx_AET_UC] = "Uncontainable", + [ESR_ELx_AET_UEU] = "Unrecoverable", + [ESR_ELx_AET_UEO] = "Restartable", + [ESR_ELx_AET_UER] = "Recoverable", + [ESR_ELx_AET_CE] = "Corrected", +}; + +DEFINE_PER_CPU(int, sei_in_process); +asmlinkage void do_sei(struct pt_regs *regs, unsigned int esr, int el) +{ + int aet = ESR_ELx_AET(esr); + console_verbose(); + + pr_crit("Asynchronous SError interrupt detected on CPU%d, %s, %s\n", + smp_processor_id(), sei_context[el], sei_severity[aet]); + /* * In firmware first mode, we could assume firmware will only generate one * of cper records at a time. There is no risk for one cpu to parse ghes table. @@ -653,9 +681,31 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) this_cpu_dec(sei_in_process); } - die("Oops - bad mode", regs, 0); + if (el == 0 && IS_ENABLED(CONFIG_ARM64_ESB) && + cpus_have_cap(ARM64_HAS_RAS_EXTN)) { + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (aet >= ESR_ELx_AET_UEO) + return; + + if (aet == ESR_ELx_AET_UEU) { + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = pc; + + current->thread.fault_address = 0; + current->thread.fault_code = 0; + + force_sig_info(info.si_signo, &info, current); + + return; + } + } + local_irq_disable(); - panic("bad mode"); + panic("Asynchronous SError interrupt"); } /*