Message ID | 20201214075615.25038-4-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen: add support for automatic debug key actions in case of crash | expand |
On 14.12.2020 08:56, Juergen Gross wrote: > @@ -519,6 +521,59 @@ void __init initialize_keytable(void) > } > } > > +#define CRASHACTION_SIZE 32 > +static char crash_debug_panic[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-panic", crash_debug_panic); > +static char crash_debug_hwdom[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-hwdom", crash_debug_hwdom); > +static char crash_debug_watchdog[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-watchdog", crash_debug_watchdog); > +#ifdef CONFIG_KEXEC > +static char crash_debug_kexeccmd[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-kexeccmd", crash_debug_kexeccmd); > +#else > +#define crash_debug_kexeccmd NULL > +#endif > +static char crash_debug_debugkey[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-debugkey", crash_debug_debugkey); > + > +void keyhandler_crash_action(enum crash_reason reason) > +{ > + static const char *const crash_action[] = { > + [CRASHREASON_PANIC] = crash_debug_panic, > + [CRASHREASON_HWDOM] = crash_debug_hwdom, > + [CRASHREASON_WATCHDOG] = crash_debug_watchdog, > + [CRASHREASON_KEXECCMD] = crash_debug_kexeccmd, > + [CRASHREASON_DEBUGKEY] = crash_debug_debugkey, > + }; > + static bool ignore; > + const char *action; > + > + /* Some handlers are not functional too early. */ > + if ( system_state < SYS_STATE_smp_boot ) > + return; > + > + /* Avoid recursion. */ > + if ( ignore ) > + return; > + ignore = true; > + > + if ( (unsigned int)reason >= ARRAY_SIZE(crash_action) ) > + return; > + action = crash_action[reason]; > + if ( !action ) > + return; If we consider either of the last two "return"s to possibly be taken, I think the "ignore" logic wants to live below them, not above, avoiding no output at all when a recursive invocation turns out to be a "good" one. Then, as before, Reviewed-by: Jan Beulich <jbeulich@suse.com> Jan
On 14.12.20 10:16, Jan Beulich wrote: > On 14.12.2020 08:56, Juergen Gross wrote: >> @@ -519,6 +521,59 @@ void __init initialize_keytable(void) >> } >> } >> >> +#define CRASHACTION_SIZE 32 >> +static char crash_debug_panic[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-panic", crash_debug_panic); >> +static char crash_debug_hwdom[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-hwdom", crash_debug_hwdom); >> +static char crash_debug_watchdog[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-watchdog", crash_debug_watchdog); >> +#ifdef CONFIG_KEXEC >> +static char crash_debug_kexeccmd[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-kexeccmd", crash_debug_kexeccmd); >> +#else >> +#define crash_debug_kexeccmd NULL >> +#endif >> +static char crash_debug_debugkey[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-debugkey", crash_debug_debugkey); >> + >> +void keyhandler_crash_action(enum crash_reason reason) >> +{ >> + static const char *const crash_action[] = { >> + [CRASHREASON_PANIC] = crash_debug_panic, >> + [CRASHREASON_HWDOM] = crash_debug_hwdom, >> + [CRASHREASON_WATCHDOG] = crash_debug_watchdog, >> + [CRASHREASON_KEXECCMD] = crash_debug_kexeccmd, >> + [CRASHREASON_DEBUGKEY] = crash_debug_debugkey, >> + }; >> + static bool ignore; >> + const char *action; >> + >> + /* Some handlers are not functional too early. */ >> + if ( system_state < SYS_STATE_smp_boot ) >> + return; >> + >> + /* Avoid recursion. */ >> + if ( ignore ) >> + return; >> + ignore = true; >> + >> + if ( (unsigned int)reason >= ARRAY_SIZE(crash_action) ) >> + return; >> + action = crash_action[reason]; >> + if ( !action ) >> + return; > > If we consider either of the last two "return"s to possibly be > taken, I think the "ignore" logic wants to live below them, not > above, avoiding no output at all when a recursive invocation > turns out to be a "good" one. Then, as before, > Reviewed-by: Jan Beulich <jbeulich@suse.com> Fine with me. Juergen
Hi Juergen, On 14/12/2020 07:56, Juergen Gross wrote: > diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c > index de120fa092..806355ed8b 100644 > --- a/xen/common/keyhandler.c > +++ b/xen/common/keyhandler.c > @@ -3,7 +3,9 @@ > */ > > #include <asm/regs.h> > +#include <xen/delay.h> > #include <xen/keyhandler.h> > +#include <xen/param.h> > #include <xen/shutdown.h> > #include <xen/event.h> > #include <xen/console.h> > @@ -519,6 +521,59 @@ void __init initialize_keytable(void) > } > } > > +#define CRASHACTION_SIZE 32 > +static char crash_debug_panic[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-panic", crash_debug_panic); > +static char crash_debug_hwdom[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-hwdom", crash_debug_hwdom); > +static char crash_debug_watchdog[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-watchdog", crash_debug_watchdog); > +#ifdef CONFIG_KEXEC > +static char crash_debug_kexeccmd[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-kexeccmd", crash_debug_kexeccmd); > +#else > +#define crash_debug_kexeccmd NULL > +#endif > +static char crash_debug_debugkey[CRASHACTION_SIZE]; > +string_runtime_param("crash-debug-debugkey", crash_debug_debugkey); > + > +void keyhandler_crash_action(enum crash_reason reason) > +{ > + static const char *const crash_action[] = { > + [CRASHREASON_PANIC] = crash_debug_panic, > + [CRASHREASON_HWDOM] = crash_debug_hwdom, > + [CRASHREASON_WATCHDOG] = crash_debug_watchdog, > + [CRASHREASON_KEXECCMD] = crash_debug_kexeccmd, > + [CRASHREASON_DEBUGKEY] = crash_debug_debugkey, > + }; > + static bool ignore; > + const char *action; > + > + /* Some handlers are not functional too early. */ Can you explain in the commit message why this is necessary (An example would be useful)? Cheers,
On 14.12.20 11:24, Julien Grall wrote: > Hi Juergen, > > On 14/12/2020 07:56, Juergen Gross wrote: >> diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c >> index de120fa092..806355ed8b 100644 >> --- a/xen/common/keyhandler.c >> +++ b/xen/common/keyhandler.c >> @@ -3,7 +3,9 @@ >> */ >> #include <asm/regs.h> >> +#include <xen/delay.h> >> #include <xen/keyhandler.h> >> +#include <xen/param.h> >> #include <xen/shutdown.h> >> #include <xen/event.h> >> #include <xen/console.h> >> @@ -519,6 +521,59 @@ void __init initialize_keytable(void) >> } >> } >> +#define CRASHACTION_SIZE 32 >> +static char crash_debug_panic[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-panic", crash_debug_panic); >> +static char crash_debug_hwdom[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-hwdom", crash_debug_hwdom); >> +static char crash_debug_watchdog[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-watchdog", crash_debug_watchdog); >> +#ifdef CONFIG_KEXEC >> +static char crash_debug_kexeccmd[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-kexeccmd", crash_debug_kexeccmd); >> +#else >> +#define crash_debug_kexeccmd NULL >> +#endif >> +static char crash_debug_debugkey[CRASHACTION_SIZE]; >> +string_runtime_param("crash-debug-debugkey", crash_debug_debugkey); >> + >> +void keyhandler_crash_action(enum crash_reason reason) >> +{ >> + static const char *const crash_action[] = { >> + [CRASHREASON_PANIC] = crash_debug_panic, >> + [CRASHREASON_HWDOM] = crash_debug_hwdom, >> + [CRASHREASON_WATCHDOG] = crash_debug_watchdog, >> + [CRASHREASON_KEXECCMD] = crash_debug_kexeccmd, >> + [CRASHREASON_DEBUGKEY] = crash_debug_debugkey, >> + }; >> + static bool ignore; >> + const char *action; >> + >> + /* Some handlers are not functional too early. */ > > Can you explain in the commit message why this is necessary (An example > would be useful)? Okay. Juergen
diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc index b4a0d60c11..e4c0a144fc 100644 --- a/docs/misc/xen-command-line.pandoc +++ b/docs/misc/xen-command-line.pandoc @@ -574,6 +574,47 @@ reduction of features at Xen's disposal to manage guests. ### cpuinfo (x86) > `= <boolean>` +### crash-debug-debugkey +### crash-debug-hwdom +### crash-debug-kexeccmd +### crash-debug-panic +### crash-debug-watchdog +> `= <string>` + +> Can be modified at runtime + +Specify debug-key actions in cases of crashes. Each of the parameters applies +to a different crash reason. The `<string>` is a sequence of debug key +characters, with `+` having the special meaning of a 10 millisecond pause. + +`crash-debug-debugkey` will be used for crashes induced by the `C` debug +key (i.e. manually induced crash). + +`crash-debug-hwdom` denotes a crash of dom0. + +`crash-debug-kexeccmd` is an explicit request of dom0 to continue with the +kdump kernel via kexec. Only available on hypervisors built with CONFIG_KEXEC. + +`crash-debug-panic` is a crash of the hypervisor. + +`crash-debug-watchdog` is a crash due to the watchdog timer expiring. + +It should be noted that dumping diagnosis data to the console can fail in +multiple ways (missing data, hanging system, ...) depending on the reason +of the crash, which might have left the hypervisor in a bad state. In case +a debug-key action leads to another crash recursion will be avoided, so no +additional debug-key actions will be performed in this case. A crash in the +early boot phase will not result in any debug-key action, as the system +might not yet be in a state where the handlers can work. + +So e.g. `crash-debug-watchdog=0+0r` would dump dom0 state twice with 10 +milliseconds between the two state dumps, followed by the run queues of the +hypervisor, if the system crashes due to a watchdog timeout. + +Depending on the reason of the system crash it might happen that triggering +some debug key action will result in a hang instead of dumping data and then +doing a reboot or crash dump. + ### crashinfo_maxaddr > `= <size>` diff --git a/xen/common/kexec.c b/xen/common/kexec.c index 52cdc4ebc3..ebeee6405a 100644 --- a/xen/common/kexec.c +++ b/xen/common/kexec.c @@ -373,10 +373,12 @@ static int kexec_common_shutdown(void) return 0; } -void kexec_crash(void) +void kexec_crash(enum crash_reason reason) { int pos; + keyhandler_crash_action(reason); + pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0); if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) ) return; @@ -409,7 +411,7 @@ static long kexec_reboot(void *_image) static void do_crashdump_trigger(unsigned char key) { printk("'%c' pressed -> triggering crashdump\n", key); - kexec_crash(); + kexec_crash(CRASHREASON_DEBUGKEY); printk(" * no crash kernel loaded!\n"); } @@ -840,7 +842,7 @@ static int kexec_exec(XEN_GUEST_HANDLE_PARAM(void) uarg) ret = continue_hypercall_on_cpu(0, kexec_reboot, image); break; case KEXEC_TYPE_CRASH: - kexec_crash(); /* Does not return */ + kexec_crash(CRASHREASON_KEXECCMD); /* Does not return */ break; } diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index de120fa092..806355ed8b 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -3,7 +3,9 @@ */ #include <asm/regs.h> +#include <xen/delay.h> #include <xen/keyhandler.h> +#include <xen/param.h> #include <xen/shutdown.h> #include <xen/event.h> #include <xen/console.h> @@ -519,6 +521,59 @@ void __init initialize_keytable(void) } } +#define CRASHACTION_SIZE 32 +static char crash_debug_panic[CRASHACTION_SIZE]; +string_runtime_param("crash-debug-panic", crash_debug_panic); +static char crash_debug_hwdom[CRASHACTION_SIZE]; +string_runtime_param("crash-debug-hwdom", crash_debug_hwdom); +static char crash_debug_watchdog[CRASHACTION_SIZE]; +string_runtime_param("crash-debug-watchdog", crash_debug_watchdog); +#ifdef CONFIG_KEXEC +static char crash_debug_kexeccmd[CRASHACTION_SIZE]; +string_runtime_param("crash-debug-kexeccmd", crash_debug_kexeccmd); +#else +#define crash_debug_kexeccmd NULL +#endif +static char crash_debug_debugkey[CRASHACTION_SIZE]; +string_runtime_param("crash-debug-debugkey", crash_debug_debugkey); + +void keyhandler_crash_action(enum crash_reason reason) +{ + static const char *const crash_action[] = { + [CRASHREASON_PANIC] = crash_debug_panic, + [CRASHREASON_HWDOM] = crash_debug_hwdom, + [CRASHREASON_WATCHDOG] = crash_debug_watchdog, + [CRASHREASON_KEXECCMD] = crash_debug_kexeccmd, + [CRASHREASON_DEBUGKEY] = crash_debug_debugkey, + }; + static bool ignore; + const char *action; + + /* Some handlers are not functional too early. */ + if ( system_state < SYS_STATE_smp_boot ) + return; + + /* Avoid recursion. */ + if ( ignore ) + return; + ignore = true; + + if ( (unsigned int)reason >= ARRAY_SIZE(crash_action) ) + return; + action = crash_action[reason]; + if ( !action ) + return; + + while ( *action ) + { + if ( *action == '+' ) + mdelay(10); + else + handle_keypress(*action, NULL); + action++; + } +} + /* * Local variables: * mode: C diff --git a/xen/common/shutdown.c b/xen/common/shutdown.c index 912593915b..abde48aa4c 100644 --- a/xen/common/shutdown.c +++ b/xen/common/shutdown.c @@ -43,7 +43,7 @@ void hwdom_shutdown(u8 reason) case SHUTDOWN_crash: debugger_trap_immediate(); printk("Hardware Dom%u crashed: ", hardware_domain->domain_id); - kexec_crash(); + kexec_crash(CRASHREASON_HWDOM); maybe_reboot(); break; /* not reached */ @@ -56,7 +56,7 @@ void hwdom_shutdown(u8 reason) case SHUTDOWN_watchdog: printk("Hardware Dom%u shutdown: watchdog rebooting machine\n", hardware_domain->domain_id); - kexec_crash(); + kexec_crash(CRASHREASON_WATCHDOG); machine_restart(0); break; /* not reached */ diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index 861ad53a8f..acec277f5e 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -1271,7 +1271,7 @@ void panic(const char *fmt, ...) debugger_trap_immediate(); - kexec_crash(); + kexec_crash(CRASHREASON_PANIC); if ( opt_noreboot ) machine_halt(); diff --git a/xen/include/xen/kexec.h b/xen/include/xen/kexec.h index e85ba16405..9f7a912e97 100644 --- a/xen/include/xen/kexec.h +++ b/xen/include/xen/kexec.h @@ -1,6 +1,8 @@ #ifndef __XEN_KEXEC_H__ #define __XEN_KEXEC_H__ +#include <xen/keyhandler.h> + #ifdef CONFIG_KEXEC #include <public/kexec.h> @@ -48,7 +50,7 @@ void machine_kexec_unload(struct kexec_image *image); void machine_kexec_reserved(xen_kexec_reserve_t *reservation); void machine_reboot_kexec(struct kexec_image *image); void machine_kexec(struct kexec_image *image); -void kexec_crash(void); +void kexec_crash(enum crash_reason reason); void kexec_crash_save_cpu(void); struct crash_xen_info *kexec_crash_save_info(void); void machine_crash_shutdown(void); @@ -82,7 +84,11 @@ void vmcoreinfo_append_str(const char *fmt, ...) #define kexecing 0 static inline void kexec_early_calculations(void) {} -static inline void kexec_crash(void) {} +static inline void kexec_crash(enum crash_reason reason) +{ + keyhandler_crash_action(reason); +} + static inline void kexec_crash_save_cpu(void) {} static inline void set_kexec_crash_area_size(u64 system_ram) {} diff --git a/xen/include/xen/keyhandler.h b/xen/include/xen/keyhandler.h index 5131e86cbc..9c5830a037 100644 --- a/xen/include/xen/keyhandler.h +++ b/xen/include/xen/keyhandler.h @@ -48,4 +48,14 @@ void register_irq_keyhandler(unsigned char key, /* Inject a keypress into the key-handling subsystem. */ extern void handle_keypress(unsigned char key, struct cpu_user_regs *regs); +enum crash_reason { + CRASHREASON_PANIC, + CRASHREASON_HWDOM, + CRASHREASON_WATCHDOG, + CRASHREASON_KEXECCMD, + CRASHREASON_DEBUGKEY, +}; + +void keyhandler_crash_action(enum crash_reason reason); + #endif /* __XEN_KEYHANDLER_H__ */