Message ID | 20230812164314.352131-1-deller@gmx.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Fix signal handler to detect crashes in qemu-linux-user | expand |
On 8/12/23 09:43, Helge Deller wrote: > +/* _init and _fini are provided by the linker */ > +extern char _init; > +extern char _fini; > + > static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) > { > CPUArchState *env = thread_cpu->env_ptr; > @@ -819,6 +848,13 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) > if (host_sig == SIGSEGV) { > bool maperr = true; > > + /* Did segfault happened in qemu source code? */ > + if ((pc >= (uintptr_t) &_init && pc < (uintptr_t) &_fini) || > + (TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64 > + && !h2g_valid(host_addr))) { > + qemu_show_backtrace(info); > + } This is incorrect. I did mention that you should look at adjust_signal_pc, which has a lot of commentary on the subject. (0) Place this after the write-protect and restart check just below, since that is not an error of any sort. (1) If helper_retaddr == 1, then this is a fault reading for translation and is a guest SIGSEGV. (2) If helper_retaddr != 0, then this is a fault in some cpu_ldst.h operation and is a guest SIGSEGV. (3) If in_code_gen_buffer(host_signal_pc()), then this is a fault within generated code and is a guest SIGSEGV. Otherwise it's a host SIGSEGV. r~
* Richard Henderson <richard.henderson@linaro.org>: > On 8/12/23 09:43, Helge Deller wrote: > > +/* _init and _fini are provided by the linker */ > > +extern char _init; > > +extern char _fini; > > + > > static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) > > { > > CPUArchState *env = thread_cpu->env_ptr; > > @@ -819,6 +848,13 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) > > if (host_sig == SIGSEGV) { > > bool maperr = true; > > > > + /* Did segfault happened in qemu source code? */ > > + if ((pc >= (uintptr_t) &_init && pc < (uintptr_t) &_fini) || > > + (TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64 > > + && !h2g_valid(host_addr))) { > > + qemu_show_backtrace(info); > > + } > > This is incorrect. > > I did mention that you should look at adjust_signal_pc, Yes, you did. Maybe I was blind or didn't fully understood yet. > which has a lot of commentary on the subject. > > (0) Place this after the write-protect and restart check > just below, since that is not an error of any sort. > > (1) If helper_retaddr == 1, then this is a fault reading > for translation and is a guest SIGSEGV. > > (2) If helper_retaddr != 0, then this is a fault in some > cpu_ldst.h operation and is a guest SIGSEGV. (1) and (2) means host SIGSEGV can only happen if helper_retaddr == 0. > (3) If in_code_gen_buffer(host_signal_pc()), then this > is a fault within generated code and is a guest SIGSEGV. > > Otherwise it's a host SIGSEGV. So, basically like this? Helge diff --git a/linux-user/signal.c b/linux-user/signal.c index 748a98f3e5..9762246eec 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -23,6 +23,7 @@ #include <sys/ucontext.h> #include <sys/resource.h> +#include <execinfo.h> #include "qemu.h" #include "user-internals.h" @@ -32,6 +33,7 @@ #include "signal-common.h" #include "host-signal.h" #include "user/safe-syscall.h" +#include "tcg/tcg.h" static struct target_sigaction sigact_table[TARGET_NSIG]; @@ -781,6 +783,31 @@ static inline void rewind_if_in_safe_syscall(void *puc) } } +static void qemu_show_backtrace(siginfo_t *info) +{ + void *array[20]; + char **strings; + int size, i; + + fprintf(stderr, "---------------\n"); + fprintf(stderr, "QEMU linux-user v" QEMU_VERSION " for target " + UNAME_MACHINE "\n"); + fprintf(stderr, "QEMU internal error: signal=%d, errno=%d, " + "code=%d, addr=%p\n", + info->si_signo, info->si_errno, info->si_code, + info->si_addr); + fprintf(stderr, "while running: %s\n", exec_path); + size = backtrace(array, ARRAY_SIZE(array)); + strings = backtrace_symbols(array, size); + if (strings) { + fprintf(stderr, "QEMU backtrace:\n"); + for (i = 0; i < size; i++) + fprintf(stderr, "%s\n", strings[i]); + } + free (strings); + exit(info->si_code); +} + static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) { CPUArchState *env = thread_cpu->env_ptr; @@ -800,7 +827,7 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) */ if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) { MMUAccessType access_type; - uintptr_t host_addr; + uintptr_t host_addr, host_pc; abi_ptr guest_addr; bool is_write; @@ -812,7 +839,8 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) */ guest_addr = h2g_nocheck(host_addr); - pc = host_signal_pc(uc); + host_pc = host_signal_pc(uc); + pc = host_pc; is_write = host_signal_write(info, uc); access_type = adjust_signal_pc(&pc, is_write); @@ -837,6 +865,12 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) } } + /* Did segfault happened in qemu source code? */ + if (helper_retaddr == 0 && + !in_code_gen_buffer((void*)host_pc)) { + qemu_show_backtrace(info); + } + sigprocmask(SIG_SETMASK, sigmask, NULL); cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc); } else { diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 9353268cc1..da29d97816 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8132,6 +8132,7 @@ static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps) IntervalTreeNode *s; int count; +*(int*)NULL = 1; for (s = interval_tree_iter_first(map_info, 0, -1); s; s = interval_tree_iter_next(s, 0, -1)) { MapInfo *e = container_of(s, MapInfo, itree);
On 8/12/23 11:12, Helge Deller wrote: >> I did mention that you should look at adjust_signal_pc, > > Yes, you did. > Maybe I was blind or didn't fully understood yet. > >> which has a lot of commentary on the subject. >> >> (0) Place this after the write-protect and restart check >> just below, since that is not an error of any sort. >> >> (1) If helper_retaddr == 1, then this is a fault reading >> for translation and is a guest SIGSEGV. >> >> (2) If helper_retaddr != 0, then this is a fault in some >> cpu_ldst.h operation and is a guest SIGSEGV. > > (1) and (2) means host SIGSEGV can only happen if > helper_retaddr == 0. Yes. > >> (3) If in_code_gen_buffer(host_signal_pc()), then this >> is a fault within generated code and is a guest SIGSEGV. >> >> Otherwise it's a host SIGSEGV. > > So, basically like this? Looks about right. r~
diff --git a/linux-user/signal.c b/linux-user/signal.c index 748a98f3e5..d445376f06 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -23,6 +23,7 @@ #include <sys/ucontext.h> #include <sys/resource.h> +#include <execinfo.h> #include "qemu.h" #include "user-internals.h" @@ -781,6 +782,34 @@ static inline void rewind_if_in_safe_syscall(void *puc) } } +static void qemu_show_backtrace(siginfo_t *info) +{ + void *array[20]; + char **strings; + int size, i; + + fprintf(stderr, "QEMU linux-user v" QEMU_VERSION " for target " + UNAME_MACHINE "\n"); + fprintf(stderr, "QEMU internal error: signal=%d, errno=%d, " + "code=%d, addr=%p\n", + info->si_signo, info->si_errno, info->si_code, + info->si_addr); + fprintf(stderr, "while running: %s\n", exec_path); + size = backtrace(array, ARRAY_SIZE(array)); + strings = backtrace_symbols(array, size); + if (strings) { + fprintf(stderr, "QEMU backtrace:\n"); + for (i = 0; i < size; i++) + fprintf(stderr, "%s\n", strings[i]); + } + free (strings); + exit(info->si_code); +} + +/* _init and _fini are provided by the linker */ +extern char _init; +extern char _fini; + static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) { CPUArchState *env = thread_cpu->env_ptr; @@ -819,6 +848,13 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) if (host_sig == SIGSEGV) { bool maperr = true; + /* Did segfault happened in qemu source code? */ + if ((pc >= (uintptr_t) &_init && pc < (uintptr_t) &_fini) || + (TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64 + && !h2g_valid(host_addr))) { + qemu_show_backtrace(info); + } + if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) { /* If this was a write to a TB protected page, restart. */ if (is_write && diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 9353268cc1..da29d97816 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8132,6 +8132,7 @@ static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps) IntervalTreeNode *s; int count; +*(int*)NULL = 1; for (s = interval_tree_iter_first(map_info, 0, -1); s; s = interval_tree_iter_next(s, 0, -1)) { MapInfo *e = container_of(s, MapInfo, itree);
If there is an internal program error in the qemu source code which triggers a SIGSEGV, qemu will currently assume this is a SIGSEGV of the target and print: (hppa-chroot)root@p100:/# cat /proc/self/maps ** ERROR:../../home/cvs/qemu/qemu/accel/tcg/cpu-exec.c:532:cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu) Bail out! ERROR:../../home/cvs/qemu/qemu/accel/tcg/cpu-exec.c:532:cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu) ** This error message is very misleading for developers and end-users. The attached patch will print instead: (hppa-chroot)root@p100:/# cat /proc/self/maps QEMU linux-user v8.0.93 for target parisc QEMU internal error: signal=11, errno=0, code=1, addr=(nil) while running: /usr/bin/cat QEMU backtrace: [0x7f70cd045115] [0x7f70cd21b140] [0x7f70cd04ec49] [0x7f70cd04ec6b] [0x7f70cd0597e2] [0x7f70cd05d9ed] [0x7f70cd064008] [0x7f70ccffbd2d] [0x7f70ccff57f8] [0x7f70cd205868] [0x7f70cd206f4f] [0x7f70ccff60a5] Note that glibc's backtrace() can not resolve the addresses to function names for static binaries, which is why only addresses are show above. Signed-off-by: Helge Deller <deller@gmx.de> v2: - Refined crash detection based on IP address, suggested by Richard - More info in crash dump, e.g. qemu version and target --- linux-user/signal.c | 36 ++++++++++++++++++++++++++++++++++++ linux-user/syscall.c | 1 + 2 files changed, 37 insertions(+) -- 2.41.0