diff mbox series

[kvm-unit-tests,v1] s390x: Improve stack traces that contain an interrupt frame

Message ID 20230405123508.854034-1-nsg@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests,v1] s390x: Improve stack traces that contain an interrupt frame | expand

Commit Message

Nina Schoetterl-Glausch April 5, 2023, 12:35 p.m. UTC
When we encounter an unexpected interrupt we print a stack trace.
While we can identify the interrupting instruction via the old psw,
we don't really have a way to identify callers further up the stack,
since we rely on the s390x elf abi calling convention to perform the
backtrace. An interrupt is not a call, so there are no guarantees about
the contents of the stack and return address registers.
If we get lucky their content is as we need it or valid for a previous
callee in which case we print one wrong caller and then proceed with the
correct ones.

Warn about the stack trace above the interrupting instruction possibly
not being correct by inserting a new stack frame with a warning symbol.
Also identify the interrupted instruction.

For example:

0x00000000000150f1: print_pgm_info at lib/s390x/interrupt.c:255
 (inlined by) handle_pgm_int at lib/s390x/interrupt.c:274
0x0000000000011099: pgm_int at s390x/cstart64.S:97
0x0000000000014523: sclp_service_call at lib/s390x/sclp.c:185
0x0000000000000000: lowcore at lib/s390x/asm/arch_def.h:172
0x0000000000014b8b: console_refill_read_buffer at lib/s390x/sclp-console.c:259
 (inlined by) __getchar at lib/s390x/sclp-console.c:290
0x00000000000188ef: getchar at lib/getchar.c:8

becomes:

0x00000000000151f9: print_pgm_info at lib/s390x/interrupt.c:255
 (inlined by) handle_pgm_int at lib/s390x/interrupt.c:274
0x00000000000110c1: pgm_int at s390x/cstart64.S:98
0x000000000001462f: servc at lib/s390x/asm/arch_def.h:459
 (inlined by) sclp_service_call at lib/s390x/sclp.c:186
0x0000000000019150: WARNING_THE_FOLLOWING_CALLERS_MIGHT_BE_CORRECT_BY_ACCIDENT_ONLY at s390x/cstart64.S:?
0x0000000000000000: lowcore at lib/s390x/asm/arch_def.h:172
0x0000000000014c93: console_refill_read_buffer at lib/s390x/sclp-console.c:259
 (inlined by) __getchar at lib/s390x/sclp-console.c:290

Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
---
 s390x/cstart64.S |  1 +
 s390x/macros.S   | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)


base-commit: 5b5d27da2973b20ec29b18df4d749fb2190458af
prerequisite-patch-id: 619d9dfe41a0509d1f123849d696af3109e534ce
prerequisite-patch-id: b5b4345ef04be0c4c4c70903e343783a9ebec0ce
prerequisite-patch-id: 8b1ee5a4dd43bd7f70a69e0ffe1dfea0cfe2be91
prerequisite-patch-id: dc72bb12a0ee455bc607b69f9b644075338a15d0
prerequisite-patch-id: e394d9d3d4c0df3c9788c06e7e940a5abf645318
prerequisite-patch-id: efb98123d132fa4b0bf198dd2718966beea4fbd8

Comments

Nico Boehr April 17, 2023, 7:57 a.m. UTC | #1
Quoting Nina Schoetterl-Glausch (2023-04-05 14:35:08)
> When we encounter an unexpected interrupt we print a stack trace.
> While we can identify the interrupting instruction via the old psw,
> we don't really have a way to identify callers further up the stack,
> since we rely on the s390x elf abi calling convention to perform the
> backtrace. An interrupt is not a call, so there are no guarantees about
> the contents of the stack and return address registers.
> If we get lucky their content is as we need it or valid for a previous
> callee in which case we print one wrong caller and then proceed with the
> correct ones.

I did not think too much about it, so it might not work, but how about a
seperate interrupt stack?

Then, we could print the interrupt stack trace (which should be correct) and -
with a warning as you suggest - the maybe incorrect regular stack trace.
Nina Schoetterl-Glausch April 17, 2023, 9:06 a.m. UTC | #2
On Mon, 2023-04-17 at 09:57 +0200, Nico Boehr wrote:
> Quoting Nina Schoetterl-Glausch (2023-04-05 14:35:08)
> > When we encounter an unexpected interrupt we print a stack trace.
> > While we can identify the interrupting instruction via the old psw,
> > we don't really have a way to identify callers further up the stack,
> > since we rely on the s390x elf abi calling convention to perform the
> > backtrace. An interrupt is not a call, so there are no guarantees about
> > the contents of the stack and return address registers.
> > If we get lucky their content is as we need it or valid for a previous
> > callee in which case we print one wrong caller and then proceed with the
> > correct ones.
> 
> I did not think too much about it, so it might not work, but how about a
> seperate interrupt stack?
> 
> Then, we could print the interrupt stack trace (which should be correct) and -
> with a warning as you suggest - the maybe incorrect regular stack trace.

Not sure I'm getting the point. Do you want an implementation that doesn't have
the weirdness of using a frame with a special symbol to warn?
We only output a bunch of caller addresses and pretty_print_stacks.py formats that
into a readable stack trace. So by having the special symbol frame there are no
changes needed to that script, but it certainly would be possible do it differently,
e.g. output "STACK: dead beef WARN 0 ffff" and have the script
print the warning if it sees a WARN.
diff mbox series

Patch

diff --git a/s390x/cstart64.S b/s390x/cstart64.S
index 468ace3e..3cd0e3f3 100644
--- a/s390x/cstart64.S
+++ b/s390x/cstart64.S
@@ -13,6 +13,7 @@ 
 #include <asm/asm-offsets.h>
 #include <asm/sigp.h>
 
+.file __FILE__
 #include "macros.S"
 .section .init
 
diff --git a/s390x/macros.S b/s390x/macros.S
index e2a56a36..ebbb5fac 100644
--- a/s390x/macros.S
+++ b/s390x/macros.S
@@ -17,6 +17,20 @@ 
  * we re-load the registers and load the old PSW.
  */
 	.macro CALL_INT_HANDLER c_func, old_psw
+	/* Allocate new stack frame for warning symbol that shows up in the stack trace */
+	stg	%r15, -STACK_FRAME_SIZE + STACK_FRAME_INT_BACKCHAIN(%r15)
+	lay	%r15, -STACK_FRAME_SIZE(%r15)
+	/*
+	 * The handler must return with the original registers -> save r14
+	 * so it can be used to point to the interrupting instruction
+	 */
+	stg	%r14, STACK_FRAME_INT_GRS0(%r15)
+	larl	%r14, WARNING_THE_FOLLOWING_CALLERS_MIGHT_BE_CORRECT_BY_ACCIDENT_ONLY
+	/* Pretend we are returning to an instruction after the warning symbol */
+	la	%r14,1(%r14)
+	stg	%r14, 12 * 8 + STACK_FRAME_INT_GRS0(%r15)
+	/* Pretend we made a call with the old psw address as return address */
+	lg	%r14, 8 + \old_psw
 	SAVE_REGS_STACK
 	/* Save the stack address in GR2 which is the first function argument */
 	lgr     %r2, %r15
@@ -30,7 +44,14 @@ 
 	brasl	%r14, \c_func
 	algfi   %r15, STACK_FRAME_SIZE
 	RESTORE_REGS_STACK
+	lg	%r14, STACK_FRAME_INT_GRS0(%r15)
+	lg	%r15, STACK_FRAME_INT_BACKCHAIN(%r15)
 	lpswe	\old_psw
+	.ifndef WARNING_THE_FOLLOWING_CALLERS_MIGHT_BE_CORRECT_BY_ACCIDENT_ONLY
+	.pushsection .rodata
+	.set	WARNING_THE_FOLLOWING_CALLERS_MIGHT_BE_CORRECT_BY_ACCIDENT_ONLY, .
+	.popsection
+	.endif
 	.endm
 
 /* Save registers on the stack (r15), so we can have stacked interrupts. */