diff mbox series

[10/10] arm64: stacktrace: unwind exception boundaries

Message ID 20241010101510.1487477-11-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: stacktrace: improve unwind reporting | expand

Commit Message

Mark Rutland Oct. 10, 2024, 10:15 a.m. UTC
When arm64's stack unwinder encounters an exception boundary, it uses
the pt_regs::stackframe created by the entry code, which has a copy of
the PC and FP at the time the exception was taken. The unwinder doesn't
know anything about pt_regs, and reports the PC from the stackframe, but
does not report the LR.

The LR is only guaranteed to contain the return address at function call
boundaries, and can be used as a scratch register at other times, so the
LR at an exception boundary may or may not be a legitimate return
address. It would be useful to report the LR value regardless, as it can
be helpful when debugging, and in future it will be helpful for reliable
stacktrace support.

This patch changes the way we unwind across exception boundaries,
allowing both the PC and LR to be reported. The entry code creates a
frame_record_meta structure embedded within pt_regs, which the unwinder
uses to find the pt_regs. The unwinder can then extract pt_regs::pc and
pt_regs::lr as two separate unwind steps before continuing with a
regular walk of frame records.

When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
with an "L" marker so that it can be identified easily. For example,
an unwind across an exception boundary will appear as follows:

|  el1h_64_irq+0x6c/0x70
|  _raw_spin_unlock_irqrestore+0x10/0x60 (P)
|  __aarch64_insn_write+0x6c/0x90 (L)
|  aarch64_insn_patch_text_nosync+0x28/0x80

... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.

Note that the LR may be stale at the point of the exception, for example,
shortly after a return:

|  el1h_64_irq+0x6c/0x70
|  default_idle_call+0x34/0x180 (P)
|  default_idle_call+0x28/0x180 (L)
|  do_idle+0x204/0x268

... where the LR points a few instructions before the current PC.

This plays nicely with all the other unwind metadata tracking. With the
ftrace_graph profiler enabled globally, and kretprobes installed on
generic_handle_domain_irq() and do_interrupt_handler(), a backtrace triggered
by magic-sysrq + L reports:

| Call trace:
|  show_stack+0x20/0x40 (CF)
|  dump_stack_lvl+0x60/0x80 (F)
|  dump_stack+0x18/0x28
|  nmi_cpu_backtrace+0xfc/0x140
|  nmi_trigger_cpumask_backtrace+0x1c8/0x200
|  arch_trigger_cpumask_backtrace+0x20/0x40
|  sysrq_handle_showallcpus+0x24/0x38 (F)
|  __handle_sysrq+0xa8/0x1b0 (F)
|  handle_sysrq+0x38/0x50 (F)
|  pl011_int+0x460/0x5a8 (F)
|  __handle_irq_event_percpu+0x60/0x220 (F)
|  handle_irq_event+0x54/0xc0 (F)
|  handle_fasteoi_irq+0xa8/0x1d0 (F)
|  generic_handle_domain_irq+0x34/0x58 (F)
|  gic_handle_irq+0x54/0x140 (FK)
|  call_on_irq_stack+0x24/0x58 (F)
|  do_interrupt_handler+0x88/0xa0
|  el1_interrupt+0x34/0x68 (FK)
|  el1h_64_irq_handler+0x18/0x28
|  el1h_64_irq+0x6c/0x70
|  default_idle_call+0x34/0x180 (P)
|  default_idle_call+0x28/0x180 (L)
|  do_idle+0x204/0x268
|  cpu_startup_entry+0x3c/0x50 (F)
|  rest_init+0xe4/0xf0
|  start_kernel+0x744/0x750
|  __primary_switched+0x88/0x98

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Puranjay Mohan <puranjay12@gmail.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/ptrace.h           |   4 +-
 arch/arm64/include/asm/stacktrace/frame.h |  35 +++++++
 arch/arm64/kernel/asm-offsets.c           |   1 +
 arch/arm64/kernel/entry.S                 |  12 ++-
 arch/arm64/kernel/head.S                  |   3 +
 arch/arm64/kernel/probes/stBV5U5j         |   0
 arch/arm64/kernel/process.c               |   1 +
 arch/arm64/kernel/stacktrace.c            | 121 ++++++++++++++++++++--
 8 files changed, 158 insertions(+), 19 deletions(-)
 create mode 100644 arch/arm64/kernel/probes/stBV5U5j

Comments

Miroslav Benes Oct. 11, 2024, 3:16 p.m. UTC | #1
Hi,

> +static __always_inline int
> +kunwind_next_frame_record(struct kunwind_state *state)
> +{
> +	unsigned long fp = state->common.fp;
> +	struct frame_record *record;
> +	struct stack_info *info;
> +	unsigned long new_fp, new_pc;
> +
> +	if (fp & 0x7)
> +		return -EINVAL;
> +
> +	info = unwind_find_stack(&state->common, fp, sizeof(*record));
> +	if (!info)
> +		return -EINVAL;
> +
> +	record = (struct frame_record *)fp;
> +	new_fp = READ_ONCE(record->fp);
> +	new_pc = READ_ONCE(record->lr);
> +
> +	if (!new_fp && !new_pc)
> +		return kunwind_next_frame_record_meta(state);
> +
> +	unwind_consume_stack(&state->common, info, fp, sizeof(*record));
> +
> +	state->common.fp = new_fp;
> +	state->common.pc = new_pc;
> +	state->source = KUNWIND_SOURCE_FRAME;
> +
> +	return 0;
> +}
> +
>  /*
>   * Unwind from one frame record (A) to the next frame record (B).
>   *
> @@ -165,30 +266,27 @@ kunwind_recover_return_address(struct kunwind_state *state)
>  static __always_inline int
>  kunwind_next(struct kunwind_state *state)
>  {
> -	struct task_struct *tsk = state->task;
> -	unsigned long fp = state->common.fp;
>  	int err;
>  
>  	state->flags.all = 0;
>  
> -	/* Final frame; nothing to unwind */
> -	if (fp == (unsigned long)&task_pt_regs(tsk)->stackframe)
> -		return -ENOENT;
> -
>  	switch (state->source) {
>  	case KUNWIND_SOURCE_FRAME:
>  	case KUNWIND_SOURCE_CALLER:
>  	case KUNWIND_SOURCE_TASK:
> +	case KUNWIND_SOURCE_REGS_LR:
> +		err = kunwind_next_frame_record(state);
> +		break;
>  	case KUNWIND_SOURCE_REGS_PC:
> -		err = unwind_next_frame_record(&state->common);
> -		if (err)
> -			return err;
> -		state->source = KUNWIND_SOURCE_FRAME;
> +		err = kunwind_next_regs_lr(state);

the remaining users of unwind_next_frame_record() after this change are in 
KVM. How does it work there? What is the difference?

Miroslav
Mark Rutland Oct. 11, 2024, 4:13 p.m. UTC | #2
On Fri, Oct 11, 2024 at 05:16:23PM +0200, Miroslav Benes wrote:
> Hi,
> 
> > +static __always_inline int
> > +kunwind_next_frame_record(struct kunwind_state *state)
> > +{
> > +	unsigned long fp = state->common.fp;
> > +	struct frame_record *record;
> > +	struct stack_info *info;
> > +	unsigned long new_fp, new_pc;
> > +
> > +	if (fp & 0x7)
> > +		return -EINVAL;
> > +
> > +	info = unwind_find_stack(&state->common, fp, sizeof(*record));
> > +	if (!info)
> > +		return -EINVAL;
> > +
> > +	record = (struct frame_record *)fp;
> > +	new_fp = READ_ONCE(record->fp);
> > +	new_pc = READ_ONCE(record->lr);
> > +
> > +	if (!new_fp && !new_pc)
> > +		return kunwind_next_frame_record_meta(state);
> > +
> > +	unwind_consume_stack(&state->common, info, fp, sizeof(*record));
> > +
> > +	state->common.fp = new_fp;
> > +	state->common.pc = new_pc;
> > +	state->source = KUNWIND_SOURCE_FRAME;
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   * Unwind from one frame record (A) to the next frame record (B).
> >   *
> > @@ -165,30 +266,27 @@ kunwind_recover_return_address(struct kunwind_state *state)
> >  static __always_inline int
> >  kunwind_next(struct kunwind_state *state)
> >  {
> > -	struct task_struct *tsk = state->task;
> > -	unsigned long fp = state->common.fp;
> >  	int err;
> >  
> >  	state->flags.all = 0;
> >  
> > -	/* Final frame; nothing to unwind */
> > -	if (fp == (unsigned long)&task_pt_regs(tsk)->stackframe)
> > -		return -ENOENT;
> > -
> >  	switch (state->source) {
> >  	case KUNWIND_SOURCE_FRAME:
> >  	case KUNWIND_SOURCE_CALLER:
> >  	case KUNWIND_SOURCE_TASK:
> > +	case KUNWIND_SOURCE_REGS_LR:
> > +		err = kunwind_next_frame_record(state);
> > +		break;
> >  	case KUNWIND_SOURCE_REGS_PC:
> > -		err = unwind_next_frame_record(&state->common);
> > -		if (err)
> > -			return err;
> > -		state->source = KUNWIND_SOURCE_FRAME;
> > +		err = kunwind_next_regs_lr(state);
> 
> the remaining users of unwind_next_frame_record() after this change are in 
> KVM. How does it work there?

The short of it is those are unchanged by this series, and the behaviour
of the KVM stacktrace code is unchanged -- it will continue to not
report exception boundaries.

The KVM hyp code doesn't (currently) use the frame_record_meta records
at exception boundaries, so the KVM stacktrace code won't see those and
only need to unwind regular frame records.

It would be nice to improve that, but IIUC it shouldn't matter for
RELAIBLE_STACKTRACE; all calls to hyp code from the main kernel are
synchronous, and aside from a (fatal) hyp_panic(), executing in a
regular kernel context ensures there's no hyp context to unwind.

> What is the difference?

The kunwind_next_frame_record() function will identify and unwind any
frame_record_meta frames, while unwind_next_frame_record() only handles
regular frame records. Since the KVM hyp code doesn't use
frame_record_meta frames, using unwind_next_frame_record() is
sufficient.

I originally wanted the kunwind code call unwind_next_frame_record() and
then handle any frame_record_meta, but that was painful due to the state
that alters (e.g. the way we track which stack ranges have been
consumed), and duplicating the easrly parts of
unwind_next_frame_record() was simpler overall.

As a general naming convention, the unwind_*() functions are things that
can be shared between the main kernel image and KVM, and the kunwind_*()
functions are specific to the main kernel image.

I'm happy to change the name to more clearly distinguish
kunwind_next_frame_record() from unwind_next_frame_record(), if that
would help?

Mark.
Miroslav Benes Oct. 11, 2024, 4:34 p.m. UTC | #3
On Fri, 11 Oct 2024, Mark Rutland wrote:

> On Fri, Oct 11, 2024 at 05:16:23PM +0200, Miroslav Benes wrote:
>
> > >  	switch (state->source) {
> > >  	case KUNWIND_SOURCE_FRAME:
> > >  	case KUNWIND_SOURCE_CALLER:
> > >  	case KUNWIND_SOURCE_TASK:
> > > +	case KUNWIND_SOURCE_REGS_LR:
> > > +		err = kunwind_next_frame_record(state);
> > > +		break;
> > >  	case KUNWIND_SOURCE_REGS_PC:
> > > -		err = unwind_next_frame_record(&state->common);
> > > -		if (err)
> > > -			return err;
> > > -		state->source = KUNWIND_SOURCE_FRAME;
> > > +		err = kunwind_next_regs_lr(state);
> > 
> > the remaining users of unwind_next_frame_record() after this change are in 
> > KVM. How does it work there?
> 
> The short of it is those are unchanged by this series, and the behaviour
> of the KVM stacktrace code is unchanged -- it will continue to not
> report exception boundaries.
> 
> The KVM hyp code doesn't (currently) use the frame_record_meta records
> at exception boundaries, so the KVM stacktrace code won't see those and
> only need to unwind regular frame records.

Yes.
 
> It would be nice to improve that, but IIUC it shouldn't matter for
> RELAIBLE_STACKTRACE; all calls to hyp code from the main kernel are
> synchronous, and aside from a (fatal) hyp_panic(), executing in a
> regular kernel context ensures there's no hyp context to unwind.

I agree. I was mainly asking if there was a reason (how exceptions are 
processed for example) why it could not be unified because the first 
thought was that the differences are not so big on the code level.

But yeah, out of scope for this patch set.

> > What is the difference?
> 
> The kunwind_next_frame_record() function will identify and unwind any
> frame_record_meta frames, while unwind_next_frame_record() only handles
> regular frame records. Since the KVM hyp code doesn't use
> frame_record_meta frames, using unwind_next_frame_record() is
> sufficient.
> 
> I originally wanted the kunwind code call unwind_next_frame_record() and
> then handle any frame_record_meta, but that was painful due to the state
> that alters (e.g. the way we track which stack ranges have been
> consumed), and duplicating the easrly parts of
> unwind_next_frame_record() was simpler overall.

Ok.

> As a general naming convention, the unwind_*() functions are things that
> can be shared between the main kernel image and KVM, and the kunwind_*()
> functions are specific to the main kernel image.
> 
> I'm happy to change the name to more clearly distinguish
> kunwind_next_frame_record() from unwind_next_frame_record(), if that
> would help?

Nope, as far as I am concerned the convention makes sense and I have no 
problem with that.

Thank you,
Miroslav
Mark Brown Oct. 12, 2024, 9:22 a.m. UTC | #4
On Thu, Oct 10, 2024 at 11:15:10AM +0100, Mark Rutland wrote:

> When a PC is unwound from pt_regs::lr, dump_backtrace() will log this
> with an "L" marker so that it can be identified easily. For example,
> an unwind across an exception boundary will appear as follows:

> |  el1h_64_irq+0x6c/0x70
> |  _raw_spin_unlock_irqrestore+0x10/0x60 (P)
> |  __aarch64_insn_write+0x6c/0x90 (L)
> |  aarch64_insn_patch_text_nosync+0x28/0x80

> ... with a (P) entry for pt_regs::pc, and an (L) entry for pt_regs:lr.

This will be super useful.

Reviewed-by: Mark Brown <broonie@kernel.org>

modulo the stray file Miroslav mentioned.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 89c02f85f4b11..47ff8654c5ec1 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -168,9 +168,7 @@  struct pt_regs {
 	u32 pmr;
 
 	u64 sdei_ttbr1;
-	u64 unused;
-
-	struct frame_record stackframe;
+	struct frame_record_meta stackframe;
 
 	/* Only valid for some EL1 exceptions. */
 	u64 lockdep_hardirqs;
diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h
index 6397bc847f147..0ee0f6ba0fd86 100644
--- a/arch/arm64/include/asm/stacktrace/frame.h
+++ b/arch/arm64/include/asm/stacktrace/frame.h
@@ -3,6 +3,30 @@ 
 #define __ASM_STACKTRACE_FRAME_H
 
 /*
+ * - FRAME_META_TYPE_NONE
+ *
+ *   This value is reserved.
+ *
+ * - FRAME_META_TYPE_FINAL
+ *
+ *   The record is the last entry on the stack.
+ *   Unwinding should terminate successfully.
+ *
+ * - FRAME_META_TYPE_PT_REGS
+ *
+ *   The record is embedded within a struct pt_regs, recording the registers at
+ *   an arbitrary point in time.
+ *   Unwinding should consume pt_regs::pc, followed by pt_regs::lr.
+ *
+ * Note: all other values are reserved and should result in unwinding
+ * terminating with an error.
+ */
+#define FRAME_META_TYPE_NONE		0
+#define FRAME_META_TYPE_FINAL		1
+#define FRAME_META_TYPE_PT_REGS		2
+
+#ifndef __ASSEMBLY__
+/* 
  * A standard AAPCS64 frame record.
  */
 struct frame_record {
@@ -10,4 +34,15 @@  struct frame_record {
 	u64 lr;
 };
 
+/*
+ * A metadata frame record indicating a special unwind.
+ * The record::{fp,lr} fields must be zero to indicate the presence of
+ * metadata.
+ */
+struct frame_record_meta {
+	struct frame_record record;
+	u64 type;
+};
+#endif /* __ASSEMBLY */
+
 #endif /* __ASM_STACKTRACE_FRAME_H */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index eb7fb2f9b9274..021f04f97fde5 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -81,6 +81,7 @@  int main(void)
   DEFINE(S_SDEI_TTBR1,		offsetof(struct pt_regs, sdei_ttbr1));
   DEFINE(S_PMR,			offsetof(struct pt_regs, pmr));
   DEFINE(S_STACKFRAME,		offsetof(struct pt_regs, stackframe));
+  DEFINE(S_STACKFRAME_TYPE,	offsetof(struct pt_regs, stackframe.type));
   DEFINE(PT_REGS_SIZE,		sizeof(struct pt_regs));
   BLANK();
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index fa6d6d5ca5e02..5ae2a34b50bda 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -25,6 +25,7 @@ 
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/scs.h>
+#include <asm/stacktrace/frame.h>
 #include <asm/thread_info.h>
 #include <asm/asm-uaccess.h>
 #include <asm/unistd.h>
@@ -284,15 +285,16 @@  alternative_else_nop_endif
 	stp	lr, x21, [sp, #S_LR]
 
 	/*
-	 * For exceptions from EL0, create a final frame record.
-	 * For exceptions from EL1, create a synthetic frame record so the
-	 * interrupted code shows up in the backtrace.
+	 * Create a metadata frame record. The unwinder will use this to
+	 * identify and unwind exception boundaries.
 	 */
-	.if \el == 0
 	stp	xzr, xzr, [sp, #S_STACKFRAME]
+	.if \el == 0
+	mov	x0, #FRAME_META_TYPE_FINAL
 	.else
-	stp	x29, x22, [sp, #S_STACKFRAME]
+	mov	x0, #FRAME_META_TYPE_PT_REGS
 	.endif
+	str	x0, [sp, #S_STACKFRAME_TYPE]
 	add	x29, sp, #S_STACKFRAME
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index cb68adcabe078..5ab1970ee5436 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -32,6 +32,7 @@ 
 #include <asm/scs.h>
 #include <asm/smp.h>
 #include <asm/sysreg.h>
+#include <asm/stacktrace/frame.h>
 #include <asm/thread_info.h>
 #include <asm/virt.h>
 
@@ -199,6 +200,8 @@  SYM_CODE_END(preserve_boot_args)
 	sub	sp, sp, #PT_REGS_SIZE
 
 	stp	xzr, xzr, [sp, #S_STACKFRAME]
+	mov	\tmp1, #FRAME_META_TYPE_FINAL
+	str	\tmp1, [sp, #S_STACKFRAME_TYPE]
 	add	x29, sp, #S_STACKFRAME
 
 	scs_load_current
diff --git a/arch/arm64/kernel/probes/stBV5U5j b/arch/arm64/kernel/probes/stBV5U5j
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index d45fd114eac3f..29904c829de25 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -409,6 +409,7 @@  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		 */
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
+		childregs->stackframe.type = FRAME_META_TYPE_FINAL;
 
 		p->thread.cpu_context.x19 = (unsigned long)args->fn;
 		p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index f8e231683dad9..caef85462acb6 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -26,6 +26,7 @@  enum kunwind_source {
 	KUNWIND_SOURCE_CALLER,
 	KUNWIND_SOURCE_TASK,
 	KUNWIND_SOURCE_REGS_PC,
+	KUNWIND_SOURCE_REGS_LR,
 };
 
 union unwind_flags {
@@ -55,6 +56,7 @@  struct kunwind_state {
 #endif
 	enum kunwind_source source;
 	union unwind_flags flags;
+	struct pt_regs *regs;
 };
 
 static __always_inline void
@@ -65,6 +67,7 @@  kunwind_init(struct kunwind_state *state,
 	state->task = task;
 	state->source = KUNWIND_SOURCE_UNKNOWN;
 	state->flags.all = 0;
+	state->regs = NULL;
 }
 
 /*
@@ -80,6 +83,7 @@  kunwind_init_from_regs(struct kunwind_state *state,
 {
 	kunwind_init(state, current);
 
+	state->regs = regs;
 	state->common.fp = regs->regs[29];
 	state->common.pc = regs->pc;
 	state->source = KUNWIND_SOURCE_REGS_PC;
@@ -155,6 +159,103 @@  kunwind_recover_return_address(struct kunwind_state *state)
 	return 0;
 }
 
+static __always_inline
+int kunwind_next_regs_pc(struct kunwind_state *state)
+{
+	struct stack_info *info;
+	unsigned long fp = state->common.fp;
+	struct pt_regs *regs;
+
+	regs = container_of((u64 *)fp, struct pt_regs, stackframe.record.fp);
+
+	info = unwind_find_stack(&state->common, (unsigned long)regs, sizeof(*regs));
+	if (!info)
+		return -EINVAL;
+
+	unwind_consume_stack(&state->common, info, (unsigned long)regs,
+			     sizeof(*regs));
+
+	state->regs = regs;
+	state->common.pc = regs->pc;
+	state->common.fp = regs->regs[29];
+	state->source = KUNWIND_SOURCE_REGS_PC;
+	return 0;
+}
+
+static __always_inline int
+kunwind_next_regs_lr(struct kunwind_state *state)
+{
+	/*
+	 * The stack for the regs was consumed by kunwind_next_regs_pc(), so we
+	 * cannot consume that again here, but we know the regs are safe to
+	 * access.
+	 */
+	state->common.pc = state->regs->regs[30];
+	state->common.fp = state->regs->regs[29];
+	state->regs = NULL;
+	state->source = KUNWIND_SOURCE_REGS_LR;
+
+	return 0;
+}
+
+static __always_inline int
+kunwind_next_frame_record_meta(struct kunwind_state *state)
+{
+	struct task_struct *tsk = state->task;
+	unsigned long fp = state->common.fp;
+	struct frame_record_meta *meta;
+	struct stack_info *info;
+
+	info = unwind_find_stack(&state->common, fp, sizeof(*meta));
+	if (!info)
+		return -EINVAL;
+
+	meta = (struct frame_record_meta *)fp;
+	switch (READ_ONCE(meta->type)) {
+	case FRAME_META_TYPE_FINAL:
+		if (meta == &task_pt_regs(tsk)->stackframe)
+			return -ENOENT;
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	case FRAME_META_TYPE_PT_REGS:
+		return kunwind_next_regs_pc(state);
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+}
+
+static __always_inline int
+kunwind_next_frame_record(struct kunwind_state *state)
+{
+	unsigned long fp = state->common.fp;
+	struct frame_record *record;
+	struct stack_info *info;
+	unsigned long new_fp, new_pc;
+
+	if (fp & 0x7)
+		return -EINVAL;
+
+	info = unwind_find_stack(&state->common, fp, sizeof(*record));
+	if (!info)
+		return -EINVAL;
+
+	record = (struct frame_record *)fp;
+	new_fp = READ_ONCE(record->fp);
+	new_pc = READ_ONCE(record->lr);
+
+	if (!new_fp && !new_pc)
+		return kunwind_next_frame_record_meta(state);
+
+	unwind_consume_stack(&state->common, info, fp, sizeof(*record));
+
+	state->common.fp = new_fp;
+	state->common.pc = new_pc;
+	state->source = KUNWIND_SOURCE_FRAME;
+
+	return 0;
+}
+
 /*
  * Unwind from one frame record (A) to the next frame record (B).
  *
@@ -165,30 +266,27 @@  kunwind_recover_return_address(struct kunwind_state *state)
 static __always_inline int
 kunwind_next(struct kunwind_state *state)
 {
-	struct task_struct *tsk = state->task;
-	unsigned long fp = state->common.fp;
 	int err;
 
 	state->flags.all = 0;
 
-	/* Final frame; nothing to unwind */
-	if (fp == (unsigned long)&task_pt_regs(tsk)->stackframe)
-		return -ENOENT;
-
 	switch (state->source) {
 	case KUNWIND_SOURCE_FRAME:
 	case KUNWIND_SOURCE_CALLER:
 	case KUNWIND_SOURCE_TASK:
+	case KUNWIND_SOURCE_REGS_LR:
+		err = kunwind_next_frame_record(state);
+		break;
 	case KUNWIND_SOURCE_REGS_PC:
-		err = unwind_next_frame_record(&state->common);
-		if (err)
-			return err;
-		state->source = KUNWIND_SOURCE_FRAME;
+		err = kunwind_next_regs_lr(state);
 		break;
 	default:
-		return -EINVAL;
+		err = -EINVAL;
 	}
 
+	if (err)
+		return err;
+
 	state->common.pc = ptrauth_strip_kernel_insn_pac(state->common.pc);
 
 	return kunwind_recover_return_address(state);
@@ -338,6 +436,7 @@  static const char *state_source_string(const struct kunwind_state *state)
 	case KUNWIND_SOURCE_CALLER:	return "C";
 	case KUNWIND_SOURCE_TASK:	return "T";
 	case KUNWIND_SOURCE_REGS_PC:	return "P";
+	case KUNWIND_SOURCE_REGS_LR:	return "L";
 	default:			return "U";
 	}
 }