diff mbox series

[PATCHv3,3/3] arm64: stacktrace: better handle corrupted stacks

Message ID 20190702130729.19615-4-mark.rutland@arm.com (mailing list archive)
State Mainlined
Commit 592700f094be229b5c9cc1192d5cea46eb4c7afc
Headers show
Series arm64: stacktrace: improve robustness | expand

Commit Message

Mark Rutland July 2, 2019, 1:07 p.m. UTC
The arm64 stacktrace code is careful to only dereference frame records
in valid stack ranges, ensuring that a corrupted frame record won't
result in a faulting access.

However, it's still possible for corrupt frame records to result in
infinite loops in the stacktrace code, which is also undesirable.

This patch ensures that we complete a stacktrace in finite time, by
keeping track of which stacks we have already completed unwinding, and
verifying that if the next frame record is on the same stack, it is at a
higher address.

As this has turned out to be particularly subtle, comments are added to
explain the procedure.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Tested-by: James Morse <james.morse@arm.com>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tengfei Fan <tengfeif@codeaurora.org>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/stacktrace.h | 57 +++++++++++++++++++++++++++++++------
 arch/arm64/kernel/stacktrace.c      | 40 +++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 9 deletions(-)

Comments

Dave Martin July 3, 2019, 9:13 a.m. UTC | #1
On Tue, Jul 02, 2019 at 02:07:29PM +0100, Mark Rutland wrote:
> The arm64 stacktrace code is careful to only dereference frame records
> in valid stack ranges, ensuring that a corrupted frame record won't
> result in a faulting access.
> 
> However, it's still possible for corrupt frame records to result in
> infinite loops in the stacktrace code, which is also undesirable.
> 
> This patch ensures that we complete a stacktrace in finite time, by
> keeping track of which stacks we have already completed unwinding, and
> verifying that if the next frame record is on the same stack, it is at a
> higher address.
> 
> As this has turned out to be particularly subtle, comments are added to
> explain the procedure.
> 
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Reviewed-by: James Morse <james.morse@arm.com>
> Tested-by: James Morse <james.morse@arm.com>
> Acked-by: Dave Martin <Dave.Martin@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Tengfei Fan <tengfeif@codeaurora.org>
> Cc: Will Deacon <will.deacon@arm.com>
> ---
>  arch/arm64/include/asm/stacktrace.h | 57 +++++++++++++++++++++++++++++++------
>  arch/arm64/kernel/stacktrace.c      | 40 +++++++++++++++++++++++++-
>  2 files changed, 88 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> index 18f90bf1385c..b3e03bbb69ea 100644
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -19,19 +19,12 @@
>  #include <linux/percpu.h>
>  #include <linux/sched.h>
>  #include <linux/sched/task_stack.h>
> +#include <linux/types.h>
>  
>  #include <asm/memory.h>
>  #include <asm/ptrace.h>
>  #include <asm/sdei.h>
>  
> -struct stackframe {
> -	unsigned long fp;
> -	unsigned long pc;
> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> -	int graph;
> -#endif
> -};
> -
>  enum stack_type {
>  	STACK_TYPE_UNKNOWN,
>  	STACK_TYPE_TASK,
> @@ -39,6 +32,7 @@ enum stack_type {
>  	STACK_TYPE_OVERFLOW,
>  	STACK_TYPE_SDEI_NORMAL,
>  	STACK_TYPE_SDEI_CRITICAL,
> +	__NR_STACK_TYPES
>  };
>  
>  struct stack_info {
> @@ -47,6 +41,37 @@ struct stack_info {
>  	enum stack_type type;
>  };
>  
> +/*
> + * A snapshot of a frame record or fp/lr register values, along with some
> + * accounting information necessary for robust unwinding.
> + *
> + * @fp:          The fp value in the frame record (or the real fp)
> + * @pc:          The fp value in the frame record (or the real lr)
> + *
> + * @stacks_done: Stacks which have been entirely unwound, for which it is no
> + *               longer valid to unwind to.
> + *
> + * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
> + *               of 0. This is used to ensure that within a stack, each
> + *               subsequent frame record is at an increasing address.
> + * @prev_type:   The type of stack this frame record was on, or a synthetic
> + *               value of STACK_TYPE_UNKNOWN. This is used to detect a
> + *               transition from one stack to another.
> + *
> + * @graph:       When FUNCTION_GRAPH_TRACER is selected, holds the index of a
> + *               replacement lr value in the ftrace graph stack.
> + */
> +struct stackframe {
> +	unsigned long fp;
> +	unsigned long pc;
> +	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
> +	unsigned long prev_fp;
> +	enum stack_type prev_type;
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +	int graph;
> +#endif
> +};
> +
>  extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
>  extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
>  			    int (*fn)(struct stackframe *, void *), void *data);
> @@ -128,6 +153,9 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
>  				       unsigned long sp,
>  				       struct stack_info *info)
>  {
> +	if (info)
> +		info->type = STACK_TYPE_UNKNOWN;
> +
>  	if (on_task_stack(tsk, sp, info))
>  		return true;
>  	if (tsk != current || preemptible())
> @@ -150,6 +178,19 @@ static inline void start_backtrace(struct stackframe *frame,
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  	frame->graph = 0;
>  #endif
> +
> +	/*
> +	 * Prime the first unwind.
> +	 *
> +	 * In unwind_frame() we'll check that the FP points to a valid stack,
> +	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
> +	 * treated as a transition to whichever stack that happens to be. The
> +	 * prev_fp value won't be used, but we set it to 0 such that it is
> +	 * definitely not an accessible stack address.
> +	 */
> +	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
> +	frame->prev_fp = 0;
> +	frame->prev_type = STACK_TYPE_UNKNOWN;
>  }
>  
>  #endif	/* __ASM_STACKTRACE_H */
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index e5338216deaa..8094bfe654f5 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -40,9 +40,18 @@
>   *	ldp	x29, x30, [sp]
>   *	add	sp, sp, #0x10
>   */
> +
> +/*
> + * Unwind from one frame record (A) to the next frame record (B).
> + *
> + * We terminate early if the location of B indicates a malformed chain of frame
> + * records (e.g. a cycle), determined based on the location and fp value of A
> + * and the location (but not the fp value) of B.
> + */
>  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  {
>  	unsigned long fp = frame->fp;
> +	struct stack_info info;
>  
>  	if (fp & 0xf)
>  		return -EINVAL;
> @@ -50,11 +59,40 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  	if (!tsk)
>  		tsk = current;
>  
> -	if (!on_accessible_stack(tsk, fp, NULL))
> +	if (!on_accessible_stack(tsk, fp, &info))
>  		return -EINVAL;
>  
> +	if (test_bit(info.type, frame->stacks_done))
> +		return -EINVAL;
> +
> +	/*
> +	 * As stacks grow downward, any valid record on the same stack must be
> +	 * at a strictly higher address than the prior record.
> +	 *
> +	 * Stacks can nest in several valid orders, e.g.
> +	 *
> +	 * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
> +	 * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
> +	 *
> +	 * ... but the nesting itself is strict. Once we transition from one
> +	 * stack to another, it's never valid to unwind back to that first
> +	 * stack.
> +	 */
> +	if (info.type == frame->prev_type) {
> +		if (fp <= frame->prev_fp)
> +			return -EINVAL;
> +	} else {
> +		set_bit(frame->prev_type, frame->stacks_done);
> +	}
> +
> +	/*
> +	 * Record this frame record's values and location. The prev_fp and
> +	 * prev_type are only meaningful to the next unwind_frame() invocation.
> +	 */
>  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
>  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
> +	frame->prev_fp = fp;
> +	frame->prev_type = info.type;

Thanks for the explanation.

This looks less complicated that I had feared.

Cheers
---Dave
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 18f90bf1385c..b3e03bbb69ea 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -19,19 +19,12 @@ 
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/types.h>
 
 #include <asm/memory.h>
 #include <asm/ptrace.h>
 #include <asm/sdei.h>
 
-struct stackframe {
-	unsigned long fp;
-	unsigned long pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	int graph;
-#endif
-};
-
 enum stack_type {
 	STACK_TYPE_UNKNOWN,
 	STACK_TYPE_TASK,
@@ -39,6 +32,7 @@  enum stack_type {
 	STACK_TYPE_OVERFLOW,
 	STACK_TYPE_SDEI_NORMAL,
 	STACK_TYPE_SDEI_CRITICAL,
+	__NR_STACK_TYPES
 };
 
 struct stack_info {
@@ -47,6 +41,37 @@  struct stack_info {
 	enum stack_type type;
 };
 
+/*
+ * A snapshot of a frame record or fp/lr register values, along with some
+ * accounting information necessary for robust unwinding.
+ *
+ * @fp:          The fp value in the frame record (or the real fp)
+ * @pc:          The fp value in the frame record (or the real lr)
+ *
+ * @stacks_done: Stacks which have been entirely unwound, for which it is no
+ *               longer valid to unwind to.
+ *
+ * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
+ *               of 0. This is used to ensure that within a stack, each
+ *               subsequent frame record is at an increasing address.
+ * @prev_type:   The type of stack this frame record was on, or a synthetic
+ *               value of STACK_TYPE_UNKNOWN. This is used to detect a
+ *               transition from one stack to another.
+ *
+ * @graph:       When FUNCTION_GRAPH_TRACER is selected, holds the index of a
+ *               replacement lr value in the ftrace graph stack.
+ */
+struct stackframe {
+	unsigned long fp;
+	unsigned long pc;
+	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+	unsigned long prev_fp;
+	enum stack_type prev_type;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	int graph;
+#endif
+};
+
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
 extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
@@ -128,6 +153,9 @@  static inline bool on_accessible_stack(const struct task_struct *tsk,
 				       unsigned long sp,
 				       struct stack_info *info)
 {
+	if (info)
+		info->type = STACK_TYPE_UNKNOWN;
+
 	if (on_task_stack(tsk, sp, info))
 		return true;
 	if (tsk != current || preemptible())
@@ -150,6 +178,19 @@  static inline void start_backtrace(struct stackframe *frame,
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame->graph = 0;
 #endif
+
+	/*
+	 * Prime the first unwind.
+	 *
+	 * In unwind_frame() we'll check that the FP points to a valid stack,
+	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
+	 * treated as a transition to whichever stack that happens to be. The
+	 * prev_fp value won't be used, but we set it to 0 such that it is
+	 * definitely not an accessible stack address.
+	 */
+	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
+	frame->prev_fp = 0;
+	frame->prev_type = STACK_TYPE_UNKNOWN;
 }
 
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index e5338216deaa..8094bfe654f5 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -40,9 +40,18 @@ 
  *	ldp	x29, x30, [sp]
  *	add	sp, sp, #0x10
  */
+
+/*
+ * Unwind from one frame record (A) to the next frame record (B).
+ *
+ * We terminate early if the location of B indicates a malformed chain of frame
+ * records (e.g. a cycle), determined based on the location and fp value of A
+ * and the location (but not the fp value) of B.
+ */
 int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
 	unsigned long fp = frame->fp;
+	struct stack_info info;
 
 	if (fp & 0xf)
 		return -EINVAL;
@@ -50,11 +59,40 @@  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	if (!tsk)
 		tsk = current;
 
-	if (!on_accessible_stack(tsk, fp, NULL))
+	if (!on_accessible_stack(tsk, fp, &info))
 		return -EINVAL;
 
+	if (test_bit(info.type, frame->stacks_done))
+		return -EINVAL;
+
+	/*
+	 * As stacks grow downward, any valid record on the same stack must be
+	 * at a strictly higher address than the prior record.
+	 *
+	 * Stacks can nest in several valid orders, e.g.
+	 *
+	 * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+	 * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+	 *
+	 * ... but the nesting itself is strict. Once we transition from one
+	 * stack to another, it's never valid to unwind back to that first
+	 * stack.
+	 */
+	if (info.type == frame->prev_type) {
+		if (fp <= frame->prev_fp)
+			return -EINVAL;
+	} else {
+		set_bit(frame->prev_type, frame->stacks_done);
+	}
+
+	/*
+	 * Record this frame record's values and location. The prev_fp and
+	 * prev_type are only meaningful to the next unwind_frame() invocation.
+	 */
 	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
 	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+	frame->prev_fp = fp;
+	frame->prev_type = info.type;
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (tsk->ret_stack &&