Message ID | 20190702130729.19615-4-mark.rutland@arm.com (mailing list archive) |
---|---|
State | Mainlined |
Commit | 592700f094be229b5c9cc1192d5cea46eb4c7afc |
Headers | show |
Series | arm64: stacktrace: improve robustness | expand |
On Tue, Jul 02, 2019 at 02:07:29PM +0100, Mark Rutland wrote: > The arm64 stacktrace code is careful to only dereference frame records > in valid stack ranges, ensuring that a corrupted frame record won't > result in a faulting access. > > However, it's still possible for corrupt frame records to result in > infinite loops in the stacktrace code, which is also undesirable. > > This patch ensures that we complete a stacktrace in finite time, by > keeping track of which stacks we have already completed unwinding, and > verifying that if the next frame record is on the same stack, it is at a > higher address. > > As this has turned out to be particularly subtle, comments are added to > explain the procedure. > > Signed-off-by: Mark Rutland <mark.rutland@arm.com> > Reviewed-by: James Morse <james.morse@arm.com> > Tested-by: James Morse <james.morse@arm.com> > Acked-by: Dave Martin <Dave.Martin@arm.com> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Tengfei Fan <tengfeif@codeaurora.org> > Cc: Will Deacon <will.deacon@arm.com> > --- > arch/arm64/include/asm/stacktrace.h | 57 +++++++++++++++++++++++++++++++------ > arch/arm64/kernel/stacktrace.c | 40 +++++++++++++++++++++++++- > 2 files changed, 88 insertions(+), 9 deletions(-) > > diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h > index 18f90bf1385c..b3e03bbb69ea 100644 > --- a/arch/arm64/include/asm/stacktrace.h > +++ b/arch/arm64/include/asm/stacktrace.h > @@ -19,19 +19,12 @@ > #include <linux/percpu.h> > #include <linux/sched.h> > #include <linux/sched/task_stack.h> > +#include <linux/types.h> > > #include <asm/memory.h> > #include <asm/ptrace.h> > #include <asm/sdei.h> > > -struct stackframe { > - unsigned long fp; > - unsigned long pc; > -#ifdef CONFIG_FUNCTION_GRAPH_TRACER > - int graph; > -#endif > -}; > - > enum stack_type { > STACK_TYPE_UNKNOWN, > STACK_TYPE_TASK, > @@ -39,6 +32,7 @@ enum stack_type { > STACK_TYPE_OVERFLOW, > STACK_TYPE_SDEI_NORMAL, > STACK_TYPE_SDEI_CRITICAL, > + __NR_STACK_TYPES > }; > > struct stack_info { > @@ -47,6 +41,37 @@ struct stack_info { > enum stack_type type; > }; > > +/* > + * A snapshot of a frame record or fp/lr register values, along with some > + * accounting information necessary for robust unwinding. > + * > + * @fp: The fp value in the frame record (or the real fp) > + * @pc: The fp value in the frame record (or the real lr) > + * > + * @stacks_done: Stacks which have been entirely unwound, for which it is no > + * longer valid to unwind to. > + * > + * @prev_fp: The fp that pointed to this frame record, or a synthetic value > + * of 0. This is used to ensure that within a stack, each > + * subsequent frame record is at an increasing address. > + * @prev_type: The type of stack this frame record was on, or a synthetic > + * value of STACK_TYPE_UNKNOWN. This is used to detect a > + * transition from one stack to another. > + * > + * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a > + * replacement lr value in the ftrace graph stack. > + */ > +struct stackframe { > + unsigned long fp; > + unsigned long pc; > + DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); > + unsigned long prev_fp; > + enum stack_type prev_type; > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER > + int graph; > +#endif > +}; > + > extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); > extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, > int (*fn)(struct stackframe *, void *), void *data); > @@ -128,6 +153,9 @@ static inline bool on_accessible_stack(const struct task_struct *tsk, > unsigned long sp, > struct stack_info *info) > { > + if (info) > + info->type = STACK_TYPE_UNKNOWN; > + > if (on_task_stack(tsk, sp, info)) > return true; > if (tsk != current || preemptible()) > @@ -150,6 +178,19 @@ static inline void start_backtrace(struct stackframe *frame, > #ifdef CONFIG_FUNCTION_GRAPH_TRACER > frame->graph = 0; > #endif > + > + /* > + * Prime the first unwind. > + * > + * In unwind_frame() we'll check that the FP points to a valid stack, > + * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be > + * treated as a transition to whichever stack that happens to be. The > + * prev_fp value won't be used, but we set it to 0 such that it is > + * definitely not an accessible stack address. > + */ > + bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); > + frame->prev_fp = 0; > + frame->prev_type = STACK_TYPE_UNKNOWN; > } > > #endif /* __ASM_STACKTRACE_H */ > diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c > index e5338216deaa..8094bfe654f5 100644 > --- a/arch/arm64/kernel/stacktrace.c > +++ b/arch/arm64/kernel/stacktrace.c > @@ -40,9 +40,18 @@ > * ldp x29, x30, [sp] > * add sp, sp, #0x10 > */ > + > +/* > + * Unwind from one frame record (A) to the next frame record (B). > + * > + * We terminate early if the location of B indicates a malformed chain of frame > + * records (e.g. a cycle), determined based on the location and fp value of A > + * and the location (but not the fp value) of B. > + */ > int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) > { > unsigned long fp = frame->fp; > + struct stack_info info; > > if (fp & 0xf) > return -EINVAL; > @@ -50,11 +59,40 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) > if (!tsk) > tsk = current; > > - if (!on_accessible_stack(tsk, fp, NULL)) > + if (!on_accessible_stack(tsk, fp, &info)) > return -EINVAL; > > + if (test_bit(info.type, frame->stacks_done)) > + return -EINVAL; > + > + /* > + * As stacks grow downward, any valid record on the same stack must be > + * at a strictly higher address than the prior record. > + * > + * Stacks can nest in several valid orders, e.g. > + * > + * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL > + * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW > + * > + * ... but the nesting itself is strict. Once we transition from one > + * stack to another, it's never valid to unwind back to that first > + * stack. > + */ > + if (info.type == frame->prev_type) { > + if (fp <= frame->prev_fp) > + return -EINVAL; > + } else { > + set_bit(frame->prev_type, frame->stacks_done); > + } > + > + /* > + * Record this frame record's values and location. The prev_fp and > + * prev_type are only meaningful to the next unwind_frame() invocation. > + */ > frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); > frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); > + frame->prev_fp = fp; > + frame->prev_type = info.type; Thanks for the explanation. This looks less complicated that I had feared. Cheers ---Dave
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 18f90bf1385c..b3e03bbb69ea 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -19,19 +19,12 @@ #include <linux/percpu.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/types.h> #include <asm/memory.h> #include <asm/ptrace.h> #include <asm/sdei.h> -struct stackframe { - unsigned long fp; - unsigned long pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - int graph; -#endif -}; - enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, @@ -39,6 +32,7 @@ enum stack_type { STACK_TYPE_OVERFLOW, STACK_TYPE_SDEI_NORMAL, STACK_TYPE_SDEI_CRITICAL, + __NR_STACK_TYPES }; struct stack_info { @@ -47,6 +41,37 @@ struct stack_info { enum stack_type type; }; +/* + * A snapshot of a frame record or fp/lr register values, along with some + * accounting information necessary for robust unwinding. + * + * @fp: The fp value in the frame record (or the real fp) + * @pc: The fp value in the frame record (or the real lr) + * + * @stacks_done: Stacks which have been entirely unwound, for which it is no + * longer valid to unwind to. + * + * @prev_fp: The fp that pointed to this frame record, or a synthetic value + * of 0. This is used to ensure that within a stack, each + * subsequent frame record is at an increasing address. + * @prev_type: The type of stack this frame record was on, or a synthetic + * value of STACK_TYPE_UNKNOWN. This is used to detect a + * transition from one stack to another. + * + * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a + * replacement lr value in the ftrace graph stack. + */ +struct stackframe { + unsigned long fp; + unsigned long pc; + DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); + unsigned long prev_fp; + enum stack_type prev_type; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + int graph; +#endif +}; + extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); @@ -128,6 +153,9 @@ static inline bool on_accessible_stack(const struct task_struct *tsk, unsigned long sp, struct stack_info *info) { + if (info) + info->type = STACK_TYPE_UNKNOWN; + if (on_task_stack(tsk, sp, info)) return true; if (tsk != current || preemptible()) @@ -150,6 +178,19 @@ static inline void start_backtrace(struct stackframe *frame, #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame->graph = 0; #endif + + /* + * Prime the first unwind. + * + * In unwind_frame() we'll check that the FP points to a valid stack, + * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be + * treated as a transition to whichever stack that happens to be. The + * prev_fp value won't be used, but we set it to 0 such that it is + * definitely not an accessible stack address. + */ + bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); + frame->prev_fp = 0; + frame->prev_type = STACK_TYPE_UNKNOWN; } #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index e5338216deaa..8094bfe654f5 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -40,9 +40,18 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ + +/* + * Unwind from one frame record (A) to the next frame record (B). + * + * We terminate early if the location of B indicates a malformed chain of frame + * records (e.g. a cycle), determined based on the location and fp value of A + * and the location (but not the fp value) of B. + */ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { unsigned long fp = frame->fp; + struct stack_info info; if (fp & 0xf) return -EINVAL; @@ -50,11 +59,40 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (!tsk) tsk = current; - if (!on_accessible_stack(tsk, fp, NULL)) + if (!on_accessible_stack(tsk, fp, &info)) return -EINVAL; + if (test_bit(info.type, frame->stacks_done)) + return -EINVAL; + + /* + * As stacks grow downward, any valid record on the same stack must be + * at a strictly higher address than the prior record. + * + * Stacks can nest in several valid orders, e.g. + * + * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL + * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW + * + * ... but the nesting itself is strict. Once we transition from one + * stack to another, it's never valid to unwind back to that first + * stack. + */ + if (info.type == frame->prev_type) { + if (fp <= frame->prev_fp) + return -EINVAL; + } else { + set_bit(frame->prev_type, frame->stacks_done); + } + + /* + * Record this frame record's values and location. The prev_fp and + * prev_type are only meaningful to the next unwind_frame() invocation. + */ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + frame->prev_fp = fp; + frame->prev_type = info.type; #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack &&