diff mbox series

[RFC,v1,1/4] arm64: Implement infrastructure for stack trace reliability checks

Message ID 20210330190955.13707-2-madvenka@linux.microsoft.com (mailing list archive)
State New, archived
Headers show
Series arm64: Implement stack trace reliability checks | expand

Commit Message

Madhavan T. Venkataraman March 30, 2021, 7:09 p.m. UTC
From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

Implement a check_reliability() function that will contain checks for the
presence of various features and conditions that can render the stack trace
unreliable.

Introduce the first reliability check - If a return PC encountered in a
stack trace is not a valid kernel text address, the stack trace is
considered unreliable. It could be some generated code.

Other reliability checks will be added in the future.

These checks will involve checking the return PC to see if it falls inside
any special functions where the stack trace is considered unreliable.
Implement the infrastructure needed for this.

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 arch/arm64/include/asm/stacktrace.h |  2 +
 arch/arm64/kernel/stacktrace.c      | 80 +++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

Comments

Mark Brown April 1, 2021, 3:27 p.m. UTC | #1
On Tue, Mar 30, 2021 at 02:09:52PM -0500, madvenka@linux.microsoft.com wrote:
> From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>
> 
> Implement a check_reliability() function that will contain checks for the
> presence of various features and conditions that can render the stack trace
> unreliable.

This looks good to me with one minor stylistic thing:

> +/*
> + * Special functions where the stack trace is unreliable.
> + */
> +static struct function_range	special_functions[] = {
> +	{ 0, 0 }
> +};

Might be good to put a comment here saying that this is terminating the
list rather than detecting a NULL function pointer:

	{ /* sentinel */ }

is a common idiom for that.

Given that it's a fixed array we could also...

> +	for (func = special_functions; func->start; func++) {
> +		if (pc >= func->start && pc < func->end)

...do these as

	for (i = 0; i < ARRAY_SIZE(special_functions); i++) 

so you don't need something like that, though that gets awkward when you
have to write out special_functions[i].field a lot.

So many different potential colours for the bikeshed!
Madhavan T. Venkataraman April 1, 2021, 5:44 p.m. UTC | #2
On 4/1/21 10:27 AM, Mark Brown wrote:
> On Tue, Mar 30, 2021 at 02:09:52PM -0500, madvenka@linux.microsoft.com wrote:
>> From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>
>>
>> Implement a check_reliability() function that will contain checks for the
>> presence of various features and conditions that can render the stack trace
>> unreliable.
> 
> This looks good to me with one minor stylistic thing:
> 
>> +/*
>> + * Special functions where the stack trace is unreliable.
>> + */
>> +static struct function_range	special_functions[] = {
>> +	{ 0, 0 }
>> +};
> 
> Might be good to put a comment here saying that this is terminating the
> list rather than detecting a NULL function pointer:
> 
> 	{ /* sentinel */ }
> 
> is a common idiom for that.
> 
> Given that it's a fixed array we could also...
> 
>> +	for (func = special_functions; func->start; func++) {
>> +		if (pc >= func->start && pc < func->end)
> 
> ...do these as
> 
> 	for (i = 0; i < ARRAY_SIZE(special_functions); i++) 
> 
> so you don't need something like that, though that gets awkward when you
> have to write out special_functions[i].field a lot.
> 
> So many different potential colours for the bikeshed!
I will make the above changes.

Thanks!

Madhavan
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index eb29b1fe8255..684f65808394 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -59,6 +59,7 @@  struct stackframe {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	int graph;
 #endif
+	bool reliable;
 };
 
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
@@ -169,6 +170,7 @@  static inline void start_backtrace(struct stackframe *frame,
 	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
 	frame->prev_fp = 0;
 	frame->prev_type = STACK_TYPE_UNKNOWN;
+	frame->reliable = true;
 }
 
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index ad20981dfda4..ff35b3953c39 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -18,6 +18,84 @@ 
 #include <asm/stack_pointer.h>
 #include <asm/stacktrace.h>
 
+struct function_range {
+	unsigned long	start;
+	unsigned long	end;
+};
+
+/*
+ * Special functions where the stack trace is unreliable.
+ */
+static struct function_range	special_functions[] = {
+	{ 0, 0 }
+};
+
+static bool is_reliable_function(unsigned long pc)
+{
+	static bool inited = false;
+	struct function_range *func;
+
+	if (!inited) {
+		static char sym[KSYM_NAME_LEN];
+		unsigned long size, offset;
+
+		for (func = special_functions; func->start; func++) {
+			if (kallsyms_lookup(func->start, &size, &offset,
+					    NULL, sym)) {
+				func->start -= offset;
+				func->end = func->start + size;
+			} else {
+				/*
+				 * This is just a label. So, we only need to
+				 * consider that particular location. So, size
+				 * is the size of one Aarch64 instruction.
+				 */
+				func->end = func->start + 4;
+			}
+		}
+		inited = true;
+	}
+
+	for (func = special_functions; func->start; func++) {
+		if (pc >= func->start && pc < func->end)
+			return false;
+	}
+	return true;
+}
+
+/*
+ * Check for the presence of features and conditions that render the stack
+ * trace unreliable.
+ *
+ * Once all such cases have been addressed, this function can aid live
+ * patching (and this comment can be removed).
+ */
+static void check_reliability(struct stackframe *frame)
+{
+	/*
+	 * If the stack trace has already been marked unreliable, just return.
+	 */
+	if (!frame->reliable)
+		return;
+
+	/*
+	 * First, make sure that the return address is a proper kernel text
+	 * address. A NULL or invalid return address probably means there's
+	 * some generated code which __kernel_text_address() doesn't know
+	 * about. Mark the stack trace as not reliable.
+	 */
+	if (!__kernel_text_address(frame->pc)) {
+		frame->reliable = false;
+		return;
+	}
+
+	/*
+	 * Check the reliability of the return PC's function.
+	 */
+	if (!is_reliable_function(frame->pc))
+		frame->reliable = false;
+}
+
 /*
  * AArch64 PCS assigns the frame pointer to x29.
  *
@@ -108,6 +186,8 @@  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 
 	frame->pc = ptrauth_strip_insn_pac(frame->pc);
 
+	check_reliability(frame);
+
 	return 0;
 }
 NOKPROBE_SYMBOL(unwind_frame);