diff mbox series

[RFC,2/3] tracing: Introduce "rel_stack" option

Message ID 173807863557.1525539.14465198884840039000.stgit@mhiramat.roam.corp.google.com (mailing list archive)
State New
Headers show
Series tracing: Introduce relative stacktrace | expand

Commit Message

Masami Hiramatsu (Google) Jan. 28, 2025, 3:37 p.m. UTC
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Since the relative offset stacktrace requires a special module loading
events to decode binary, it should be an optional for normal use case.
User can enable this feature via `options/relative-stacktrace`.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace.c         |   13 ++++++++-----
 kernel/trace/trace.h         |    2 ++
 kernel/trace/trace_entries.h |   22 ++++++++++++++++++++++
 kernel/trace/trace_output.c  |   37 ++++++++++++++++++++++++++++++++-----
 4 files changed, 64 insertions(+), 10 deletions(-)

Comments

Steven Rostedt Jan. 28, 2025, 4:07 p.m. UTC | #1
On Wed, 29 Jan 2025 00:37:15 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Since the relative offset stacktrace requires a special module loading
> events to decode binary, it should be an optional for normal use case.
> User can enable this feature via `options/relative-stacktrace`.
> 

Let's make this an entirely new event and not based on an option. Reason
being, there's no way for libtraceevent to know which this is. We could
even have a mixture of these in the trace.

Instead of an option, we can add a new trigger: stacktrace_rel that will do
a relative stack trace. And the event can be kernel_stack_rel

Then it can be enabled via:

  echo 'stacktrace_rel if prev_state & 3' > events/sched/sched_switch/trigger

-- Steve
Masami Hiramatsu (Google) Jan. 29, 2025, 12:14 a.m. UTC | #2
On Tue, 28 Jan 2025 11:07:00 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Wed, 29 Jan 2025 00:37:15 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> 
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > Since the relative offset stacktrace requires a special module loading
> > events to decode binary, it should be an optional for normal use case.
> > User can enable this feature via `options/relative-stacktrace`.
> > 
> 
> Let's make this an entirely new event and not based on an option. Reason
> being, there's no way for libtraceevent to know which this is. We could
> even have a mixture of these in the trace.
> 
> Instead of an option, we can add a new trigger: stacktrace_rel that will do
> a relative stack trace. And the event can be kernel_stack_rel
> 
> Then it can be enabled via:
> 
>   echo 'stacktrace_rel if prev_state & 3' > events/sched/sched_switch/trigger

Oh, I thought this was another feature, adding a new trigger action.
Can't we do this with the event defined by FTRACE_ENTRY_DUP()?

Thank you,

> 
> -- Steve
diff mbox series

Patch

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8e86a43b368c..b4da5e29957f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2926,6 +2926,7 @@  static void __ftrace_trace_stack(struct trace_array *tr,
 	struct ftrace_stack *fstack;
 	struct stack_entry *entry;
 	int stackidx;
+	int type;
 
 	/*
 	 * Add one, for this function and the call to save_stack_trace()
@@ -2937,6 +2938,7 @@  static void __ftrace_trace_stack(struct trace_array *tr,
 #endif
 
 	preempt_disable_notrace();
+	type = (tr->trace_flags & TRACE_ITER_REL_STACK_BIT) ? TRACE_REL_STACK : TRACE_STACK;
 
 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
 
@@ -2973,17 +2975,18 @@  static void __ftrace_trace_stack(struct trace_array *tr,
 		for (int i = 0; i < nr_entries; i++) {
 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
-			else
+			else if (type == TRACE_REL_STACK)
 				calls[i] -= (unsigned long)_stext;
 		}
 	}
 #else
-	/* Adjsut entries as the offset from _stext, instead of raw address. */
-	for (int i = 0; i < nr_entries; i++)
-		fstack->calls[i] -= (unsigned long)_stext;
+	if (type == TRACE_REL_STACK)
+		/* Adjsut entries as the offset from _stext, instead of raw address. */
+		for (int i = 0; i < nr_entries; i++)
+			fstack->calls[i] -= (unsigned long)_stext;
 #endif
 
-	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
+	event = __trace_buffer_lock_reserve(buffer, type,
 				    struct_size(entry, caller, nr_entries),
 				    trace_ctx);
 	if (!event)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9c21ba45b7af..602aea0ec69a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -55,6 +55,7 @@  enum trace_type {
 	TRACE_TIMERLAT,
 	TRACE_RAW_DATA,
 	TRACE_FUNC_REPEATS,
+	TRACE_REL_STACK,
 
 	__TRACE_LAST_TYPE,
 };
@@ -1350,6 +1351,7 @@  extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
 		C(TRACE_PRINTK,		"trace_printk_dest"),	\
 		C(PAUSE_ON_TRACE,	"pause-on-trace"),	\
 		C(HASH_PTR,		"hash-ptr"),	/* Print hashed pointer */ \
+		C(REL_STACK,	"relative-stacktrace"),	\
 		FUNCTION_FLAGS					\
 		FGRAPH_FLAGS					\
 		STACK_FLAGS					\
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index fbfb396905a6..7769f95b70fe 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -229,6 +229,28 @@  FTRACE_ENTRY(kernel_stack, stack_entry,
 		 (void *)__entry->caller[6], (void *)__entry->caller[7])
 );
 
+FTRACE_ENTRY_DUP(kernel_rel_stack, stack_entry,
+
+	TRACE_REL_STACK,
+
+	F_STRUCT(
+		__field(	int,		size	)
+		__stack_array(	unsigned long,	caller,	FTRACE_STACK_ENTRIES, size)
+	),
+
+	F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
+		 "\t=> %ps\n\t=> %ps\n\t=> %ps\n"
+		 "\t=> %ps\n\t=> %ps\n",
+		 (void *)__entry->caller[0] + (unsigned long)_stext,
+		 (void *)__entry->caller[1] + (unsigned long)_stext,
+		 (void *)__entry->caller[2] + (unsigned long)_stext,
+		 (void *)__entry->caller[3] + (unsigned long)_stext,
+		 (void *)__entry->caller[4] + (unsigned long)_stext,
+		 (void *)__entry->caller[5] + (unsigned long)_stext,
+		 (void *)__entry->caller[6] + (unsigned long)_stext,
+		 (void *)__entry->caller[7] + (unsigned long)_stext)
+);
+
 FTRACE_ENTRY(user_stack, userstack_entry,
 
 	TRACE_USER_STACK,
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 497872df48f6..47e4ab549e81 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1239,16 +1239,17 @@  static struct trace_event trace_wake_event = {
 	.funcs		= &trace_wake_funcs,
 };
 
-/* TRACE_STACK */
-
-static enum print_line_t trace_stack_print(struct trace_iterator *iter,
-					   int flags, struct trace_event *event)
+static enum print_line_t trace_kernel_stack_print(struct trace_iterator *iter,
+					   int flags, struct trace_event *event, bool relative)
 {
 	struct stack_entry *field;
 	struct trace_seq *s = &iter->seq;
 	unsigned long *p;
 	unsigned long *end;
-	long delta = (unsigned long)_stext + iter->tr->text_delta;
+	long delta = iter->tr->text_delta;
+
+	if (relative)
+		delta += (unsigned long)_stext;
 
 	trace_assign_type(field, iter->ent);
 	end = (unsigned long *)((long)iter->ent + iter->ent_size);
@@ -1272,6 +1273,14 @@  static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 	return trace_handle_return(s);
 }
 
+/* TRACE_STACK */
+
+static enum print_line_t trace_stack_print(struct trace_iterator *iter,
+					   int flags, struct trace_event *event)
+{
+	return trace_kernel_stack_print(iter, flags, event, false);
+}
+
 static struct trace_event_functions trace_stack_funcs = {
 	.trace		= trace_stack_print,
 };
@@ -1281,6 +1290,23 @@  static struct trace_event trace_stack_event = {
 	.funcs		= &trace_stack_funcs,
 };
 
+/* TRACE_REL_STACK */
+
+static enum print_line_t trace_rel_stack_print(struct trace_iterator *iter,
+					   int flags, struct trace_event *event)
+{
+	return trace_kernel_stack_print(iter, flags, event, true);
+}
+
+static struct trace_event_functions trace_rel_stack_funcs = {
+	.trace		= trace_rel_stack_print,
+};
+
+static struct trace_event trace_rel_stack_event = {
+	.type		= TRACE_REL_STACK,
+	.funcs		= &trace_rel_stack_funcs,
+};
+
 /* TRACE_USER_STACK */
 static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 						int flags, struct trace_event *event)
@@ -1724,6 +1750,7 @@  static struct trace_event *events[] __initdata = {
 	&trace_ctx_event,
 	&trace_wake_event,
 	&trace_stack_event,
+	&trace_rel_stack_event,
 	&trace_user_stack_event,
 	&trace_bputs_event,
 	&trace_bprint_event,