diff mbox series

[8/8] tracing: Update modules to persistent instances when loaded

Message ID 20250205225104.264333506@goodmis.org (mailing list archive)
State Superseded
Headers show
Series ring-buffer/tracing: Save module information in persistent memory | expand

Commit Message

Steven Rostedt Feb. 5, 2025, 10:50 p.m. UTC
From: Steven Rostedt <rostedt@goodmis.org>

When a module is loaded and a persistent buffer is actively tracing, add
it to the list of modules in the persistent memory.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c        | 25 +++++++++++++++++++++++
 kernel/trace/trace.h        |  2 ++
 kernel/trace/trace_events.c | 40 ++++++++++++++++++++++++++-----------
 3 files changed, 55 insertions(+), 12 deletions(-)

Comments

Masami Hiramatsu (Google) Feb. 6, 2025, 10:01 a.m. UTC | #1
On Wed, 05 Feb 2025 17:50:39 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> From: Steven Rostedt <rostedt@goodmis.org>
> 
> When a module is loaded and a persistent buffer is actively tracing, add
> it to the list of modules in the persistent memory.
> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  kernel/trace/trace.c        | 25 +++++++++++++++++++++++
>  kernel/trace/trace.h        |  2 ++
>  kernel/trace/trace_events.c | 40 ++++++++++++++++++++++++++-----------
>  3 files changed, 55 insertions(+), 12 deletions(-)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 7b4027804cd4..443f2bc5b856 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -10088,6 +10088,30 @@ static void trace_module_remove_evals(struct module *mod)
>  static inline void trace_module_remove_evals(struct module *mod) { }
>  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
>  
> +static bool trace_array_active(struct trace_array *tr)
> +{
> +	if (tr->current_trace != &nop_trace)
> +		return true;
> +
> +	/* 0 is no events, 1 is all disabled */
> +	return trace_events_enabled(tr, NULL) > 1;
> +}
> +
> +static void trace_module_record(struct module *mod)
> +{
> +	struct trace_array *tr;
> +
> +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> +		/* Update any persistent trace array that has already been started */
> +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> +		    TRACE_ARRAY_FL_BOOT) {
> +			/* Only update if the trace array is active */
> +			if (trace_array_active(tr))

Do we really need this check? It seems that we can just save_mod() if the
above condition is true.

> +				save_mod(mod, tr);
> +		}
> +	}
> +}
> +
>  static int trace_module_notify(struct notifier_block *self,
>  			       unsigned long val, void *data)
>  {
> @@ -10096,6 +10120,7 @@ static int trace_module_notify(struct notifier_block *self,
>  	switch (val) {
>  	case MODULE_STATE_COMING:
>  		trace_module_add_evals(mod);
> +		trace_module_record(mod);
>  		break;
>  	case MODULE_STATE_GOING:
>  		trace_module_remove_evals(mod);

Don't we need to remove the module entry when a module is removed?
(everytime we remove a module, trace data is cleared?)

Thank you,

> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 3a020fb82a34..90493220c362 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -786,6 +786,8 @@ extern void trace_find_cmdline(int pid, char comm[]);
>  extern int trace_find_tgid(int pid);
>  extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
>  
> +extern int trace_events_enabled(struct trace_array *tr, const char *system);
> +
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  extern unsigned long ftrace_update_tot_cnt;
>  extern unsigned long ftrace_number_of_pages;
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 4cb275316e51..107767afe0ab 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -1811,28 +1811,28 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
>  	return cnt;
>  }
>  
> -static ssize_t
> -system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
> -		   loff_t *ppos)
> +/*
> + * Returns:
> + *   0 : no events exist?
> + *   1 : all events are disabled
> + *   2 : all events are enabled
> + *   3 : some events are enabled and some are enabled
> + */
> +int trace_events_enabled(struct trace_array *tr, const char *system)
>  {
> -	const char set_to_char[4] = { '?', '0', '1', 'X' };
> -	struct trace_subsystem_dir *dir = filp->private_data;
> -	struct event_subsystem *system = dir->subsystem;
>  	struct trace_event_call *call;
>  	struct trace_event_file *file;
> -	struct trace_array *tr = dir->tr;
> -	char buf[2];
>  	int set = 0;
> -	int ret;
>  
> -	mutex_lock(&event_mutex);
> +	guard(mutex)(&event_mutex);
> +
>  	list_for_each_entry(file, &tr->events, list) {
>  		call = file->event_call;
>  		if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
>  		    !trace_event_name(call) || !call->class || !call->class->reg)
>  			continue;
>  
> -		if (system && strcmp(call->class->system, system->name) != 0)
> +		if (system && strcmp(call->class->system, system) != 0)
>  			continue;
>  
>  		/*
> @@ -1848,7 +1848,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
>  		if (set == 3)
>  			break;
>  	}
> -	mutex_unlock(&event_mutex);
> +
> +	return set;
> +}
> +
> +static ssize_t
> +system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
> +		   loff_t *ppos)
> +{
> +	const char set_to_char[4] = { '?', '0', '1', 'X' };
> +	struct trace_subsystem_dir *dir = filp->private_data;
> +	struct event_subsystem *system = dir->subsystem;
> +	struct trace_array *tr = dir->tr;
> +	char buf[2];
> +	int set;
> +	int ret;
> +
> +	set = trace_events_enabled(tr, system ? system->name : NULL);
>  
>  	buf[0] = set_to_char[set];
>  	buf[1] = '\n';
> -- 
> 2.45.2
> 
> 
>
Steven Rostedt Feb. 6, 2025, 3:36 p.m. UTC | #2
On Thu, 6 Feb 2025 19:01:24 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> > +static void trace_module_record(struct module *mod)
> > +{
> > +	struct trace_array *tr;
> > +
> > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > +		/* Update any persistent trace array that has already been started */
> > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > +		    TRACE_ARRAY_FL_BOOT) {
> > +			/* Only update if the trace array is active */
> > +			if (trace_array_active(tr))  
> 
> Do we really need this check? It seems that we can just save_mod() if the
> above condition is true.

It gets a little more  complicated if we need to add and remove modules.

> 
> > +				save_mod(mod, tr);
> > +		}
> > +	}
> > +}
> > +
> >  static int trace_module_notify(struct notifier_block *self,
> >  			       unsigned long val, void *data)
> >  {
> > @@ -10096,6 +10120,7 @@ static int trace_module_notify(struct notifier_block *self,
> >  	switch (val) {
> >  	case MODULE_STATE_COMING:
> >  		trace_module_add_evals(mod);
> > +		trace_module_record(mod);
> >  		break;
> >  	case MODULE_STATE_GOING:
> >  		trace_module_remove_evals(mod);  
> 
> Don't we need to remove the module entry when a module is removed?
> (everytime we remove a module, trace data is cleared?)

I do have a patch that that removes entries, but I decided we don't really
want to do that.

If we want to have events for modules that were removed. Note, the ring
buffer is cleared if any module event was ever enabled and then the module
is removed, as how to print it is removed too. But we could disable that
for the persistent ring buffers as they should not be using the default
trace event print format anyway.

As for stack traces, we still want the module it was for when the stack
trace happens. A common bug we see is when a module is removed, it can
cause other bugs. We want to know about modules that were removed. Keeping
that information about removed modules will allow us to see what functions
were called by a stack trace for a module that was removed.

-- Steve
Masami Hiramatsu (Google) Feb. 6, 2025, 4:53 p.m. UTC | #3
On Thu, 6 Feb 2025 10:36:12 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 6 Feb 2025 19:01:24 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > > +static void trace_module_record(struct module *mod)
> > > +{
> > > +	struct trace_array *tr;
> > > +
> > > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > > +		/* Update any persistent trace array that has already been started */
> > > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > > +		    TRACE_ARRAY_FL_BOOT) {
> > > +			/* Only update if the trace array is active */
> > > +			if (trace_array_active(tr))  
> > 
> > Do we really need this check? It seems that we can just save_mod() if the
> > above condition is true.
> 
> It gets a little more  complicated if we need to add and remove modules.

Yeah, but for converting the module address, we don't want to see other
module information.

> 
> > 
> > > +				save_mod(mod, tr);
> > > +		}
> > > +	}
> > > +}
> > > +
> > >  static int trace_module_notify(struct notifier_block *self,
> > >  			       unsigned long val, void *data)
> > >  {
> > > @@ -10096,6 +10120,7 @@ static int trace_module_notify(struct notifier_block *self,
> > >  	switch (val) {
> > >  	case MODULE_STATE_COMING:
> > >  		trace_module_add_evals(mod);
> > > +		trace_module_record(mod);
> > >  		break;
> > >  	case MODULE_STATE_GOING:
> > >  		trace_module_remove_evals(mod);  
> > 
> > Don't we need to remove the module entry when a module is removed?
> > (everytime we remove a module, trace data is cleared?)
> 
> I do have a patch that that removes entries, but I decided we don't really
> want to do that.
> 
> If we want to have events for modules that were removed. Note, the ring
> buffer is cleared if any module event was ever enabled and then the module
> is removed, as how to print it is removed too. But we could disable that
> for the persistent ring buffers as they should not be using the default
> trace event print format anyway.

Yeah, if the event is on the module the buffer is cleared.
But the module address can be in the stacktrace. In that case, the event
in the module is not enabled, but other events like sched_switch can
take stacktrace which can include the module address. In that case, the
buffer is also cleared when the module is removed?

> As for stack traces, we still want the module it was for when the stack
> trace happens. A common bug we see is when a module is removed, it can
> cause other bugs. We want to know about modules that were removed. Keeping
> that information about removed modules will allow us to see what functions
> were called by a stack trace for a module that was removed.

Hmm, but that should be covered by module load/unload events?

Anyway, this series does not cover the module text address in the stacktrace.
I just made a series of patches (which also not cover the module removal yet),
but it can show the basic idea.

My idea is to sort the previous module entries in the persistent buffer
when it is setup. We also make a "module_delta" array in the trace_array.
Then the print function can searche the appropriate module info from
the sorted table and find corresponding delta from "module_delta".

For example,

/sys/kernel/tracing/instances/boot_mapped # cat trace
           <...>-1629    [006] .....   202.860051: foo_bar_with_fn: foo Look at me 4
           <...>-1629    [006] .....   202.860059: <stack trace>
 => trace_event_raw_event_foo_bar_with_fn
 => simple_thread_fn
 => kthread
 => ret_from_fork
 => ret_from_fork_asm
/sys/kernel/tracing/instances/boot_mapped # cat last_boot_info 
Offset: 0
ffffffffa0016000 trace_events_sample
ffffffffa0025000 trace_printk
/sys/kernel/tracing/instances/boot_mapped # lsmod 

trace_events_sample 45056 0 - Live 0xffffffffa001c000
trace_printk 12288 0 - Live 0xffffffffa0016000

Let me share it.

Thank you,
Steven Rostedt Feb. 6, 2025, 5:18 p.m. UTC | #4
On Fri, 7 Feb 2025 01:53:30 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> On Thu, 6 Feb 2025 10:36:12 -0500
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Thu, 6 Feb 2025 19:01:24 +0900
> > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> >   
> > > > +static void trace_module_record(struct module *mod)
> > > > +{
> > > > +	struct trace_array *tr;
> > > > +
> > > > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > > > +		/* Update any persistent trace array that has already been started */
> > > > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > > > +		    TRACE_ARRAY_FL_BOOT) {
> > > > +			/* Only update if the trace array is active */
> > > > +			if (trace_array_active(tr))    
> > > 
> > > Do we really need this check? It seems that we can just save_mod() if the
> > > above condition is true.  
> > 
> > It gets a little more  complicated if we need to add and remove modules.  
> 
> Yeah, but for converting the module address, we don't want to see other
> module information.

But we want to see the module information for modules that were loaded
during the trace. If a module is removed and suddenly the system crashed,
we just lost that information. Hence why I reset the module information
when the tracing starts.


> > If we want to have events for modules that were removed. Note, the ring
> > buffer is cleared if any module event was ever enabled and then the module
> > is removed, as how to print it is removed too. But we could disable that
> > for the persistent ring buffers as they should not be using the default
> > trace event print format anyway.  
> 
> Yeah, if the event is on the module the buffer is cleared.
> But the module address can be in the stacktrace. In that case, the event
> in the module is not enabled, but other events like sched_switch can
> take stacktrace which can include the module address. In that case, the
> buffer is also cleared when the module is removed?

No. The buffer is only cleared if one of its events were ever enabled. If
no event within the module was enabled, then the buffer is not cleared.

> 
> > As for stack traces, we still want the module it was for when the stack
> > trace happens. A common bug we see is when a module is removed, it can
> > cause other bugs. We want to know about modules that were removed. Keeping
> > that information about removed modules will allow us to see what functions
> > were called by a stack trace for a module that was removed.  
> 
> Hmm, but that should be covered by module load/unload events?

If we have them enabled.

-- Steve
Masami Hiramatsu (Google) Feb. 7, 2025, 12:47 a.m. UTC | #5
On Thu, 6 Feb 2025 12:18:20 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 7 Feb 2025 01:53:30 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > On Thu, 6 Feb 2025 10:36:12 -0500
> > Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > On Thu, 6 Feb 2025 19:01:24 +0900
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> > >   
> > > > > +static void trace_module_record(struct module *mod)
> > > > > +{
> > > > > +	struct trace_array *tr;
> > > > > +
> > > > > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > > > > +		/* Update any persistent trace array that has already been started */
> > > > > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > > > > +		    TRACE_ARRAY_FL_BOOT) {
> > > > > +			/* Only update if the trace array is active */
> > > > > +			if (trace_array_active(tr))    
> > > > 
> > > > Do we really need this check? It seems that we can just save_mod() if the
> > > > above condition is true.  
> > > 
> > > It gets a little more  complicated if we need to add and remove modules.  
> > 
> > Yeah, but for converting the module address, we don't want to see other
> > module information.
> 
> But we want to see the module information for modules that were loaded
> during the trace. If a module is removed and suddenly the system crashed,
> we just lost that information. Hence why I reset the module information
> when the tracing starts.

Then, what about removing module info if the address is recycled for
new module? We can keep it until the same address range is used by other
module(s).

> 
> 
> > > If we want to have events for modules that were removed. Note, the ring
> > > buffer is cleared if any module event was ever enabled and then the module
> > > is removed, as how to print it is removed too. But we could disable that
> > > for the persistent ring buffers as they should not be using the default
> > > trace event print format anyway.  
> > 
> > Yeah, if the event is on the module the buffer is cleared.
> > But the module address can be in the stacktrace. In that case, the event
> > in the module is not enabled, but other events like sched_switch can
> > take stacktrace which can include the module address. In that case, the
> > buffer is also cleared when the module is removed?
> 
> No. The buffer is only cleared if one of its events were ever enabled. If
> no event within the module was enabled, then the buffer is not cleared.
> 
> > 
> > > As for stack traces, we still want the module it was for when the stack
> > > trace happens. A common bug we see is when a module is removed, it can
> > > cause other bugs. We want to know about modules that were removed. Keeping
> > > that information about removed modules will allow us to see what functions
> > > were called by a stack trace for a module that was removed.  
> > 
> > Hmm, but that should be covered by module load/unload events?
> 
> If we have them enabled.

Yes, and current module load event does not cover the address. I think
we can add a new event to log it.

Thank you,

> 
> -- Steve
diff mbox series

Patch

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7b4027804cd4..443f2bc5b856 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10088,6 +10088,30 @@  static void trace_module_remove_evals(struct module *mod)
 static inline void trace_module_remove_evals(struct module *mod) { }
 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 
+static bool trace_array_active(struct trace_array *tr)
+{
+	if (tr->current_trace != &nop_trace)
+		return true;
+
+	/* 0 is no events, 1 is all disabled */
+	return trace_events_enabled(tr, NULL) > 1;
+}
+
+static void trace_module_record(struct module *mod)
+{
+	struct trace_array *tr;
+
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		/* Update any persistent trace array that has already been started */
+		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
+		    TRACE_ARRAY_FL_BOOT) {
+			/* Only update if the trace array is active */
+			if (trace_array_active(tr))
+				save_mod(mod, tr);
+		}
+	}
+}
+
 static int trace_module_notify(struct notifier_block *self,
 			       unsigned long val, void *data)
 {
@@ -10096,6 +10120,7 @@  static int trace_module_notify(struct notifier_block *self,
 	switch (val) {
 	case MODULE_STATE_COMING:
 		trace_module_add_evals(mod);
+		trace_module_record(mod);
 		break;
 	case MODULE_STATE_GOING:
 		trace_module_remove_evals(mod);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3a020fb82a34..90493220c362 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -786,6 +786,8 @@  extern void trace_find_cmdline(int pid, char comm[]);
 extern int trace_find_tgid(int pid);
 extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
 
+extern int trace_events_enabled(struct trace_array *tr, const char *system);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
 extern unsigned long ftrace_number_of_pages;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4cb275316e51..107767afe0ab 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1811,28 +1811,28 @@  event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	return cnt;
 }
 
-static ssize_t
-system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
-		   loff_t *ppos)
+/*
+ * Returns:
+ *   0 : no events exist?
+ *   1 : all events are disabled
+ *   2 : all events are enabled
+ *   3 : some events are enabled and some are enabled
+ */
+int trace_events_enabled(struct trace_array *tr, const char *system)
 {
-	const char set_to_char[4] = { '?', '0', '1', 'X' };
-	struct trace_subsystem_dir *dir = filp->private_data;
-	struct event_subsystem *system = dir->subsystem;
 	struct trace_event_call *call;
 	struct trace_event_file *file;
-	struct trace_array *tr = dir->tr;
-	char buf[2];
 	int set = 0;
-	int ret;
 
-	mutex_lock(&event_mutex);
+	guard(mutex)(&event_mutex);
+
 	list_for_each_entry(file, &tr->events, list) {
 		call = file->event_call;
 		if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
 		    !trace_event_name(call) || !call->class || !call->class->reg)
 			continue;
 
-		if (system && strcmp(call->class->system, system->name) != 0)
+		if (system && strcmp(call->class->system, system) != 0)
 			continue;
 
 		/*
@@ -1848,7 +1848,23 @@  system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 		if (set == 3)
 			break;
 	}
-	mutex_unlock(&event_mutex);
+
+	return set;
+}
+
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+		   loff_t *ppos)
+{
+	const char set_to_char[4] = { '?', '0', '1', 'X' };
+	struct trace_subsystem_dir *dir = filp->private_data;
+	struct event_subsystem *system = dir->subsystem;
+	struct trace_array *tr = dir->tr;
+	char buf[2];
+	int set;
+	int ret;
+
+	set = trace_events_enabled(tr, system ? system->name : NULL);
 
 	buf[0] = set_to_char[set];
 	buf[1] = '\n';