diff mbox series

[3/5] tracing: Have trace_printk not use binary prints if boot buffer

Message ID 20240823014019.386925800@goodmis.org (mailing list archive)
State Accepted
Commit 9b7bdf6f6ece6ea888cc7d2f02c00b403b66a119
Headers show
Series tracing: Allow trace_printk() to use the persistent ring buffer | expand

Commit Message

Steven Rostedt Aug. 23, 2024, 1:39 a.m. UTC
From: Steven Rostedt <rostedt@goodmis.org>

If the persistent boot mapped ring buffer is used for trace_printk(),
force it to not use the binary versions. trace_printk() by default uses
bin_printf() that only saves the pointer to the format and not the format
itself inside the ring buffer. But for a persistent buffer that is read
after reboot, the pointers to the format strings may not be the same, or
worse, not even exist! Instead, just force the more robust, but slower,
version that does the formatting before saving into the ring buffer.

The boot mapped buffer can now be used for trace_printk and friends!

Using the trace_printk() and the persistent buffer was used to debug the
issue with the osnoise tracer:

Link: https://lore.kernel.org/all/20240822103443.6a6ae051@gandalf.local.home/

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 .../admin-guide/kernel-parameters.txt         |  4 +-
 kernel/trace/trace.c                          | 44 ++++++++++++-------
 kernel/trace/trace.h                          |  3 +-
 kernel/trace/trace_output.c                   |  5 ++-
 4 files changed, 36 insertions(+), 20 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a8803c0c0a89..9e507e6cb4c8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6751,8 +6751,6 @@ 
 			    traceoff	- Have the tracing instance tracing disabled after it is created.
 			    traceprintk	- Have trace_printk() write into this trace instance
 					  (note, "printk" and "trace_printk" can also be used)
-					  Currently, traceprintk flag cannot be used for memory
-					  mapped ring buffers as described below.
 
 				trace_instance=foo^traceoff^traceprintk,sched,irq
 
@@ -6785,7 +6783,7 @@ 
 			mix with events of the current boot (unless you are debugging a random crash
 			at boot up).
 
-				reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff@trace,sched,irq
+				reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
 
 
 	trace_options=[option-list]
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8e28f19f5316..35b37c9aa26c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -502,6 +502,17 @@  static struct trace_array global_trace = {
 
 static struct trace_array *printk_trace = &global_trace;
 
+static __always_inline bool printk_binsafe(struct trace_array *tr)
+{
+	/*
+	 * The binary format of traceprintk can cause a crash if used
+	 * by a buffer from another boot. Force the use of the
+	 * non binary version of trace_printk if the trace_printk
+	 * buffer is a boot mapped ring buffer.
+	 */
+	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
+}
+
 void trace_set_ring_buffer_expanded(struct trace_array *tr)
 {
 	if (!tr)
@@ -1130,7 +1141,7 @@  EXPORT_SYMBOL_GPL(__trace_puts);
  */
 int __trace_bputs(unsigned long ip, const char *str)
 {
-	struct trace_array *tr = printk_trace;
+	struct trace_array *tr = READ_ONCE(printk_trace);
 	struct ring_buffer_event *event;
 	struct trace_buffer *buffer;
 	struct bputs_entry *entry;
@@ -1138,6 +1149,9 @@  int __trace_bputs(unsigned long ip, const char *str)
 	int size = sizeof(struct bputs_entry);
 	int ret = 0;
 
+	if (!printk_binsafe(tr))
+		return __trace_puts(ip, str, strlen(str));
+
 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
 		return 0;
 
@@ -3247,12 +3261,15 @@  int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	struct trace_event_call *call = &event_bprint;
 	struct ring_buffer_event *event;
 	struct trace_buffer *buffer;
-	struct trace_array *tr = printk_trace;
+	struct trace_array *tr = READ_ONCE(printk_trace);
 	struct bprint_entry *entry;
 	unsigned int trace_ctx;
 	char *tbuffer;
 	int len = 0, size;
 
+	if (!printk_binsafe(tr))
+		return trace_vprintk(ip, fmt, args);
+
 	if (unlikely(tracing_selftest_running || tracing_disabled))
 		return 0;
 
@@ -10560,20 +10577,17 @@  __init static void enable_instances(void)
 		if (traceoff)
 			tracer_tracing_off(tr);
 
-		if (traceprintk) {
-			/*
-			 * The binary format of traceprintk can cause a crash if used
-			 * by a buffer from another boot. Do not allow it for the
-			 * memory mapped ring buffers.
-			 */
-			if (start)
-				pr_warn("Tracing: WARNING: memory mapped ring buffers cannot be used for trace_printk\n");
-			else
-				printk_trace = tr;
-		}
+		if (traceprintk)
+			printk_trace = tr;
 
-		/* Only allow non mapped buffers to be deleted */
-		if (!start)
+		/*
+		 * If start is set, then this is a mapped buffer, and
+		 * cannot be deleted by user space, so keep the reference
+		 * to it.
+		 */
+		if (start)
+			tr->flags |= TRACE_ARRAY_FL_BOOT;
+		else
 			trace_array_put(tr);
 
 		while ((tok = strsep(&curr_str, ","))) {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4f448ab2d1e7..07b2d2af9b33 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -429,7 +429,8 @@  struct trace_array {
 };
 
 enum {
-	TRACE_ARRAY_FL_GLOBAL	= (1 << 0)
+	TRACE_ARRAY_FL_GLOBAL	= BIT(0),
+	TRACE_ARRAY_FL_BOOT	= BIT(1),
 };
 
 extern struct list_head ftrace_trace_arrays;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 48de93598897..868f2f912f28 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1591,10 +1591,13 @@  static enum print_line_t trace_print_print(struct trace_iterator *iter,
 {
 	struct print_entry *field;
 	struct trace_seq *s = &iter->seq;
+	unsigned long ip;
 
 	trace_assign_type(field, iter->ent);
 
-	seq_print_ip_sym(s, field->ip, flags);
+	ip = field->ip + iter->tr->text_delta;
+
+	seq_print_ip_sym(s, ip, flags);
 	trace_seq_printf(s, ": %s", field->buf);
 
 	return trace_handle_return(s);