diff mbox series

[v2,14/17] tracing: Add kho serialization of trace events

Message ID 20231222195144.24532-9-graf@amazon.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Alexander Graf Dec. 22, 2023, 7:51 p.m. UTC
Events and thus their parsing handle in ftrace have dynamic IDs that get
assigned whenever the event is added to the system. If we want to parse
trace events after kexec, we need to link event IDs back to the original
trace event that existed before we kexec'ed.

There are broadly 2 paths we could take for that:

  1) Save full event description across KHO, restore after kexec,
     merge identical trace events into a single identifier.
  2) Recover the ID of post-kexec added events so they get the same
     ID after kexec that they had before kexec

This patch implements the second option. It's simpler and thus less
intrusive. However, it means we can not fully parse affected events
when the kernel removes or modifies trace events across a kho kexec.

Signed-off-by: Alexander Graf <graf@amazon.com>

---

v1 -> v2:

  - Leave anything that requires a name in trace.c to keep buffers
    unnamed entities
  - Put events as array into a property, use fingerprint instead of
    names to identify them
  - Reduce footprint without CONFIG_FTRACE_KHO
---
 kernel/trace/trace.c        |  1 +
 kernel/trace/trace_output.c | 89 +++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_output.h |  5 +++
 3 files changed, 95 insertions(+)
diff mbox series

Patch

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 94e30dfacfd1..b9ce8cf24d02 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10634,6 +10634,7 @@  static int trace_kho_notifier(struct notifier_block *self,
 
 	err |= fdt_begin_node(fdt, "ftrace");
 	err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible));
+	err |= trace_kho_write_events(fdt);
 	err |= trace_kho_write_trace_array(fdt, &global_trace);
 	err |= fdt_end_node(fdt);
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3e7fa44dc2b2..7d8815352e20 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -12,6 +12,8 @@ 
 #include <linux/sched/clock.h>
 #include <linux/sched/mm.h>
 #include <linux/idr.h>
+#include <linux/kexec.h>
+#include <linux/crc32.h>
 
 #include "trace_output.h"
 
@@ -669,6 +671,93 @@  int trace_print_lat_context(struct trace_iterator *iter)
 	return !trace_seq_has_overflowed(s);
 }
 
+/**
+ * event2fp - Return fingerprint of an event
+ * @event: The event to fingerprint
+ *
+ * For KHO, we need to match events before and after kexec to recover its type
+ * id. This function returns a hash that combines an event's name, and all of
+ * its fields' lengths.
+ */
+static u32 event2fp(struct trace_event *event)
+{
+	struct ftrace_event_field *field;
+	struct trace_event_call *call;
+	struct list_head *head;
+	const char *name;
+	u32 crc32 = ~0;
+
+	/* Low type numbers are static, nothing to checksum */
+	if (event->type && event->type < __TRACE_LAST_TYPE)
+		return event->type;
+
+	call = container_of(event, struct trace_event_call, event);
+	name = trace_event_name(call);
+	if (name)
+		crc32 = crc32_le(crc32, name, strlen(name));
+
+	head = trace_get_fields(call);
+	list_for_each_entry(field, head, link)
+		crc32 = crc32_le(crc32, (char *)&field->size, sizeof(field->size));
+
+	return crc32;
+}
+
+struct trace_event_map {
+	u32 crc32;
+	u32 type;
+};
+
+static int __maybe_unused _trace_kho_write_events(void *fdt)
+{
+	struct trace_event_call *call;
+	int count = __TRACE_LAST_TYPE - 1;
+	struct trace_event_map *map;
+	int err = 0;
+	int i;
+
+	down_read(&trace_event_sem);
+	/* Allocate an array that we can place all maps into */
+	list_for_each_entry(call, &ftrace_events, list)
+		count++;
+
+	map = vmalloc(count * sizeof(*map));
+	if (!map)
+		return -ENOMEM;
+
+	/* Then fill the array with all crc32 values */
+	count = 0;
+	for (i = 1; i < __TRACE_LAST_TYPE; i++)
+		map[count++] = (struct trace_event_map) {
+			.crc32 = count,
+			.type = count,
+		};
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		struct trace_event *event = &call->event;
+
+		map[count++] = (struct trace_event_map) {
+			.crc32 = event2fp(event),
+			.type = event->type,
+		};
+	}
+	up_read(&trace_event_sem);
+
+	/* And finally write it into a DT variable */
+	err |= fdt_property(fdt, "events", map, count * sizeof(*map));
+
+	vfree(map);
+	return err;
+}
+
+#ifdef CONFIG_FTRACE_KHO
+int trace_kho_write_events(void *fdt)
+{
+	return _trace_kho_write_events(fdt);
+}
+#endif
+
+
 /**
  * ftrace_find_event - find a registered event
  * @type: the type of event to look for
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index dca40f1f1da4..07481f295436 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -25,6 +25,11 @@  extern enum print_line_t print_event_fields(struct trace_iterator *iter,
 extern void trace_event_read_lock(void);
 extern void trace_event_read_unlock(void);
 extern struct trace_event *ftrace_find_event(int type);
+#ifdef CONFIG_FTRACE_KHO
+extern int trace_kho_write_events(void *fdt);
+#else
+static inline int trace_kho_write_events(void *fdt) { return -EINVAL; }
+#endif
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
 					 int flags, struct trace_event *event);