diff mbox series

[v4,2/4] tracing: Have reserve_mem use phys_to_virt() and separate from memmap buffer

Message ID 20250401215333.427506494@goodmis.org (mailing list archive)
State Superseded
Headers show
Series tracing: Clean up persistent ring buffer code (was: ring-buffer: Allow persistent memory to be user space mmapped) | expand

Commit Message

Steven Rostedt April 1, 2025, 9:51 p.m. UTC
From: Steven Rostedt <rostedt@goodmis.org>

The reserve_mem kernel command line option may pass back a physical
address, but the memory is still part of the normal memory just like
using memblock_reserve() would be. This means that the physical memory
returned by the reserve_mem command line option can be converted directly
to virtual memory by simply using phys_to_virt().

When freeing the buffer allocated by reserve_mem, use free_reserved_area().

Because the persistent ring buffer can also be allocated via the memmap
option, which *is* different than normal memory as it cannot be added back
to the buddy system, it must be treated differently. It still needs to be
virtually mapped to have access to it. It also can not be freed nor can it
ever be memory mapped to user space.

Create a new trace_array flag called TRACE_ARRAY_FL_MEMMAP which gets set
if the buffer is created by the memmap option, and this will prevent the
buffer from being memory mapped by user space.

Also increment the ref count for memmap'ed buffers so that they can never
be freed.

Link: https://lore.kernel.org/all/Z-wFszhJ_9o4dc8O@kernel.org/

Suggested-by: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 28 ++++++++++++++++++++++------
 kernel/trace/trace.h |  1 +
 2 files changed, 23 insertions(+), 6 deletions(-)

Comments

Steven Rostedt April 1, 2025, 10:16 p.m. UTC | #1
On Tue, 01 Apr 2025 17:51:17 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> @@ -9615,8 +9619,12 @@ static void free_trace_buffers(struct trace_array *tr)
>  	free_trace_buffer(&tr->max_buffer);
>  #endif
>  
> -	if (tr->range_addr_start)
> -		vunmap((void *)tr->range_addr_start);
> +	if (tr->range_addr_start) {
> +		void *start = (void *)tr->range_addr_start;
> +		void *end = start + tr->range_addr_size;
> +
> +		free_reserved_area(start, end, 0, tr->range_name);
> +	}
>  }
>  
>  static void init_trace_flags_index(struct trace_array *tr)

Masami,

Note, your patch to free the persistent ring buffer wasn't fully
functional, as it only did the "vunmap()". That doesn't return the buffer
back to the buddy allocator. What you saw was just the freeing of all the
other descriptors that make up a trace instance.

Before this patch:

  ~# free
                 total        used        free      shared  buff/cache   available
  Mem:         8185908      297404     7825896         916      162288     7888504
  Swap:        7812092           0     7812092
  ~# rmdir /sys/kernel/tracing/instances/boot_mapped
  ~# free
                 total        used        free      shared  buff/cache   available
  Mem:         8206384      297956     7845904         916      162260     7908428
  Swap:        7812092           0     7812092

Amount freed: 7845904 - 7825896 = 20008 (20M)

After this patch:

  ~# free
                 total        used        free      shared  buff/cache   available
  Mem:         8185912      301808     7820696         920      162860     7884104
  Swap:        7812092           0     7812092
  ~# rmdir /sys/kernel/tracing/instances/boot_mapped
  ~# free
                 total        used        free      shared  buff/cache   available
  Mem:         8226868      295968     7867644         920      162836     7930900
  Swap:        7812092           0     7812092

Amount freed: 7867644 - 7820696 = 46948 (46M)!

-- Steve
Steven Rostedt April 1, 2025, 10:40 p.m. UTC | #2
On Tue, 1 Apr 2025 18:16:40 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:


> Note, your patch to free the persistent ring buffer wasn't fully
> functional, as it only did the "vunmap()". That doesn't return the buffer
> back to the buddy allocator. What you saw was just the freeing of all the
> other descriptors that make up a trace instance.
> 
> Before this patch:
> 
>   ~# free
>                  total        used        free      shared  buff/cache   available
>   Mem:         8185908      297404     7825896         916      162288     7888504
>   Swap:        7812092           0     7812092
>   ~# rmdir /sys/kernel/tracing/instances/boot_mapped
>   ~# free
>                  total        used        free      shared  buff/cache   available
>   Mem:         8206384      297956     7845904         916      162260     7908428
>   Swap:        7812092           0     7812092
> 
> Amount freed: 7845904 - 7825896 = 20008 (20M)
> 
> After this patch:
> 
>   ~# free
>                  total        used        free      shared  buff/cache   available
>   Mem:         8185912      301808     7820696         920      162860     7884104
>   Swap:        7812092           0     7812092
>   ~# rmdir /sys/kernel/tracing/instances/boot_mapped
>   ~# free
>                  total        used        free      shared  buff/cache   available
>   Mem:         8226868      295968     7867644         920      162836     7930900
>   Swap:        7812092           0     7812092
> 
> Amount freed: 7867644 - 7820696 = 46948 (46M)!

Bah! My patch is buggy. Yeah, your code released the memory properly, as you had:

	reserve_mem_release_by_name()

which does do free_reserve_area() on the memory mapping.

Which means this patch is buggy and freed the same memory twice.

OK, time for v5 to fix that :-p

-- Steve
diff mbox series

Patch

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index de9c237e5826..d960f80701bd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8505,6 +8505,10 @@  static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct trace_iterator *iter = &info->iter;
 	int ret = 0;
 
+	/* A memmap'ed buffer is not supported for user space mmap */
+	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
+		return -ENODEV;
+
 	/* Currently the boot mapped buffer is not supported for mmap */
 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
 		return -ENODEV;
@@ -9615,8 +9619,12 @@  static void free_trace_buffers(struct trace_array *tr)
 	free_trace_buffer(&tr->max_buffer);
 #endif
 
-	if (tr->range_addr_start)
-		vunmap((void *)tr->range_addr_start);
+	if (tr->range_addr_start) {
+		void *start = (void *)tr->range_addr_start;
+		void *end = start + tr->range_addr_size;
+
+		free_reserved_area(start, end, 0, tr->range_name);
+	}
 }
 
 static void init_trace_flags_index(struct trace_array *tr)
@@ -10710,6 +10718,7 @@  static inline void do_allocate_snapshot(const char *name) { }
 __init static void enable_instances(void)
 {
 	struct trace_array *tr;
+	bool memmap_area = false;
 	char *curr_str;
 	char *name;
 	char *str;
@@ -10778,6 +10787,7 @@  __init static void enable_instances(void)
 					name);
 				continue;
 			}
+			memmap_area = true;
 		} else if (tok) {
 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
 				start = 0;
@@ -10800,7 +10810,10 @@  __init static void enable_instances(void)
 				continue;
 			}
 
-			addr = map_pages(start, size);
+			if (memmap_area)
+				addr = map_pages(start, size);
+			else
+				addr = (unsigned long)phys_to_virt(start);
 			if (addr) {
 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
 					name, &start, (unsigned long)size);
@@ -10827,10 +10840,13 @@  __init static void enable_instances(void)
 			update_printk_trace(tr);
 
 		/*
-		 * If start is set, then this is a mapped buffer, and
-		 * cannot be deleted by user space, so keep the reference
-		 * to it.
+		 * memmap'd buffers can not be freed.
 		 */
+		if (memmap_area) {
+			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
+			tr->ref++;
+		}
+
 		if (start) {
 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
 			tr->range_name = no_free_ptr(rname);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c20f6bcc200a..f9513dc14c37 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -447,6 +447,7 @@  enum {
 	TRACE_ARRAY_FL_BOOT		= BIT(1),
 	TRACE_ARRAY_FL_LAST_BOOT	= BIT(2),
 	TRACE_ARRAY_FL_MOD_INIT		= BIT(3),
+	TRACE_ARRAY_FL_MEMMAP		= BIT(4),
 };
 
 #ifdef CONFIG_MODULES