diff mbox series

[v2] ring-buffer: Align meta-page to sub-buffers for improved TLB usage

Message ID 20240628104611.1443542-1-vdonnefort@google.com (mailing list archive)
State Accepted
Headers show
Series [v2] ring-buffer: Align meta-page to sub-buffers for improved TLB usage | expand

Commit Message

Vincent Donnefort June 28, 2024, 10:46 a.m. UTC
Previously, the mapped ring-buffer layout caused misalignment between
the meta-page and sub-buffers when the sub-buffer size was not a
multiple of PAGE_SIZE. This prevented hardware with larger TLB entries
from utilizing them effectively.

Add a padding with the zero-page between the meta-page and sub-buffers.
Also update the ring-buffer map_test to verify that padding.

Signed-off-by: Vincent Donnefort <vdonnefort@google.com>

--

This is based on the mm-unstable branch [1] as it depends on David's work [2]
for allowing the zero-page in vm_insert_page().

[1] https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git
[2] https://lore.kernel.org/all/20240522125713.775114-1-david@redhat.com

v1 -> v2:
  * Fix unsequenced modification and access to 'p' (s390 build)



base-commit: c65920c76a977c2b73c3a8b03b4c0c00cc1285ed

Comments

Steven Rostedt July 15, 2024, 6:59 p.m. UTC | #1
On Fri, 28 Jun 2024 11:46:11 +0100
Vincent Donnefort <vdonnefort@google.com> wrote:

> This is based on the mm-unstable branch [1] as it depends on David's work [2]
> for allowing the zero-page in vm_insert_page().
> 
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git
> [2] https://lore.kernel.org/all/20240522125713.775114-1-david@redhat.com

I'll hold off to 6.12 merge window before pushing this. So that the
above will be guaranteed to be there.

-- Steve
Steven Rostedt Aug. 21, 2024, 3:56 p.m. UTC | #2
On Fri, 28 Jun 2024 11:46:11 +0100
Vincent Donnefort <vdonnefort@google.com> wrote:

> diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
> index a9006fa7097e..4bb0192e43f3 100644
> --- a/tools/testing/selftests/ring-buffer/map_test.c
> +++ b/tools/testing/selftests/ring-buffer/map_test.c
> @@ -228,6 +228,20 @@ TEST_F(map, data_mmap)
>  	data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
>  		    desc->cpu_fd, meta_len);
>  	ASSERT_EQ(data, MAP_FAILED);
> +
> +	/* Verify meta-page padding */
> +	if (desc->meta->meta_page_size > getpagesize()) {
> +		void *addr;
> +
> +		data_len = desc->meta->meta_page_size;
> +		data = mmap(NULL, data_len,
> +			    PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
> +		ASSERT_NE(data, MAP_FAILED);
> +
> +		addr = (void *)((unsigned long)data + getpagesize());
> +		ASSERT_EQ(*((int *)addr), 0);

Should we make this a test that the entire page is zero?

		for (int i = desc->meta->meta_struct_len; i < desc->meta->meta_page_size; i += sizeof(int))
			ASSERT_EQ(((int *)data)[i], 0);

?

> +		munmap(data, data_len);
> +	}
>  }

Also, looking at the init, if for some reason (I highly doubt it may
happen) that the meta_struct_len becomes bigger than page_size, we should
update the init section to:

	/* Handle the case where meta_struct_len is greater than page size */
	if (page_size < desc->meta->meta_struct_len) {
		/* meta_page_size is >= meta_struct_len */
		page_size = desc->meta->meta_page_size;
		munmap(desc->meta, page_size);
		map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
		if (map == MAP_FAILED)
			return -errno;
		desc->meta = (struct trace_buffer_meta *)map;
	}

-- Steve
diff mbox series

Patch

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7345a8b625fb..c1116e76fe17 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6148,10 +6148,10 @@  static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
 	/* install subbuf ID to kern VA translation */
 	cpu_buffer->subbuf_ids = subbuf_ids;
 
-	meta->meta_page_size = PAGE_SIZE;
 	meta->meta_struct_len = sizeof(*meta);
 	meta->nr_subbufs = nr_subbufs;
 	meta->subbuf_size = cpu_buffer->buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
+	meta->meta_page_size = meta->subbuf_size;
 
 	rb_update_meta_page(cpu_buffer);
 }
@@ -6238,6 +6238,12 @@  static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
 	    !(vma->vm_flags & VM_MAYSHARE))
 		return -EPERM;
 
+	subbuf_order = cpu_buffer->buffer->subbuf_order;
+	subbuf_pages = 1 << subbuf_order;
+
+	if (subbuf_order && pgoff % subbuf_pages)
+		return -EINVAL;
+
 	/*
 	 * Make sure the mapping cannot become writable later. Also tell the VM
 	 * to not touch these pages (VM_DONTCOPY | VM_DONTEXPAND).
@@ -6247,11 +6253,8 @@  static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
 
 	lockdep_assert_held(&cpu_buffer->mapping_lock);
 
-	subbuf_order = cpu_buffer->buffer->subbuf_order;
-	subbuf_pages = 1 << subbuf_order;
-
 	nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */
-	nr_pages = ((nr_subbufs) << subbuf_order) - pgoff + 1; /* + meta-page */
+	nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */
 
 	vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	if (!vma_pages || vma_pages > nr_pages)
@@ -6264,20 +6267,24 @@  static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
 		return -ENOMEM;
 
 	if (!pgoff) {
+		unsigned long meta_page_padding;
+
 		pages[p++] = virt_to_page(cpu_buffer->meta_page);
 
 		/*
-		 * TODO: Align sub-buffers on their size, once
-		 * vm_insert_pages() supports the zero-page.
+		 * Pad with the zero-page to align the meta-page with the
+		 * sub-buffers.
 		 */
-	} else {
-		/* Skip the meta-page */
-		pgoff--;
+		meta_page_padding = subbuf_pages - 1;
+		while (meta_page_padding-- && p < nr_pages) {
+			unsigned long __maybe_unused zero_addr =
+				vma->vm_start + (PAGE_SIZE * p);
 
-		if (pgoff % subbuf_pages) {
-			err = -EINVAL;
-			goto out;
+			pages[p++] = ZERO_PAGE(zero_addr);
 		}
+	} else {
+		/* Skip the meta-page */
+		pgoff -= subbuf_pages;
 
 		s += pgoff / subbuf_pages;
 	}
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
index a9006fa7097e..4bb0192e43f3 100644
--- a/tools/testing/selftests/ring-buffer/map_test.c
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -228,6 +228,20 @@  TEST_F(map, data_mmap)
 	data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
 		    desc->cpu_fd, meta_len);
 	ASSERT_EQ(data, MAP_FAILED);
+
+	/* Verify meta-page padding */
+	if (desc->meta->meta_page_size > getpagesize()) {
+		void *addr;
+
+		data_len = desc->meta->meta_page_size;
+		data = mmap(NULL, data_len,
+			    PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
+		ASSERT_NE(data, MAP_FAILED);
+
+		addr = (void *)((unsigned long)data + getpagesize());
+		ASSERT_EQ(*((int *)addr), 0);
+		munmap(data, data_len);
+	}
 }
 
 FIXTURE(snapshot) {