diff mbox series

[bpf,2/2] bpf: use vmalloc with VM_ALLOW_HUGE_VMAP for bpf_prog_pack

Message ID 20220408223443.3303509-3-song@kernel.org (mailing list archive)
State New
Headers show
Series vmalloc: bpf: introduce VM_ALLOW_HUGE_VMAP | expand

Commit Message

Song Liu April 8, 2022, 10:34 p.m. UTC
Use __vmalloc_node_range with VM_ALLOW_HUGE_VMAP for bpf_prog_pack so that
BPF programs sit on PMD_SIZE pages. This benefits system performance by
reducing iTLB miss rate.

Signed-off-by: Song Liu <song@kernel.org>
---
 kernel/bpf/core.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

Comments

Christoph Hellwig April 9, 2022, 5:29 a.m. UTC | #1
On Fri, Apr 08, 2022 at 03:34:43PM -0700, Song Liu wrote:
> +static void *bpf_prog_pack_vmalloc(unsigned long size)
> +{
> +#if defined(MODULES_VADDR)
> +	unsigned long start = MODULES_VADDR;
> +	unsigned long end = MODULES_END;
> +#else
> +	unsigned long start = VMALLOC_START;
> +	unsigned long end = VMALLOC_END;
> +#endif
> +
> +	return __vmalloc_node_range(size, PAGE_SIZE, start, end, GFP_KERNEL, PAGE_KERNEL,
> +				    VM_DEFER_KMEMLEAK | VM_ALLOW_HUGE_VMAP,
> +				    NUMA_NO_NODE, __builtin_return_address(0));
> +}

Instead of having this magic in bpf I think a module_alloc_large would
seems like the better interface here.
Song Liu April 10, 2022, 1:34 a.m. UTC | #2
> On Apr 8, 2022, at 10:29 PM, Christoph Hellwig <hch@infradead.org> wrote:
> 
> On Fri, Apr 08, 2022 at 03:34:43PM -0700, Song Liu wrote:
>> +static void *bpf_prog_pack_vmalloc(unsigned long size)
>> +{
>> +#if defined(MODULES_VADDR)
>> +	unsigned long start = MODULES_VADDR;
>> +	unsigned long end = MODULES_END;
>> +#else
>> +	unsigned long start = VMALLOC_START;
>> +	unsigned long end = VMALLOC_END;
>> +#endif
>> +
>> +	return __vmalloc_node_range(size, PAGE_SIZE, start, end, GFP_KERNEL, PAGE_KERNEL,
>> +				    VM_DEFER_KMEMLEAK | VM_ALLOW_HUGE_VMAP,
>> +				    NUMA_NO_NODE, __builtin_return_address(0));
>> +}
> 
> Instead of having this magic in bpf I think a module_alloc_large would
> seems like the better interface here.

AFAICT, modules allocate a large piece of memory and put both text and
data on it, so modules cannot really use huge pages yet. 

OTOH, it is probably beneficial for the modules to use something 
similar to bpf_prog_pack, i.e., put text from multiple modules to a 
single huge page. Of course, this requires non-trivial work in both 
mm code and module code.

Given that 1) modules cannot use huge pages yet, and 2) module may
use differently (with sharing), I think adding module_alloc_large()
doesn't add much value at the moment. So we can just keep this logic
in BPF for now. 

Does this make sense?

Thanks,
Song
Christoph Hellwig April 11, 2022, 6:56 a.m. UTC | #3
On Sun, Apr 10, 2022 at 01:34:50AM +0000, Song Liu wrote:
> OTOH, it is probably beneficial for the modules to use something 
> similar to bpf_prog_pack, i.e., put text from multiple modules to a 
> single huge page. Of course, this requires non-trivial work in both 
> mm code and module code.
> 
> Given that 1) modules cannot use huge pages yet, and 2) module may
> use differently (with sharing), I think adding module_alloc_large()
> doesn't add much value at the moment. So we can just keep this logic
> in BPF for now. 
> 
> Does this make sense?

I'm not intending to say modules should use the new helper.  But I'd much
prefer to keep all the MODULES_VADDR related bits self-contained in the
modules code and not splatter it over random other subsystems.
Song Liu April 11, 2022, 10:18 p.m. UTC | #4
> On Apr 10, 2022, at 11:56 PM, Christoph Hellwig <hch@infradead.org> wrote:
> 
> On Sun, Apr 10, 2022 at 01:34:50AM +0000, Song Liu wrote:
>> OTOH, it is probably beneficial for the modules to use something 
>> similar to bpf_prog_pack, i.e., put text from multiple modules to a 
>> single huge page. Of course, this requires non-trivial work in both 
>> mm code and module code.
>> 
>> Given that 1) modules cannot use huge pages yet, and 2) module may
>> use differently (with sharing), I think adding module_alloc_large()
>> doesn't add much value at the moment. So we can just keep this logic
>> in BPF for now. 
>> 
>> Does this make sense?
> 
> I'm not intending to say modules should use the new helper.  But I'd much
> prefer to keep all the MODULES_VADDR related bits self-contained in the
> modules code and not splatter it over random other subsystems.

Got it. Will add that in v2. 

Thanks,
Song
diff mbox series

Patch

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 13e9dbeeedf3..04214f4e64f1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -851,13 +851,28 @@  static LIST_HEAD(pack_list);
 #define BPF_HPAGE_MASK PAGE_MASK
 #endif
 
+static void *bpf_prog_pack_vmalloc(unsigned long size)
+{
+#if defined(MODULES_VADDR)
+	unsigned long start = MODULES_VADDR;
+	unsigned long end = MODULES_END;
+#else
+	unsigned long start = VMALLOC_START;
+	unsigned long end = VMALLOC_END;
+#endif
+
+	return __vmalloc_node_range(size, PAGE_SIZE, start, end, GFP_KERNEL, PAGE_KERNEL,
+				    VM_DEFER_KMEMLEAK | VM_ALLOW_HUGE_VMAP,
+				    NUMA_NO_NODE, __builtin_return_address(0));
+}
+
 static size_t select_bpf_prog_pack_size(void)
 {
 	size_t size;
 	void *ptr;
 
 	size = BPF_HPAGE_SIZE * num_online_nodes();
-	ptr = module_alloc(size);
+	ptr = bpf_prog_pack_vmalloc(size);
 
 	/* Test whether we can get huge pages. If not just use PAGE_SIZE
 	 * packs.
@@ -881,7 +896,7 @@  static struct bpf_prog_pack *alloc_new_pack(void)
 		       GFP_KERNEL);
 	if (!pack)
 		return NULL;
-	pack->ptr = module_alloc(bpf_prog_pack_size);
+	pack->ptr = bpf_prog_pack_vmalloc(bpf_prog_pack_size);
 	if (!pack->ptr) {
 		kfree(pack);
 		return NULL;
@@ -889,7 +904,6 @@  static struct bpf_prog_pack *alloc_new_pack(void)
 	bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE);
 	list_add_tail(&pack->list, &pack_list);
 
-	set_vm_flush_reset_perms(pack->ptr);
 	set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
 	set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
 	return pack;
@@ -970,7 +984,9 @@  static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
 	if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
 				       bpf_prog_chunk_count(), 0) == 0) {
 		list_del(&pack->list);
-		module_memfree(pack->ptr);
+		set_memory_nx((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
+		set_memory_rw((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
+		vfree(pack->ptr);
 		kfree(pack);
 	}
 out: