Message ID | b59ed8781ef9af995c5bfa762de1f42fdfc57c74.1643475473.git.christophe.leroy@csgroup.eu (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Allocate module text and data separately | expand |
On Sat, Jan 29, 2022 at 05:02:09PM +0000, Christophe Leroy wrote: > diff --git a/kernel/module.c b/kernel/module.c > index 11f51e17fb9f..f3758115ebaa 100644 > --- a/kernel/module.c > +++ b/kernel/module.c > @@ -81,7 +81,9 @@ > /* If this is set, the section belongs in the init part of the module */ > #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) > > +#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > #define data_layout core_layout > +#endif > > /* > * Mutex protects: > @@ -111,6 +113,12 @@ static struct mod_tree_root { > #define module_addr_min mod_tree.addr_min > #define module_addr_max mod_tree.addr_max > > +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > +static struct mod_tree_root mod_data_tree __cacheline_aligned = { > + .addr_min = -1UL, > +}; > +#endif > + > #ifdef CONFIG_MODULES_TREE_LOOKUP > > /* > @@ -186,6 +194,11 @@ static void mod_tree_insert(struct module *mod) > __mod_tree_insert(&mod->core_layout.mtn, &mod_tree); > if (mod->init_layout.size) > __mod_tree_insert(&mod->init_layout.mtn, &mod_tree); > + > +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > + mod->data_layout.mtn.mod = mod; > + __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree); > +#endif kernel/ directory has quite a few files, module.c is the second to largest file, and it has tons of stuff. Aaron is doing work to split things out to make code easier to read and so that its easier to review changes. See: https://lkml.kernel.org/r/20220130213214.1042497-1-atomlin@redhat.com I think this is a good patch example which could benefit from that work. So I'd much prefer to see that work go in first than this, so to see if we can make the below changes more compartamentalized. Curious, how much testing has been put into this series? Luis
Le 03/02/2022 à 01:01, Luis Chamberlain a écrit : > On Sat, Jan 29, 2022 at 05:02:09PM +0000, Christophe Leroy wrote: >> diff --git a/kernel/module.c b/kernel/module.c >> index 11f51e17fb9f..f3758115ebaa 100644 >> --- a/kernel/module.c >> +++ b/kernel/module.c >> @@ -81,7 +81,9 @@ >> /* If this is set, the section belongs in the init part of the module */ >> #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) >> >> +#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC >> #define data_layout core_layout >> +#endif >> >> /* >> * Mutex protects: >> @@ -111,6 +113,12 @@ static struct mod_tree_root { >> #define module_addr_min mod_tree.addr_min >> #define module_addr_max mod_tree.addr_max >> >> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC >> +static struct mod_tree_root mod_data_tree __cacheline_aligned = { >> + .addr_min = -1UL, >> +}; >> +#endif >> + >> #ifdef CONFIG_MODULES_TREE_LOOKUP >> >> /* >> @@ -186,6 +194,11 @@ static void mod_tree_insert(struct module *mod) >> __mod_tree_insert(&mod->core_layout.mtn, &mod_tree); >> if (mod->init_layout.size) >> __mod_tree_insert(&mod->init_layout.mtn, &mod_tree); >> + >> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC >> + mod->data_layout.mtn.mod = mod; >> + __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree); >> +#endif > > > kernel/ directory has quite a few files, module.c is the second to > largest file, and it has tons of stuff. Aaron is doing work to > split things out to make code easier to read and so that its easier > to review changes. See: > > https://lkml.kernel.org/r/20220130213214.1042497-1-atomlin@redhat.com > > I think this is a good patch example which could benefit from that work. > So I'd much prefer to see that work go in first than this, so to see if > we can make the below changes more compartamentalized. > > Curious, how much testing has been put into this series? I tested the change up to (including) patch 4 to verify it doesn't introduce regression when not using CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC, Then I tested with patch 5. I first tried with the 'hello world' test module. After that I loaded several important modules and checked I didn't get any regression, both with and without STRICT_MODULES_RWX and I checked the consistency in /proc/vmallocinfo /proc/modules /sys/class/modules/* I also tested with a hacked module_alloc() to force branch trampolines. Christophe
On Thu, Feb 03, 2022 at 07:05:13AM +0000, Christophe Leroy wrote: > > > Le 03/02/2022 à 01:01, Luis Chamberlain a écrit : > > On Sat, Jan 29, 2022 at 05:02:09PM +0000, Christophe Leroy wrote: > >> diff --git a/kernel/module.c b/kernel/module.c > >> index 11f51e17fb9f..f3758115ebaa 100644 > >> --- a/kernel/module.c > >> +++ b/kernel/module.c > >> @@ -81,7 +81,9 @@ > >> /* If this is set, the section belongs in the init part of the module */ > >> #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) > >> > >> +#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > >> #define data_layout core_layout > >> +#endif > >> > >> /* > >> * Mutex protects: > >> @@ -111,6 +113,12 @@ static struct mod_tree_root { > >> #define module_addr_min mod_tree.addr_min > >> #define module_addr_max mod_tree.addr_max > >> > >> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > >> +static struct mod_tree_root mod_data_tree __cacheline_aligned = { > >> + .addr_min = -1UL, > >> +}; > >> +#endif > >> + > >> #ifdef CONFIG_MODULES_TREE_LOOKUP > >> > >> /* > >> @@ -186,6 +194,11 @@ static void mod_tree_insert(struct module *mod) > >> __mod_tree_insert(&mod->core_layout.mtn, &mod_tree); > >> if (mod->init_layout.size) > >> __mod_tree_insert(&mod->init_layout.mtn, &mod_tree); > >> + > >> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > >> + mod->data_layout.mtn.mod = mod; > >> + __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree); > >> +#endif > > > > > > kernel/ directory has quite a few files, module.c is the second to > > largest file, and it has tons of stuff. Aaron is doing work to > > split things out to make code easier to read and so that its easier > > to review changes. See: > > > > https://lkml.kernel.org/r/20220130213214.1042497-1-atomlin@redhat.com > > > > I think this is a good patch example which could benefit from that work. > > So I'd much prefer to see that work go in first than this, so to see if > > we can make the below changes more compartamentalized. > > > > Curious, how much testing has been put into this series? > > > I tested the change up to (including) patch 4 to verify it doesn't > introduce regression when not using > CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC, > Then I tested with patch 5. I first tried with the 'hello world' test > module. After that I loaded several important modules and checked I > didn't get any regression, both with and without STRICT_MODULES_RWX and > I checked the consistency in /proc/vmallocinfo > /proc/modules /sys/class/modules/* I wonder if we have a test for STRICT_MODULES_RWX. > I also tested with a hacked module_alloc() to force branch trampolines. So to verify that reducing these trampolines actually helps on an architecture? I wonder if we can generalize this somehow to let archs verify such strategies can help. I was hoping for a bit more wider testing, like actually users, etc. It does not seem like so. So we can get to that by merging this soon into modules-next and having this bleed out issues with linux-next. We are in good time to do this now. The kmod tree has tons of tests: https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git/ Can you use that to verify there are no regressions? Aaron, Michal, if you can do the same that'd be appreciated. Luis
Hello, On Thu, Feb 03, 2022 at 11:51:05AM -0800, Luis Chamberlain wrote: > On Thu, Feb 03, 2022 at 07:05:13AM +0000, Christophe Leroy wrote: > > Le 03/02/2022 à 01:01, Luis Chamberlain a écrit : > > > On Sat, Jan 29, 2022 at 05:02:09PM +0000, Christophe Leroy wrote: > > >> diff --git a/kernel/module.c b/kernel/module.c > > >> index 11f51e17fb9f..f3758115ebaa 100644 > > >> --- a/kernel/module.c > > >> +++ b/kernel/module.c > > >> @@ -81,7 +81,9 @@ > > >> /* If this is set, the section belongs in the init part of the module */ > > >> #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) > > >> > > >> +#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > > >> #define data_layout core_layout > > >> +#endif > > >> > > >> /* > > >> * Mutex protects: > > >> @@ -111,6 +113,12 @@ static struct mod_tree_root { > > >> #define module_addr_min mod_tree.addr_min > > >> #define module_addr_max mod_tree.addr_max > > >> > > >> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > > >> +static struct mod_tree_root mod_data_tree __cacheline_aligned = { > > >> + .addr_min = -1UL, > > >> +}; > > >> +#endif > > >> + > > >> #ifdef CONFIG_MODULES_TREE_LOOKUP > > >> > > >> /* > > >> @@ -186,6 +194,11 @@ static void mod_tree_insert(struct module *mod) > > >> __mod_tree_insert(&mod->core_layout.mtn, &mod_tree); > > >> if (mod->init_layout.size) > > >> __mod_tree_insert(&mod->init_layout.mtn, &mod_tree); > > >> + > > >> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC > > >> + mod->data_layout.mtn.mod = mod; > > >> + __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree); > > >> +#endif > > > > > > > > > kernel/ directory has quite a few files, module.c is the second to > > > largest file, and it has tons of stuff. Aaron is doing work to > > > split things out to make code easier to read and so that its easier > > > to review changes. See: > > > > > > https://lkml.kernel.org/r/20220130213214.1042497-1-atomlin@redhat.com > > > > > > I think this is a good patch example which could benefit from that work. > > > So I'd much prefer to see that work go in first than this, so to see if > > > we can make the below changes more compartamentalized. > > > > > > Curious, how much testing has been put into this series? > > > > > > I tested the change up to (including) patch 4 to verify it doesn't > > introduce regression when not using > > CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC, > > > Then I tested with patch 5. I first tried with the 'hello world' test > > module. After that I loaded several important modules and checked I > > didn't get any regression, both with and without STRICT_MODULES_RWX and > > I checked the consistency in /proc/vmallocinfo > > /proc/modules /sys/class/modules/* > > I wonder if we have a test for STRICT_MODULES_RWX. > > > I also tested with a hacked module_alloc() to force branch trampolines. > > So to verify that reducing these trampolines actually helps on an > architecture? I wonder if we can generalize this somehow to let archs > verify such strategies can help. > > I was hoping for a bit more wider testing, like actually users, etc. > It does not seem like so. So we can get to that by merging this soon > into modules-next and having this bleed out issues with linux-next. > We are in good time to do this now. > > The kmod tree has tons of tests: > > https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git/ > > Can you use that to verify there are no regressions? openSUSE has the testsuite packaged so it's easy to run on arbitrary kernel but only on ppc64(le) because there is no ppc there anymore. So yes, it does not regress Book3S/64 as far as kmod testsuite is conderned and building s390x non-modular kernel also still worka but that's not saying much. Thanks Michal
diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b21..b5d1f2c19c27 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -882,6 +882,12 @@ config MODULES_USE_ELF_REL Modules only use ELF REL relocations. Modules with ELF RELA relocations will give an error. +config ARCH_WANTS_MODULES_DATA_IN_VMALLOC + bool + help + For architectures like powerpc/32 which have constraints on module + allocation and need to allocate module data outside of module area. + config HAVE_IRQ_EXIT_ON_IRQ_STACK bool help diff --git a/include/linux/module.h b/include/linux/module.h index 1e135fd5c076..3a892bdcbb5f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -422,6 +422,9 @@ struct module { /* Core layout: rbtree is accessed frequently, so keep together. */ struct module_layout core_layout __module_layout_align; struct module_layout init_layout; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + struct module_layout data_layout; +#endif /* Arch-specific module values */ struct mod_arch_specific arch; @@ -569,6 +572,11 @@ bool is_module_text_address(unsigned long addr); static inline bool within_module_core(unsigned long addr, const struct module *mod) { +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + if ((unsigned long)mod->data_layout.base <= addr && + addr < (unsigned long)mod->data_layout.base + mod->data_layout.size) + return true; +#endif return (unsigned long)mod->core_layout.base <= addr && addr < (unsigned long)mod->core_layout.base + mod->core_layout.size; } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 0852a537dad4..85d3fd40b7fe 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2022,8 +2022,11 @@ static int kdb_lsmod(int argc, const char **argv) if (mod->state == MODULE_STATE_UNFORMED) continue; - kdb_printf("%-20s%8u 0x%px ", mod->name, - mod->core_layout.size, (void *)mod); + kdb_printf("%-20s%8u", mod->name, mod->core_layout.size); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + kdb_printf("/%8u", mod->data_layout.size); +#endif + kdb_printf(" 0x%px ", (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); #endif @@ -2034,6 +2037,9 @@ static int kdb_lsmod(int argc, const char **argv) else kdb_printf(" (Live)"); kdb_printf(" 0x%px", mod->core_layout.base); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + kdb_printf("/0x%px", mod->data_layout.base); +#endif #ifdef CONFIG_MODULE_UNLOAD { diff --git a/kernel/module.c b/kernel/module.c index 11f51e17fb9f..f3758115ebaa 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -81,7 +81,9 @@ /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) +#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC #define data_layout core_layout +#endif /* * Mutex protects: @@ -111,6 +113,12 @@ static struct mod_tree_root { #define module_addr_min mod_tree.addr_min #define module_addr_max mod_tree.addr_max +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC +static struct mod_tree_root mod_data_tree __cacheline_aligned = { + .addr_min = -1UL, +}; +#endif + #ifdef CONFIG_MODULES_TREE_LOOKUP /* @@ -186,6 +194,11 @@ static void mod_tree_insert(struct module *mod) __mod_tree_insert(&mod->core_layout.mtn, &mod_tree); if (mod->init_layout.size) __mod_tree_insert(&mod->init_layout.mtn, &mod_tree); + +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + mod->data_layout.mtn.mod = mod; + __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree); +#endif } static void mod_tree_remove_init(struct module *mod) @@ -198,6 +211,9 @@ static void mod_tree_remove(struct module *mod) { __mod_tree_remove(&mod->core_layout.mtn, &mod_tree); mod_tree_remove_init(mod); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + __mod_tree_remove(&mod->data_layout.mtn, &mod_data_tree); +#endif } static struct module *mod_find(unsigned long addr, struct mod_tree_root *tree) @@ -252,6 +268,9 @@ static void mod_update_bounds(struct module *mod) __mod_update_bounds(mod->core_layout.base, mod->core_layout.size, &mod_tree); if (mod->init_layout.size) __mod_update_bounds(mod->init_layout.base, mod->init_layout.size, &mod_tree); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + __mod_update_bounds(mod->data_layout.base, mod->data_layout.size, &mod_data_tree); +#endif } #ifdef CONFIG_KGDB_KDB @@ -1181,6 +1200,17 @@ static ssize_t show_coresize(struct module_attribute *mattr, static struct module_attribute modinfo_coresize = __ATTR(coresize, 0444, show_coresize, NULL); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC +static ssize_t show_datasize(struct module_attribute *mattr, + struct module_kobject *mk, char *buffer) +{ + return sprintf(buffer, "%u\n", mk->mod->data_layout.size); +} + +static struct module_attribute modinfo_datasize = + __ATTR(datasize, 0444, show_datasize, NULL); +#endif + static ssize_t show_initsize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { @@ -1209,6 +1239,9 @@ static struct module_attribute *modinfo_attrs[] = { &modinfo_srcversion, &modinfo_initstate, &modinfo_coresize, +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + &modinfo_datasize, +#endif &modinfo_initsize, &modinfo_taint, #ifdef CONFIG_MODULE_UNLOAD @@ -2211,6 +2244,9 @@ static void free_module(struct module *mod) /* Finally, free the core (containing the module structure) */ module_memfree(mod->core_layout.base); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + vfree(mod->data_layout.base); +#endif } void *__symbol_get(const char *symbol) @@ -3462,6 +3498,24 @@ static int move_module(struct module *mod, struct load_info *info) } else mod->init_layout.base = NULL; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + /* Do the allocs. */ + ptr = vmalloc(mod->data_layout.size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. Just mark it as not being a + * leak. + */ + kmemleak_not_leak(ptr); + if (!ptr) { + module_memfree(mod->core_layout.base); + module_memfree(mod->init_layout.base); + return -ENOMEM; + } + + memset(ptr, 0, mod->data_layout.size); + mod->data_layout.base = ptr; +#endif /* Transfer each section which specifies SHF_ALLOC */ pr_debug("final section addresses:\n"); for (i = 0; i < info->hdr->e_shnum; i++) { @@ -3637,6 +3691,9 @@ static void module_deallocate(struct module *mod, struct load_info *info) module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); module_memfree(mod->core_layout.base); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + vfree(mod->data_layout.base); +#endif } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -4615,13 +4672,17 @@ static int m_show(struct seq_file *m, void *p) struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; void *value; + unsigned int size; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) return 0; - seq_printf(m, "%s %u", - mod->name, mod->init_layout.size + mod->core_layout.size); + size = mod->init_layout.size + mod->core_layout.size; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + size += mod->data_layout.size; +#endif + seq_printf(m, "%s %u", mod->name, size); print_unload_info(m, mod); /* Informative for users. */ @@ -4744,13 +4805,20 @@ bool is_module_address(unsigned long addr) struct module *__module_address(unsigned long addr) { struct module *mod; + struct mod_tree_root *tree; - if (addr < module_addr_min || addr > module_addr_max) + if (addr >= mod_tree.addr_min && addr <= mod_tree.addr_max) + tree = &mod_tree; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + else if (addr >= mod_data_tree.addr_min && addr <= mod_data_tree.addr_max) + tree = &mod_data_tree; +#endif + else return NULL; module_assert_mutex_or_preempt(); - mod = mod_find(addr, &mod_tree); + mod = mod_find(addr, tree); if (mod) { BUG_ON(!within_module(addr, mod)); if (mod->state == MODULE_STATE_UNFORMED)