Message ID | 20190605144814.29319-1-npiggin@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] mm/large system hash: use vmalloc for size > MAX_ORDER when !hashdist | expand |
On Thu, 6 Jun 2019 00:48:13 +1000 Nicholas Piggin <npiggin@gmail.com> wrote: > The kernel currently clamps large system hashes to MAX_ORDER when > hashdist is not set, which is rather arbitrary. > > vmalloc space is limited on 32-bit machines, but this shouldn't > result in much more used because of small physical memory limiting > system hash sizes. > > Include "vmalloc" or "linear" in the kernel log message. > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- > > This is a better solution than the previous one for the case of !NUMA > systems running on CONFIG_NUMA kernels, we can clear the default > hashdist early and have everything allocated out of the linear map. > > The hugepage vmap series I will post later, but it's quite > independent from this improvement. > > ... > > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -7966,6 +7966,7 @@ void *__init alloc_large_system_hash(const char *tablename, > unsigned long log2qty, size; > void *table = NULL; > gfp_t gfp_flags; > + bool virt; > > /* allow the kernel cmdline to have a say */ > if (!numentries) { > @@ -8022,6 +8023,7 @@ void *__init alloc_large_system_hash(const char *tablename, > > gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; > do { > + virt = false; > size = bucketsize << log2qty; > if (flags & HASH_EARLY) { > if (flags & HASH_ZERO) > @@ -8029,26 +8031,26 @@ void *__init alloc_large_system_hash(const char *tablename, > else > table = memblock_alloc_raw(size, > SMP_CACHE_BYTES); > - } else if (hashdist) { > + } else if (get_order(size) >= MAX_ORDER || hashdist) { > table = __vmalloc(size, gfp_flags, PAGE_KERNEL); > + virt = true; > } else { > /* > * If bucketsize is not a power-of-two, we may free > * some pages at the end of hash table which > * alloc_pages_exact() automatically does > */ > - if (get_order(size) < MAX_ORDER) { > - table = alloc_pages_exact(size, gfp_flags); > - kmemleak_alloc(table, size, 1, gfp_flags); > - } > + table = alloc_pages_exact(size, gfp_flags); > + kmemleak_alloc(table, size, 1, gfp_flags); > } > } while (!table && size > PAGE_SIZE && --log2qty); > > if (!table) > panic("Failed to allocate %s hash table\n", tablename); > > - pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n", > - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size); > + pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", > + tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, > + virt ? "vmalloc" : "linear"); Could remove `bool virt' and use is_vmalloc_addr() in the printk?
Andrew Morton's on June 6, 2019 7:22 am: > On Thu, 6 Jun 2019 00:48:13 +1000 Nicholas Piggin <npiggin@gmail.com> wrote: > >> The kernel currently clamps large system hashes to MAX_ORDER when >> hashdist is not set, which is rather arbitrary. >> >> vmalloc space is limited on 32-bit machines, but this shouldn't >> result in much more used because of small physical memory limiting >> system hash sizes. >> >> Include "vmalloc" or "linear" in the kernel log message. >> >> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> >> --- >> >> This is a better solution than the previous one for the case of !NUMA >> systems running on CONFIG_NUMA kernels, we can clear the default >> hashdist early and have everything allocated out of the linear map. >> >> The hugepage vmap series I will post later, but it's quite >> independent from this improvement. >> >> ... >> >> --- a/mm/page_alloc.c >> +++ b/mm/page_alloc.c >> @@ -7966,6 +7966,7 @@ void *__init alloc_large_system_hash(const char *tablename, >> unsigned long log2qty, size; >> void *table = NULL; >> gfp_t gfp_flags; >> + bool virt; >> >> /* allow the kernel cmdline to have a say */ >> if (!numentries) { >> @@ -8022,6 +8023,7 @@ void *__init alloc_large_system_hash(const char *tablename, >> >> gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; >> do { >> + virt = false; >> size = bucketsize << log2qty; >> if (flags & HASH_EARLY) { >> if (flags & HASH_ZERO) >> @@ -8029,26 +8031,26 @@ void *__init alloc_large_system_hash(const char *tablename, >> else >> table = memblock_alloc_raw(size, >> SMP_CACHE_BYTES); >> - } else if (hashdist) { >> + } else if (get_order(size) >= MAX_ORDER || hashdist) { >> table = __vmalloc(size, gfp_flags, PAGE_KERNEL); >> + virt = true; >> } else { >> /* >> * If bucketsize is not a power-of-two, we may free >> * some pages at the end of hash table which >> * alloc_pages_exact() automatically does >> */ >> - if (get_order(size) < MAX_ORDER) { >> - table = alloc_pages_exact(size, gfp_flags); >> - kmemleak_alloc(table, size, 1, gfp_flags); >> - } >> + table = alloc_pages_exact(size, gfp_flags); >> + kmemleak_alloc(table, size, 1, gfp_flags); >> } >> } while (!table && size > PAGE_SIZE && --log2qty); >> >> if (!table) >> panic("Failed to allocate %s hash table\n", tablename); >> >> - pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n", >> - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size); >> + pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", >> + tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, >> + virt ? "vmalloc" : "linear"); > > Could remove `bool virt' and use is_vmalloc_addr() in the printk? > It can run before mem_init() and it looks like some archs set VMALLOC_START/END (high_memory) there (e.g., x86-32, ppc32). Thanks, Nick
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d66bc8abe0af..15f46be7d210 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7966,6 +7966,7 @@ void *__init alloc_large_system_hash(const char *tablename, unsigned long log2qty, size; void *table = NULL; gfp_t gfp_flags; + bool virt; /* allow the kernel cmdline to have a say */ if (!numentries) { @@ -8022,6 +8023,7 @@ void *__init alloc_large_system_hash(const char *tablename, gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; do { + virt = false; size = bucketsize << log2qty; if (flags & HASH_EARLY) { if (flags & HASH_ZERO) @@ -8029,26 +8031,26 @@ void *__init alloc_large_system_hash(const char *tablename, else table = memblock_alloc_raw(size, SMP_CACHE_BYTES); - } else if (hashdist) { + } else if (get_order(size) >= MAX_ORDER || hashdist) { table = __vmalloc(size, gfp_flags, PAGE_KERNEL); + virt = true; } else { /* * If bucketsize is not a power-of-two, we may free * some pages at the end of hash table which * alloc_pages_exact() automatically does */ - if (get_order(size) < MAX_ORDER) { - table = alloc_pages_exact(size, gfp_flags); - kmemleak_alloc(table, size, 1, gfp_flags); - } + table = alloc_pages_exact(size, gfp_flags); + kmemleak_alloc(table, size, 1, gfp_flags); } } while (!table && size > PAGE_SIZE && --log2qty); if (!table) panic("Failed to allocate %s hash table\n", tablename); - pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n", - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size); + pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", + tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, + virt ? "vmalloc" : "linear"); if (_hash_shift) *_hash_shift = log2qty;
The kernel currently clamps large system hashes to MAX_ORDER when hashdist is not set, which is rather arbitrary. vmalloc space is limited on 32-bit machines, but this shouldn't result in much more used because of small physical memory limiting system hash sizes. Include "vmalloc" or "linear" in the kernel log message. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- This is a better solution than the previous one for the case of !NUMA systems running on CONFIG_NUMA kernels, we can clear the default hashdist early and have everything allocated out of the linear map. The hugepage vmap series I will post later, but it's quite independent from this improvement. mm/page_alloc.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)