diff mbox series

[1/2] mm/large system hash: use vmalloc for size > MAX_ORDER when !hashdist

Message ID 20190605144814.29319-1-npiggin@gmail.com (mailing list archive)
State New, archived
Headers show
Series [1/2] mm/large system hash: use vmalloc for size > MAX_ORDER when !hashdist | expand

Commit Message

Nicholas Piggin June 5, 2019, 2:48 p.m. UTC
The kernel currently clamps large system hashes to MAX_ORDER when
hashdist is not set, which is rather arbitrary.

vmalloc space is limited on 32-bit machines, but this shouldn't
result in much more used because of small physical memory limiting
system hash sizes.

Include "vmalloc" or "linear" in the kernel log message.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---

This is a better solution than the previous one for the case of !NUMA
systems running on CONFIG_NUMA kernels, we can clear the default
hashdist early and have everything allocated out of the linear map.

The hugepage vmap series I will post later, but it's quite
independent from this improvement.

 mm/page_alloc.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

Comments

Andrew Morton June 5, 2019, 9:22 p.m. UTC | #1
On Thu,  6 Jun 2019 00:48:13 +1000 Nicholas Piggin <npiggin@gmail.com> wrote:

> The kernel currently clamps large system hashes to MAX_ORDER when
> hashdist is not set, which is rather arbitrary.
> 
> vmalloc space is limited on 32-bit machines, but this shouldn't
> result in much more used because of small physical memory limiting
> system hash sizes.
> 
> Include "vmalloc" or "linear" in the kernel log message.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> 
> This is a better solution than the previous one for the case of !NUMA
> systems running on CONFIG_NUMA kernels, we can clear the default
> hashdist early and have everything allocated out of the linear map.
> 
> The hugepage vmap series I will post later, but it's quite
> independent from this improvement.
> 
> ...
>
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -7966,6 +7966,7 @@ void *__init alloc_large_system_hash(const char *tablename,
>  	unsigned long log2qty, size;
>  	void *table = NULL;
>  	gfp_t gfp_flags;
> +	bool virt;
>  
>  	/* allow the kernel cmdline to have a say */
>  	if (!numentries) {
> @@ -8022,6 +8023,7 @@ void *__init alloc_large_system_hash(const char *tablename,
>  
>  	gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
>  	do {
> +		virt = false;
>  		size = bucketsize << log2qty;
>  		if (flags & HASH_EARLY) {
>  			if (flags & HASH_ZERO)
> @@ -8029,26 +8031,26 @@ void *__init alloc_large_system_hash(const char *tablename,
>  			else
>  				table = memblock_alloc_raw(size,
>  							   SMP_CACHE_BYTES);
> -		} else if (hashdist) {
> +		} else if (get_order(size) >= MAX_ORDER || hashdist) {
>  			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
> +			virt = true;
>  		} else {
>  			/*
>  			 * If bucketsize is not a power-of-two, we may free
>  			 * some pages at the end of hash table which
>  			 * alloc_pages_exact() automatically does
>  			 */
> -			if (get_order(size) < MAX_ORDER) {
> -				table = alloc_pages_exact(size, gfp_flags);
> -				kmemleak_alloc(table, size, 1, gfp_flags);
> -			}
> +			table = alloc_pages_exact(size, gfp_flags);
> +			kmemleak_alloc(table, size, 1, gfp_flags);
>  		}
>  	} while (!table && size > PAGE_SIZE && --log2qty);
>  
>  	if (!table)
>  		panic("Failed to allocate %s hash table\n", tablename);
>  
> -	pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n",
> -		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size);
> +	pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
> +		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
> +		virt ? "vmalloc" : "linear");

Could remove `bool virt' and use is_vmalloc_addr() in the printk?
Nicholas Piggin June 6, 2019, 2:27 a.m. UTC | #2
Andrew Morton's on June 6, 2019 7:22 am:
> On Thu,  6 Jun 2019 00:48:13 +1000 Nicholas Piggin <npiggin@gmail.com> wrote:
> 
>> The kernel currently clamps large system hashes to MAX_ORDER when
>> hashdist is not set, which is rather arbitrary.
>> 
>> vmalloc space is limited on 32-bit machines, but this shouldn't
>> result in much more used because of small physical memory limiting
>> system hash sizes.
>> 
>> Include "vmalloc" or "linear" in the kernel log message.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> 
>> This is a better solution than the previous one for the case of !NUMA
>> systems running on CONFIG_NUMA kernels, we can clear the default
>> hashdist early and have everything allocated out of the linear map.
>> 
>> The hugepage vmap series I will post later, but it's quite
>> independent from this improvement.
>> 
>> ...
>>
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -7966,6 +7966,7 @@ void *__init alloc_large_system_hash(const char *tablename,
>>  	unsigned long log2qty, size;
>>  	void *table = NULL;
>>  	gfp_t gfp_flags;
>> +	bool virt;
>>  
>>  	/* allow the kernel cmdline to have a say */
>>  	if (!numentries) {
>> @@ -8022,6 +8023,7 @@ void *__init alloc_large_system_hash(const char *tablename,
>>  
>>  	gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
>>  	do {
>> +		virt = false;
>>  		size = bucketsize << log2qty;
>>  		if (flags & HASH_EARLY) {
>>  			if (flags & HASH_ZERO)
>> @@ -8029,26 +8031,26 @@ void *__init alloc_large_system_hash(const char *tablename,
>>  			else
>>  				table = memblock_alloc_raw(size,
>>  							   SMP_CACHE_BYTES);
>> -		} else if (hashdist) {
>> +		} else if (get_order(size) >= MAX_ORDER || hashdist) {
>>  			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
>> +			virt = true;
>>  		} else {
>>  			/*
>>  			 * If bucketsize is not a power-of-two, we may free
>>  			 * some pages at the end of hash table which
>>  			 * alloc_pages_exact() automatically does
>>  			 */
>> -			if (get_order(size) < MAX_ORDER) {
>> -				table = alloc_pages_exact(size, gfp_flags);
>> -				kmemleak_alloc(table, size, 1, gfp_flags);
>> -			}
>> +			table = alloc_pages_exact(size, gfp_flags);
>> +			kmemleak_alloc(table, size, 1, gfp_flags);
>>  		}
>>  	} while (!table && size > PAGE_SIZE && --log2qty);
>>  
>>  	if (!table)
>>  		panic("Failed to allocate %s hash table\n", tablename);
>>  
>> -	pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n",
>> -		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size);
>> +	pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
>> +		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
>> +		virt ? "vmalloc" : "linear");
> 
> Could remove `bool virt' and use is_vmalloc_addr() in the printk?
> 

It can run before mem_init() and it looks like some archs set
VMALLOC_START/END (high_memory) there (e.g., x86-32, ppc32).

Thanks,
Nick
diff mbox series

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d66bc8abe0af..15f46be7d210 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7966,6 +7966,7 @@  void *__init alloc_large_system_hash(const char *tablename,
 	unsigned long log2qty, size;
 	void *table = NULL;
 	gfp_t gfp_flags;
+	bool virt;
 
 	/* allow the kernel cmdline to have a say */
 	if (!numentries) {
@@ -8022,6 +8023,7 @@  void *__init alloc_large_system_hash(const char *tablename,
 
 	gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
 	do {
+		virt = false;
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY) {
 			if (flags & HASH_ZERO)
@@ -8029,26 +8031,26 @@  void *__init alloc_large_system_hash(const char *tablename,
 			else
 				table = memblock_alloc_raw(size,
 							   SMP_CACHE_BYTES);
-		} else if (hashdist) {
+		} else if (get_order(size) >= MAX_ORDER || hashdist) {
 			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+			virt = true;
 		} else {
 			/*
 			 * If bucketsize is not a power-of-two, we may free
 			 * some pages at the end of hash table which
 			 * alloc_pages_exact() automatically does
 			 */
-			if (get_order(size) < MAX_ORDER) {
-				table = alloc_pages_exact(size, gfp_flags);
-				kmemleak_alloc(table, size, 1, gfp_flags);
-			}
+			table = alloc_pages_exact(size, gfp_flags);
+			kmemleak_alloc(table, size, 1, gfp_flags);
 		}
 	} while (!table && size > PAGE_SIZE && --log2qty);
 
 	if (!table)
 		panic("Failed to allocate %s hash table\n", tablename);
 
-	pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n",
-		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size);
+	pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
+		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+		virt ? "vmalloc" : "linear");
 
 	if (_hash_shift)
 		*_hash_shift = log2qty;