diff mbox series

[11/12] hugetlb: batch TLB flushes when freeing vmemmap

Message ID 20230825190436.55045-12-mike.kravetz@oracle.com (mailing list archive)
State New
Headers show
Series Batch hugetlb vmemmap modification operations | expand

Commit Message

Mike Kravetz Aug. 25, 2023, 7:04 p.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

Now that a list of pages is deduplicated at once, the TLB
flush can be batched for all vmemmap pages that got remapped.

Add a flags field and pass whether it's a bulk allocation or
just a single page to decide to remap.

The TLB flush is global as we don't have guarantees from caller
that the set of folios is contiguous, or to add complexity in
composing a list of kVAs to flush.

Modified by Mike Kravetz to perform TLB flush on single folio if an
error is encountered.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 mm/hugetlb_vmemmap.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

Comments

Muchun Song Aug. 30, 2023, 8:23 a.m. UTC | #1
On 2023/8/26 03:04, Mike Kravetz wrote:
> From: Joao Martins <joao.m.martins@oracle.com>
>
> Now that a list of pages is deduplicated at once, the TLB
> flush can be batched for all vmemmap pages that got remapped.
>
> Add a flags field and pass whether it's a bulk allocation or
> just a single page to decide to remap.
>
> The TLB flush is global as we don't have guarantees from caller
> that the set of folios is contiguous, or to add complexity in
> composing a list of kVAs to flush.
>
> Modified by Mike Kravetz to perform TLB flush on single folio if an
> error is encountered.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
> ---
>   mm/hugetlb_vmemmap.c | 9 +++++++--
>   1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 904a64fe5669..a2fc7b03ac6b 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -36,6 +36,7 @@ struct vmemmap_remap_walk {
>   	unsigned long		reuse_addr;
>   	struct list_head	*vmemmap_pages;
>   #define VMEMMAP_REMAP_ONLY_SPLIT	BIT(0)
> +#define	VMEMMAP_REMAP_BULK_PAGES	BIT(1)

We could reuse the flag (as I suggest VMEMMAP_SPLIT_WITHOUT_FLUSH)
proposed in the patch 10. When I saw this patch, I think the name
is not suitable, maybe VMEMMAP_WITHOUT_TLB_FLUSH is better.

Thanks.

>   	unsigned long		flags;
>   };
>   
> @@ -211,7 +212,8 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
>   			return ret;
>   	} while (pgd++, addr = next, addr != end);
>   
> -	if (!(walk->flags & VMEMMAP_REMAP_ONLY_SPLIT))
> +	if (!(walk->flags &
> +	      (VMEMMAP_REMAP_ONLY_SPLIT | VMEMMAP_REMAP_BULK_PAGES)))
>   		flush_tlb_kernel_range(start, end);
>   
>   	return 0;
> @@ -377,7 +379,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
>   		.remap_pte	= vmemmap_remap_pte,
>   		.reuse_addr	= reuse,
>   		.vmemmap_pages	= &vmemmap_pages,
> -		.flags		= 0,
> +		.flags		= !bulk_pages ? 0 : VMEMMAP_REMAP_BULK_PAGES,
>   	};
>   	int nid = page_to_nid((struct page *)start);
>   	gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY |
> @@ -427,6 +429,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
>   			.remap_pte	= vmemmap_restore_pte,
>   			.reuse_addr	= reuse,
>   			.vmemmap_pages	= &vmemmap_pages,
> +			.flags		= 0,
>   		};
>   
>   		vmemmap_remap_range(reuse, end, &walk);
> @@ -700,6 +703,8 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
>   	list_for_each_entry(folio, folio_list, lru)
>   		hugetlb_vmemmap_optimize_bulk(h, &folio->page, &vmemmap_pages);
>   
> +	flush_tlb_kernel_range(0, TLB_FLUSH_ALL);
> +
>   	free_vmemmap_page_list(&vmemmap_pages);
>   }
>
Joao Martins Aug. 30, 2023, 11:17 a.m. UTC | #2
On 30/08/2023 09:23, Muchun Song wrote:
> 
> 
> On 2023/8/26 03:04, Mike Kravetz wrote:
>> From: Joao Martins <joao.m.martins@oracle.com>
>>
>> Now that a list of pages is deduplicated at once, the TLB
>> flush can be batched for all vmemmap pages that got remapped.
>>
>> Add a flags field and pass whether it's a bulk allocation or
>> just a single page to decide to remap.
>>
>> The TLB flush is global as we don't have guarantees from caller
>> that the set of folios is contiguous, or to add complexity in
>> composing a list of kVAs to flush.
>>
>> Modified by Mike Kravetz to perform TLB flush on single folio if an
>> error is encountered.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
>> ---
>>   mm/hugetlb_vmemmap.c | 9 +++++++--
>>   1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
>> index 904a64fe5669..a2fc7b03ac6b 100644
>> --- a/mm/hugetlb_vmemmap.c
>> +++ b/mm/hugetlb_vmemmap.c
>> @@ -36,6 +36,7 @@ struct vmemmap_remap_walk {
>>       unsigned long        reuse_addr;
>>       struct list_head    *vmemmap_pages;
>>   #define VMEMMAP_REMAP_ONLY_SPLIT    BIT(0)
>> +#define    VMEMMAP_REMAP_BULK_PAGES    BIT(1)
> 
> We could reuse the flag (as I suggest VMEMMAP_SPLIT_WITHOUT_FLUSH)
> proposed in the patch 10. When I saw this patch, I think the name
> is not suitable, maybe VMEMMAP_WITHOUT_TLB_FLUSH is better.
> 

As mentioned in the previous patch, yeah makes sense to have a bit just for
no TLB flush and perhaps we don't even BIT(1). We can use remap_pte to tell PTE
vs PMD flush "skipping"

> Thanks.
> 
>>       unsigned long        flags;
>>   };
>>   @@ -211,7 +212,8 @@ static int vmemmap_remap_range(unsigned long start,
>> unsigned long end,
>>               return ret;
>>       } while (pgd++, addr = next, addr != end);
>>   -    if (!(walk->flags & VMEMMAP_REMAP_ONLY_SPLIT))
>> +    if (!(walk->flags &
>> +          (VMEMMAP_REMAP_ONLY_SPLIT | VMEMMAP_REMAP_BULK_PAGES)))
>>           flush_tlb_kernel_range(start, end);
>>         return 0;
>> @@ -377,7 +379,7 @@ static int vmemmap_remap_free(unsigned long start,
>> unsigned long end,
>>           .remap_pte    = vmemmap_remap_pte,
>>           .reuse_addr    = reuse,
>>           .vmemmap_pages    = &vmemmap_pages,
>> -        .flags        = 0,
>> +        .flags        = !bulk_pages ? 0 : VMEMMAP_REMAP_BULK_PAGES,
>>       };
>>       int nid = page_to_nid((struct page *)start);
>>       gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY |
>> @@ -427,6 +429,7 @@ static int vmemmap_remap_free(unsigned long start,
>> unsigned long end,
>>               .remap_pte    = vmemmap_restore_pte,
>>               .reuse_addr    = reuse,
>>               .vmemmap_pages    = &vmemmap_pages,
>> +            .flags        = 0,
>>           };
>>             vmemmap_remap_range(reuse, end, &walk);
>> @@ -700,6 +703,8 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h,
>> struct list_head *folio_l
>>       list_for_each_entry(folio, folio_list, lru)
>>           hugetlb_vmemmap_optimize_bulk(h, &folio->page, &vmemmap_pages);
>>   +    flush_tlb_kernel_range(0, TLB_FLUSH_ALL);
>> +
>>       free_vmemmap_page_list(&vmemmap_pages);
>>   }
>>   
>
diff mbox series

Patch

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 904a64fe5669..a2fc7b03ac6b 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -36,6 +36,7 @@  struct vmemmap_remap_walk {
 	unsigned long		reuse_addr;
 	struct list_head	*vmemmap_pages;
 #define VMEMMAP_REMAP_ONLY_SPLIT	BIT(0)
+#define	VMEMMAP_REMAP_BULK_PAGES	BIT(1)
 	unsigned long		flags;
 };
 
@@ -211,7 +212,8 @@  static int vmemmap_remap_range(unsigned long start, unsigned long end,
 			return ret;
 	} while (pgd++, addr = next, addr != end);
 
-	if (!(walk->flags & VMEMMAP_REMAP_ONLY_SPLIT))
+	if (!(walk->flags &
+	      (VMEMMAP_REMAP_ONLY_SPLIT | VMEMMAP_REMAP_BULK_PAGES)))
 		flush_tlb_kernel_range(start, end);
 
 	return 0;
@@ -377,7 +379,7 @@  static int vmemmap_remap_free(unsigned long start, unsigned long end,
 		.remap_pte	= vmemmap_remap_pte,
 		.reuse_addr	= reuse,
 		.vmemmap_pages	= &vmemmap_pages,
-		.flags		= 0,
+		.flags		= !bulk_pages ? 0 : VMEMMAP_REMAP_BULK_PAGES,
 	};
 	int nid = page_to_nid((struct page *)start);
 	gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY |
@@ -427,6 +429,7 @@  static int vmemmap_remap_free(unsigned long start, unsigned long end,
 			.remap_pte	= vmemmap_restore_pte,
 			.reuse_addr	= reuse,
 			.vmemmap_pages	= &vmemmap_pages,
+			.flags		= 0,
 		};
 
 		vmemmap_remap_range(reuse, end, &walk);
@@ -700,6 +703,8 @@  void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
 	list_for_each_entry(folio, folio_list, lru)
 		hugetlb_vmemmap_optimize_bulk(h, &folio->page, &vmemmap_pages);
 
+	flush_tlb_kernel_range(0, TLB_FLUSH_ALL);
+
 	free_vmemmap_page_list(&vmemmap_pages);
 }