Message ID | 20230825190436.55045-12-mike.kravetz@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Batch hugetlb vmemmap modification operations | expand |
On 2023/8/26 03:04, Mike Kravetz wrote: > From: Joao Martins <joao.m.martins@oracle.com> > > Now that a list of pages is deduplicated at once, the TLB > flush can be batched for all vmemmap pages that got remapped. > > Add a flags field and pass whether it's a bulk allocation or > just a single page to decide to remap. > > The TLB flush is global as we don't have guarantees from caller > that the set of folios is contiguous, or to add complexity in > composing a list of kVAs to flush. > > Modified by Mike Kravetz to perform TLB flush on single folio if an > error is encountered. > > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> > --- > mm/hugetlb_vmemmap.c | 9 +++++++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > index 904a64fe5669..a2fc7b03ac6b 100644 > --- a/mm/hugetlb_vmemmap.c > +++ b/mm/hugetlb_vmemmap.c > @@ -36,6 +36,7 @@ struct vmemmap_remap_walk { > unsigned long reuse_addr; > struct list_head *vmemmap_pages; > #define VMEMMAP_REMAP_ONLY_SPLIT BIT(0) > +#define VMEMMAP_REMAP_BULK_PAGES BIT(1) We could reuse the flag (as I suggest VMEMMAP_SPLIT_WITHOUT_FLUSH) proposed in the patch 10. When I saw this patch, I think the name is not suitable, maybe VMEMMAP_WITHOUT_TLB_FLUSH is better. Thanks. > unsigned long flags; > }; > > @@ -211,7 +212,8 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, > return ret; > } while (pgd++, addr = next, addr != end); > > - if (!(walk->flags & VMEMMAP_REMAP_ONLY_SPLIT)) > + if (!(walk->flags & > + (VMEMMAP_REMAP_ONLY_SPLIT | VMEMMAP_REMAP_BULK_PAGES))) > flush_tlb_kernel_range(start, end); > > return 0; > @@ -377,7 +379,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, > .remap_pte = vmemmap_remap_pte, > .reuse_addr = reuse, > .vmemmap_pages = &vmemmap_pages, > - .flags = 0, > + .flags = !bulk_pages ? 0 : VMEMMAP_REMAP_BULK_PAGES, > }; > int nid = page_to_nid((struct page *)start); > gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY | > @@ -427,6 +429,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, > .remap_pte = vmemmap_restore_pte, > .reuse_addr = reuse, > .vmemmap_pages = &vmemmap_pages, > + .flags = 0, > }; > > vmemmap_remap_range(reuse, end, &walk); > @@ -700,6 +703,8 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l > list_for_each_entry(folio, folio_list, lru) > hugetlb_vmemmap_optimize_bulk(h, &folio->page, &vmemmap_pages); > > + flush_tlb_kernel_range(0, TLB_FLUSH_ALL); > + > free_vmemmap_page_list(&vmemmap_pages); > } >
On 30/08/2023 09:23, Muchun Song wrote: > > > On 2023/8/26 03:04, Mike Kravetz wrote: >> From: Joao Martins <joao.m.martins@oracle.com> >> >> Now that a list of pages is deduplicated at once, the TLB >> flush can be batched for all vmemmap pages that got remapped. >> >> Add a flags field and pass whether it's a bulk allocation or >> just a single page to decide to remap. >> >> The TLB flush is global as we don't have guarantees from caller >> that the set of folios is contiguous, or to add complexity in >> composing a list of kVAs to flush. >> >> Modified by Mike Kravetz to perform TLB flush on single folio if an >> error is encountered. >> >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> >> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> >> --- >> mm/hugetlb_vmemmap.c | 9 +++++++-- >> 1 file changed, 7 insertions(+), 2 deletions(-) >> >> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c >> index 904a64fe5669..a2fc7b03ac6b 100644 >> --- a/mm/hugetlb_vmemmap.c >> +++ b/mm/hugetlb_vmemmap.c >> @@ -36,6 +36,7 @@ struct vmemmap_remap_walk { >> unsigned long reuse_addr; >> struct list_head *vmemmap_pages; >> #define VMEMMAP_REMAP_ONLY_SPLIT BIT(0) >> +#define VMEMMAP_REMAP_BULK_PAGES BIT(1) > > We could reuse the flag (as I suggest VMEMMAP_SPLIT_WITHOUT_FLUSH) > proposed in the patch 10. When I saw this patch, I think the name > is not suitable, maybe VMEMMAP_WITHOUT_TLB_FLUSH is better. > As mentioned in the previous patch, yeah makes sense to have a bit just for no TLB flush and perhaps we don't even BIT(1). We can use remap_pte to tell PTE vs PMD flush "skipping" > Thanks. > >> unsigned long flags; >> }; >> @@ -211,7 +212,8 @@ static int vmemmap_remap_range(unsigned long start, >> unsigned long end, >> return ret; >> } while (pgd++, addr = next, addr != end); >> - if (!(walk->flags & VMEMMAP_REMAP_ONLY_SPLIT)) >> + if (!(walk->flags & >> + (VMEMMAP_REMAP_ONLY_SPLIT | VMEMMAP_REMAP_BULK_PAGES))) >> flush_tlb_kernel_range(start, end); >> return 0; >> @@ -377,7 +379,7 @@ static int vmemmap_remap_free(unsigned long start, >> unsigned long end, >> .remap_pte = vmemmap_remap_pte, >> .reuse_addr = reuse, >> .vmemmap_pages = &vmemmap_pages, >> - .flags = 0, >> + .flags = !bulk_pages ? 0 : VMEMMAP_REMAP_BULK_PAGES, >> }; >> int nid = page_to_nid((struct page *)start); >> gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY | >> @@ -427,6 +429,7 @@ static int vmemmap_remap_free(unsigned long start, >> unsigned long end, >> .remap_pte = vmemmap_restore_pte, >> .reuse_addr = reuse, >> .vmemmap_pages = &vmemmap_pages, >> + .flags = 0, >> }; >> vmemmap_remap_range(reuse, end, &walk); >> @@ -700,6 +703,8 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, >> struct list_head *folio_l >> list_for_each_entry(folio, folio_list, lru) >> hugetlb_vmemmap_optimize_bulk(h, &folio->page, &vmemmap_pages); >> + flush_tlb_kernel_range(0, TLB_FLUSH_ALL); >> + >> free_vmemmap_page_list(&vmemmap_pages); >> } >> >
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 904a64fe5669..a2fc7b03ac6b 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -36,6 +36,7 @@ struct vmemmap_remap_walk { unsigned long reuse_addr; struct list_head *vmemmap_pages; #define VMEMMAP_REMAP_ONLY_SPLIT BIT(0) +#define VMEMMAP_REMAP_BULK_PAGES BIT(1) unsigned long flags; }; @@ -211,7 +212,8 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, return ret; } while (pgd++, addr = next, addr != end); - if (!(walk->flags & VMEMMAP_REMAP_ONLY_SPLIT)) + if (!(walk->flags & + (VMEMMAP_REMAP_ONLY_SPLIT | VMEMMAP_REMAP_BULK_PAGES))) flush_tlb_kernel_range(start, end); return 0; @@ -377,7 +379,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, .remap_pte = vmemmap_remap_pte, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, - .flags = 0, + .flags = !bulk_pages ? 0 : VMEMMAP_REMAP_BULK_PAGES, }; int nid = page_to_nid((struct page *)start); gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY | @@ -427,6 +429,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, + .flags = 0, }; vmemmap_remap_range(reuse, end, &walk); @@ -700,6 +703,8 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l list_for_each_entry(folio, folio_list, lru) hugetlb_vmemmap_optimize_bulk(h, &folio->page, &vmemmap_pages); + flush_tlb_kernel_range(0, TLB_FLUSH_ALL); + free_vmemmap_page_list(&vmemmap_pages); }