Message ID | 20230918230202.254631-4-mike.kravetz@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Batch hugetlb vmemmap modification operations | expand |
On 2023/9/19 07:01, Mike Kravetz wrote: > When adding hugetlb pages to the pool, we first create a list of the > allocated pages before adding to the pool. Pass this list of pages to a > new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization. > > Due to significant differences in vmemmmap initialization for bootmem > allocated hugetlb pages, a new routine prep_and_add_bootmem_folios > is created. > > We also modify the routine vmemmap_should_optimize() to check for pages > that are already optimized. There are code paths that might request > vmemmap optimization twice and we want to make sure this is not > attempted. > > Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> > --- > mm/hugetlb.c | 50 +++++++++++++++++++++++++++++++++++++------- > mm/hugetlb_vmemmap.c | 11 ++++++++++ > mm/hugetlb_vmemmap.h | 5 +++++ > 3 files changed, 58 insertions(+), 8 deletions(-) > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 8624286be273..d6f3db3c1313 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -2269,6 +2269,11 @@ static void prep_and_add_allocated_folios(struct hstate *h, > { > struct folio *folio, *tmp_f; > > + /* > + * Send list for bulk vmemmap optimization processing > + */ From the kernel development document, one-line comment format is "/**/". > + hugetlb_vmemmap_optimize_folios(h, folio_list); > + > /* > * Add all new pool pages to free lists in one lock cycle > */ > @@ -3309,6 +3314,40 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio, > prep_compound_head((struct page *)folio, huge_page_order(h)); > } > > +static void __init prep_and_add_bootmem_folios(struct hstate *h, > + struct list_head *folio_list) > +{ > + struct folio *folio, *tmp_f; > + > + /* > + * Send list for bulk vmemmap optimization processing > + */ > + hugetlb_vmemmap_optimize_folios(h, folio_list); > + > + /* > + * Add all new pool pages to free lists in one lock cycle > + */ > + spin_lock_irq(&hugetlb_lock); > + list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { > + if (!folio_test_hugetlb_vmemmap_optimized(folio)) { > + /* > + * If HVO fails, initialize all tail struct pages > + * We do not worry about potential long lock hold > + * time as this is early in boot and there should > + * be no contention. > + */ > + hugetlb_folio_init_tail_vmemmap(folio, > + HUGETLB_VMEMMAP_RESERVE_PAGES, > + pages_per_huge_page(h)); > + } > + __prep_account_new_huge_page(h, folio_nid(folio)); > + enqueue_hugetlb_folio(h, folio); > + } > + spin_unlock_irq(&hugetlb_lock); > + > + INIT_LIST_HEAD(folio_list); I'm not sure what is the purpose of the reinitialization to list head? > +} > + > /* > * Put bootmem huge pages into the standard lists after mem_map is up. > * Note: This only applies to gigantic (order > MAX_ORDER) pages. > @@ -3329,7 +3368,7 @@ static void __init gather_bootmem_prealloc(void) > * in this list. If so, process each size separately. > */ > if (h != prev_h && prev_h != NULL) > - prep_and_add_allocated_folios(prev_h, &folio_list); > + prep_and_add_bootmem_folios(prev_h, &folio_list); > prev_h = h; > > VM_BUG_ON(!hstate_is_gigantic(h)); > @@ -3337,12 +3376,7 @@ static void __init gather_bootmem_prealloc(void) > > hugetlb_folio_init_vmemmap(folio, h, > HUGETLB_VMEMMAP_RESERVE_PAGES); > - __prep_new_hugetlb_folio(h, folio); > - /* If HVO fails, initialize all tail struct pages */ > - if (!HPageVmemmapOptimized(&folio->page)) > - hugetlb_folio_init_tail_vmemmap(folio, > - HUGETLB_VMEMMAP_RESERVE_PAGES, > - pages_per_huge_page(h)); > + init_new_hugetlb_folio(h, folio); > list_add(&folio->lru, &folio_list); > > /* > @@ -3354,7 +3388,7 @@ static void __init gather_bootmem_prealloc(void) > cond_resched(); > } > > - prep_and_add_allocated_folios(h, &folio_list); > + prep_and_add_bootmem_folios(h, &folio_list); > } > > static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > index 76682d1d79a7..4558b814ffab 100644 > --- a/mm/hugetlb_vmemmap.c > +++ b/mm/hugetlb_vmemmap.c > @@ -483,6 +483,9 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) > /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ > static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head) > { > + if (HPageVmemmapOptimized((struct page *)head)) > + return false; > + > if (!READ_ONCE(vmemmap_optimize_enabled)) > return false; > > @@ -572,6 +575,14 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) > SetHPageVmemmapOptimized(head); > } > > +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) > +{ > + struct folio *folio; > + > + list_for_each_entry(folio, folio_list, lru) > + hugetlb_vmemmap_optimize(h, &folio->page); > +} > + > static struct ctl_table hugetlb_vmemmap_sysctls[] = { > { > .procname = "hugetlb_optimize_vmemmap", > diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h > index 4573899855d7..c512e388dbb4 100644 > --- a/mm/hugetlb_vmemmap.h > +++ b/mm/hugetlb_vmemmap.h > @@ -20,6 +20,7 @@ > #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP > int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head); > void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head); > +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list); > > static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) > { > @@ -48,6 +49,10 @@ static inline void hugetlb_vmemmap_optimize(const struct hstate *h, struct page > { > } > > +static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) > +{ > +} > + > static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) > { > return 0;
On 09/19/23 11:10, Muchun Song wrote: > > > On 2023/9/19 07:01, Mike Kravetz wrote: > > When adding hugetlb pages to the pool, we first create a list of the > > allocated pages before adding to the pool. Pass this list of pages to a > > new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization. > > > > Due to significant differences in vmemmmap initialization for bootmem > > allocated hugetlb pages, a new routine prep_and_add_bootmem_folios > > is created. > > > > We also modify the routine vmemmap_should_optimize() to check for pages > > that are already optimized. There are code paths that might request > > vmemmap optimization twice and we want to make sure this is not > > attempted. > > > > Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> > > --- > > mm/hugetlb.c | 50 +++++++++++++++++++++++++++++++++++++------- > > mm/hugetlb_vmemmap.c | 11 ++++++++++ > > mm/hugetlb_vmemmap.h | 5 +++++ > > 3 files changed, 58 insertions(+), 8 deletions(-) > > > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index 8624286be273..d6f3db3c1313 100644 > > --- a/mm/hugetlb.c > > +++ b/mm/hugetlb.c > > @@ -2269,6 +2269,11 @@ static void prep_and_add_allocated_folios(struct hstate *h, > > { > > struct folio *folio, *tmp_f; > > + /* > > + * Send list for bulk vmemmap optimization processing > > + */ > > From the kernel development document, one-line comment format is "/**/". > Will change the comments introduced here. > > + hugetlb_vmemmap_optimize_folios(h, folio_list); > > + > > /* > > * Add all new pool pages to free lists in one lock cycle > > */ > > @@ -3309,6 +3314,40 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio, > > prep_compound_head((struct page *)folio, huge_page_order(h)); > > } > > +static void __init prep_and_add_bootmem_folios(struct hstate *h, > > + struct list_head *folio_list) > > +{ > > + struct folio *folio, *tmp_f; > > + > > + /* > > + * Send list for bulk vmemmap optimization processing > > + */ > > + hugetlb_vmemmap_optimize_folios(h, folio_list); > > + > > + /* > > + * Add all new pool pages to free lists in one lock cycle > > + */ > > + spin_lock_irq(&hugetlb_lock); > > + list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { > > + if (!folio_test_hugetlb_vmemmap_optimized(folio)) { > > + /* > > + * If HVO fails, initialize all tail struct pages > > + * We do not worry about potential long lock hold > > + * time as this is early in boot and there should > > + * be no contention. > > + */ > > + hugetlb_folio_init_tail_vmemmap(folio, > > + HUGETLB_VMEMMAP_RESERVE_PAGES, > > + pages_per_huge_page(h)); > > + } > > + __prep_account_new_huge_page(h, folio_nid(folio)); > > + enqueue_hugetlb_folio(h, folio); > > + } > > + spin_unlock_irq(&hugetlb_lock); > > + > > + INIT_LIST_HEAD(folio_list); > > I'm not sure what is the purpose of the reinitialization to list head? > There really is no purpose. This was copied from prep_and_add_allocated_folios which also has this unnecessary call. It is unnecessary as enqueue_hugetlb_folio() will do a list_move for each folio on the list. Therefore, at the end of the loop we KNOW the list is empty. I will remove here and in prep_and_add_allocated_folios. Thanks,
> On Sep 20, 2023, at 04:49, Mike Kravetz <mike.kravetz@oracle.com> wrote: > > On 09/19/23 11:10, Muchun Song wrote: >> >> >> On 2023/9/19 07:01, Mike Kravetz wrote: >>> When adding hugetlb pages to the pool, we first create a list of the >>> allocated pages before adding to the pool. Pass this list of pages to a >>> new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization. >>> >>> Due to significant differences in vmemmmap initialization for bootmem >>> allocated hugetlb pages, a new routine prep_and_add_bootmem_folios >>> is created. >>> >>> We also modify the routine vmemmap_should_optimize() to check for pages >>> that are already optimized. There are code paths that might request >>> vmemmap optimization twice and we want to make sure this is not >>> attempted. >>> >>> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> >>> --- >>> mm/hugetlb.c | 50 +++++++++++++++++++++++++++++++++++++------- >>> mm/hugetlb_vmemmap.c | 11 ++++++++++ >>> mm/hugetlb_vmemmap.h | 5 +++++ >>> 3 files changed, 58 insertions(+), 8 deletions(-) >>> >>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c >>> index 8624286be273..d6f3db3c1313 100644 >>> --- a/mm/hugetlb.c >>> +++ b/mm/hugetlb.c >>> @@ -2269,6 +2269,11 @@ static void prep_and_add_allocated_folios(struct hstate *h, >>> { >>> struct folio *folio, *tmp_f; >>> + /* >>> + * Send list for bulk vmemmap optimization processing >>> + */ >> >> From the kernel development document, one-line comment format is "/**/". >> > > Will change the comments introduced here. BTW, there are some places as well, please updates all, thanks. > >>> + hugetlb_vmemmap_optimize_folios(h, folio_list); >>> + >>> /* >>> * Add all new pool pages to free lists in one lock cycle >>> */ >>> @@ -3309,6 +3314,40 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio, >>> prep_compound_head((struct page *)folio, huge_page_order(h)); >>> } >>> +static void __init prep_and_add_bootmem_folios(struct hstate *h, >>> + struct list_head *folio_list) >>> +{ >>> + struct folio *folio, *tmp_f; >>> + >>> + /* >>> + * Send list for bulk vmemmap optimization processing >>> + */ >>> + hugetlb_vmemmap_optimize_folios(h, folio_list); >>> + >>> + /* >>> + * Add all new pool pages to free lists in one lock cycle >>> + */ >>> + spin_lock_irq(&hugetlb_lock); >>> + list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { >>> + if (!folio_test_hugetlb_vmemmap_optimized(folio)) { >>> + /* >>> + * If HVO fails, initialize all tail struct pages >>> + * We do not worry about potential long lock hold >>> + * time as this is early in boot and there should >>> + * be no contention. >>> + */ >>> + hugetlb_folio_init_tail_vmemmap(folio, >>> + HUGETLB_VMEMMAP_RESERVE_PAGES, >>> + pages_per_huge_page(h)); >>> + } >>> + __prep_account_new_huge_page(h, folio_nid(folio)); >>> + enqueue_hugetlb_folio(h, folio); >>> + } >>> + spin_unlock_irq(&hugetlb_lock); >>> + >>> + INIT_LIST_HEAD(folio_list); >> >> I'm not sure what is the purpose of the reinitialization to list head? >> > > There really is no purpose. This was copied from > prep_and_add_allocated_folios which also has this unnecessary call. It is > unnecessary as enqueue_hugetlb_folio() will do a list_move for each > folio on the list. Therefore, at the end of the loop we KNOW the list > is empty. Right. > > I will remove here and in prep_and_add_allocated_folios. Thanks. > > Thanks, > -- > Mike Kravetz
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8624286be273..d6f3db3c1313 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2269,6 +2269,11 @@ static void prep_and_add_allocated_folios(struct hstate *h, { struct folio *folio, *tmp_f; + /* + * Send list for bulk vmemmap optimization processing + */ + hugetlb_vmemmap_optimize_folios(h, folio_list); + /* * Add all new pool pages to free lists in one lock cycle */ @@ -3309,6 +3314,40 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio, prep_compound_head((struct page *)folio, huge_page_order(h)); } +static void __init prep_and_add_bootmem_folios(struct hstate *h, + struct list_head *folio_list) +{ + struct folio *folio, *tmp_f; + + /* + * Send list for bulk vmemmap optimization processing + */ + hugetlb_vmemmap_optimize_folios(h, folio_list); + + /* + * Add all new pool pages to free lists in one lock cycle + */ + spin_lock_irq(&hugetlb_lock); + list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { + if (!folio_test_hugetlb_vmemmap_optimized(folio)) { + /* + * If HVO fails, initialize all tail struct pages + * We do not worry about potential long lock hold + * time as this is early in boot and there should + * be no contention. + */ + hugetlb_folio_init_tail_vmemmap(folio, + HUGETLB_VMEMMAP_RESERVE_PAGES, + pages_per_huge_page(h)); + } + __prep_account_new_huge_page(h, folio_nid(folio)); + enqueue_hugetlb_folio(h, folio); + } + spin_unlock_irq(&hugetlb_lock); + + INIT_LIST_HEAD(folio_list); +} + /* * Put bootmem huge pages into the standard lists after mem_map is up. * Note: This only applies to gigantic (order > MAX_ORDER) pages. @@ -3329,7 +3368,7 @@ static void __init gather_bootmem_prealloc(void) * in this list. If so, process each size separately. */ if (h != prev_h && prev_h != NULL) - prep_and_add_allocated_folios(prev_h, &folio_list); + prep_and_add_bootmem_folios(prev_h, &folio_list); prev_h = h; VM_BUG_ON(!hstate_is_gigantic(h)); @@ -3337,12 +3376,7 @@ static void __init gather_bootmem_prealloc(void) hugetlb_folio_init_vmemmap(folio, h, HUGETLB_VMEMMAP_RESERVE_PAGES); - __prep_new_hugetlb_folio(h, folio); - /* If HVO fails, initialize all tail struct pages */ - if (!HPageVmemmapOptimized(&folio->page)) - hugetlb_folio_init_tail_vmemmap(folio, - HUGETLB_VMEMMAP_RESERVE_PAGES, - pages_per_huge_page(h)); + init_new_hugetlb_folio(h, folio); list_add(&folio->lru, &folio_list); /* @@ -3354,7 +3388,7 @@ static void __init gather_bootmem_prealloc(void) cond_resched(); } - prep_and_add_allocated_folios(h, &folio_list); + prep_and_add_bootmem_folios(h, &folio_list); } static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 76682d1d79a7..4558b814ffab 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -483,6 +483,9 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head) { + if (HPageVmemmapOptimized((struct page *)head)) + return false; + if (!READ_ONCE(vmemmap_optimize_enabled)) return false; @@ -572,6 +575,14 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) SetHPageVmemmapOptimized(head); } +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) +{ + struct folio *folio; + + list_for_each_entry(folio, folio_list, lru) + hugetlb_vmemmap_optimize(h, &folio->page); +} + static struct ctl_table hugetlb_vmemmap_sysctls[] = { { .procname = "hugetlb_optimize_vmemmap", diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index 4573899855d7..c512e388dbb4 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -20,6 +20,7 @@ #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head); void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head); +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list); static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) { @@ -48,6 +49,10 @@ static inline void hugetlb_vmemmap_optimize(const struct hstate *h, struct page { } +static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) +{ +} + static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) { return 0;
When adding hugetlb pages to the pool, we first create a list of the allocated pages before adding to the pool. Pass this list of pages to a new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization. Due to significant differences in vmemmmap initialization for bootmem allocated hugetlb pages, a new routine prep_and_add_bootmem_folios is created. We also modify the routine vmemmap_should_optimize() to check for pages that are already optimized. There are code paths that might request vmemmap optimization twice and we want to make sure this is not attempted. Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> --- mm/hugetlb.c | 50 +++++++++++++++++++++++++++++++++++++------- mm/hugetlb_vmemmap.c | 11 ++++++++++ mm/hugetlb_vmemmap.h | 5 +++++ 3 files changed, 58 insertions(+), 8 deletions(-)