Message ID | 20220715170312.13931-3-julien@xen.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xen/mm: Optimize init_heap_pages() | expand |
Hi Julien, On 2022/7/16 1:03, Julien Grall wrote: > From: Julien Grall <jgrall@amazon.com> > > At the moment, init_heap_pages() will call free_heap_pages() page > by page. To reduce the time to initialize the heap, we will want > to provide multiple pages at the same time. > > init_heap_pages() is now split in two parts: > - init_heap_pages(): will break down the range in multiple set > of contiguous pages. For now, the criteria is the pages should > belong to the same NUMA node. > - _init_heap_pages(): will initialize a set of pages belonging to > the same NUMA node. In a follow-up patch, new requirements will > be added (e.g. pages should belong to the same zone). For now the > pages are still passed one by one to free_heap_pages(). > > Note that the comment before init_heap_pages() is heavily outdated and > does not reflect the current code. So update it. > > This patch is a merge/rework of patches from David Woodhouse and > Hongyan Xia. > > Signed-off-by: Julien Grall <jgrall@amazon.com> > > --- > > Interestingly, I was expecting this patch to perform worse. However, > from testing there is a small increase in perf. > > That said, I mainly plit the patch because it keeps refactoring and > optimization separated. > > Changes in v2: > - Rename init_contig_pages() to _init_heap_pages() > - Fold is_contig_page() > --- > xen/common/page_alloc.c | 77 ++++++++++++++++++++++++++--------------- > 1 file changed, 50 insertions(+), 27 deletions(-) > > diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c > index 078c2990041d..eedb2fed77c3 100644 > --- a/xen/common/page_alloc.c > +++ b/xen/common/page_alloc.c > @@ -1778,16 +1778,44 @@ int query_page_offline(mfn_t mfn, uint32_t *status) > } > > /* > - * Hand the specified arbitrary page range to the specified heap zone > - * checking the node_id of the previous page. If they differ and the > - * latter is not on a MAX_ORDER boundary, then we reserve the page by > - * not freeing it to the buddy allocator. > + * This function should only be called with valid pages from the same NUMA > + * node. > */ > +static void _init_heap_pages(const struct page_info *pg, > + unsigned long nr_pages, > + bool need_scrub) > +{ > + unsigned long s, e; > + unsigned int nid = phys_to_nid(page_to_maddr(pg)); > + > + s = mfn_x(page_to_mfn(pg)); > + e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1)); > + if ( unlikely(!avail[nid]) ) > + { > + bool use_tail = IS_ALIGNED(s, 1UL << MAX_ORDER) && > + (find_first_set_bit(e) <= find_first_set_bit(s)); > + unsigned long n; > + > + n = init_node_heap(nid, s, nr_pages, &use_tail); > + BUG_ON(n > nr_pages); > + if ( use_tail ) > + e -= n; > + else > + s += n; > + } > + > + while ( s < e ) > + { > + free_heap_pages(mfn_to_page(_mfn(s)), 0, need_scrub); > + s += 1UL; > + } > +} > + > static void init_heap_pages( > struct page_info *pg, unsigned long nr_pages) > { > unsigned long i; > - bool idle_scrub = false; > + bool need_scrub = scrub_debug; > You have changed idle_scrub to need_scrub, but haven't mentioned this in commit log, and I also haven't found related discussion in v1. I am very clear about this change. Cheers, Wei Chen > /* > * Keep MFN 0 away from the buddy allocator to avoid crossing zone > @@ -1812,35 +1840,30 @@ static void init_heap_pages( > spin_unlock(&heap_lock); > > if ( system_state < SYS_STATE_active && opt_bootscrub == BOOTSCRUB_IDLE ) > - idle_scrub = true; > + need_scrub = true; > > - for ( i = 0; i < nr_pages; i++ ) > + for ( i = 0; i < nr_pages; ) > { > - unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); > + unsigned int nid = phys_to_nid(page_to_maddr(pg)); > + unsigned long left = nr_pages - i; > + unsigned long contig_pages; > > - if ( unlikely(!avail[nid]) ) > + /* > + * _init_heap_pages() is only able to accept range following > + * specific property (see comment on top of _init_heap_pages()). > + * > + * So break down the range in smaller set. > + */ > + for ( contig_pages = 1; contig_pages < left; contig_pages++ ) > { > - unsigned long s = mfn_x(page_to_mfn(pg + i)); > - unsigned long e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1)); > - bool use_tail = (nid == phys_to_nid(pfn_to_paddr(e - 1))) && > - IS_ALIGNED(s, 1UL << MAX_ORDER) && > - (find_first_set_bit(e) <= find_first_set_bit(s)); > - unsigned long n; > - > - n = init_node_heap(nid, mfn_x(page_to_mfn(pg + i)), nr_pages - i, > - &use_tail); > - BUG_ON(i + n > nr_pages); > - if ( n && !use_tail ) > - { > - i += n - 1; > - continue; > - } > - if ( i + n == nr_pages ) > + if ( nid != (phys_to_nid(page_to_maddr(pg))) ) > break; > - nr_pages -= n; > } > > - free_heap_pages(pg + i, 0, scrub_debug || idle_scrub); > + _init_heap_pages(pg, contig_pages, need_scrub); > + > + pg += contig_pages; > + i += contig_pages; > } > } >
On 15.07.2022 19:03, Julien Grall wrote: > From: Julien Grall <jgrall@amazon.com> > > At the moment, init_heap_pages() will call free_heap_pages() page > by page. To reduce the time to initialize the heap, we will want > to provide multiple pages at the same time. > > init_heap_pages() is now split in two parts: > - init_heap_pages(): will break down the range in multiple set > of contiguous pages. For now, the criteria is the pages should > belong to the same NUMA node. > - _init_heap_pages(): will initialize a set of pages belonging to > the same NUMA node. In a follow-up patch, new requirements will > be added (e.g. pages should belong to the same zone). For now the > pages are still passed one by one to free_heap_pages(). > > Note that the comment before init_heap_pages() is heavily outdated and > does not reflect the current code. So update it. > > This patch is a merge/rework of patches from David Woodhouse and > Hongyan Xia. > > Signed-off-by: Julien Grall <jgrall@amazon.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> Albeit maybe with ... > --- a/xen/common/page_alloc.c > +++ b/xen/common/page_alloc.c > @@ -1778,16 +1778,44 @@ int query_page_offline(mfn_t mfn, uint32_t *status) > } > > /* > - * Hand the specified arbitrary page range to the specified heap zone > - * checking the node_id of the previous page. If they differ and the > - * latter is not on a MAX_ORDER boundary, then we reserve the page by > - * not freeing it to the buddy allocator. > + * This function should only be called with valid pages from the same NUMA > + * node. > */ > +static void _init_heap_pages(const struct page_info *pg, > + unsigned long nr_pages, > + bool need_scrub) > +{ > + unsigned long s, e; > + unsigned int nid = phys_to_nid(page_to_maddr(pg)); > + > + s = mfn_x(page_to_mfn(pg)); > + e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1)); > + if ( unlikely(!avail[nid]) ) > + { > + bool use_tail = IS_ALIGNED(s, 1UL << MAX_ORDER) && > + (find_first_set_bit(e) <= find_first_set_bit(s)); > + unsigned long n; > + > + n = init_node_heap(nid, s, nr_pages, &use_tail); > + BUG_ON(n > nr_pages); > + if ( use_tail ) > + e -= n; > + else > + s += n; > + } > + > + while ( s < e ) > + { > + free_heap_pages(mfn_to_page(_mfn(s)), 0, need_scrub); > + s += 1UL; ... the more conventional s++ or ++s used here? Jan
On 18/07/2022 09:18, Wei Chen wrote: >> static void init_heap_pages( >> struct page_info *pg, unsigned long nr_pages) >> { >> unsigned long i; >> - bool idle_scrub = false; >> + bool need_scrub = scrub_debug; >> > > You have changed idle_scrub to need_scrub, but haven't mentioned this > in commit log, and I also haven't found related discussion in v1. I > am very clear about this change. The meaning/use of the variable is now different. Before this patch, the variable was only indicating whether idle scrub was enabled (this is configurable by the admin). This was then or-ed with 'scrub_debug' when calling free_heap_pages(). With this patch, we now store the result of the or-ed in the local variable. This is not something I felt was necessary to mention in the commit message. Cheers,
Hi Jan, On 18/07/2022 10:31, Jan Beulich wrote: > On 15.07.2022 19:03, Julien Grall wrote: >> From: Julien Grall <jgrall@amazon.com> >> >> At the moment, init_heap_pages() will call free_heap_pages() page >> by page. To reduce the time to initialize the heap, we will want >> to provide multiple pages at the same time. >> >> init_heap_pages() is now split in two parts: >> - init_heap_pages(): will break down the range in multiple set >> of contiguous pages. For now, the criteria is the pages should >> belong to the same NUMA node. >> - _init_heap_pages(): will initialize a set of pages belonging to >> the same NUMA node. In a follow-up patch, new requirements will >> be added (e.g. pages should belong to the same zone). For now the >> pages are still passed one by one to free_heap_pages(). >> >> Note that the comment before init_heap_pages() is heavily outdated and >> does not reflect the current code. So update it. >> >> This patch is a merge/rework of patches from David Woodhouse and >> Hongyan Xia. >> >> Signed-off-by: Julien Grall <jgrall@amazon.com> > > Reviewed-by: Jan Beulich <jbeulich@suse.com> Thanks. >> --- a/xen/common/page_alloc.c >> +++ b/xen/common/page_alloc.c >> @@ -1778,16 +1778,44 @@ int query_page_offline(mfn_t mfn, uint32_t *status) >> } >> >> /* >> - * Hand the specified arbitrary page range to the specified heap zone >> - * checking the node_id of the previous page. If they differ and the >> - * latter is not on a MAX_ORDER boundary, then we reserve the page by >> - * not freeing it to the buddy allocator. >> + * This function should only be called with valid pages from the same NUMA >> + * node. >> */ >> +static void _init_heap_pages(const struct page_info *pg, >> + unsigned long nr_pages, >> + bool need_scrub) >> +{ >> + unsigned long s, e; >> + unsigned int nid = phys_to_nid(page_to_maddr(pg)); >> + >> + s = mfn_x(page_to_mfn(pg)); >> + e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1)); >> + if ( unlikely(!avail[nid]) ) >> + { >> + bool use_tail = IS_ALIGNED(s, 1UL << MAX_ORDER) && >> + (find_first_set_bit(e) <= find_first_set_bit(s)); >> + unsigned long n; >> + >> + n = init_node_heap(nid, s, nr_pages, &use_tail); >> + BUG_ON(n > nr_pages); >> + if ( use_tail ) >> + e -= n; >> + else >> + s += n; >> + } >> + >> + while ( s < e ) >> + { >> + free_heap_pages(mfn_to_page(_mfn(s)), 0, need_scrub); >> + s += 1UL; > > ... the more conventional s++ or ++s used here? I would prefer to keep using "s += 1UL" here because: * it will be replace with a proper order in the follow-up patch. So this is temporary. * one could argue that if I use "s++" then I should also switch to a for loop which would make sense here but not in the next patch. Cheers,
On 18.07.2022 12:08, Julien Grall wrote: > On 18/07/2022 10:31, Jan Beulich wrote: >> On 15.07.2022 19:03, Julien Grall wrote: >>> --- a/xen/common/page_alloc.c >>> +++ b/xen/common/page_alloc.c >>> @@ -1778,16 +1778,44 @@ int query_page_offline(mfn_t mfn, uint32_t *status) >>> } >>> >>> /* >>> - * Hand the specified arbitrary page range to the specified heap zone >>> - * checking the node_id of the previous page. If they differ and the >>> - * latter is not on a MAX_ORDER boundary, then we reserve the page by >>> - * not freeing it to the buddy allocator. >>> + * This function should only be called with valid pages from the same NUMA >>> + * node. >>> */ >>> +static void _init_heap_pages(const struct page_info *pg, >>> + unsigned long nr_pages, >>> + bool need_scrub) >>> +{ >>> + unsigned long s, e; >>> + unsigned int nid = phys_to_nid(page_to_maddr(pg)); >>> + >>> + s = mfn_x(page_to_mfn(pg)); >>> + e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1)); >>> + if ( unlikely(!avail[nid]) ) >>> + { >>> + bool use_tail = IS_ALIGNED(s, 1UL << MAX_ORDER) && >>> + (find_first_set_bit(e) <= find_first_set_bit(s)); >>> + unsigned long n; >>> + >>> + n = init_node_heap(nid, s, nr_pages, &use_tail); >>> + BUG_ON(n > nr_pages); >>> + if ( use_tail ) >>> + e -= n; >>> + else >>> + s += n; >>> + } >>> + >>> + while ( s < e ) >>> + { >>> + free_heap_pages(mfn_to_page(_mfn(s)), 0, need_scrub); >>> + s += 1UL; >> >> ... the more conventional s++ or ++s used here? > > I would prefer to keep using "s += 1UL" here because: > * it will be replace with a proper order in the follow-up patch. So > this is temporary. Fair enough. Jan > * one could argue that if I use "s++" then I should also switch to a > for loop which would make sense here but not in the next patch. > > Cheers, >
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 078c2990041d..eedb2fed77c3 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -1778,16 +1778,44 @@ int query_page_offline(mfn_t mfn, uint32_t *status) } /* - * Hand the specified arbitrary page range to the specified heap zone - * checking the node_id of the previous page. If they differ and the - * latter is not on a MAX_ORDER boundary, then we reserve the page by - * not freeing it to the buddy allocator. + * This function should only be called with valid pages from the same NUMA + * node. */ +static void _init_heap_pages(const struct page_info *pg, + unsigned long nr_pages, + bool need_scrub) +{ + unsigned long s, e; + unsigned int nid = phys_to_nid(page_to_maddr(pg)); + + s = mfn_x(page_to_mfn(pg)); + e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1)); + if ( unlikely(!avail[nid]) ) + { + bool use_tail = IS_ALIGNED(s, 1UL << MAX_ORDER) && + (find_first_set_bit(e) <= find_first_set_bit(s)); + unsigned long n; + + n = init_node_heap(nid, s, nr_pages, &use_tail); + BUG_ON(n > nr_pages); + if ( use_tail ) + e -= n; + else + s += n; + } + + while ( s < e ) + { + free_heap_pages(mfn_to_page(_mfn(s)), 0, need_scrub); + s += 1UL; + } +} + static void init_heap_pages( struct page_info *pg, unsigned long nr_pages) { unsigned long i; - bool idle_scrub = false; + bool need_scrub = scrub_debug; /* * Keep MFN 0 away from the buddy allocator to avoid crossing zone @@ -1812,35 +1840,30 @@ static void init_heap_pages( spin_unlock(&heap_lock); if ( system_state < SYS_STATE_active && opt_bootscrub == BOOTSCRUB_IDLE ) - idle_scrub = true; + need_scrub = true; - for ( i = 0; i < nr_pages; i++ ) + for ( i = 0; i < nr_pages; ) { - unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); + unsigned int nid = phys_to_nid(page_to_maddr(pg)); + unsigned long left = nr_pages - i; + unsigned long contig_pages; - if ( unlikely(!avail[nid]) ) + /* + * _init_heap_pages() is only able to accept range following + * specific property (see comment on top of _init_heap_pages()). + * + * So break down the range in smaller set. + */ + for ( contig_pages = 1; contig_pages < left; contig_pages++ ) { - unsigned long s = mfn_x(page_to_mfn(pg + i)); - unsigned long e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1), 1)); - bool use_tail = (nid == phys_to_nid(pfn_to_paddr(e - 1))) && - IS_ALIGNED(s, 1UL << MAX_ORDER) && - (find_first_set_bit(e) <= find_first_set_bit(s)); - unsigned long n; - - n = init_node_heap(nid, mfn_x(page_to_mfn(pg + i)), nr_pages - i, - &use_tail); - BUG_ON(i + n > nr_pages); - if ( n && !use_tail ) - { - i += n - 1; - continue; - } - if ( i + n == nr_pages ) + if ( nid != (phys_to_nid(page_to_maddr(pg))) ) break; - nr_pages -= n; } - free_heap_pages(pg + i, 0, scrub_debug || idle_scrub); + _init_heap_pages(pg, contig_pages, need_scrub); + + pg += contig_pages; + i += contig_pages; } }