Message ID | 20250402201841.3245371-1-dwmw2@infradead.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [RFC,1/3] mm: Introduce for_each_valid_pfn() and use it from reserve_bootmem_region() | expand |
On Wed, Apr 02, 2025 at 09:18:39PM +0100, David Woodhouse wrote: > From: David Woodhouse <dwmw@amazon.co.uk> > > Especially since commit 9092d4f7a1f8 ("memblock: update initialization > of reserved pages"), the reserve_bootmem_region() function can spend a > significant amount of time iterating over every 4KiB PFN in a range, > calling pfn_valid() on each one, and ultimately doing absolutely nothing. > > On a platform used for virtualization, with large NOMAP regions that > eventually get used for guest RAM, this leads to a significant increase > in steal time experienced during kexec for a live update. > > Introduce for_each_valid_pfn() and use it from reserve_bootmem_region(). > This implementation is precisely the same naïve loop that the function > used to have, but subsequent commits will provide optimised versions > for FLATMEM and SPARSEMEM, and this version will remain for those > architectures which provide their own pfn_valid() implementation, > until/unless they also provide a matching for_each_valid_pfn(). > > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > --- > include/linux/mmzone.h | 10 ++++++++++ > mm/mm_init.c | 23 ++++++++++------------- > 2 files changed, 20 insertions(+), 13 deletions(-) > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 25e80b2ca7f4..32ecb5cadbaf 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -2176,6 +2176,16 @@ void sparse_init(void); > #define subsection_map_init(_pfn, _nr_pages) do {} while (0) > #endif /* CONFIG_SPARSEMEM */ > > +/* > + * Fallback case for when the architecture provides its own pfn_valid() but > + * not a corresponding for_each_valid_pfn(). > + */ > +#ifndef for_each_valid_pfn > +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ > + for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++) \ > + if (pfn_valid(_pfn)) > +#endif > + > #endif /* !__GENERATING_BOUNDS.H */ > #endif /* !__ASSEMBLY__ */ > #endif /* _LINUX_MMZONE_H */ > diff --git a/mm/mm_init.c b/mm/mm_init.c > index a38a1909b407..7c699bad42ad 100644 > --- a/mm/mm_init.c > +++ b/mm/mm_init.c > @@ -777,22 +777,19 @@ static inline void init_deferred_page(unsigned long pfn, int nid) > void __meminit reserve_bootmem_region(phys_addr_t start, > phys_addr_t end, int nid) > { > - unsigned long start_pfn = PFN_DOWN(start); > - unsigned long end_pfn = PFN_UP(end); > + unsigned long pfn; > > - for (; start_pfn < end_pfn; start_pfn++) { > - if (pfn_valid(start_pfn)) { > - struct page *page = pfn_to_page(start_pfn); > + for_each_valid_pfn (pfn, PFN_DOWN(start), PFN_UP(end)) { > + struct page *page = pfn_to_page(pfn); > > - init_deferred_page(start_pfn, nid); > + init_deferred_page(pfn, nid); > > - /* > - * no need for atomic set_bit because the struct > - * page is not visible yet so nobody should > - * access it yet. > - */ > - __SetPageReserved(page); > - } > + /* > + * no need for atomic set_bit because the struct > + * page is not visible yet so nobody should > + * access it yet. > + */ > + __SetPageReserved(page); > } > } > > -- > 2.49.0 >
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 25e80b2ca7f4..32ecb5cadbaf 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2176,6 +2176,16 @@ void sparse_init(void); #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ +/* + * Fallback case for when the architecture provides its own pfn_valid() but + * not a corresponding for_each_valid_pfn(). + */ +#ifndef for_each_valid_pfn +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ + for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++) \ + if (pfn_valid(_pfn)) +#endif + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/mm/mm_init.c b/mm/mm_init.c index a38a1909b407..7c699bad42ad 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -777,22 +777,19 @@ static inline void init_deferred_page(unsigned long pfn, int nid) void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid) { - unsigned long start_pfn = PFN_DOWN(start); - unsigned long end_pfn = PFN_UP(end); + unsigned long pfn; - for (; start_pfn < end_pfn; start_pfn++) { - if (pfn_valid(start_pfn)) { - struct page *page = pfn_to_page(start_pfn); + for_each_valid_pfn (pfn, PFN_DOWN(start), PFN_UP(end)) { + struct page *page = pfn_to_page(pfn); - init_deferred_page(start_pfn, nid); + init_deferred_page(pfn, nid); - /* - * no need for atomic set_bit because the struct - * page is not visible yet so nobody should - * access it yet. - */ - __SetPageReserved(page); - } + /* + * no need for atomic set_bit because the struct + * page is not visible yet so nobody should + * access it yet. + */ + __SetPageReserved(page); } }