Message ID | 20210309214050.4674-4-osalvador@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Cleanup and fixups for vmemmap handling | expand |
Hi Oscar, On Wed, 10 Mar 2021 at 03:11, Oscar Salvador <osalvador@suse.de> wrote: > > When sizeof(struct page) is not a power of 2, sections do not span > a PMD anymore and so when populating them some parts of the PMD will > remain unused. > Because of this, PMDs will be left behind when depopulating sections > since remove_pmd_table() thinks that those unused parts are still in > use. > > Fix this by marking the unused parts with PAGE_UNUSED, so memchr_inv() > will do the right thing and will let us free the PMD when the last user > of it is gone. > > This patch is based on a similar patch by David Hildenbrand: > > https://lore.kernel.org/linux-mm/20200722094558.9828-9-david@redhat.com/ > > Signed-off-by: Oscar Salvador <osalvador@suse.de> > Reviewed-by: David Hildenbrand <david@redhat.com> > Acked-by: Dave Hansen <dave.hansen@linux.intel.com> > --- > arch/x86/mm/init_64.c | 65 +++++++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 53 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c > index 9ecb3c488ac8..d93b36856ed3 100644 > --- a/arch/x86/mm/init_64.c > +++ b/arch/x86/mm/init_64.c > @@ -826,6 +826,51 @@ void __init paging_init(void) > zone_sizes_init(); > } > > +#ifdef CONFIG_SPARSEMEM_VMEMMAP > +#define PAGE_UNUSED 0xFD > + > +/* Returns true if the PMD is completely unused and thus it can be freed */ > +static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end) > +{ > + unsigned long start = ALIGN_DOWN(addr, PMD_SIZE); > + > + memset((void *)addr, PAGE_UNUSED, end - addr); > + > + return !memchr_inv((void *)start, PAGE_UNUSED, PMD_SIZE); > +} > + > +static void __meminit vmemmap_use_sub_pmd(unsigned long start) > +{ > + /* > + * As we expect to add in the same granularity as we remove, it's > + * sufficient to mark only some piece used to block the memmap page from > + * getting removed when removing some other adjacent memmap (just in > + * case the first memmap never gets initialized e.g., because the memory > + * block never gets onlined). > + */ > + memset((void *)start, 0, sizeof(struct page)); > +} > + > +static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) > +{ > + /* > + * Could be our memmap page is filled with PAGE_UNUSED already from a > + * previous remove. Make sure to reset it. > + */ > + vmemmap_use_sub_pmd(start); > + > + /* > + * Mark with PAGE_UNUSED the unused parts of the new memmap range > + */ > + if (!IS_ALIGNED(start, PMD_SIZE)) > + memset((void *)start, PAGE_UNUSED, > + start - ALIGN_DOWN(start, PMD_SIZE)); > + if (!IS_ALIGNED(end, PMD_SIZE)) > + memset((void *)end, PAGE_UNUSED, > + ALIGN(end, PMD_SIZE) - end); > +} > +#endif > + > /* > * Memory hotplug specific functions > */ > @@ -871,8 +916,6 @@ int arch_add_memory(int nid, u64 start, u64 size, > return add_pages(nid, start_pfn, nr_pages, params); > } > > -#define PAGE_INUSE 0xFD > - > static void __meminit free_pagetable(struct page *page, int order) > { > unsigned long magic; > @@ -1006,7 +1049,6 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, > unsigned long next, pages = 0; > pte_t *pte_base; > pmd_t *pmd; > - void *page_addr; > > pmd = pmd_start + pmd_index(addr); > for (; addr < end; addr = next, pmd++) { > @@ -1026,20 +1068,13 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, > pmd_clear(pmd); > spin_unlock(&init_mm.page_table_lock); > pages++; > - } else { > - /* If here, we are freeing vmemmap pages. */ > - memset((void *)addr, PAGE_INUSE, next - addr); > - > - page_addr = page_address(pmd_page(*pmd)); > - if (!memchr_inv(page_addr, PAGE_INUSE, > - PMD_SIZE)) { > + } else if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) && > + vmemmap_pmd_is_unused(addr, next)) { > free_hugepage_table(pmd_page(*pmd), > altmap); > - > spin_lock(&init_mm.page_table_lock); > pmd_clear(pmd); > spin_unlock(&init_mm.page_table_lock); > - } > } > > continue; > @@ -1492,11 +1527,17 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, > > addr_end = addr + PMD_SIZE; > p_end = p + PMD_SIZE; > + > + if (!IS_ALIGNED(addr, PMD_SIZE) || > + !IS_ALIGNED(next, PMD_SIZE)) > + vmemmap_use_new_sub_pmd(addr, next); > + > continue; > } else if (altmap) > return -ENOMEM; /* no fallback */ > } else if (pmd_large(*pmd)) { > vmemmap_verify((pte_t *)pmd, node, addr, next); > + vmemmap_use_sub_pmd(addr); While building the linux next 20210310 tag for x86_64 architecture with clang-12 and gcc-9 the following warnings / errors were noticed. arch/x86/mm/init_64.c:1585:6: error: implicit declaration of function 'vmemmap_use_new_sub_pmd' [-Werror,-Wimplicit-function-declaration] vmemmap_use_new_sub_pmd(addr, next); ^ arch/x86/mm/init_64.c:1591:4: error: implicit declaration of function 'vmemmap_use_sub_pmd' [-Werror,-Wimplicit-function-declaration] vmemmap_use_sub_pmd(addr, next); ^ 2 errors generated. make[3]: *** [scripts/Makefile.build:271: arch/x86/mm/init_64.o] Error 1 Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org> Steps to reproduce: ------------------- # TuxMake is a command line tool and Python library that provides # portable and repeatable Linux kernel builds across a variety of # architectures, toolchains, kernel configurations, and make targets. # # TuxMake supports the concept of runtimes. # See https://docs.tuxmake.org/runtimes/, for that to work it requires # that you install podman or docker on your system. # # To install tuxmake on your system globally: # sudo pip3 install -U tuxmake # # See https://docs.tuxmake.org/ for complete documentation. tuxmake --runtime podman --target-arch x86_64 --toolchain clang-12 --kconfig defconfig --kconfig-add https://builds.tuxbuild.com/1pYCPt4WlgSfSdv1BULm6ABINeJ/config Build pipeline error link, https://gitlab.com/Linaro/lkft/mirrors/next/linux-next/-/jobs/1085496613#L428
On Wed, Mar 10, 2021 at 11:37:53AM +0530, Naresh Kamboju wrote: > Hi Oscar, Hi Naresh, > While building the linux next 20210310 tag for x86_64 architecture with clang-12 > and gcc-9 the following warnings / errors were noticed. > > arch/x86/mm/init_64.c:1585:6: error: implicit declaration of function > 'vmemmap_use_new_sub_pmd' [-Werror,-Wimplicit-function-declaration] > vmemmap_use_new_sub_pmd(addr, next); > ^ > arch/x86/mm/init_64.c:1591:4: error: implicit declaration of function > 'vmemmap_use_sub_pmd' [-Werror,-Wimplicit-function-declaration] > vmemmap_use_sub_pmd(addr, next); > ^ > 2 errors generated. > make[3]: *** [scripts/Makefile.build:271: arch/x86/mm/init_64.o] Error 1 > > Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org> Yes, this was also reported by Zi Yan here [1]. Looking into your .config, seems to be the same issue as you have CONFIG_SPARSE_VMEMMAP but !CONFIG_MEMORY_HOTPLUG. This version fixes those compilation errors. Thanks for reporting it anyway! [1] https://lore.kernel.org/linux-mm/YEfoH8US4YVxak7r@localhost.localdomain/T/#ma566ff437ff4bf8fcc5f80f62cd0cc8761edd12d > Steps to reproduce: > ------------------- > # TuxMake is a command line tool and Python library that provides > # portable and repeatable Linux kernel builds across a variety of > # architectures, toolchains, kernel configurations, and make targets. > # > # TuxMake supports the concept of runtimes. > # See https://docs.tuxmake.org/runtimes/, for that to work it requires > # that you install podman or docker on your system. > # > # To install tuxmake on your system globally: > # sudo pip3 install -U tuxmake > # > # See https://docs.tuxmake.org/ for complete documentation. > > > tuxmake --runtime podman --target-arch x86_64 --toolchain clang-12 > --kconfig defconfig --kconfig-add > https://builds.tuxbuild.com/1pYCPt4WlgSfSdv1BULm6ABINeJ/config > > > Build pipeline error link, > https://gitlab.com/Linaro/lkft/mirrors/next/linux-next/-/jobs/1085496613#L428 > > -- > Linaro LKFT > https://lkft.linaro.org
On 3/9/21 1:40 PM, Oscar Salvador wrote: > +static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) > +{ > + /* > + * Could be our memmap page is filled with PAGE_UNUSED already from a > + * previous remove. Make sure to reset it. > + */ > + vmemmap_use_sub_pmd(start); > + > + /* > + * Mark with PAGE_UNUSED the unused parts of the new memmap range > + */ > + if (!IS_ALIGNED(start, PMD_SIZE)) > + memset((void *)start, PAGE_UNUSED, > + start - ALIGN_DOWN(start, PMD_SIZE)); > + if (!IS_ALIGNED(end, PMD_SIZE)) > + memset((void *)end, PAGE_UNUSED, > + ALIGN(end, PMD_SIZE) - end); > +} > +#endif This is apparently under both CONFIG_SPARSEMEM_VMEMMAP and CONFIG_MEMORY_HOTPLUG #ifdefs. It errors out at compile-time with this config: https://sr71.net/~dave/intel/config-mmotm-20210311 > linux.git/arch/x86/mm/init_64.c: In function 'vmemmap_populate_hugepages': > linux.git/arch/x86/mm/init_64.c:1585:6: error: implicit declaration of function 'vmemmap_use_new_sub_pmd' [-Werror=implicit-function-declaration] > vmemmap_use_new_sub_pmd(addr, next); > ^~~~~~~~~~~~~~~~~~~~~~~ > /home/davehans/linux.git/arch/x86/mm/init_64.c:1591:4: error: implicit declaration of function 'vmemmap_use_sub_pmd' [-Werror=implicit-function-declaration] > vmemmap_use_sub_pmd(addr, next); > ^~~~~~~~~~~~~~~~~~~ I didn't see a quick fix other than #ifdef'ing the call sites, which is pretty ugly.
On Thu, Mar 11, 2021 at 01:29:39PM -0800, Dave Hansen wrote: > On 3/9/21 1:40 PM, Oscar Salvador wrote: > > +static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) > > +{ > > + /* > > + * Could be our memmap page is filled with PAGE_UNUSED already from a > > + * previous remove. Make sure to reset it. > > + */ > > + vmemmap_use_sub_pmd(start); > > + > > + /* > > + * Mark with PAGE_UNUSED the unused parts of the new memmap range > > + */ > > + if (!IS_ALIGNED(start, PMD_SIZE)) > > + memset((void *)start, PAGE_UNUSED, > > + start - ALIGN_DOWN(start, PMD_SIZE)); > > + if (!IS_ALIGNED(end, PMD_SIZE)) > > + memset((void *)end, PAGE_UNUSED, > > + ALIGN(end, PMD_SIZE) - end); > > +} > > +#endif > > This is apparently under both CONFIG_SPARSEMEM_VMEMMAP and > CONFIG_MEMORY_HOTPLUG #ifdefs. It errors out at compile-time with this > config: https://sr71.net/~dave/intel/config-mmotm-20210311 It seems that mmotm still has v5. v6 (this one) fixed that up. I basically moved the code out of MEMORY_HOTPLUG #ifdefs. I could not reproduce your error on v6.
On Thu, Mar 11, 2021 at 10:48:34PM +0100, Oscar Salvador wrote: > On Thu, Mar 11, 2021 at 01:29:39PM -0800, Dave Hansen wrote: > > On 3/9/21 1:40 PM, Oscar Salvador wrote: > > > +static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) > > > +{ > > > + /* > > > + * Could be our memmap page is filled with PAGE_UNUSED already from a > > > + * previous remove. Make sure to reset it. > > > + */ > > > + vmemmap_use_sub_pmd(start); > > > + > > > + /* > > > + * Mark with PAGE_UNUSED the unused parts of the new memmap range > > > + */ > > > + if (!IS_ALIGNED(start, PMD_SIZE)) > > > + memset((void *)start, PAGE_UNUSED, > > > + start - ALIGN_DOWN(start, PMD_SIZE)); > > > + if (!IS_ALIGNED(end, PMD_SIZE)) > > > + memset((void *)end, PAGE_UNUSED, > > > + ALIGN(end, PMD_SIZE) - end); > > > +} > > > +#endif > > > > This is apparently under both CONFIG_SPARSEMEM_VMEMMAP and > > CONFIG_MEMORY_HOTPLUG #ifdefs. It errors out at compile-time with this > > config: https://sr71.net/~dave/intel/config-mmotm-20210311 > > It seems that mmotm still has v5. > v6 (this one) fixed that up. I basically moved the code out of > MEMORY_HOTPLUG #ifdefs. > > I could not reproduce your error on v6. I can reproduce this with next-20210329. .config attached.
On Tue, Mar 30, 2021 at 03:29:50AM +0100, Matthew Wilcox wrote: > I can reproduce this with next-20210329. > > .config attached. Hi Matthew, Thanks for the report. I tried to reproduce this with the attached config on next-20210401 and I had no luck. You still see it on that one? Thanks
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9ecb3c488ac8..d93b36856ed3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -826,6 +826,51 @@ void __init paging_init(void) zone_sizes_init(); } +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define PAGE_UNUSED 0xFD + +/* Returns true if the PMD is completely unused and thus it can be freed */ +static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end) +{ + unsigned long start = ALIGN_DOWN(addr, PMD_SIZE); + + memset((void *)addr, PAGE_UNUSED, end - addr); + + return !memchr_inv((void *)start, PAGE_UNUSED, PMD_SIZE); +} + +static void __meminit vmemmap_use_sub_pmd(unsigned long start) +{ + /* + * As we expect to add in the same granularity as we remove, it's + * sufficient to mark only some piece used to block the memmap page from + * getting removed when removing some other adjacent memmap (just in + * case the first memmap never gets initialized e.g., because the memory + * block never gets onlined). + */ + memset((void *)start, 0, sizeof(struct page)); +} + +static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) +{ + /* + * Could be our memmap page is filled with PAGE_UNUSED already from a + * previous remove. Make sure to reset it. + */ + vmemmap_use_sub_pmd(start); + + /* + * Mark with PAGE_UNUSED the unused parts of the new memmap range + */ + if (!IS_ALIGNED(start, PMD_SIZE)) + memset((void *)start, PAGE_UNUSED, + start - ALIGN_DOWN(start, PMD_SIZE)); + if (!IS_ALIGNED(end, PMD_SIZE)) + memset((void *)end, PAGE_UNUSED, + ALIGN(end, PMD_SIZE) - end); +} +#endif + /* * Memory hotplug specific functions */ @@ -871,8 +916,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return add_pages(nid, start_pfn, nr_pages, params); } -#define PAGE_INUSE 0xFD - static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; @@ -1006,7 +1049,6 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, unsigned long next, pages = 0; pte_t *pte_base; pmd_t *pmd; - void *page_addr; pmd = pmd_start + pmd_index(addr); for (; addr < end; addr = next, pmd++) { @@ -1026,20 +1068,13 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); pages++; - } else { - /* If here, we are freeing vmemmap pages. */ - memset((void *)addr, PAGE_INUSE, next - addr); - - page_addr = page_address(pmd_page(*pmd)); - if (!memchr_inv(page_addr, PAGE_INUSE, - PMD_SIZE)) { + } else if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) && + vmemmap_pmd_is_unused(addr, next)) { free_hugepage_table(pmd_page(*pmd), altmap); - spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); - } } continue; @@ -1492,11 +1527,17 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, addr_end = addr + PMD_SIZE; p_end = p + PMD_SIZE; + + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) + vmemmap_use_new_sub_pmd(addr, next); + continue; } else if (altmap) return -ENOMEM; /* no fallback */ } else if (pmd_large(*pmd)) { vmemmap_verify((pte_t *)pmd, node, addr, next); + vmemmap_use_sub_pmd(addr); continue; } if (vmemmap_populate_basepages(addr, next, node, NULL))