Message ID | 20240521114830.841660-6-bjorn@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | riscv: Memory Hot(Un)Plug support | expand |
On Tue, May 21, 2024 at 1:49 PM Björn Töpel <bjorn@kernel.org> wrote: > > From: Björn Töpel <bjorn@rivosinc.com> > > For an architecture to support memory hotplugging, a couple of > callbacks needs to be implemented: > > arch_add_memory() > This callback is responsible for adding the physical memory into the > direct map, and call into the memory hotplugging generic code via > __add_pages() that adds the corresponding struct page entries, and > updates the vmemmap mapping. > > arch_remove_memory() > This is the inverse of the callback above. > > vmemmap_free() > This function tears down the vmemmap mappings (if > CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the > backing vmemmap pages. Note that for persistent memory, an > alternative allocator for the backing pages can be used; The > vmem_altmap. This means that when the backing pages are cleared, > extra care is needed so that the correct deallocation method is > used. > > arch_get_mappable_range() > This functions returns the PA range that the direct map can map. > Used by the MHP internals for sanity checks. > > The page table unmap/teardown functions are heavily based on code from > the x86 tree. The same remove_pgd_mapping() function is used in both > vmemmap_free() and arch_remove_memory(), but in the latter function > the backing pages are not removed. > > Signed-off-by: Björn Töpel <bjorn@rivosinc.com> > --- > arch/riscv/mm/init.c | 261 +++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 261 insertions(+) > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index 6f72b0b2b854..6693b742bf2f 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -1493,3 +1493,264 @@ void __init pgtable_cache_init(void) > } > } > #endif > + > +#ifdef CONFIG_MEMORY_HOTPLUG > +static void __meminit free_pagetable(struct page *page, int order) > +{ > + unsigned int nr_pages = 1 << order; > + > + /* > + * vmemmap/direct page tables can be reserved, if added at > + * boot. > + */ > + if (PageReserved(page)) { > + __ClearPageReserved(page); What's the difference between __ClearPageReserved() and ClearPageReserved()? Because it seems like free_reserved_page() calls the latter already, so why would you need to call __ClearPageReserved() on the first page? > + while (nr_pages--) > + free_reserved_page(page++); > + return; > + } > + > + free_pages((unsigned long)page_address(page), order); > +} > + > +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) > +{ > + pte_t *pte; > + int i; > + > + for (i = 0; i < PTRS_PER_PTE; i++) { > + pte = pte_start + i; > + if (!pte_none(*pte)) > + return; > + } > + > + free_pagetable(pmd_page(*pmd), 0); > + pmd_clear(pmd); > +} > + > +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) > +{ > + pmd_t *pmd; > + int i; > + > + for (i = 0; i < PTRS_PER_PMD; i++) { > + pmd = pmd_start + i; > + if (!pmd_none(*pmd)) > + return; > + } > + > + free_pagetable(pud_page(*pud), 0); > + pud_clear(pud); > +} > + > +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) > +{ > + pud_t *pud; > + int i; > + > + for (i = 0; i < PTRS_PER_PUD; i++) { > + pud = pud_start + i; > + if (!pud_none(*pud)) > + return; > + } > + > + free_pagetable(p4d_page(*p4d), 0); > + p4d_clear(p4d); > +} > + > +static void __meminit free_vmemmap_storage(struct page *page, size_t size, > + struct vmem_altmap *altmap) > +{ > + if (altmap) > + vmem_altmap_free(altmap, size >> PAGE_SHIFT); > + else > + free_pagetable(page, get_order(size)); > +} > + > +static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, > + bool is_vmemmap, struct vmem_altmap *altmap) > +{ > + unsigned long next; > + pte_t *ptep, pte; > + > + for (; addr < end; addr = next) { > + next = (addr + PAGE_SIZE) & PAGE_MASK; Nit: use ALIGN() instead. > + if (next > end) > + next = end; > + > + ptep = pte_base + pte_index(addr); > + pte = READ_ONCE(*ptep); Nit: Use ptep_get() > + > + if (!pte_present(*ptep)) > + continue; > + > + pte_clear(&init_mm, addr, ptep); > + if (is_vmemmap) > + free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); > + } > +} > + > +static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, > + bool is_vmemmap, struct vmem_altmap *altmap) > +{ > + unsigned long next; > + pte_t *pte_base; > + pmd_t *pmdp, pmd; > + > + for (; addr < end; addr = next) { > + next = pmd_addr_end(addr, end); > + pmdp = pmd_base + pmd_index(addr); > + pmd = READ_ONCE(*pmdp); Nit: Use pmdp_get() > + > + if (!pmd_present(pmd)) > + continue; > + > + if (pmd_leaf(pmd)) { > + pmd_clear(pmdp); > + if (is_vmemmap) > + free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap); > + continue; > + } > + > + pte_base = (pte_t *)pmd_page_vaddr(*pmdp); > + remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); > + free_pte_table(pte_base, pmdp); > + } > +} > + > +static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end, > + bool is_vmemmap, struct vmem_altmap *altmap) > +{ > + unsigned long next; > + pud_t *pudp, pud; > + pmd_t *pmd_base; > + > + for (; addr < end; addr = next) { > + next = pud_addr_end(addr, end); > + pudp = pud_base + pud_index(addr); > + pud = READ_ONCE(*pudp); Nit: Use pudp_get() > + > + if (!pud_present(pud)) > + continue; > + > + if (pud_leaf(pud)) { > + if (pgtable_l4_enabled) { > + pud_clear(pudp); > + if (is_vmemmap) > + free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap); > + } > + continue; > + } > + > + pmd_base = pmd_offset(pudp, 0); > + remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); > + > + if (pgtable_l4_enabled) > + free_pmd_table(pmd_base, pudp); > + } > +} > + > +static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end, > + bool is_vmemmap, struct vmem_altmap *altmap) > +{ > + unsigned long next; > + p4d_t *p4dp, p4d; > + pud_t *pud_base; > + > + for (; addr < end; addr = next) { > + next = p4d_addr_end(addr, end); > + p4dp = p4d_base + p4d_index(addr); > + p4d = READ_ONCE(*p4dp); Nit: Use p4dp_get() > + > + if (!p4d_present(p4d)) > + continue; > + > + if (p4d_leaf(p4d)) { > + if (pgtable_l5_enabled) { > + p4d_clear(p4dp); > + if (is_vmemmap) > + free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap); > + } > + continue; > + } > + > + pud_base = pud_offset(p4dp, 0); > + remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap); > + > + if (pgtable_l5_enabled) > + free_pud_table(pud_base, p4dp); > + } > +} > + > +static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap, > + struct vmem_altmap *altmap) > +{ > + unsigned long addr, next; > + p4d_t *p4d_base; > + pgd_t *pgd; > + > + for (addr = va; addr < end; addr = next) { > + next = pgd_addr_end(addr, end); > + pgd = pgd_offset_k(addr); > + > + if (!pgd_present(*pgd)) > + continue; > + > + if (pgd_leaf(*pgd)) > + continue; > + > + p4d_base = p4d_offset(pgd, 0); > + remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap); > + } > + > + flush_tlb_all(); > +} > + > +static void __meminit remove_linear_mapping(phys_addr_t start, u64 size) > +{ > + unsigned long va = (unsigned long)__va(start); > + unsigned long end = (unsigned long)__va(start + size); > + > + remove_pgd_mapping(va, end, false, NULL); > +} > + > +struct range arch_get_mappable_range(void) > +{ > + struct range mhp_range; > + > + mhp_range.start = __pa(PAGE_OFFSET); > + mhp_range.end = __pa(PAGE_END - 1); > + return mhp_range; > +} > + > +int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) > +{ > + int ret = 0; > + > + create_linear_mapping_range(start, start + size, 0, ¶ms->pgprot); > + ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params); > + if (ret) { > + remove_linear_mapping(start, size); > + goto out; > + } > + > + max_pfn = PFN_UP(start + size); > + max_low_pfn = max_pfn; > + > + out: > + flush_tlb_all(); > + return ret; > +} > + > +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) > +{ > + __remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap); > + remove_linear_mapping(start, size); > + flush_tlb_all(); > +} > + > +void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) > +{ > + remove_pgd_mapping(start, end, true, altmap); > +} > +#endif /* CONFIG_MEMORY_HOTPLUG */ > -- > 2.40.1 >
Alexandre Ghiti <alexghiti@rivosinc.com> writes: > On Tue, May 21, 2024 at 1:49 PM Björn Töpel <bjorn@kernel.org> wrote: >> >> From: Björn Töpel <bjorn@rivosinc.com> >> >> For an architecture to support memory hotplugging, a couple of >> callbacks needs to be implemented: >> >> arch_add_memory() >> This callback is responsible for adding the physical memory into the >> direct map, and call into the memory hotplugging generic code via >> __add_pages() that adds the corresponding struct page entries, and >> updates the vmemmap mapping. >> >> arch_remove_memory() >> This is the inverse of the callback above. >> >> vmemmap_free() >> This function tears down the vmemmap mappings (if >> CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the >> backing vmemmap pages. Note that for persistent memory, an >> alternative allocator for the backing pages can be used; The >> vmem_altmap. This means that when the backing pages are cleared, >> extra care is needed so that the correct deallocation method is >> used. >> >> arch_get_mappable_range() >> This functions returns the PA range that the direct map can map. >> Used by the MHP internals for sanity checks. >> >> The page table unmap/teardown functions are heavily based on code from >> the x86 tree. The same remove_pgd_mapping() function is used in both >> vmemmap_free() and arch_remove_memory(), but in the latter function >> the backing pages are not removed. >> >> Signed-off-by: Björn Töpel <bjorn@rivosinc.com> >> --- >> arch/riscv/mm/init.c | 261 +++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 261 insertions(+) >> >> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >> index 6f72b0b2b854..6693b742bf2f 100644 >> --- a/arch/riscv/mm/init.c >> +++ b/arch/riscv/mm/init.c >> @@ -1493,3 +1493,264 @@ void __init pgtable_cache_init(void) >> } >> } >> #endif >> + >> +#ifdef CONFIG_MEMORY_HOTPLUG >> +static void __meminit free_pagetable(struct page *page, int order) >> +{ >> + unsigned int nr_pages = 1 << order; >> + >> + /* >> + * vmemmap/direct page tables can be reserved, if added at >> + * boot. >> + */ >> + if (PageReserved(page)) { >> + __ClearPageReserved(page); > > What's the difference between __ClearPageReserved() and > ClearPageReserved()? Because it seems like free_reserved_page() calls > the latter already, so why would you need to call > __ClearPageReserved() on the first page? Indeed! x86 copy pasta (which uses bootmem info page that RV doesn't). >> + while (nr_pages--) >> + free_reserved_page(page++); >> + return; >> + } >> + >> + free_pages((unsigned long)page_address(page), order); >> +} >> + >> +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) >> +{ >> + pte_t *pte; >> + int i; >> + >> + for (i = 0; i < PTRS_PER_PTE; i++) { >> + pte = pte_start + i; >> + if (!pte_none(*pte)) >> + return; >> + } >> + >> + free_pagetable(pmd_page(*pmd), 0); >> + pmd_clear(pmd); >> +} >> + >> +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) >> +{ >> + pmd_t *pmd; >> + int i; >> + >> + for (i = 0; i < PTRS_PER_PMD; i++) { >> + pmd = pmd_start + i; >> + if (!pmd_none(*pmd)) >> + return; >> + } >> + >> + free_pagetable(pud_page(*pud), 0); >> + pud_clear(pud); >> +} >> + >> +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) >> +{ >> + pud_t *pud; >> + int i; >> + >> + for (i = 0; i < PTRS_PER_PUD; i++) { >> + pud = pud_start + i; >> + if (!pud_none(*pud)) >> + return; >> + } >> + >> + free_pagetable(p4d_page(*p4d), 0); >> + p4d_clear(p4d); >> +} >> + >> +static void __meminit free_vmemmap_storage(struct page *page, size_t size, >> + struct vmem_altmap *altmap) >> +{ >> + if (altmap) >> + vmem_altmap_free(altmap, size >> PAGE_SHIFT); >> + else >> + free_pagetable(page, get_order(size)); >> +} >> + >> +static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + pte_t *ptep, pte; >> + >> + for (; addr < end; addr = next) { >> + next = (addr + PAGE_SIZE) & PAGE_MASK; > > Nit: use ALIGN() instead. > >> + if (next > end) >> + next = end; >> + >> + ptep = pte_base + pte_index(addr); >> + pte = READ_ONCE(*ptep); > > Nit: Use ptep_get() > >> + >> + if (!pte_present(*ptep)) >> + continue; >> + >> + pte_clear(&init_mm, addr, ptep); >> + if (is_vmemmap) >> + free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); >> + } >> +} >> + >> +static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + pte_t *pte_base; >> + pmd_t *pmdp, pmd; >> + >> + for (; addr < end; addr = next) { >> + next = pmd_addr_end(addr, end); >> + pmdp = pmd_base + pmd_index(addr); >> + pmd = READ_ONCE(*pmdp); > > Nit: Use pmdp_get() > >> + >> + if (!pmd_present(pmd)) >> + continue; >> + >> + if (pmd_leaf(pmd)) { >> + pmd_clear(pmdp); >> + if (is_vmemmap) >> + free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap); >> + continue; >> + } >> + >> + pte_base = (pte_t *)pmd_page_vaddr(*pmdp); >> + remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); >> + free_pte_table(pte_base, pmdp); >> + } >> +} >> + >> +static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + pud_t *pudp, pud; >> + pmd_t *pmd_base; >> + >> + for (; addr < end; addr = next) { >> + next = pud_addr_end(addr, end); >> + pudp = pud_base + pud_index(addr); >> + pud = READ_ONCE(*pudp); > > Nit: Use pudp_get() > >> + >> + if (!pud_present(pud)) >> + continue; >> + >> + if (pud_leaf(pud)) { >> + if (pgtable_l4_enabled) { >> + pud_clear(pudp); >> + if (is_vmemmap) >> + free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap); >> + } >> + continue; >> + } >> + >> + pmd_base = pmd_offset(pudp, 0); >> + remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); >> + >> + if (pgtable_l4_enabled) >> + free_pmd_table(pmd_base, pudp); >> + } >> +} >> + >> +static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end, >> + bool is_vmemmap, struct vmem_altmap *altmap) >> +{ >> + unsigned long next; >> + p4d_t *p4dp, p4d; >> + pud_t *pud_base; >> + >> + for (; addr < end; addr = next) { >> + next = p4d_addr_end(addr, end); >> + p4dp = p4d_base + p4d_index(addr); >> + p4d = READ_ONCE(*p4dp); > > Nit: Use p4dp_get() ...and I'll make sure to address these nits as well. Thanks! Björn
On Tue, May 21, 2024 at 03:19:37PM +0200, Alexandre Ghiti wrote: > On Tue, May 21, 2024 at 1:49 PM Björn Töpel <bjorn@kernel.org> wrote: > > + if (PageReserved(page)) { > > + __ClearPageReserved(page); > > What's the difference between __ClearPageReserved() and > ClearPageReserved()? Because it seems like free_reserved_page() calls > the latter already, so why would you need to call > __ClearPageReserved() on the first page? __{Set,Clear}Page are the non-atomic version. Usually used when you know that no one else can fiddle with the page, which should be the case here since we are removing the memory. As to why we have __ClearPageReserved and then having free_reserved_page() call ClearPageReserved I do not really know. Looking at the history, it has always been like this. I remember I looked at this a few years ago but I cannot remember the outcome of that. Maybe David remembers better, but I think we could remove that __ClearPageReserved. Looking at powerpc implementation code, it does not do the __ClearPageReserved and relies only on free_reserved_page(). I will have a look.
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 6f72b0b2b854..6693b742bf2f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1493,3 +1493,264 @@ void __init pgtable_cache_init(void) } } #endif + +#ifdef CONFIG_MEMORY_HOTPLUG +static void __meminit free_pagetable(struct page *page, int order) +{ + unsigned int nr_pages = 1 << order; + + /* + * vmemmap/direct page tables can be reserved, if added at + * boot. + */ + if (PageReserved(page)) { + __ClearPageReserved(page); + while (nr_pages--) + free_reserved_page(page++); + return; + } + + free_pages((unsigned long)page_address(page), order); +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (!pte_none(*pte)) + return; + } + + free_pagetable(pmd_page(*pmd), 0); + pmd_clear(pmd); +} + +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +{ + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (!pmd_none(*pmd)) + return; + } + + free_pagetable(pud_page(*pud), 0); + pud_clear(pud); +} + +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +{ + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } + + free_pagetable(p4d_page(*p4d), 0); + p4d_clear(p4d); +} + +static void __meminit free_vmemmap_storage(struct page *page, size_t size, + struct vmem_altmap *altmap) +{ + if (altmap) + vmem_altmap_free(altmap, size >> PAGE_SHIFT); + else + free_pagetable(page, get_order(size)); +} + +static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pte_t *ptep, pte; + + for (; addr < end; addr = next) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + ptep = pte_base + pte_index(addr); + pte = READ_ONCE(*ptep); + + if (!pte_present(*ptep)) + continue; + + pte_clear(&init_mm, addr, ptep); + if (is_vmemmap) + free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); + } +} + +static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pte_t *pte_base; + pmd_t *pmdp, pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + pmdp = pmd_base + pmd_index(addr); + pmd = READ_ONCE(*pmdp); + + if (!pmd_present(pmd)) + continue; + + if (pmd_leaf(pmd)) { + pmd_clear(pmdp); + if (is_vmemmap) + free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap); + continue; + } + + pte_base = (pte_t *)pmd_page_vaddr(*pmdp); + remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); + free_pte_table(pte_base, pmdp); + } +} + +static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pud_t *pudp, pud; + pmd_t *pmd_base; + + for (; addr < end; addr = next) { + next = pud_addr_end(addr, end); + pudp = pud_base + pud_index(addr); + pud = READ_ONCE(*pudp); + + if (!pud_present(pud)) + continue; + + if (pud_leaf(pud)) { + if (pgtable_l4_enabled) { + pud_clear(pudp); + if (is_vmemmap) + free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap); + } + continue; + } + + pmd_base = pmd_offset(pudp, 0); + remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); + + if (pgtable_l4_enabled) + free_pmd_table(pmd_base, pudp); + } +} + +static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + p4d_t *p4dp, p4d; + pud_t *pud_base; + + for (; addr < end; addr = next) { + next = p4d_addr_end(addr, end); + p4dp = p4d_base + p4d_index(addr); + p4d = READ_ONCE(*p4dp); + + if (!p4d_present(p4d)) + continue; + + if (p4d_leaf(p4d)) { + if (pgtable_l5_enabled) { + p4d_clear(p4dp); + if (is_vmemmap) + free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap); + } + continue; + } + + pud_base = pud_offset(p4dp, 0); + remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap); + + if (pgtable_l5_enabled) + free_pud_table(pud_base, p4dp); + } +} + +static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap, + struct vmem_altmap *altmap) +{ + unsigned long addr, next; + p4d_t *p4d_base; + pgd_t *pgd; + + for (addr = va; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgd = pgd_offset_k(addr); + + if (!pgd_present(*pgd)) + continue; + + if (pgd_leaf(*pgd)) + continue; + + p4d_base = p4d_offset(pgd, 0); + remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap); + } + + flush_tlb_all(); +} + +static void __meminit remove_linear_mapping(phys_addr_t start, u64 size) +{ + unsigned long va = (unsigned long)__va(start); + unsigned long end = (unsigned long)__va(start + size); + + remove_pgd_mapping(va, end, false, NULL); +} + +struct range arch_get_mappable_range(void) +{ + struct range mhp_range; + + mhp_range.start = __pa(PAGE_OFFSET); + mhp_range.end = __pa(PAGE_END - 1); + return mhp_range; +} + +int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) +{ + int ret = 0; + + create_linear_mapping_range(start, start + size, 0, ¶ms->pgprot); + ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params); + if (ret) { + remove_linear_mapping(start, size); + goto out; + } + + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + + out: + flush_tlb_all(); + return ret; +} + +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +{ + __remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap); + remove_linear_mapping(start, size); + flush_tlb_all(); +} + +void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) +{ + remove_pgd_mapping(start, end, true, altmap); +} +#endif /* CONFIG_MEMORY_HOTPLUG */