Message ID | 20190404125916.10215-3-osalvador@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Preparing memhotplug for allocating memmap from hot-added range | expand |
On 04.04.19 14:59, Oscar Salvador wrote: > From: Michal Hocko <mhocko@suse.com> > > arch_add_memory, __add_pages take a want_memblock which controls whether > the newly added memory should get the sysfs memblock user API (e.g. > ZONE_DEVICE users do not want/need this interface). Some callers even > want to control where do we allocate the memmap from by configuring > altmap. > > Add a more generic hotplug context for arch_add_memory and __add_pages. > struct mhp_restrictions contains flags which contains additional > features to be enabled by the memory hotplug (MHP_MEMBLOCK_API > currently) and altmap for alternative memmap allocator. > > This patch shouldn't introduce any functional change. > > Signed-off-by: Michal Hocko <mhocko@suse.com> > Signed-off-by: Oscar Salvador <osalvador@suse.de> > --- > arch/arm64/mm/mmu.c | 6 +++--- > arch/ia64/mm/init.c | 6 +++--- > arch/powerpc/mm/mem.c | 6 +++--- > arch/s390/mm/init.c | 6 +++--- > arch/sh/mm/init.c | 6 +++--- > arch/x86/mm/init_32.c | 6 +++--- > arch/x86/mm/init_64.c | 10 +++++----- > include/linux/memory_hotplug.h | 29 +++++++++++++++++++++++------ > kernel/memremap.c | 10 +++++++--- > mm/memory_hotplug.c | 10 ++++++---- > 10 files changed, 59 insertions(+), 36 deletions(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index e6acfa7be4c7..db509550329d 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -1046,8 +1046,8 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) > } > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > - bool want_memblock) > +int arch_add_memory(int nid, u64 start, u64 size, > + struct mhp_restrictions *restrictions) Should the restrictions be marked const? > { > int flags = 0; > > @@ -1058,6 +1058,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > size, PAGE_KERNEL, pgd_pgtable_alloc, flags); > > return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, > - altmap, want_memblock); > + restrictions); Again, some strange alignment thingies going on here :) > } > #endif > diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c > index e49200e31750..379eb1f9adc9 100644 > --- a/arch/ia64/mm/init.c > +++ b/arch/ia64/mm/init.c > @@ -666,14 +666,14 @@ mem_init (void) > } > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > - bool want_memblock) > +int arch_add_memory(int nid, u64 start, u64 size, > + struct mhp_restrictions *restrictions) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > int ret; > > - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); > + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); > if (ret) > printk("%s: Problem encountered in __add_pages() as ret=%d\n", > __func__, ret); > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > index 1aa27aac73c5..76deaa8525db 100644 > --- a/arch/powerpc/mm/mem.c > +++ b/arch/powerpc/mm/mem.c > @@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) > return -ENODEV; > } > > -int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > - bool want_memblock) > +int __meminit arch_add_memory(int nid, u64 start, u64 size, > + struct mhp_restrictions *restrictions) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > @@ -127,7 +127,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * > } > flush_inval_dcache_range(start, start + size); > > - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); > + return __add_pages(nid, start_pfn, nr_pages, restrictions); > } > > #ifdef CONFIG_MEMORY_HOTREMOVE > diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c > index 25e3113091ea..f5db961ad792 100644 > --- a/arch/s390/mm/init.c > +++ b/arch/s390/mm/init.c > @@ -216,8 +216,8 @@ device_initcall(s390_cma_mem_init); > > #endif /* CONFIG_CMA */ > > -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > - bool want_memblock) > +int arch_add_memory(int nid, u64 start, u64 size, > + struct mhp_restrictions *restrictions) > { > unsigned long start_pfn = PFN_DOWN(start); > unsigned long size_pages = PFN_DOWN(size); > @@ -227,7 +227,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > if (rc) > return rc; > > - rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); > + rc = __add_pages(nid, start_pfn, size_pages, restrictions); > if (rc) > vmem_remove_mapping(start, size); > return rc; > diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c > index 8e004b2f1a6a..168d3a6b9358 100644 > --- a/arch/sh/mm/init.c > +++ b/arch/sh/mm/init.c > @@ -404,15 +404,15 @@ void __init mem_init(void) > } > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > - bool want_memblock) > +int arch_add_memory(int nid, u64 start, u64 size, > + struct mhp_restrictions *restrictions) > { > unsigned long start_pfn = PFN_DOWN(start); > unsigned long nr_pages = size >> PAGE_SHIFT; > int ret; > > /* We only have ZONE_NORMAL, so this is easy.. */ > - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); > + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); > if (unlikely(ret)) > printk("%s: Failed, __add_pages() == %d\n", __func__, ret); > > diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c > index 85c94f9a87f8..755dbed85531 100644 > --- a/arch/x86/mm/init_32.c > +++ b/arch/x86/mm/init_32.c > @@ -850,13 +850,13 @@ void __init mem_init(void) > } > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > - bool want_memblock) > +int arch_add_memory(int nid, u64 start, u64 size, > + struct mhp_restrictions *restrictions) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > > - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); > + return __add_pages(nid, start_pfn, nr_pages, restrictions); > } > > #ifdef CONFIG_MEMORY_HOTREMOVE > diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c > index bccff68e3267..db42c11b48fb 100644 > --- a/arch/x86/mm/init_64.c > +++ b/arch/x86/mm/init_64.c > @@ -777,11 +777,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) > } > > int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, > - struct vmem_altmap *altmap, bool want_memblock) > + struct mhp_restrictions *restrictions) > { > int ret; > > - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); > + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); > WARN_ON_ONCE(ret); > > /* update max_pfn, max_low_pfn and high_memory */ > @@ -791,15 +791,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, > return ret; > } > > -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > - bool want_memblock) > +int arch_add_memory(int nid, u64 start, u64 size, > + struct mhp_restrictions *restrictions) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > > init_memory_mapping(start, start + size); > > - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); > + return add_pages(nid, start_pfn, nr_pages, restrictions); > } > > #define PAGE_INUSE 0xFD > diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h > index 3c8cf347804c..5bd4b56f639c 100644 > --- a/include/linux/memory_hotplug.h > +++ b/include/linux/memory_hotplug.h > @@ -118,20 +118,37 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, > unsigned long nr_pages, struct vmem_altmap *altmap); > #endif /* CONFIG_MEMORY_HOTREMOVE */ > > +/* > + * Do we want sysfs memblock files created. This will allow userspace to online > + * and offline memory explicitly. Lack of this bit means that the caller has to > + * call move_pfn_range_to_zone to finish the initialization. > + */ I think you can be more precise here. "Create memory block devices for added pages. This is usually the case for all system ram (and only system ram), as only this way memory can be onlined/offlined by user space and kdump to correctly detect the new memory using udev events." Maybe we should even go a step further and call this MHP_SYSTEM_RAM Because that is what it is right now. > + > +#define MHP_MEMBLOCK_API (1<<0) > + > +/* > + * Restrictions for the memory hotplug: > + * flags: MHP_ flags > + * altmap: alternative allocator for memmap array > + */ > +struct mhp_restrictions { > + unsigned long flags; > + struct vmem_altmap *altmap; > +}; > + > /* reasonably generic interface to expand the physical pages */ > extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, > - struct vmem_altmap *altmap, bool want_memblock); > + struct mhp_restrictions *restrictions); > > #ifndef CONFIG_ARCH_HAS_ADD_PAGES > static inline int add_pages(int nid, unsigned long start_pfn, > - unsigned long nr_pages, struct vmem_altmap *altmap, > - bool want_memblock) > + unsigned long nr_pages, struct mhp_restrictions *restrictions) > { > - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); > + return __add_pages(nid, start_pfn, nr_pages, restrictions); > } > #else /* ARCH_HAS_ADD_PAGES */ > int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, > - struct vmem_altmap *altmap, bool want_memblock); > + struct mhp_restrictions *restrictions); dito alignment. You have tabs configured to 8 characters, right? > #endif /* ARCH_HAS_ADD_PAGES */ > > #ifdef CONFIG_NUMA > @@ -333,7 +350,7 @@ extern int __add_memory(int nid, u64 start, u64 size); > extern int add_memory(int nid, u64 start, u64 size); > extern int add_memory_resource(int nid, struct resource *resource); > extern int arch_add_memory(int nid, u64 start, u64 size, > - struct vmem_altmap *altmap, bool want_memblock); > + struct mhp_restrictions *restrictions); > extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, > unsigned long nr_pages, struct vmem_altmap *altmap); > extern bool is_memblock_offlined(struct memory_block *mem); > diff --git a/kernel/memremap.c b/kernel/memremap.c > index a856cb5ff192..cc5e3e34417d 100644 > --- a/kernel/memremap.c > +++ b/kernel/memremap.c > @@ -149,6 +149,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) > struct resource *res = &pgmap->res; > struct dev_pagemap *conflict_pgmap; > pgprot_t pgprot = PAGE_KERNEL; > + struct mhp_restrictions restrictions = {}; > int error, nid, is_ram; > > if (!pgmap->ref || !pgmap->kill) > @@ -199,6 +200,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) > if (error) > goto err_pfn_remap; > > + /* We do not want any optional features only our own memmap */ > + restrictions.altmap = altmap; > +> mem_hotplug_begin(); > > /* > @@ -214,7 +218,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) > */ > if (pgmap->type == MEMORY_DEVICE_PRIVATE) { > error = add_pages(nid, align_start >> PAGE_SHIFT, > - align_size >> PAGE_SHIFT, NULL, false); > + align_size >> PAGE_SHIFT, &restrictions); > } else { > error = kasan_add_zero_shadow(__va(align_start), align_size); > if (error) { > @@ -222,8 +226,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) > goto err_kasan; > } > > - error = arch_add_memory(nid, align_start, align_size, altmap, > - false); > + error = arch_add_memory(nid, align_start, align_size, > + &restrictions); dito alignment > } > > if (!error) { > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c > index d8a3e9554aec..50f77e059457 100644 > --- a/mm/memory_hotplug.c > +++ b/mm/memory_hotplug.c > @@ -274,12 +274,12 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, > * add the new pages. > */ > int __ref __add_pages(int nid, unsigned long phys_start_pfn, > - unsigned long nr_pages, struct vmem_altmap *altmap, > - bool want_memblock) > + unsigned long nr_pages, struct mhp_restrictions *restrictions) > { > unsigned long i; > int err = 0; > int start_sec, end_sec; > + struct vmem_altmap *altmap = restrictions->altmap; > > /* during initialize mem_map, align hot-added range to section */ > start_sec = pfn_to_section_nr(phys_start_pfn); > @@ -300,7 +300,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, > > for (i = start_sec; i <= end_sec; i++) { > err = __add_section(nid, section_nr_to_pfn(i), altmap, > - want_memblock); > + restrictions->flags & MHP_MEMBLOCK_API); > > /* > * EEXIST is finally dealt with by ioresource collision > @@ -1102,6 +1102,7 @@ int __ref add_memory_resource(int nid, struct resource *res) > u64 start, size; > bool new_node = false; > int ret; > + struct mhp_restrictions restrictions = {}; I'd make this the very first variable. Also eventually struct mhp_restrictions restrictions = { .restrictions = MHP_MEMBLOCK_API }; > > start = res->start; > size = resource_size(res); > @@ -1126,7 +1127,8 @@ int __ref add_memory_resource(int nid, struct resource *res) > new_node = ret; > > /* call arch's memory hotadd */ > - ret = arch_add_memory(nid, start, size, NULL, true); > + restrictions.flags = MHP_MEMBLOCK_API; > + ret = arch_add_memory(nid, start, size, &restrictions); > if (ret < 0) > goto error; > >
On 04.04.19 16:57, David Hildenbrand wrote: > On 04.04.19 14:59, Oscar Salvador wrote: >> From: Michal Hocko <mhocko@suse.com> >> >> arch_add_memory, __add_pages take a want_memblock which controls whether >> the newly added memory should get the sysfs memblock user API (e.g. >> ZONE_DEVICE users do not want/need this interface). Some callers even >> want to control where do we allocate the memmap from by configuring >> altmap. >> >> Add a more generic hotplug context for arch_add_memory and __add_pages. >> struct mhp_restrictions contains flags which contains additional >> features to be enabled by the memory hotplug (MHP_MEMBLOCK_API >> currently) and altmap for alternative memmap allocator. >> >> This patch shouldn't introduce any functional change. >> >> Signed-off-by: Michal Hocko <mhocko@suse.com> >> Signed-off-by: Oscar Salvador <osalvador@suse.de> >> --- >> arch/arm64/mm/mmu.c | 6 +++--- >> arch/ia64/mm/init.c | 6 +++--- >> arch/powerpc/mm/mem.c | 6 +++--- >> arch/s390/mm/init.c | 6 +++--- >> arch/sh/mm/init.c | 6 +++--- >> arch/x86/mm/init_32.c | 6 +++--- >> arch/x86/mm/init_64.c | 10 +++++----- >> include/linux/memory_hotplug.h | 29 +++++++++++++++++++++++------ >> kernel/memremap.c | 10 +++++++--- >> mm/memory_hotplug.c | 10 ++++++---- >> 10 files changed, 59 insertions(+), 36 deletions(-) >> >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index e6acfa7be4c7..db509550329d 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -1046,8 +1046,8 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) >> } >> >> #ifdef CONFIG_MEMORY_HOTPLUG >> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> - bool want_memblock) >> +int arch_add_memory(int nid, u64 start, u64 size, >> + struct mhp_restrictions *restrictions) > > Should the restrictions be marked const? > >> { >> int flags = 0; >> >> @@ -1058,6 +1058,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> size, PAGE_KERNEL, pgd_pgtable_alloc, flags); >> >> return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, >> - altmap, want_memblock); >> + restrictions); > > Again, some strange alignment thingies going on here :) > >> } >> #endif >> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c >> index e49200e31750..379eb1f9adc9 100644 >> --- a/arch/ia64/mm/init.c >> +++ b/arch/ia64/mm/init.c >> @@ -666,14 +666,14 @@ mem_init (void) >> } >> >> #ifdef CONFIG_MEMORY_HOTPLUG >> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> - bool want_memblock) >> +int arch_add_memory(int nid, u64 start, u64 size, >> + struct mhp_restrictions *restrictions) >> { >> unsigned long start_pfn = start >> PAGE_SHIFT; >> unsigned long nr_pages = size >> PAGE_SHIFT; >> int ret; >> >> - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); >> + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); >> if (ret) >> printk("%s: Problem encountered in __add_pages() as ret=%d\n", >> __func__, ret); >> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c >> index 1aa27aac73c5..76deaa8525db 100644 >> --- a/arch/powerpc/mm/mem.c >> +++ b/arch/powerpc/mm/mem.c >> @@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) >> return -ENODEV; >> } >> >> -int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> - bool want_memblock) >> +int __meminit arch_add_memory(int nid, u64 start, u64 size, >> + struct mhp_restrictions *restrictions) >> { >> unsigned long start_pfn = start >> PAGE_SHIFT; >> unsigned long nr_pages = size >> PAGE_SHIFT; >> @@ -127,7 +127,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * >> } >> flush_inval_dcache_range(start, start + size); >> >> - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); >> + return __add_pages(nid, start_pfn, nr_pages, restrictions); >> } >> >> #ifdef CONFIG_MEMORY_HOTREMOVE >> diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c >> index 25e3113091ea..f5db961ad792 100644 >> --- a/arch/s390/mm/init.c >> +++ b/arch/s390/mm/init.c >> @@ -216,8 +216,8 @@ device_initcall(s390_cma_mem_init); >> >> #endif /* CONFIG_CMA */ >> >> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> - bool want_memblock) >> +int arch_add_memory(int nid, u64 start, u64 size, >> + struct mhp_restrictions *restrictions) >> { >> unsigned long start_pfn = PFN_DOWN(start); >> unsigned long size_pages = PFN_DOWN(size); >> @@ -227,7 +227,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> if (rc) >> return rc; >> >> - rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); >> + rc = __add_pages(nid, start_pfn, size_pages, restrictions); >> if (rc) >> vmem_remove_mapping(start, size); >> return rc; >> diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c >> index 8e004b2f1a6a..168d3a6b9358 100644 >> --- a/arch/sh/mm/init.c >> +++ b/arch/sh/mm/init.c >> @@ -404,15 +404,15 @@ void __init mem_init(void) >> } >> >> #ifdef CONFIG_MEMORY_HOTPLUG >> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> - bool want_memblock) >> +int arch_add_memory(int nid, u64 start, u64 size, >> + struct mhp_restrictions *restrictions) >> { >> unsigned long start_pfn = PFN_DOWN(start); >> unsigned long nr_pages = size >> PAGE_SHIFT; >> int ret; >> >> /* We only have ZONE_NORMAL, so this is easy.. */ >> - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); >> + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); >> if (unlikely(ret)) >> printk("%s: Failed, __add_pages() == %d\n", __func__, ret); >> >> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c >> index 85c94f9a87f8..755dbed85531 100644 >> --- a/arch/x86/mm/init_32.c >> +++ b/arch/x86/mm/init_32.c >> @@ -850,13 +850,13 @@ void __init mem_init(void) >> } >> >> #ifdef CONFIG_MEMORY_HOTPLUG >> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> - bool want_memblock) >> +int arch_add_memory(int nid, u64 start, u64 size, >> + struct mhp_restrictions *restrictions) >> { >> unsigned long start_pfn = start >> PAGE_SHIFT; >> unsigned long nr_pages = size >> PAGE_SHIFT; >> >> - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); >> + return __add_pages(nid, start_pfn, nr_pages, restrictions); >> } >> >> #ifdef CONFIG_MEMORY_HOTREMOVE >> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c >> index bccff68e3267..db42c11b48fb 100644 >> --- a/arch/x86/mm/init_64.c >> +++ b/arch/x86/mm/init_64.c >> @@ -777,11 +777,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) >> } >> >> int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, >> - struct vmem_altmap *altmap, bool want_memblock) >> + struct mhp_restrictions *restrictions) >> { >> int ret; >> >> - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); >> + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); >> WARN_ON_ONCE(ret); >> >> /* update max_pfn, max_low_pfn and high_memory */ >> @@ -791,15 +791,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, >> return ret; >> } >> >> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >> - bool want_memblock) >> +int arch_add_memory(int nid, u64 start, u64 size, >> + struct mhp_restrictions *restrictions) >> { >> unsigned long start_pfn = start >> PAGE_SHIFT; >> unsigned long nr_pages = size >> PAGE_SHIFT; >> >> init_memory_mapping(start, start + size); >> >> - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); >> + return add_pages(nid, start_pfn, nr_pages, restrictions); >> } >> >> #define PAGE_INUSE 0xFD >> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h >> index 3c8cf347804c..5bd4b56f639c 100644 >> --- a/include/linux/memory_hotplug.h >> +++ b/include/linux/memory_hotplug.h >> @@ -118,20 +118,37 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, >> unsigned long nr_pages, struct vmem_altmap *altmap); >> #endif /* CONFIG_MEMORY_HOTREMOVE */ >> >> +/* >> + * Do we want sysfs memblock files created. This will allow userspace to online >> + * and offline memory explicitly. Lack of this bit means that the caller has to >> + * call move_pfn_range_to_zone to finish the initialization. >> + */ > > I think you can be more precise here. > > "Create memory block devices for added pages. This is usually the case > for all system ram (and only system ram), as only this way memory can be > onlined/offlined by user space and kdump to correctly detect the new > memory using udev events." > > Maybe we should even go a step further and call this > > MHP_SYSTEM_RAM > > Because that is what it is right now. > >> + >> +#define MHP_MEMBLOCK_API (1<<0) >> + >> +/* >> + * Restrictions for the memory hotplug: >> + * flags: MHP_ flags >> + * altmap: alternative allocator for memmap array >> + */ >> +struct mhp_restrictions { >> + unsigned long flags; >> + struct vmem_altmap *altmap; >> +}; >> + >> /* reasonably generic interface to expand the physical pages */ >> extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, >> - struct vmem_altmap *altmap, bool want_memblock); >> + struct mhp_restrictions *restrictions); >> >> #ifndef CONFIG_ARCH_HAS_ADD_PAGES >> static inline int add_pages(int nid, unsigned long start_pfn, >> - unsigned long nr_pages, struct vmem_altmap *altmap, >> - bool want_memblock) >> + unsigned long nr_pages, struct mhp_restrictions *restrictions) >> { >> - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); >> + return __add_pages(nid, start_pfn, nr_pages, restrictions); >> } >> #else /* ARCH_HAS_ADD_PAGES */ >> int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, >> - struct vmem_altmap *altmap, bool want_memblock); >> + struct mhp_restrictions *restrictions); > > dito alignment. You have tabs configured to 8 characters, right? > >> #endif /* ARCH_HAS_ADD_PAGES */ >> >> #ifdef CONFIG_NUMA >> @@ -333,7 +350,7 @@ extern int __add_memory(int nid, u64 start, u64 size); >> extern int add_memory(int nid, u64 start, u64 size); >> extern int add_memory_resource(int nid, struct resource *resource); >> extern int arch_add_memory(int nid, u64 start, u64 size, >> - struct vmem_altmap *altmap, bool want_memblock); >> + struct mhp_restrictions *restrictions); >> extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, >> unsigned long nr_pages, struct vmem_altmap *altmap); >> extern bool is_memblock_offlined(struct memory_block *mem); >> diff --git a/kernel/memremap.c b/kernel/memremap.c >> index a856cb5ff192..cc5e3e34417d 100644 >> --- a/kernel/memremap.c >> +++ b/kernel/memremap.c >> @@ -149,6 +149,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) >> struct resource *res = &pgmap->res; >> struct dev_pagemap *conflict_pgmap; >> pgprot_t pgprot = PAGE_KERNEL; >> + struct mhp_restrictions restrictions = {}; >> int error, nid, is_ram; >> >> if (!pgmap->ref || !pgmap->kill) >> @@ -199,6 +200,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) >> if (error) >> goto err_pfn_remap; >> >> + /* We do not want any optional features only our own memmap */ >> + restrictions.altmap = altmap; >> +> mem_hotplug_begin(); >> >> /* >> @@ -214,7 +218,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) >> */ >> if (pgmap->type == MEMORY_DEVICE_PRIVATE) { >> error = add_pages(nid, align_start >> PAGE_SHIFT, >> - align_size >> PAGE_SHIFT, NULL, false); >> + align_size >> PAGE_SHIFT, &restrictions); >> } else { >> error = kasan_add_zero_shadow(__va(align_start), align_size); >> if (error) { >> @@ -222,8 +226,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) >> goto err_kasan; >> } >> >> - error = arch_add_memory(nid, align_start, align_size, altmap, >> - false); >> + error = arch_add_memory(nid, align_start, align_size, >> + &restrictions); > > dito alignment > >> } >> >> if (!error) { >> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c >> index d8a3e9554aec..50f77e059457 100644 >> --- a/mm/memory_hotplug.c >> +++ b/mm/memory_hotplug.c >> @@ -274,12 +274,12 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, >> * add the new pages. >> */ >> int __ref __add_pages(int nid, unsigned long phys_start_pfn, >> - unsigned long nr_pages, struct vmem_altmap *altmap, >> - bool want_memblock) >> + unsigned long nr_pages, struct mhp_restrictions *restrictions) >> { >> unsigned long i; >> int err = 0; >> int start_sec, end_sec; >> + struct vmem_altmap *altmap = restrictions->altmap; >> >> /* during initialize mem_map, align hot-added range to section */ >> start_sec = pfn_to_section_nr(phys_start_pfn); >> @@ -300,7 +300,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, >> >> for (i = start_sec; i <= end_sec; i++) { >> err = __add_section(nid, section_nr_to_pfn(i), altmap, >> - want_memblock); >> + restrictions->flags & MHP_MEMBLOCK_API); >> >> /* >> * EEXIST is finally dealt with by ioresource collision >> @@ -1102,6 +1102,7 @@ int __ref add_memory_resource(int nid, struct resource *res) >> u64 start, size; >> bool new_node = false; >> int ret; >> + struct mhp_restrictions restrictions = {}; > > I'd make this the very first variable. > > Also eventually > > struct mhp_restrictions restrictions = { > .restrictions = MHP_MEMBLOCK_API > }; > >> >> start = res->start; >> size = resource_size(res); >> @@ -1126,7 +1127,8 @@ int __ref add_memory_resource(int nid, struct resource *res) >> new_node = ret; >> >> /* call arch's memory hotadd */ >> - ret = arch_add_memory(nid, start, size, NULL, true); >> + restrictions.flags = MHP_MEMBLOCK_API; >> + ret = arch_add_memory(nid, start, size, &restrictions); >> if (ret < 0) >> goto error; >> >> > > s/alignment/indentation/ I think I should take a nap :)
On Thu, Apr 04, 2019 at 04:57:03PM +0200, David Hildenbrand wrote: > > #ifdef CONFIG_MEMORY_HOTPLUG > > -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, > > - bool want_memblock) > > +int arch_add_memory(int nid, u64 start, u64 size, > > + struct mhp_restrictions *restrictions) > > Should the restrictions be marked const? We could, but maybe some platforms want to override something later on depending on x or y configurations, so we could be more flexible here. > > +/* > > + * Do we want sysfs memblock files created. This will allow userspace to online > > + * and offline memory explicitly. Lack of this bit means that the caller has to > > + * call move_pfn_range_to_zone to finish the initialization. > > + */ > > I think you can be more precise here. > > "Create memory block devices for added pages. This is usually the case > for all system ram (and only system ram), as only this way memory can be > onlined/offlined by user space and kdump to correctly detect the new > memory using udev events." > > Maybe we should even go a step further and call this > > MHP_SYSTEM_RAM > > Because that is what it is right now. I agree that that is nicer explanation, and I would not mind to add it. In the end, the more information and commented code the better. But I am not really convinced by MHP_SYSTEM_RAM name, and I think we should stick with MHP_MEMBLOCK_API because it represents __what__ is that flag about and its function, e.g: create memory block devices. > > @@ -1102,6 +1102,7 @@ int __ref add_memory_resource(int nid, struct resource *res) > > u64 start, size; > > bool new_node = false; > > int ret; > > + struct mhp_restrictions restrictions = {}; > > I'd make this the very first variable. > > Also eventually > > struct mhp_restrictions restrictions = { > .restrictions = MHP_MEMBLOCK_API > }; It might be more right. Actually, that is the way we tend to pre-initialize fields in structs. About the identation, I am really puzzled, I checked my branch and I cannot see any space that should be a tab. Maybe it got screwed up when sending it. Anyway, thanks for the feedback David ;-)
On 04.04.19 20:01, Oscar Salvador wrote: > On Thu, Apr 04, 2019 at 04:57:03PM +0200, David Hildenbrand wrote: > >>> #ifdef CONFIG_MEMORY_HOTPLUG >>> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, >>> - bool want_memblock) >>> +int arch_add_memory(int nid, u64 start, u64 size, >>> + struct mhp_restrictions *restrictions) >> >> Should the restrictions be marked const? > > We could, but maybe some platforms want to override something later on > depending on x or y configurations, so we could be more flexible here. > >>> +/* >>> + * Do we want sysfs memblock files created. This will allow userspace to online >>> + * and offline memory explicitly. Lack of this bit means that the caller has to >>> + * call move_pfn_range_to_zone to finish the initialization. >>> + */ >> >> I think you can be more precise here. >> >> "Create memory block devices for added pages. This is usually the case >> for all system ram (and only system ram), as only this way memory can be >> onlined/offlined by user space and kdump to correctly detect the new >> memory using udev events." >> >> Maybe we should even go a step further and call this >> >> MHP_SYSTEM_RAM >> >> Because that is what it is right now. > > I agree that that is nicer explanation, and I would not mind to add it. > In the end, the more information and commented code the better. > > But I am not really convinced by MHP_SYSTEM_RAM name, and I think we should stick > with MHP_MEMBLOCK_API because it represents __what__ is that flag about and its > function, e.g: create memory block devices. This nicely aligns with the sub-section memory add support discussion. MHP_MEMBLOCK_API immediately implies that - memory is used as system ram. Memory can be onlined/offlined. Markers at sections indicate if the section is online/offline. - memory has to follow certain restrictions (alignment + size multiple of memory block size) IOW, if some ZONE_DEVICE memory would set this flag, very bad things will happen. Especially, device memory with memory block devices should also result in quite some issues (I remember it is checked somewhere). System ram added without MHP_MEMBLOCK_API? What would happen? Memory can never be onlined. I feel like mixing these two interfaces - adding system memory vs. adding device memory wasn't the best design choice. Lot of parameters to set, but the some parameters are actually mutually exclusive. Especially memory block devices are a difference. Maybe having actual function cal variants like __add_system_memory / arch_add_system_memory ... and __add_device_memory / arch_add_device_memory would make things clearer. To me, it feels like we are trying to squeeze too many different things into one function call path, allowing people do do things that shouldn't be done. Any opinions on the design/direction on these interfaces in general? I don't see us moving away from memory block devices for system ram. But I am seeing us moving towards sub-section hot-add support for anything not system ram. > >>> @@ -1102,6 +1102,7 @@ int __ref add_memory_resource(int nid, struct resource *res) >>> u64 start, size; >>> bool new_node = false; >>> int ret; >>> + struct mhp_restrictions restrictions = {}; >> >> I'd make this the very first variable. >> >> Also eventually >> >> struct mhp_restrictions restrictions = { >> .restrictions = MHP_MEMBLOCK_API >> }; > > It might be more right. > Actually, that is the way we tend to pre-initialize fields in structs. > > About the identation, I am really puzzled, I checked my branch and I > cannot see any space that should be a tab. > Maybe it got screwed up when sending it. It's not about spaces that should be tabs, rather about how many tabs to use. But this is really just nit picking, because it usually directly jumps into my eyes :) On vim with tabstop=8 it didn't look right. > > Anyway, thanks for the feedback David ;-) >
On Thu 04-04-19 20:27:41, David Hildenbrand wrote: > On 04.04.19 20:01, Oscar Salvador wrote: [...] > > But I am not really convinced by MHP_SYSTEM_RAM name, and I think we should stick > > with MHP_MEMBLOCK_API because it represents __what__ is that flag about and its > > function, e.g: create memory block devices. Exactly > This nicely aligns with the sub-section memory add support discussion. > > MHP_MEMBLOCK_API immediately implies that > > - memory is used as system ram. Memory can be onlined/offlined. Markers > at sections indicate if the section is online/offline. No there is no implication like that. It means only that the onlined memory has a sysfs interface. Nothing more, nothing less This is an internal API so we are not carving anything into the stone. So can we simply start with what we have and go from there? I am getting felling that this discussion just makes the whole thing more muddy.
On 05.04.19 09:14, Michal Hocko wrote: > On Thu 04-04-19 20:27:41, David Hildenbrand wrote: >> On 04.04.19 20:01, Oscar Salvador wrote: > [...] >>> But I am not really convinced by MHP_SYSTEM_RAM name, and I think we should stick >>> with MHP_MEMBLOCK_API because it represents __what__ is that flag about and its >>> function, e.g: create memory block devices. > > Exactly Fine with me for keeping what Oscar has. > >> This nicely aligns with the sub-section memory add support discussion. >> >> MHP_MEMBLOCK_API immediately implies that >> >> - memory is used as system ram. Memory can be onlined/offlined. Markers >> at sections indicate if the section is online/offline. > > No there is no implication like that. It means only that the onlined > memory has a sysfs interface. Nothing more, nothing less As soon as there is a online/offline interface, you *can* (and user space usually *will*) online that memory. Onlining/offlining is only defined for memory to be added to the buddy - memory to be used as "system ram". Doing it for random device memory will not work / result in undefined behavior. Not adding memory block devices for system ram will not allow user space to online/offline it and break kdump reload for hot-added memory. But memory can be onlined/offlined using internal APIs of course - if that's what you were referring to. > > This is an internal API so we are not carving anything into the stone. That is true. > So can we simply start with what we have and go from there? Sure, what Oscar does here is just a simple refactoring of the interface and I was just wondering if the interface needs a general overhaul. >I am getting > felling that this discussion just makes the whole thing more muddy. I think this discussion is helpful to understand how the whole thing is supposed to work :) At least on my side. Meanwhile, I will have a look if memory block devices cannot simply be created by the caller of arch_add_memory(). At least it feels like creating memory bock devices could be factored out - I remember that it was not that easy.
On Fri 05-04-19 10:05:09, David Hildenbrand wrote: > On 05.04.19 09:14, Michal Hocko wrote: > > On Thu 04-04-19 20:27:41, David Hildenbrand wrote: > >> On 04.04.19 20:01, Oscar Salvador wrote: > > [...] > >>> But I am not really convinced by MHP_SYSTEM_RAM name, and I think we should stick > >>> with MHP_MEMBLOCK_API because it represents __what__ is that flag about and its > >>> function, e.g: create memory block devices. > > > > Exactly > > Fine with me for keeping what Oscar has. > > > > >> This nicely aligns with the sub-section memory add support discussion. > >> > >> MHP_MEMBLOCK_API immediately implies that > >> > >> - memory is used as system ram. Memory can be onlined/offlined. Markers > >> at sections indicate if the section is online/offline. > > > > No there is no implication like that. It means only that the onlined > > memory has a sysfs interface. Nothing more, nothing less > > As soon as there is a online/offline interface, you *can* (and user > space usually *will*) online that memory. Onlining/offlining is only > defined for memory to be added to the buddy - memory to be used as > "system ram". Doing it for random device memory will not work / result > in undefined behavior. No, not really. We really do not care where the memory comes from. Is it RAM, NVDIMM, $FOO_BAR_OF_A_FUTURE_BUZZ. We only do care that the memory can be onlined - user triggered associated with a zone. The memory even doesn't have to go to the page allocator.
On 05.04.19 12:30, Michal Hocko wrote: > On Fri 05-04-19 10:05:09, David Hildenbrand wrote: >> On 05.04.19 09:14, Michal Hocko wrote: >>> On Thu 04-04-19 20:27:41, David Hildenbrand wrote: >>>> On 04.04.19 20:01, Oscar Salvador wrote: >>> [...] >>>>> But I am not really convinced by MHP_SYSTEM_RAM name, and I think we should stick >>>>> with MHP_MEMBLOCK_API because it represents __what__ is that flag about and its >>>>> function, e.g: create memory block devices. >>> >>> Exactly >> >> Fine with me for keeping what Oscar has. >> >>> >>>> This nicely aligns with the sub-section memory add support discussion. >>>> >>>> MHP_MEMBLOCK_API immediately implies that >>>> >>>> - memory is used as system ram. Memory can be onlined/offlined. Markers >>>> at sections indicate if the section is online/offline. >>> >>> No there is no implication like that. It means only that the onlined >>> memory has a sysfs interface. Nothing more, nothing less >> >> As soon as there is a online/offline interface, you *can* (and user >> space usually *will*) online that memory. Onlining/offlining is only >> defined for memory to be added to the buddy - memory to be used as >> "system ram". Doing it for random device memory will not work / result >> in undefined behavior. > > No, not really. We really do not care where the memory comes from. Is it > RAM, NVDIMM, $FOO_BAR_OF_A_FUTURE_BUZZ. We only do care that the memory > can be onlined - user triggered associated with a zone. The memory even > doesn't have to go to the page allocator. > Yes, true, I was rather meaning if the caller wants ZONE_DEVICE, memory block devices are never an option. So "how the memory is used" not "where memory comes from". add_memory() vs. devm_memremap_pages().
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e6acfa7be4c7..db509550329d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1046,8 +1046,8 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { int flags = 0; @@ -1058,6 +1058,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, size, PAGE_KERNEL, pgd_pgtable_alloc, flags); return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, - altmap, want_memblock); + restrictions); } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index e49200e31750..379eb1f9adc9 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -666,14 +666,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1aa27aac73c5..76deaa8525db 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int __meminit arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -127,7 +127,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * } flush_inval_dcache_range(start, start + size); - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 25e3113091ea..f5db961ad792 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -216,8 +216,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -227,7 +227,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, restrictions); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 8e004b2f1a6a..168d3a6b9358 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -404,15 +404,15 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 85c94f9a87f8..755dbed85531 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -850,13 +850,13 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bccff68e3267..db42c11b48fb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -777,11 +777,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock) + struct mhp_restrictions *restrictions) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -791,15 +791,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return add_pages(nid, start_pfn, nr_pages, restrictions); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3c8cf347804c..5bd4b56f639c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -118,20 +118,37 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ +/* + * Do we want sysfs memblock files created. This will allow userspace to online + * and offline memory explicitly. Lack of this bit means that the caller has to + * call move_pfn_range_to_zone to finish the initialization. + */ + +#define MHP_MEMBLOCK_API (1<<0) + +/* + * Restrictions for the memory hotplug: + * flags: MHP_ flags + * altmap: alternative allocator for memmap array + */ +struct mhp_restrictions { + unsigned long flags; + struct vmem_altmap *altmap; +}; + /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock); + struct mhp_restrictions *restrictions); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap, - bool want_memblock) + unsigned long nr_pages, struct mhp_restrictions *restrictions) { - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock); + struct mhp_restrictions *restrictions); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA @@ -333,7 +350,7 @@ extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource); extern int arch_add_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap, bool want_memblock); + struct mhp_restrictions *restrictions); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); extern bool is_memblock_offlined(struct memory_block *mem); diff --git a/kernel/memremap.c b/kernel/memremap.c index a856cb5ff192..cc5e3e34417d 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -149,6 +149,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; pgprot_t pgprot = PAGE_KERNEL; + struct mhp_restrictions restrictions = {}; int error, nid, is_ram; if (!pgmap->ref || !pgmap->kill) @@ -199,6 +200,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (error) goto err_pfn_remap; + /* We do not want any optional features only our own memmap */ + restrictions.altmap = altmap; + mem_hotplug_begin(); /* @@ -214,7 +218,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { error = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, NULL, false); + align_size >> PAGE_SHIFT, &restrictions); } else { error = kasan_add_zero_shadow(__va(align_start), align_size); if (error) { @@ -222,8 +226,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) goto err_kasan; } - error = arch_add_memory(nid, align_start, align_size, altmap, - false); + error = arch_add_memory(nid, align_start, align_size, + &restrictions); } if (!error) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d8a3e9554aec..50f77e059457 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -274,12 +274,12 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap, - bool want_memblock) + unsigned long nr_pages, struct mhp_restrictions *restrictions) { unsigned long i; int err = 0; int start_sec, end_sec; + struct vmem_altmap *altmap = restrictions->altmap; /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); @@ -300,7 +300,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, for (i = start_sec; i <= end_sec; i++) { err = __add_section(nid, section_nr_to_pfn(i), altmap, - want_memblock); + restrictions->flags & MHP_MEMBLOCK_API); /* * EEXIST is finally dealt with by ioresource collision @@ -1102,6 +1102,7 @@ int __ref add_memory_resource(int nid, struct resource *res) u64 start, size; bool new_node = false; int ret; + struct mhp_restrictions restrictions = {}; start = res->start; size = resource_size(res); @@ -1126,7 +1127,8 @@ int __ref add_memory_resource(int nid, struct resource *res) new_node = ret; /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, NULL, true); + restrictions.flags = MHP_MEMBLOCK_API; + ret = arch_add_memory(nid, start, size, &restrictions); if (ret < 0) goto error;