Message ID | 20240529171236.32002-3-Jonathan.Cameron@huawei.com |
---|---|
State | New, archived |
Headers | show |
Series | arm64/memblock: Handling of CXL Fixed Memory Windows. | expand |
On Wed, May 29, 2024 at 06:12:30PM +0100, Jonathan Cameron wrote: > From: Dan Williams <dan.j.williams@intel.com> > > Similar to how generic memory_add_physaddr_to_nid() interrogates > memblock data for numa information, introduce > get_reserved_pfn_range_from_nid() to enable the same operation for > reserved memory ranges. Example memory ranges that are reserved, but > still have associated numa-info are persistent memory or Soft Reserved > (EFI_MEMORY_SP) memory. > > This is Dan's patch but with the implementation of > phys_addr_to_target_node() made arm64 specific. > > Cc: Mike Rapoport <rppt@linux.ibm.com> > Cc: Jia He <justin.he@arm.com> > Cc: Will Deacon <will@kernel.org> > Cc: David Hildenbrand <david@redhat.com> > Cc: Andrew Morton <akpm@linux-foundation.org> > Signed-off-by: Dan Williams <dan.j.williams@intel.com> > Link: https://lore.kernel.org/r/159457120893.754248.7783260004248722175.stgit@dwillia2-desk3.amr.corp.intel.com > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > --- > arch/arm64/include/asm/sparsemem.h | 4 ++++ > arch/arm64/mm/init.c | 22 ++++++++++++++++++++++ > include/linux/memblock.h | 8 ++++++++ > include/linux/mm.h | 14 ++++++++++++++ > mm/memblock.c | 22 +++++++++++++++++++--- > mm/mm_init.c | 29 ++++++++++++++++++++++++++++- > 6 files changed, 95 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h > index 8dd1b6a718fa..5b483ad6d501 100644 > --- a/arch/arm64/include/asm/sparsemem.h > +++ b/arch/arm64/include/asm/sparsemem.h > @@ -27,7 +27,11 @@ > #endif /* CONFIG_ARM64_64K_PAGES */ > > #ifndef __ASSEMBLY__ > + > extern int memory_add_physaddr_to_nid(u64 addr); > #define memory_add_physaddr_to_nid memory_add_physaddr_to_nid > +extern int phys_to_target_node(phys_addr_t start); > +#define phys_to_target_node phys_to_target_node > + > #endif /* __ASSEMBLY__ */ > #endif > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index f310cbd349ba..6a2f21b1bb58 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -75,6 +75,28 @@ int memory_add_physaddr_to_nid(u64 start) > } > EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); > > +int phys_to_target_node(phys_addr_t start) > +{ > + unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(start); > + int nid = __memory_add_physaddr_to_nid(start); > + > + if (nid != NUMA_NO_NODE) > + return nid; > + > + /* > + * Search reserved memory ranges since the memory address does > + * not appear to be online > + */ > + for_each_node_state(nid, N_POSSIBLE) { > + get_reserved_pfn_range_for_nid(nid, &start_pfn, &end_pfn); > + if (pfn >= start_pfn && pfn <= end_pfn) > + return nid; > + } > + > + return NUMA_NO_NODE; > +} > +EXPORT_SYMBOL(phys_to_target_node); > + > #endif /* CONFIG_NUMA */ > > /* > diff --git a/include/linux/memblock.h b/include/linux/memblock.h > index e2082240586d..c7d518a54359 100644 > --- a/include/linux/memblock.h > +++ b/include/linux/memblock.h > @@ -281,6 +281,10 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, > void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, > unsigned long *out_end_pfn, int *out_nid); > > +void __next_reserved_pfn_range(int *idx, int nid, > + unsigned long *out_start_pfn, > + unsigned long *out_end_pfn, int *out_nid); > + > /** > * for_each_mem_pfn_range - early memory pfn range iterator > * @i: an integer used as loop variable > @@ -295,6 +299,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, > for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ > i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) > > +#define for_each_reserved_pfn_range(i, nid, p_start, p_end, p_nid) \ > + for (i = -1, __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid); \ > + i >= 0; __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid)) > + > #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT > void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, > unsigned long *out_spfn, > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 9849dfda44d4..0c829b2d44fa 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3245,9 +3245,23 @@ void free_area_init(unsigned long *max_zone_pfn); > unsigned long node_map_pfn_alignment(void); > extern unsigned long absent_pages_in_range(unsigned long start_pfn, > unsigned long end_pfn); > + > +/* > + * Allow archs to opt-in to keeping get_pfn_range_for_nid() available > + * after boot. > + */ > +#ifdef CONFIG_ARCH_KEEP_MEMBLOCK > +#define __init_or_memblock > +#else > +#define __init_or_memblock __init > +#endif > + > extern void get_pfn_range_for_nid(unsigned int nid, > unsigned long *start_pfn, unsigned long *end_pfn); > > +extern void get_reserved_pfn_range_for_nid(unsigned int nid, > + unsigned long *start_pfn, unsigned long *end_pfn); > + > #ifndef CONFIG_NUMA > static inline int early_pfn_to_nid(unsigned long pfn) > { > diff --git a/mm/memblock.c b/mm/memblock.c > index d09136e040d3..5498d5ea70b4 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -1289,11 +1289,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, > /* > * Common iterator interface used to define for_each_mem_pfn_range(). > */ > -void __init_memblock __next_mem_pfn_range(int *idx, int nid, > +static void __init_memblock __next_memblock_pfn_range(int *idx, int nid, > unsigned long *out_start_pfn, > - unsigned long *out_end_pfn, int *out_nid) > + unsigned long *out_end_pfn, int *out_nid, > + struct memblock_type *type) > { > - struct memblock_type *type = &memblock.memory; > struct memblock_region *r; > int r_nid; > > @@ -1319,6 +1319,22 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, > *out_nid = r_nid; > } > > +void __init_memblock __next_mem_pfn_range(int *idx, int nid, > + unsigned long *out_start_pfn, > + unsigned long *out_end_pfn, int *out_nid) > +{ > + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid, > + &memblock.memory); > +} > + > +void __init_memblock __next_reserved_pfn_range(int *idx, int nid, > + unsigned long *out_start_pfn, > + unsigned long *out_end_pfn, int *out_nid) > +{ > + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid, > + &memblock.reserved); > +} > + > /** > * memblock_set_node - set node ID on memblock regions > * @base: base of area to set node ID for > diff --git a/mm/mm_init.c b/mm/mm_init.c > index f72b852bd5b8..1f6e29e60673 100644 > --- a/mm/mm_init.c > +++ b/mm/mm_init.c > @@ -1644,7 +1644,7 @@ static inline void alloc_node_mem_map(struct pglist_data *pgdat) { } > * provided by memblock_set_node(). If called for a node > * with no available memory, the start and end PFNs will be 0. > */ > -void __init get_pfn_range_for_nid(unsigned int nid, > +void __init_or_memblock get_pfn_range_for_nid(unsigned int nid, > unsigned long *start_pfn, unsigned long *end_pfn) > { > unsigned long this_start_pfn, this_end_pfn; > @@ -1662,6 +1662,33 @@ void __init get_pfn_range_for_nid(unsigned int nid, > *start_pfn = 0; > } > > +/** > + * get_reserved_pfn_range_for_nid - Return the start and end page frames for a node > + * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. > + * @start_pfn: Passed by reference. On return, it will have the node start_pfn. > + * @end_pfn: Passed by reference. On return, it will have the node end_pfn. > + * > + * Mostly identical to get_pfn_range_for_nid() except it operates on > + * reserved ranges rather than online memory. > + */ > +void __init_or_memblock get_reserved_pfn_range_for_nid(unsigned int nid, > + unsigned long *start_pfn, unsigned long *end_pfn) > +{ > + unsigned long this_start_pfn, this_end_pfn; > + int i; > + > + *start_pfn = -1UL; > + *end_pfn = 0; > + > + for_each_reserved_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { > + *start_pfn = min(*start_pfn, this_start_pfn); > + *end_pfn = max(*end_pfn, this_end_pfn); > + } > + > + if (*start_pfn == -1UL) > + *start_pfn = 0; > +} > + > static void __init free_area_init_node(int nid) > { > pg_data_t *pgdat = NODE_DATA(nid); > -- > 2.39.2 > Tested-off-by: Yuquan Wang <wangyuquan1236@phytium.com.cn>
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 8dd1b6a718fa..5b483ad6d501 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -27,7 +27,11 @@ #endif /* CONFIG_ARM64_64K_PAGES */ #ifndef __ASSEMBLY__ + extern int memory_add_physaddr_to_nid(u64 addr); #define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +extern int phys_to_target_node(phys_addr_t start); +#define phys_to_target_node phys_to_target_node + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f310cbd349ba..6a2f21b1bb58 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -75,6 +75,28 @@ int memory_add_physaddr_to_nid(u64 start) } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +int phys_to_target_node(phys_addr_t start) +{ + unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(start); + int nid = __memory_add_physaddr_to_nid(start); + + if (nid != NUMA_NO_NODE) + return nid; + + /* + * Search reserved memory ranges since the memory address does + * not appear to be online + */ + for_each_node_state(nid, N_POSSIBLE) { + get_reserved_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + if (pfn >= start_pfn && pfn <= end_pfn) + return nid; + } + + return NUMA_NO_NODE; +} +EXPORT_SYMBOL(phys_to_target_node); + #endif /* CONFIG_NUMA */ /* diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e2082240586d..c7d518a54359 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -281,6 +281,10 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); +void __next_reserved_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid); + /** * for_each_mem_pfn_range - early memory pfn range iterator * @i: an integer used as loop variable @@ -295,6 +299,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) +#define for_each_reserved_pfn_range(i, nid, p_start, p_end, p_nid) \ + for (i = -1, __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid); \ + i >= 0; __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid)) + #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, unsigned long *out_spfn, diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..0c829b2d44fa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3245,9 +3245,23 @@ void free_area_init(unsigned long *max_zone_pfn); unsigned long node_map_pfn_alignment(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); + +/* + * Allow archs to opt-in to keeping get_pfn_range_for_nid() available + * after boot. + */ +#ifdef CONFIG_ARCH_KEEP_MEMBLOCK +#define __init_or_memblock +#else +#define __init_or_memblock __init +#endif + extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); +extern void get_reserved_pfn_range_for_nid(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn); + #ifndef CONFIG_NUMA static inline int early_pfn_to_nid(unsigned long pfn) { diff --git a/mm/memblock.c b/mm/memblock.c index d09136e040d3..5498d5ea70b4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1289,11 +1289,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, /* * Common iterator interface used to define for_each_mem_pfn_range(). */ -void __init_memblock __next_mem_pfn_range(int *idx, int nid, +static void __init_memblock __next_memblock_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, - unsigned long *out_end_pfn, int *out_nid) + unsigned long *out_end_pfn, int *out_nid, + struct memblock_type *type) { - struct memblock_type *type = &memblock.memory; struct memblock_region *r; int r_nid; @@ -1319,6 +1319,22 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, *out_nid = r_nid; } +void __init_memblock __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid, + &memblock.memory); +} + +void __init_memblock __next_reserved_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid, + &memblock.reserved); +} + /** * memblock_set_node - set node ID on memblock regions * @base: base of area to set node ID for diff --git a/mm/mm_init.c b/mm/mm_init.c index f72b852bd5b8..1f6e29e60673 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1644,7 +1644,7 @@ static inline void alloc_node_mem_map(struct pglist_data *pgdat) { } * provided by memblock_set_node(). If called for a node * with no available memory, the start and end PFNs will be 0. */ -void __init get_pfn_range_for_nid(unsigned int nid, +void __init_or_memblock get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) { unsigned long this_start_pfn, this_end_pfn; @@ -1662,6 +1662,33 @@ void __init get_pfn_range_for_nid(unsigned int nid, *start_pfn = 0; } +/** + * get_reserved_pfn_range_for_nid - Return the start and end page frames for a node + * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. + * @start_pfn: Passed by reference. On return, it will have the node start_pfn. + * @end_pfn: Passed by reference. On return, it will have the node end_pfn. + * + * Mostly identical to get_pfn_range_for_nid() except it operates on + * reserved ranges rather than online memory. + */ +void __init_or_memblock get_reserved_pfn_range_for_nid(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + unsigned long this_start_pfn, this_end_pfn; + int i; + + *start_pfn = -1UL; + *end_pfn = 0; + + for_each_reserved_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { + *start_pfn = min(*start_pfn, this_start_pfn); + *end_pfn = max(*end_pfn, this_end_pfn); + } + + if (*start_pfn == -1UL) + *start_pfn = 0; +} + static void __init free_area_init_node(int nid) { pg_data_t *pgdat = NODE_DATA(nid);