Message ID | 20240716111346.3676969-3-rppt@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: introduce numa_memblks | expand |
On 16.07.24 13:13, Mike Rapoport wrote: > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > sgi-ip27 is the only system that defines NODE_DATA() differently than > the rest of NUMA machines. > > Add node_data array of struct pglist pointers that will point to > __node_data[node]->pglist and redefine NODE_DATA() to use node_data > array. > > This will allow pulling declaration of node_data to the generic mm code > in the next commit. > > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > --- > arch/mips/include/asm/mach-ip27/mmzone.h | 5 ++++- > arch/mips/sgi-ip27/ip27-memory.c | 5 ++++- > 2 files changed, 8 insertions(+), 2 deletions(-) > > diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h > index 08c36e50a860..629c3f290203 100644 > --- a/arch/mips/include/asm/mach-ip27/mmzone.h > +++ b/arch/mips/include/asm/mach-ip27/mmzone.h > @@ -22,7 +22,10 @@ struct node_data { > > extern struct node_data *__node_data[]; > > -#define NODE_DATA(n) (&__node_data[(n)]->pglist) > #define hub_data(n) (&__node_data[(n)]->hub) > > +extern struct pglist_data *node_data[]; > + > +#define NODE_DATA(nid) (node_data[nid]) > + > #endif /* _ASM_MACH_MMZONE_H */ > diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c > index b8ca94cfb4fe..c30ef6958b97 100644 > --- a/arch/mips/sgi-ip27/ip27-memory.c > +++ b/arch/mips/sgi-ip27/ip27-memory.c > @@ -34,8 +34,10 @@ > #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) > #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) > > -struct node_data *__node_data[MAX_NUMNODES]; > +struct pglist_data *node_data[MAX_NUMNODES]; > +EXPORT_SYMBOL(node_data); > > +struct node_data *__node_data[MAX_NUMNODES]; > EXPORT_SYMBOL(__node_data); > > static u64 gen_region_mask(void) > @@ -361,6 +363,7 @@ static void __init node_mem_init(nasid_t node) > */ > __node_data[node] = __va(slot_freepfn << PAGE_SHIFT); > memset(__node_data[node], 0, PAGE_SIZE); > + node_data[node] = &__node_data[node]->pglist; > > NODE_DATA(node)->node_start_pfn = start_pfn; > NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; I was assuming we could get rid of __node_data->pglist. But now I am confused where that is actually set. Anyhow Reviewed-by: David Hildenbrand <david@redhat.com>
On Wed, 17 Jul 2024 16:32:59 +0200 David Hildenbrand <david@redhat.com> wrote: > On 16.07.24 13:13, Mike Rapoport wrote: > > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > > > sgi-ip27 is the only system that defines NODE_DATA() differently than > > the rest of NUMA machines. > > > > Add node_data array of struct pglist pointers that will point to > > __node_data[node]->pglist and redefine NODE_DATA() to use node_data > > array. > > > > This will allow pulling declaration of node_data to the generic mm code > > in the next commit. > > > > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > > --- > > arch/mips/include/asm/mach-ip27/mmzone.h | 5 ++++- > > arch/mips/sgi-ip27/ip27-memory.c | 5 ++++- > > 2 files changed, 8 insertions(+), 2 deletions(-) > > > > diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h > > index 08c36e50a860..629c3f290203 100644 > > --- a/arch/mips/include/asm/mach-ip27/mmzone.h > > +++ b/arch/mips/include/asm/mach-ip27/mmzone.h > > @@ -22,7 +22,10 @@ struct node_data { > > > > extern struct node_data *__node_data[]; > > > > -#define NODE_DATA(n) (&__node_data[(n)]->pglist) > > #define hub_data(n) (&__node_data[(n)]->hub) > > > > +extern struct pglist_data *node_data[]; > > + > > +#define NODE_DATA(nid) (node_data[nid]) > > + > > #endif /* _ASM_MACH_MMZONE_H */ > > diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c > > index b8ca94cfb4fe..c30ef6958b97 100644 > > --- a/arch/mips/sgi-ip27/ip27-memory.c > > +++ b/arch/mips/sgi-ip27/ip27-memory.c > > @@ -34,8 +34,10 @@ > > #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) > > #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) > > > > -struct node_data *__node_data[MAX_NUMNODES]; > > +struct pglist_data *node_data[MAX_NUMNODES]; > > +EXPORT_SYMBOL(node_data); > > > > +struct node_data *__node_data[MAX_NUMNODES]; > > EXPORT_SYMBOL(__node_data); > > > > static u64 gen_region_mask(void) > > @@ -361,6 +363,7 @@ static void __init node_mem_init(nasid_t node) > > */ > > __node_data[node] = __va(slot_freepfn << PAGE_SHIFT); > > memset(__node_data[node], 0, PAGE_SIZE); > > + node_data[node] = &__node_data[node]->pglist; > > > > NODE_DATA(node)->node_start_pfn = start_pfn; > > NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; > > I was assuming we could get rid of __node_data->pglist. > > But now I am confused where that is actually set. It looks nasty... Cast in arch_refresh_nodedata() takes incoming pg_data_t * and casts it to the local version of struct node_data * which I think is this one struct node_data { struct pglist_data pglist; (which is pg_data_t pglist) struct hub_data hub; }; https://elixir.bootlin.com/linux/v6.10/source/arch/mips/sgi-ip27/ip27-memory.c#L432 Now that pg_data_t is allocated by arch_alloc_nodedata() which might be fine (though types could be handled in a more readable fashion via some container_of() magic. https://elixir.bootlin.com/linux/v6.10/source/arch/mips/sgi-ip27/ip27-memory.c#L427 However that call is: pg_data_t * __init arch_alloc_nodedata(int nid) { return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES); } So doesn't seem to allocate enough space to me as should be sizeof(struct node_data) Worth cleaning up whilst here? Proper handling of types would definitely help. Jonathan > > Anyhow > > Reviewed-by: David Hildenbrand <david@redhat.com> >
On Fri, Jul 19, 2024 at 03:38:52PM +0100, Jonathan Cameron wrote: > On Wed, 17 Jul 2024 16:32:59 +0200 > David Hildenbrand <david@redhat.com> wrote: > > > On 16.07.24 13:13, Mike Rapoport wrote: > > > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > > > > > sgi-ip27 is the only system that defines NODE_DATA() differently than > > > the rest of NUMA machines. > > > > > > Add node_data array of struct pglist pointers that will point to > > > __node_data[node]->pglist and redefine NODE_DATA() to use node_data > > > array. > > > > > > This will allow pulling declaration of node_data to the generic mm code > > > in the next commit. > > > > > > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > > > --- > > > arch/mips/include/asm/mach-ip27/mmzone.h | 5 ++++- > > > arch/mips/sgi-ip27/ip27-memory.c | 5 ++++- > > > 2 files changed, 8 insertions(+), 2 deletions(-) > > > > > > diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h > > > index 08c36e50a860..629c3f290203 100644 > > > --- a/arch/mips/include/asm/mach-ip27/mmzone.h > > > +++ b/arch/mips/include/asm/mach-ip27/mmzone.h > > > @@ -22,7 +22,10 @@ struct node_data { > > > > > > extern struct node_data *__node_data[]; > > > > > > -#define NODE_DATA(n) (&__node_data[(n)]->pglist) > > > #define hub_data(n) (&__node_data[(n)]->hub) > > > > > > +extern struct pglist_data *node_data[]; > > > + > > > +#define NODE_DATA(nid) (node_data[nid]) > > > + > > > #endif /* _ASM_MACH_MMZONE_H */ > > > diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c > > > index b8ca94cfb4fe..c30ef6958b97 100644 > > > --- a/arch/mips/sgi-ip27/ip27-memory.c > > > +++ b/arch/mips/sgi-ip27/ip27-memory.c > > > @@ -34,8 +34,10 @@ > > > #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) > > > #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) > > > > > > -struct node_data *__node_data[MAX_NUMNODES]; > > > +struct pglist_data *node_data[MAX_NUMNODES]; > > > +EXPORT_SYMBOL(node_data); > > > > > > +struct node_data *__node_data[MAX_NUMNODES]; > > > EXPORT_SYMBOL(__node_data); > > > > > > static u64 gen_region_mask(void) > > > @@ -361,6 +363,7 @@ static void __init node_mem_init(nasid_t node) > > > */ > > > __node_data[node] = __va(slot_freepfn << PAGE_SHIFT); > > > memset(__node_data[node], 0, PAGE_SIZE); > > > + node_data[node] = &__node_data[node]->pglist; > > > > > > NODE_DATA(node)->node_start_pfn = start_pfn; > > > NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; > > > > I was assuming we could get rid of __node_data->pglist. > > > > But now I am confused where that is actually set. > > It looks nasty... Nasty indeed :) > Cast in arch_refresh_nodedata() takes incoming pg_data_t * and casts it > to the local version of struct node_data * which I think is this one > > struct node_data { > struct pglist_data pglist; (which is pg_data_t pglist) > struct hub_data hub; > }; > > https://elixir.bootlin.com/linux/v6.10/source/arch/mips/sgi-ip27/ip27-memory.c#L432 > > Now that pg_data_t is allocated by > arch_alloc_nodedata() which might be fine (though types could be handled in a more > readable fashion via some container_of() magic. > https://elixir.bootlin.com/linux/v6.10/source/arch/mips/sgi-ip27/ip27-memory.c#L427 > > However that call is: > pg_data_t * __init arch_alloc_nodedata(int nid) > { > return memblock_alloc(sizeof(pg_data_t), SMP_CACHE_BYTES); > } > > So doesn't seem to allocate enough space to me as should be sizeof(struct node_data) Well, it's there to silence a compiler error (commit f8f9f21c7848 ("MIPS: Fix build error for loongson64 and sgi-ip27")), but this is not a proper fix :( Luckily nothing calls cpumask_of_node() for offline nodes... > Worth cleaning up whilst here? Proper handling of types would definitely > help. Worth cleanup indeed, but I'd rather drop arch_alloc_nodedata() on MIPS altogether. > Jonathan
diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h index 08c36e50a860..629c3f290203 100644 --- a/arch/mips/include/asm/mach-ip27/mmzone.h +++ b/arch/mips/include/asm/mach-ip27/mmzone.h @@ -22,7 +22,10 @@ struct node_data { extern struct node_data *__node_data[]; -#define NODE_DATA(n) (&__node_data[(n)]->pglist) #define hub_data(n) (&__node_data[(n)]->hub) +extern struct pglist_data *node_data[]; + +#define NODE_DATA(nid) (node_data[nid]) + #endif /* _ASM_MACH_MMZONE_H */ diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index b8ca94cfb4fe..c30ef6958b97 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -34,8 +34,10 @@ #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) -struct node_data *__node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); +struct node_data *__node_data[MAX_NUMNODES]; EXPORT_SYMBOL(__node_data); static u64 gen_region_mask(void) @@ -361,6 +363,7 @@ static void __init node_mem_init(nasid_t node) */ __node_data[node] = __va(slot_freepfn << PAGE_SHIFT); memset(__node_data[node], 0, PAGE_SIZE); + node_data[node] = &__node_data[node]->pglist; NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;