Message ID | 20230523115708.195597-1-wangkefeng.wang@huawei.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | memblock: update numa node of memblk reserved type | expand |
On 5/23/23 17:27, Kefeng Wang wrote: > The numa node of memblk reserved type is wrong, it could update > according to the numa node information from memblk memory type, > let's fix it. Indeed it's wrong at present and can be verified from sysfs file (/sys/kernel/debug/memblock/reserved) accessed in user space. > > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > --- > mm/memblock.c | 25 +++++++++++++++++++++++++ > 1 file changed, 25 insertions(+) > > diff --git a/mm/memblock.c b/mm/memblock.c > index a50447d970ef..45a0781cda31 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -1922,6 +1922,28 @@ phys_addr_t __init_memblock memblock_get_current_limit(void) > return memblock.current_limit; > } > > +static void __init_memblock memblock_reserved_update_node(void) > +{ > + struct memblock_region *rgn; > + phys_addr_t base, end, size; > + int ret; > + > + if (!IS_ENABLED(CONFIG_NUMA)) > + return; > + > + for_each_mem_region(rgn) { > + base = rgn->base; > + size = rgn->size; > + end = base + size - 1; > + > + ret = memblock_set_node(base, size, &memblock.reserved, > + memblock_get_region_node(rgn)); > + if (ret) > + pr_err("memblock: Failed to update reserved [%pa-%pa] node", > + &base, &end); > + } > +} > + > static void __init_memblock memblock_dump(struct memblock_type *type) > { > phys_addr_t base, end, size; > @@ -1955,6 +1977,7 @@ static void __init_memblock __memblock_dump_all(void) > &memblock.memory.total_size, > &memblock.reserved.total_size); > > + memblock_reserved_update_node(); __memblock_dump_all() gets called only when memblock_debug is enabled. This helper should be called directly inside memblock_dump_all() right at the beginning, regardless of memblock_debug. diff --git a/mm/memblock.c b/mm/memblock.c index 804fae92d56f..008c4e86d7f3 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1954,7 +1954,6 @@ static void __init_memblock __memblock_dump_all(void) &memblock.memory.total_size, &memblock.reserved.total_size); - memblock_reserved_update_node(); memblock_dump(&memblock.memory); memblock_dump(&memblock.reserved); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP @@ -1964,6 +1963,8 @@ static void __init_memblock __memblock_dump_all(void) void __init_memblock memblock_dump_all(void) { + memblock_reserved_update_node(); + if (memblock_debug) __memblock_dump_all(); } > memblock_dump(&memblock.memory); > memblock_dump(&memblock.reserved); > #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP > @@ -2196,6 +2219,8 @@ static int memblock_debug_show(struct seq_file *m, void *private) > unsigned int count = ARRAY_SIZE(flagname); > phys_addr_t end; > > + memblock_reserved_update_node(); > + This is redundant, should be dropped. Reserved memblock ranges need not be scanned, each time the sysfs file is accessed from user space. > for (i = 0; i < type->cnt; i++) { > reg = &type->regions[i]; > end = reg->base + reg->size - 1;
On 2023/5/24 12:59, Anshuman Khandual wrote: > > > On 5/23/23 17:27, Kefeng Wang wrote: >> The numa node of memblk reserved type is wrong, it could update >> according to the numa node information from memblk memory type, >> let's fix it. > > Indeed it's wrong at present and can be verified from sysfs file > (/sys/kernel/debug/memblock/reserved) accessed in user space. Yes, both memblock_dump() and sysfs show wrong value. > >> >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >> --- >> mm/memblock.c | 25 +++++++++++++++++++++++++ >> 1 file changed, 25 insertions(+) >> >> diff --git a/mm/memblock.c b/mm/memblock.c >> index a50447d970ef..45a0781cda31 100644 >> --- a/mm/memblock.c >> +++ b/mm/memblock.c >> @@ -1922,6 +1922,28 @@ phys_addr_t __init_memblock memblock_get_current_limit(void) >> return memblock.current_limit; >> } >> >> +static void __init_memblock memblock_reserved_update_node(void) >> +{ >> + struct memblock_region *rgn; >> + phys_addr_t base, end, size; >> + int ret; >> + >> + if (!IS_ENABLED(CONFIG_NUMA)) >> + return; >> + >> + for_each_mem_region(rgn) { >> + base = rgn->base; >> + size = rgn->size; >> + end = base + size - 1; >> + >> + ret = memblock_set_node(base, size, &memblock.reserved, >> + memblock_get_region_node(rgn)); >> + if (ret) >> + pr_err("memblock: Failed to update reserved [%pa-%pa] node", >> + &base, &end); >> + } >> +} >> + >> static void __init_memblock memblock_dump(struct memblock_type *type) >> { >> phys_addr_t base, end, size; >> @@ -1955,6 +1977,7 @@ static void __init_memblock __memblock_dump_all(void) >> &memblock.memory.total_size, >> &memblock.reserved.total_size); >> >> + memblock_reserved_update_node(); > > __memblock_dump_all() gets called only when memblock_debug is enabled. > This helper should be called directly inside memblock_dump_all() right > at the beginning, regardless of memblock_debug. This is my first though, but I found there are still many memblock_alloc and memblock_reserve after memblock_dump_all(), so I update it twice, 1) __memblock_dump_all() 2) memblock_debug_show() and without the above two interface, no one care about the reserved node info, so I put memblock_reserved_update_node into __memblock_dump_all(). >> #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP >> @@ -2196,6 +2219,8 @@ static int memblock_debug_show(struct seq_file *m, void *private) >> unsigned int count = ARRAY_SIZE(flagname); >> phys_addr_t end; >> >> + memblock_reserved_update_node(); >> + > > This is redundant, should be dropped. Reserved memblock ranges need not > be scanned, each time the sysfs file is accessed from user space. Yes, it's better to move it into memblock_init_debugfs(), which only called once.
On Wed, May 24, 2023 at 02:47:26PM +0800, Kefeng Wang wrote: > > On 2023/5/24 12:59, Anshuman Khandual wrote: > > > > On 5/23/23 17:27, Kefeng Wang wrote: > > > The numa node of memblk reserved type is wrong, it could update > > > according to the numa node information from memblk memory type, > > > let's fix it. > > > > Indeed it's wrong at present and can be verified from sysfs file > > (/sys/kernel/debug/memblock/reserved) accessed in user space. > > Yes, both memblock_dump() and sysfs show wrong value. > > > > > > > > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > > > --- > > > mm/memblock.c | 25 +++++++++++++++++++++++++ > > > 1 file changed, 25 insertions(+) > > > > > > diff --git a/mm/memblock.c b/mm/memblock.c > > > index a50447d970ef..45a0781cda31 100644 > > > --- a/mm/memblock.c > > > +++ b/mm/memblock.c > > > @@ -1922,6 +1922,28 @@ phys_addr_t __init_memblock memblock_get_current_limit(void) > > > return memblock.current_limit; > > > } > > > +static void __init_memblock memblock_reserved_update_node(void) > > > +{ > > > + struct memblock_region *rgn; > > > + phys_addr_t base, end, size; > > > + int ret; > > > + > > > + if (!IS_ENABLED(CONFIG_NUMA)) > > > + return; > > > + > > > + for_each_mem_region(rgn) { > > > + base = rgn->base; > > > + size = rgn->size; > > > + end = base + size - 1; > > > + > > > + ret = memblock_set_node(base, size, &memblock.reserved, > > > + memblock_get_region_node(rgn)); > > > + if (ret) > > > + pr_err("memblock: Failed to update reserved [%pa-%pa] node", > > > + &base, &end); > > > + } > > > +} > > > + > > > static void __init_memblock memblock_dump(struct memblock_type *type) > > > { > > > phys_addr_t base, end, size; > > > @@ -1955,6 +1977,7 @@ static void __init_memblock __memblock_dump_all(void) > > > &memblock.memory.total_size, > > > &memblock.reserved.total_size); > > > + memblock_reserved_update_node(); > > > > __memblock_dump_all() gets called only when memblock_debug is enabled. > > This helper should be called directly inside memblock_dump_all() right > > at the beginning, regardless of memblock_debug. > > This is my first though, but I found there are still many memblock_alloc and > memblock_reserve after memblock_dump_all(), so I update it twice, > > 1) __memblock_dump_all() > 2) memblock_debug_show() > > and without the above two interface, no one care about the reserved node > info, so I put memblock_reserved_update_node into __memblock_dump_all(). We don't care about the reserved node info and __memblock_dump_all() actually does not print node info for reserved regions unless somebody explicitly sets the node id on a reserved memory. So instead of updating reserved memory node info I'd rather avoid printing it in debugfs. > > > #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP > > > @@ -2196,6 +2219,8 @@ static int memblock_debug_show(struct seq_file *m, void *private) > > > unsigned int count = ARRAY_SIZE(flagname); > > > phys_addr_t end; > > > + memblock_reserved_update_node(); > > > + > > > > > This is redundant, should be dropped. Reserved memblock ranges need not > > be scanned, each time the sysfs file is accessed from user space. > > Yes, it's better to move it into memblock_init_debugfs(), > which only called once. > > >
On 2023/5/24 23:33, Mike Rapoport wrote: > On Wed, May 24, 2023 at 02:47:26PM +0800, Kefeng Wang wrote: >> >> On 2023/5/24 12:59, Anshuman Khandual wrote: >>> >>> >>> __memblock_dump_all() gets called only when memblock_debug is enabled. >>> This helper should be called directly inside memblock_dump_all() right >>> at the beginning, regardless of memblock_debug. >> >> This is my first though, but I found there are still many memblock_alloc and >> memblock_reserve after memblock_dump_all(), so I update it twice, >> >> 1) __memblock_dump_all() >> 2) memblock_debug_show() >> >> and without the above two interface, no one care about the reserved node >> info, so I put memblock_reserved_update_node into __memblock_dump_all(). > > We don't care about the reserved node info and __memblock_dump_all() > actually does not print node info for reserved regions unless somebody > explicitly sets the node id on a reserved memory. > > So instead of updating reserved memory node info I'd rather avoid printing > it in debugfs. Ok, will skip nid = MAX_NUMNODES in debug show diff --git a/mm/memblock.c b/mm/memblock.c index c5c80d9bcea3..e6033de1f76d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2169,17 +2169,19 @@ static int memblock_debug_show(struct seq_file *m, void *private) { struct memblock_type *type = m->private; struct memblock_region *reg; - int i, j; + int i, j, nid; unsigned int count = ARRAY_SIZE(flagname); phys_addr_t end; for (i = 0; i < type->cnt; i++) { reg = &type->regions[i]; end = reg->base + reg->size - 1; + nid = memblock_get_region_node(reg); seq_printf(m, "%4d: ", i); seq_printf(m, "%pa..%pa ", ®->base, &end); - seq_printf(m, "%4d ", memblock_get_region_node(reg)); + if (nid != MAX_NUMNODES) + seq_printf(m, "%4d ", nid); if (reg->flags) { for (j = 0; j < count; j++) { if (reg->flags & (1U << j)) {
diff --git a/mm/memblock.c b/mm/memblock.c index a50447d970ef..45a0781cda31 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1922,6 +1922,28 @@ phys_addr_t __init_memblock memblock_get_current_limit(void) return memblock.current_limit; } +static void __init_memblock memblock_reserved_update_node(void) +{ + struct memblock_region *rgn; + phys_addr_t base, end, size; + int ret; + + if (!IS_ENABLED(CONFIG_NUMA)) + return; + + for_each_mem_region(rgn) { + base = rgn->base; + size = rgn->size; + end = base + size - 1; + + ret = memblock_set_node(base, size, &memblock.reserved, + memblock_get_region_node(rgn)); + if (ret) + pr_err("memblock: Failed to update reserved [%pa-%pa] node", + &base, &end); + } +} + static void __init_memblock memblock_dump(struct memblock_type *type) { phys_addr_t base, end, size; @@ -1955,6 +1977,7 @@ static void __init_memblock __memblock_dump_all(void) &memblock.memory.total_size, &memblock.reserved.total_size); + memblock_reserved_update_node(); memblock_dump(&memblock.memory); memblock_dump(&memblock.reserved); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP @@ -2196,6 +2219,8 @@ static int memblock_debug_show(struct seq_file *m, void *private) unsigned int count = ARRAY_SIZE(flagname); phys_addr_t end; + memblock_reserved_update_node(); + for (i = 0; i < type->cnt; i++) { reg = &type->regions[i]; end = reg->base + reg->size - 1;
The numa node of memblk reserved type is wrong, it could update according to the numa node information from memblk memory type, let's fix it. Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> --- mm/memblock.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)