Message ID | 20200630004413.1423733-1-shakeelb@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: memcontrol: account kernel stack per node | expand |
On Mon, Jun 29, 2020 at 05:44:13PM -0700, Shakeel Butt wrote: > Currently the kernel stack is being accounted per-zone. There is no need > to do that. In addition due to being per-zone, memcg has to keep a > separate MEMCG_KERNEL_STACK_KB. Make the stat per-node and deprecate > MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of > node_stat_item. > > Signed-off-by: Shakeel Butt <shakeelb@google.com> > --- > drivers/base/node.c | 4 ++-- > fs/proc/meminfo.c | 4 ++-- > include/linux/memcontrol.h | 2 -- > include/linux/mmzone.h | 8 ++++---- > kernel/fork.c | 29 ++++++++++------------------- > kernel/scs.c | 2 +- > mm/memcontrol.c | 2 +- > mm/page_alloc.c | 16 ++++++++-------- > mm/vmstat.c | 8 ++++---- > 9 files changed, 32 insertions(+), 43 deletions(-) > > diff --git a/drivers/base/node.c b/drivers/base/node.c > index 0cf13e31603c..508b80f6329b 100644 > --- a/drivers/base/node.c > +++ b/drivers/base/node.c > @@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev, > nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), > nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), > nid, K(i.sharedram), > - nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), > + nid, node_page_state(pgdat, NR_KERNEL_STACK_KB), > #ifdef CONFIG_SHADOW_CALL_STACK > - nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), > + nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), > #endif > nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), > nid, 0UL, > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > index f262bff3ca31..887a5532e449 100644 > --- a/fs/proc/meminfo.c > +++ b/fs/proc/meminfo.c > @@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > show_val_kb(m, "SReclaimable: ", sreclaimable); > show_val_kb(m, "SUnreclaim: ", sunreclaim); > seq_printf(m, "KernelStack: %8lu kB\n", > - global_zone_page_state(NR_KERNEL_STACK_KB)); > + global_node_page_state(NR_KERNEL_STACK_KB)); > #ifdef CONFIG_SHADOW_CALL_STACK > seq_printf(m, "ShadowCallStack:%8lu kB\n", > - global_zone_page_state(NR_KERNEL_SCS_KB)); > + global_node_page_state(NR_KERNEL_SCS_KB)); > #endif > show_val_kb(m, "PageTables: ", > global_zone_page_state(NR_PAGETABLE)); > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index ba1e42715ecf..a3ddb236898e 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -33,8 +33,6 @@ enum memcg_stat_item { > MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, > MEMCG_SOCK, > MEMCG_PERCPU_B, > - /* XXX: why are these zone and not node counters? */ > - MEMCG_KERNEL_STACK_KB, > MEMCG_NR_STAT, > }; > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 8e859444927a..b79f73ce8b57 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -153,10 +153,6 @@ enum zone_stat_item { > NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ > NR_MLOCK, /* mlock()ed pages found and moved off LRU */ > NR_PAGETABLE, /* used for pagetables */ > - NR_KERNEL_STACK_KB, /* measured in KiB */ > -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) > - NR_KERNEL_SCS_KB, /* measured in KiB */ > -#endif > /* Second 128 byte cacheline */ > NR_BOUNCE, > #if IS_ENABLED(CONFIG_ZSMALLOC) > @@ -201,6 +197,10 @@ enum node_stat_item { > NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ > NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ > NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ > + NR_KERNEL_STACK_KB, /* measured in KiB */ > +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) > + NR_KERNEL_SCS_KB, /* measured in KiB */ > +#endif > NR_VM_NODE_STAT_ITEMS > }; > > diff --git a/kernel/fork.c b/kernel/fork.c > index 73fdfa9674b5..ee5393350ef7 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -278,7 +278,7 @@ static inline void free_thread_stack(struct task_struct *tsk) > > for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { > mod_memcg_page_state(vm->pages[i], > - MEMCG_KERNEL_STACK_KB, > + NR_KERNEL_STACK_KB, > -(int)(PAGE_SIZE / 1024)); Hello, Shakeel! Thank you for the cleanup, it makes total sense to me. However I have some concerns: mod_memcg_page_state() does change only memcg's counters, but not lruvec counters. So to make it per-node per-memcg (aka lruvec) we need to use mod_lruvec_state(), otherwise we won't have global per-node values. > > memcg_kmem_uncharge_page(vm->pages[i], 0); > @@ -381,32 +381,23 @@ static void account_kernel_stack(struct task_struct *tsk, int account) > { > void *stack = task_stack_page(tsk); > struct vm_struct *vm = task_stack_vm_area(tsk); > + struct page *page; > > BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); > > if (vm) { > - int i; > - > BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); > + page = vm->pages[0]; > > - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { > - mod_zone_page_state(page_zone(vm->pages[i]), > - NR_KERNEL_STACK_KB, > - PAGE_SIZE / 1024 * account); > - } > } else { > - /* > - * All stack pages are in the same zone and belong to the > - * same memcg. > - */ > - struct page *first_page = virt_to_page(stack); > - > - mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, > - THREAD_SIZE / 1024 * account); > - > - mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB, > + page = virt_to_page(stack); > + mod_memcg_obj_state(stack, NR_KERNEL_STACK_KB, > account * (THREAD_SIZE / 1024)); > } > + > + /* All stack pages are in the same node. */ > + mod_node_page_state(page_pgdat(page), NR_KERNEL_STACK_KB, > + THREAD_SIZE / 1024 * account); > } And then we probably don't need a separate change for memcg- and per-node counters. Thanks! > > static int memcg_charge_kernel_stack(struct task_struct *tsk) > @@ -431,7 +422,7 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk) > return ret; > > mod_memcg_page_state(vm->pages[i], > - MEMCG_KERNEL_STACK_KB, > + NR_KERNEL_STACK_KB, > PAGE_SIZE / 1024); > } > } > diff --git a/kernel/scs.c b/kernel/scs.c > index 5d4d9bbdec36..4ff4a7ba0094 100644 > --- a/kernel/scs.c > +++ b/kernel/scs.c > @@ -17,7 +17,7 @@ static void __scs_account(void *s, int account) > { > struct page *scs_page = virt_to_page(s); > > - mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB, > + mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB, > account * (SCS_SIZE / SZ_1K)); > } > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index b1a644224383..06de63901f81 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -1485,7 +1485,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg) > (u64)memcg_page_state(memcg, NR_FILE_PAGES) * > PAGE_SIZE); > seq_buf_printf(&s, "kernel_stack %llu\n", > - (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * > + (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * > 1024); > seq_buf_printf(&s, "slab %llu\n", > (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 174c849ba9f2..0568b126f719 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -5402,6 +5402,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) > " anon_thp: %lukB" > #endif > " writeback_tmp:%lukB" > + " kernel_stack:%lukB" > +#ifdef CONFIG_SHADOW_CALL_STACK > + " shadow_call_stack:%lukB" > +#endif > " all_unreclaimable? %s" > "\n", > pgdat->node_id, > @@ -5423,6 +5427,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) > K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), > #endif > K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), > + node_page_state(pgdat, NR_KERNEL_STACK_KB), > +#ifdef CONFIG_SHADOW_CALL_STACK > + node_page_state(pgdat, NR_KERNEL_SCS_KB), > +#endif > pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? > "yes" : "no"); > } > @@ -5454,10 +5462,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) > " present:%lukB" > " managed:%lukB" > " mlocked:%lukB" > - " kernel_stack:%lukB" > -#ifdef CONFIG_SHADOW_CALL_STACK > - " shadow_call_stack:%lukB" > -#endif > " pagetables:%lukB" > " bounce:%lukB" > " free_pcp:%lukB" > @@ -5479,10 +5483,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) > K(zone->present_pages), > K(zone_managed_pages(zone)), > K(zone_page_state(zone, NR_MLOCK)), > - zone_page_state(zone, NR_KERNEL_STACK_KB), > -#ifdef CONFIG_SHADOW_CALL_STACK > - zone_page_state(zone, NR_KERNEL_SCS_KB), > -#endif > K(zone_page_state(zone, NR_PAGETABLE)), > K(zone_page_state(zone, NR_BOUNCE)), > K(free_pcp), > diff --git a/mm/vmstat.c b/mm/vmstat.c > index 2c5a96694490..96bf8bfffd1d 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1158,10 +1158,6 @@ const char * const vmstat_text[] = { > "nr_zone_write_pending", > "nr_mlock", > "nr_page_table_pages", > - "nr_kernel_stack", > -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) > - "nr_shadow_call_stack", > -#endif > "nr_bounce", > #if IS_ENABLED(CONFIG_ZSMALLOC) > "nr_zspages", > @@ -1212,6 +1208,10 @@ const char * const vmstat_text[] = { > "nr_kernel_misc_reclaimable", > "nr_foll_pin_acquired", > "nr_foll_pin_released", > + "nr_kernel_stack", > +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) > + "nr_shadow_call_stack", > +#endif > > /* enum writeback_stat_item counters */ > "nr_dirty_threshold", > -- > 2.27.0.212.ge8ba1cc988-goog >
On Mon, Jun 29, 2020 at 8:24 PM Roman Gushchin <guro@fb.com> wrote: > > On Mon, Jun 29, 2020 at 05:44:13PM -0700, Shakeel Butt wrote: > > Currently the kernel stack is being accounted per-zone. There is no need > > to do that. In addition due to being per-zone, memcg has to keep a > > separate MEMCG_KERNEL_STACK_KB. Make the stat per-node and deprecate > > MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of > > node_stat_item. > > > > Signed-off-by: Shakeel Butt <shakeelb@google.com> > > --- > > drivers/base/node.c | 4 ++-- > > fs/proc/meminfo.c | 4 ++-- > > include/linux/memcontrol.h | 2 -- > > include/linux/mmzone.h | 8 ++++---- > > kernel/fork.c | 29 ++++++++++------------------- > > kernel/scs.c | 2 +- > > mm/memcontrol.c | 2 +- > > mm/page_alloc.c | 16 ++++++++-------- > > mm/vmstat.c | 8 ++++---- > > 9 files changed, 32 insertions(+), 43 deletions(-) > > > > diff --git a/drivers/base/node.c b/drivers/base/node.c > > index 0cf13e31603c..508b80f6329b 100644 > > --- a/drivers/base/node.c > > +++ b/drivers/base/node.c > > @@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev, > > nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), > > nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), > > nid, K(i.sharedram), > > - nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), > > + nid, node_page_state(pgdat, NR_KERNEL_STACK_KB), > > #ifdef CONFIG_SHADOW_CALL_STACK > > - nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), > > + nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), > > #endif > > nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), > > nid, 0UL, > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > > index f262bff3ca31..887a5532e449 100644 > > --- a/fs/proc/meminfo.c > > +++ b/fs/proc/meminfo.c > > @@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > > show_val_kb(m, "SReclaimable: ", sreclaimable); > > show_val_kb(m, "SUnreclaim: ", sunreclaim); > > seq_printf(m, "KernelStack: %8lu kB\n", > > - global_zone_page_state(NR_KERNEL_STACK_KB)); > > + global_node_page_state(NR_KERNEL_STACK_KB)); > > #ifdef CONFIG_SHADOW_CALL_STACK > > seq_printf(m, "ShadowCallStack:%8lu kB\n", > > - global_zone_page_state(NR_KERNEL_SCS_KB)); > > + global_node_page_state(NR_KERNEL_SCS_KB)); > > #endif > > show_val_kb(m, "PageTables: ", > > global_zone_page_state(NR_PAGETABLE)); > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > > index ba1e42715ecf..a3ddb236898e 100644 > > --- a/include/linux/memcontrol.h > > +++ b/include/linux/memcontrol.h > > @@ -33,8 +33,6 @@ enum memcg_stat_item { > > MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, > > MEMCG_SOCK, > > MEMCG_PERCPU_B, > > - /* XXX: why are these zone and not node counters? */ > > - MEMCG_KERNEL_STACK_KB, > > MEMCG_NR_STAT, > > }; > > > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > > index 8e859444927a..b79f73ce8b57 100644 > > --- a/include/linux/mmzone.h > > +++ b/include/linux/mmzone.h > > @@ -153,10 +153,6 @@ enum zone_stat_item { > > NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ > > NR_MLOCK, /* mlock()ed pages found and moved off LRU */ > > NR_PAGETABLE, /* used for pagetables */ > > - NR_KERNEL_STACK_KB, /* measured in KiB */ > > -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) > > - NR_KERNEL_SCS_KB, /* measured in KiB */ > > -#endif > > /* Second 128 byte cacheline */ > > NR_BOUNCE, > > #if IS_ENABLED(CONFIG_ZSMALLOC) > > @@ -201,6 +197,10 @@ enum node_stat_item { > > NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ > > NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ > > NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ > > + NR_KERNEL_STACK_KB, /* measured in KiB */ > > +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) > > + NR_KERNEL_SCS_KB, /* measured in KiB */ > > +#endif > > NR_VM_NODE_STAT_ITEMS > > }; > > > > diff --git a/kernel/fork.c b/kernel/fork.c > > index 73fdfa9674b5..ee5393350ef7 100644 > > --- a/kernel/fork.c > > +++ b/kernel/fork.c > > @@ -278,7 +278,7 @@ static inline void free_thread_stack(struct task_struct *tsk) > > > > for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { > > mod_memcg_page_state(vm->pages[i], > > - MEMCG_KERNEL_STACK_KB, > > + NR_KERNEL_STACK_KB, > > -(int)(PAGE_SIZE / 1024)); > > Hello, Shakeel! > > Thank you for the cleanup, it makes total sense to me. > > However I have some concerns: mod_memcg_page_state() does change only memcg's counters, > but not lruvec counters. So to make it per-node per-memcg (aka lruvec) > we need to use mod_lruvec_state(), otherwise we won't have global per-node values. > > > > > memcg_kmem_uncharge_page(vm->pages[i], 0); > > @@ -381,32 +381,23 @@ static void account_kernel_stack(struct task_struct *tsk, int account) > > { > > void *stack = task_stack_page(tsk); > > struct vm_struct *vm = task_stack_vm_area(tsk); > > + struct page *page; > > > > BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); > > > > if (vm) { > > - int i; > > - > > BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); > > + page = vm->pages[0]; > > > > - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { > > - mod_zone_page_state(page_zone(vm->pages[i]), > > - NR_KERNEL_STACK_KB, > > - PAGE_SIZE / 1024 * account); > > - } > > } else { > > - /* > > - * All stack pages are in the same zone and belong to the > > - * same memcg. > > - */ > > - struct page *first_page = virt_to_page(stack); > > - > > - mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, > > - THREAD_SIZE / 1024 * account); > > - > > - mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB, > > + page = virt_to_page(stack); > > + mod_memcg_obj_state(stack, NR_KERNEL_STACK_KB, > > account * (THREAD_SIZE / 1024)); > > } > > + > > + /* All stack pages are in the same node. */ > > + mod_node_page_state(page_pgdat(page), NR_KERNEL_STACK_KB, > > + THREAD_SIZE / 1024 * account); > > } > > And then we probably don't need a separate change for memcg- and per-node counters. > Yes, I thought about combining memcg and per-node counters but got worried that the cached stacks for CONFIG_VMAP_STACK would not be accounted for in the per-node global counters but I see that we already don't account for them in both counters. I will further simplify these. Thanks for the suggestion. Shakeel
diff --git a/drivers/base/node.c b/drivers/base/node.c index 0cf13e31603c..508b80f6329b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), nid, K(i.sharedram), - nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), + nid, node_page_state(pgdat, NR_KERNEL_STACK_KB), #ifdef CONFIG_SHADOW_CALL_STACK - nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), + nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), nid, 0UL, diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index f262bff3ca31..887a5532e449 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "SReclaimable: ", sreclaimable); show_val_kb(m, "SUnreclaim: ", sunreclaim); seq_printf(m, "KernelStack: %8lu kB\n", - global_zone_page_state(NR_KERNEL_STACK_KB)); + global_node_page_state(NR_KERNEL_STACK_KB)); #ifdef CONFIG_SHADOW_CALL_STACK seq_printf(m, "ShadowCallStack:%8lu kB\n", - global_zone_page_state(NR_KERNEL_SCS_KB)); + global_node_page_state(NR_KERNEL_SCS_KB)); #endif show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ba1e42715ecf..a3ddb236898e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -33,8 +33,6 @@ enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, MEMCG_PERCPU_B, - /* XXX: why are these zone and not node counters? */ - MEMCG_KERNEL_STACK_KB, MEMCG_NR_STAT, }; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8e859444927a..b79f73ce8b57 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -153,10 +153,6 @@ enum zone_stat_item { NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_PAGETABLE, /* used for pagetables */ - NR_KERNEL_STACK_KB, /* measured in KiB */ -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) - NR_KERNEL_SCS_KB, /* measured in KiB */ -#endif /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) @@ -201,6 +197,10 @@ enum node_stat_item { NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ + NR_KERNEL_STACK_KB, /* measured in KiB */ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + NR_KERNEL_SCS_KB, /* measured in KiB */ +#endif NR_VM_NODE_STAT_ITEMS }; diff --git a/kernel/fork.c b/kernel/fork.c index 73fdfa9674b5..ee5393350ef7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -278,7 +278,7 @@ static inline void free_thread_stack(struct task_struct *tsk) for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { mod_memcg_page_state(vm->pages[i], - MEMCG_KERNEL_STACK_KB, + NR_KERNEL_STACK_KB, -(int)(PAGE_SIZE / 1024)); memcg_kmem_uncharge_page(vm->pages[i], 0); @@ -381,32 +381,23 @@ static void account_kernel_stack(struct task_struct *tsk, int account) { void *stack = task_stack_page(tsk); struct vm_struct *vm = task_stack_vm_area(tsk); + struct page *page; BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); if (vm) { - int i; - BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); + page = vm->pages[0]; - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { - mod_zone_page_state(page_zone(vm->pages[i]), - NR_KERNEL_STACK_KB, - PAGE_SIZE / 1024 * account); - } } else { - /* - * All stack pages are in the same zone and belong to the - * same memcg. - */ - struct page *first_page = virt_to_page(stack); - - mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, - THREAD_SIZE / 1024 * account); - - mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB, + page = virt_to_page(stack); + mod_memcg_obj_state(stack, NR_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); } + + /* All stack pages are in the same node. */ + mod_node_page_state(page_pgdat(page), NR_KERNEL_STACK_KB, + THREAD_SIZE / 1024 * account); } static int memcg_charge_kernel_stack(struct task_struct *tsk) @@ -431,7 +422,7 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk) return ret; mod_memcg_page_state(vm->pages[i], - MEMCG_KERNEL_STACK_KB, + NR_KERNEL_STACK_KB, PAGE_SIZE / 1024); } } diff --git a/kernel/scs.c b/kernel/scs.c index 5d4d9bbdec36..4ff4a7ba0094 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -17,7 +17,7 @@ static void __scs_account(void *s, int account) { struct page *scs_page = virt_to_page(s); - mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB, + mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB, account * (SCS_SIZE / SZ_1K)); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b1a644224383..06de63901f81 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1485,7 +1485,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg) (u64)memcg_page_state(memcg, NR_FILE_PAGES) * PAGE_SIZE); seq_buf_printf(&s, "kernel_stack %llu\n", - (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * + (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * 1024); seq_buf_printf(&s, "slab %llu\n", (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 174c849ba9f2..0568b126f719 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5402,6 +5402,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " anon_thp: %lukB" #endif " writeback_tmp:%lukB" + " kernel_stack:%lukB" +#ifdef CONFIG_SHADOW_CALL_STACK + " shadow_call_stack:%lukB" +#endif " all_unreclaimable? %s" "\n", pgdat->node_id, @@ -5423,6 +5427,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), + node_page_state(pgdat, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + node_page_state(pgdat, NR_KERNEL_SCS_KB), +#endif pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? "yes" : "no"); } @@ -5454,10 +5462,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " present:%lukB" " managed:%lukB" " mlocked:%lukB" - " kernel_stack:%lukB" -#ifdef CONFIG_SHADOW_CALL_STACK - " shadow_call_stack:%lukB" -#endif " pagetables:%lukB" " bounce:%lukB" " free_pcp:%lukB" @@ -5479,10 +5483,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), - zone_page_state(zone, NR_KERNEL_STACK_KB), -#ifdef CONFIG_SHADOW_CALL_STACK - zone_page_state(zone, NR_KERNEL_SCS_KB), -#endif K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), diff --git a/mm/vmstat.c b/mm/vmstat.c index 2c5a96694490..96bf8bfffd1d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1158,10 +1158,6 @@ const char * const vmstat_text[] = { "nr_zone_write_pending", "nr_mlock", "nr_page_table_pages", - "nr_kernel_stack", -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) - "nr_shadow_call_stack", -#endif "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", @@ -1212,6 +1208,10 @@ const char * const vmstat_text[] = { "nr_kernel_misc_reclaimable", "nr_foll_pin_acquired", "nr_foll_pin_released", + "nr_kernel_stack", +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + "nr_shadow_call_stack", +#endif /* enum writeback_stat_item counters */ "nr_dirty_threshold",
Currently the kernel stack is being accounted per-zone. There is no need to do that. In addition due to being per-zone, memcg has to keep a separate MEMCG_KERNEL_STACK_KB. Make the stat per-node and deprecate MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of node_stat_item. Signed-off-by: Shakeel Butt <shakeelb@google.com> --- drivers/base/node.c | 4 ++-- fs/proc/meminfo.c | 4 ++-- include/linux/memcontrol.h | 2 -- include/linux/mmzone.h | 8 ++++---- kernel/fork.c | 29 ++++++++++------------------- kernel/scs.c | 2 +- mm/memcontrol.c | 2 +- mm/page_alloc.c | 16 ++++++++-------- mm/vmstat.c | 8 ++++---- 9 files changed, 32 insertions(+), 43 deletions(-)