Message ID | 20240212223029.30769-4-osalvador@suse.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | page_owner: print stacks and their outstanding allocations | expand |
On Mon, 12 Feb 2024 at 23:29, Oscar Salvador <osalvador@suse.de> wrote: > > This patch adds a new directory called 'page_owner_stacks' under > /sys/kernel/debug/, with a file called 'show_stacks' in it. > Reading from that file will show all stacks that were added by page_owner > followed by their counting, giving us a clear overview of stack <-> count > relationship. > > E.g: > > prep_new_page+0xa9/0x120 > get_page_from_freelist+0x801/0x2210 > __alloc_pages+0x18b/0x350 > alloc_pages_mpol+0x91/0x1f0 > folio_alloc+0x14/0x50 > filemap_alloc_folio+0xb2/0x100 > __filemap_get_folio+0x14a/0x490 > ext4_write_begin+0xbd/0x4b0 [ext4] > generic_perform_write+0xc1/0x1e0 > ext4_buffered_write_iter+0x68/0xe0 [ext4] > ext4_file_write_iter+0x70/0x740 [ext4] > vfs_write+0x33d/0x420 > ksys_write+0xa5/0xe0 > do_syscall_64+0x80/0x160 > entry_SYSCALL_64_after_hwframe+0x6e/0x76 > stack_count: 4578 > > The seq stack_{start,next} functions will iterate through the list > stack_list in order to print all stacks. > > Signed-off-by: Oscar Salvador <osalvador@suse.de> Acked-by: Marco Elver <elver@google.com> Minor comments below. > --- > mm/page_owner.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 98 insertions(+), 1 deletion(-) > > diff --git a/mm/page_owner.c b/mm/page_owner.c > index 7d1b3f75cef3..3e4b7cd7c8f8 100644 > --- a/mm/page_owner.c > +++ b/mm/page_owner.c > @@ -84,7 +84,12 @@ static void add_stack_record_to_list(struct stack_record *stack_record) > stack_list = stack; > } else { > stack->next = stack_list; > - stack_list = stack; > + /* This pairs with smp_load_acquire() from function Comment should be /* * ... */ (Unless in networking or other special subsystems with their own comment style.) > + * stack_start(). This guarantees that stack_start() > + * will see an updated stack_list before starting to > + * traverse the list. > + */ > + smp_store_release(&stack_list, stack); > } > spin_unlock_irqrestore(&stack_list_lock, flags); > } > @@ -792,8 +797,97 @@ static const struct file_operations proc_page_owner_operations = { > .llseek = lseek_page_owner, > }; > > +static void *stack_start(struct seq_file *m, loff_t *ppos) > +{ > + struct stack *stack; > + > + if (*ppos == -1UL) > + return NULL; > + > + if (!*ppos) { > + /* > + * This pairs with smp_store_release() from function > + * add_stack_record_to_list(), so we get a consistent > + * value of stack_list. > + */ > + stack = smp_load_acquire(&stack_list); I'm not sure if it'd make your code simpler or not: there is <linux/llist.h> for singly-linked linked lists, although the code to manage the list is simple enough I'm indifferent here. Only consider it if it helps you make the code simpler. > + } else { > + stack = m->private; > + stack = stack->next; > + } > + > + m->private = stack; > + > + return stack; > +} > + > +static void *stack_next(struct seq_file *m, void *v, loff_t *ppos) > +{ > + struct stack *stack = v; > + > + stack = stack->next; > + *ppos = stack ? *ppos + 1 : -1UL; > + m->private = stack; > + > + return stack; > +} > + > +static int stack_print(struct seq_file *m, void *v) > +{ > + char *buf; > + int ret = 0; > + struct stack *stack = v; > + struct stack_record *stack_record = stack->stack_record; > + > + if (!stack_record->size || stack_record->size < 0 || > + refcount_read(&stack_record->count) < 2) > + return 0; > + > + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); > + > + ret += stack_trace_snprint(buf, PAGE_SIZE, stack_record->entries, > + stack_record->size, 0); > + if (!ret) > + goto out; > + > + scnprintf(buf + ret, PAGE_SIZE - ret, "stack_count: %d\n\n", > + refcount_read(&stack_record->count)); > + > + seq_printf(m, buf); > + seq_puts(m, "\n\n"); > +out: > + kfree(buf); > + > + return 0; > +} > + > +static void stack_stop(struct seq_file *m, void *v) > +{ > +} Is this function even needed if it's empty? I recall there were some boilerplate "nop" functions that could be used. > +static const struct seq_operations page_owner_stack_op = { > + .start = stack_start, > + .next = stack_next, > + .stop = stack_stop, > + .show = stack_print > +}; > + > +static int page_owner_stack_open(struct inode *inode, struct file *file) > +{ > + return seq_open_private(file, &page_owner_stack_op, 0); > +} > + > +static const struct file_operations page_owner_stack_operations = { > + .open = page_owner_stack_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = seq_release, > +}; > + > static int __init pageowner_init(void) > { > + struct dentry *dir; > + > if (!static_branch_unlikely(&page_owner_inited)) { > pr_info("page_owner is disabled\n"); > return 0; > @@ -801,6 +895,9 @@ static int __init pageowner_init(void) > > debugfs_create_file("page_owner", 0400, NULL, NULL, > &proc_page_owner_operations); > + dir = debugfs_create_dir("page_owner_stacks", NULL); > + debugfs_create_file("show_stacks", 0400, dir, NULL, > + &page_owner_stack_operations); > > return 0; > } > -- > 2.43.0 >
On Tue, Feb 13, 2024 at 09:38:43AM +0100, Marco Elver wrote: > On Mon, 12 Feb 2024 at 23:29, Oscar Salvador <osalvador@suse.de> wrote: > > Signed-off-by: Oscar Salvador <osalvador@suse.de> > > Acked-by: Marco Elver <elver@google.com> Thanks! > > + /* This pairs with smp_load_acquire() from function > > Comment should be > > /* > * > ... > */ Yap, fat fingers here. > > + if (!*ppos) { > > + /* > > + * This pairs with smp_store_release() from function > > + * add_stack_record_to_list(), so we get a consistent > > + * value of stack_list. > > + */ > > + stack = smp_load_acquire(&stack_list); > > I'm not sure if it'd make your code simpler or not: there is > <linux/llist.h> for singly-linked linked lists, although the code to > manage the list is simple enough I'm indifferent here. Only consider > it if it helps you make the code simpler. I will check if it eases the code somehow. > > +static void stack_stop(struct seq_file *m, void *v) > > +{ > > +} > > Is this function even needed if it's empty? I recall there were some > boilerplate "nop" functions that could be used. I will check if seq already provides a dummy function for these cases.
On 2/12/24 23:30, Oscar Salvador wrote: > This patch adds a new directory called 'page_owner_stacks' under > /sys/kernel/debug/, with a file called 'show_stacks' in it. > Reading from that file will show all stacks that were added by page_owner > followed by their counting, giving us a clear overview of stack <-> count > relationship. > > E.g: > > prep_new_page+0xa9/0x120 > get_page_from_freelist+0x801/0x2210 > __alloc_pages+0x18b/0x350 > alloc_pages_mpol+0x91/0x1f0 > folio_alloc+0x14/0x50 > filemap_alloc_folio+0xb2/0x100 > __filemap_get_folio+0x14a/0x490 > ext4_write_begin+0xbd/0x4b0 [ext4] > generic_perform_write+0xc1/0x1e0 > ext4_buffered_write_iter+0x68/0xe0 [ext4] > ext4_file_write_iter+0x70/0x740 [ext4] > vfs_write+0x33d/0x420 > ksys_write+0xa5/0xe0 > do_syscall_64+0x80/0x160 > entry_SYSCALL_64_after_hwframe+0x6e/0x76 > stack_count: 4578 > > The seq stack_{start,next} functions will iterate through the list > stack_list in order to print all stacks. > > Signed-off-by: Oscar Salvador <osalvador@suse.de> ... > +static int stack_print(struct seq_file *m, void *v) > +{ > + char *buf; > + int ret = 0; > + struct stack *stack = v; > + struct stack_record *stack_record = stack->stack_record; > + > + if (!stack_record->size || stack_record->size < 0 || > + refcount_read(&stack_record->count) < 2) > + return 0; > + > + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); > + > + ret += stack_trace_snprint(buf, PAGE_SIZE, stack_record->entries, > + stack_record->size, 0); > + if (!ret) > + goto out; > + > + scnprintf(buf + ret, PAGE_SIZE - ret, "stack_count: %d\n\n", > + refcount_read(&stack_record->count)); > + > + seq_printf(m, buf); > + seq_puts(m, "\n\n"); > +out: > + kfree(buf); Seems rather wasteful to do kzalloc/kfree so you can print into that buffer first and then print/copy it again using seq_printf. If you give up on using stack_trace_snprintf() it's not much harder to print the stack directly with a loop of seq_printf. See e.g. slab_debugfs_show(). > + > + return 0; > +} > +
On Tue, Feb 13, 2024 at 03:25:26PM +0100, Vlastimil Babka wrote: > On 2/12/24 23:30, Oscar Salvador wrote: > > +static int stack_print(struct seq_file *m, void *v) > > +{ > > + char *buf; > > + int ret = 0; > > + struct stack *stack = v; > > + struct stack_record *stack_record = stack->stack_record; > > + > > + if (!stack_record->size || stack_record->size < 0 || > > + refcount_read(&stack_record->count) < 2) > > + return 0; > > + > > + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); > > + > > + ret += stack_trace_snprint(buf, PAGE_SIZE, stack_record->entries, > > + stack_record->size, 0); > > + if (!ret) > > + goto out; > > + > > + scnprintf(buf + ret, PAGE_SIZE - ret, "stack_count: %d\n\n", > > + refcount_read(&stack_record->count)); > > + > > + seq_printf(m, buf); > > + seq_puts(m, "\n\n"); > > +out: > > + kfree(buf); > > Seems rather wasteful to do kzalloc/kfree so you can print into that buffer > first and then print/copy it again using seq_printf. If you give up on using > stack_trace_snprintf() it's not much harder to print the stack directly with > a loop of seq_printf. See e.g. slab_debugfs_show(). Well, I thought about not reinventing the wheel there, but fair enough than performing a kmalloc/free op on every print might be suboptimal. I will try to do ir with seq_printf alone. Thanks
On 2/13/24 16:33, Oscar Salvador wrote: > On Tue, Feb 13, 2024 at 03:25:26PM +0100, Vlastimil Babka wrote: >> On 2/12/24 23:30, Oscar Salvador wrote: >> > +static int stack_print(struct seq_file *m, void *v) >> > +{ >> > + char *buf; >> > + int ret = 0; >> > + struct stack *stack = v; >> > + struct stack_record *stack_record = stack->stack_record; >> > + >> > + if (!stack_record->size || stack_record->size < 0 || >> > + refcount_read(&stack_record->count) < 2) >> > + return 0; >> > + >> > + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); >> > + >> > + ret += stack_trace_snprint(buf, PAGE_SIZE, stack_record->entries, >> > + stack_record->size, 0); >> > + if (!ret) >> > + goto out; >> > + >> > + scnprintf(buf + ret, PAGE_SIZE - ret, "stack_count: %d\n\n", >> > + refcount_read(&stack_record->count)); >> > + >> > + seq_printf(m, buf); >> > + seq_puts(m, "\n\n"); >> > +out: >> > + kfree(buf); >> >> Seems rather wasteful to do kzalloc/kfree so you can print into that buffer >> first and then print/copy it again using seq_printf. If you give up on using >> stack_trace_snprintf() it's not much harder to print the stack directly with >> a loop of seq_printf. See e.g. slab_debugfs_show(). > > Well, I thought about not reinventing the wheel there, but fair enough > than performing a kmalloc/free op on every print might be suboptimal. > I will try to do ir with seq_printf alone. Of course once there's more than one stackdepot user printing into a seq_file, creating a common seq_file helper analogy of stack_trace_snprintf() and using it from all places, would also be an option :) > Thanks > >
diff --git a/mm/page_owner.c b/mm/page_owner.c index 7d1b3f75cef3..3e4b7cd7c8f8 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -84,7 +84,12 @@ static void add_stack_record_to_list(struct stack_record *stack_record) stack_list = stack; } else { stack->next = stack_list; - stack_list = stack; + /* This pairs with smp_load_acquire() from function + * stack_start(). This guarantees that stack_start() + * will see an updated stack_list before starting to + * traverse the list. + */ + smp_store_release(&stack_list, stack); } spin_unlock_irqrestore(&stack_list_lock, flags); } @@ -792,8 +797,97 @@ static const struct file_operations proc_page_owner_operations = { .llseek = lseek_page_owner, }; +static void *stack_start(struct seq_file *m, loff_t *ppos) +{ + struct stack *stack; + + if (*ppos == -1UL) + return NULL; + + if (!*ppos) { + /* + * This pairs with smp_store_release() from function + * add_stack_record_to_list(), so we get a consistent + * value of stack_list. + */ + stack = smp_load_acquire(&stack_list); + } else { + stack = m->private; + stack = stack->next; + } + + m->private = stack; + + return stack; +} + +static void *stack_next(struct seq_file *m, void *v, loff_t *ppos) +{ + struct stack *stack = v; + + stack = stack->next; + *ppos = stack ? *ppos + 1 : -1UL; + m->private = stack; + + return stack; +} + +static int stack_print(struct seq_file *m, void *v) +{ + char *buf; + int ret = 0; + struct stack *stack = v; + struct stack_record *stack_record = stack->stack_record; + + if (!stack_record->size || stack_record->size < 0 || + refcount_read(&stack_record->count) < 2) + return 0; + + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + + ret += stack_trace_snprint(buf, PAGE_SIZE, stack_record->entries, + stack_record->size, 0); + if (!ret) + goto out; + + scnprintf(buf + ret, PAGE_SIZE - ret, "stack_count: %d\n\n", + refcount_read(&stack_record->count)); + + seq_printf(m, buf); + seq_puts(m, "\n\n"); +out: + kfree(buf); + + return 0; +} + +static void stack_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations page_owner_stack_op = { + .start = stack_start, + .next = stack_next, + .stop = stack_stop, + .show = stack_print +}; + +static int page_owner_stack_open(struct inode *inode, struct file *file) +{ + return seq_open_private(file, &page_owner_stack_op, 0); +} + +static const struct file_operations page_owner_stack_operations = { + .open = page_owner_stack_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int __init pageowner_init(void) { + struct dentry *dir; + if (!static_branch_unlikely(&page_owner_inited)) { pr_info("page_owner is disabled\n"); return 0; @@ -801,6 +895,9 @@ static int __init pageowner_init(void) debugfs_create_file("page_owner", 0400, NULL, NULL, &proc_page_owner_operations); + dir = debugfs_create_dir("page_owner_stacks", NULL); + debugfs_create_file("show_stacks", 0400, dir, NULL, + &page_owner_stack_operations); return 0; }
This patch adds a new directory called 'page_owner_stacks' under /sys/kernel/debug/, with a file called 'show_stacks' in it. Reading from that file will show all stacks that were added by page_owner followed by their counting, giving us a clear overview of stack <-> count relationship. E.g: prep_new_page+0xa9/0x120 get_page_from_freelist+0x801/0x2210 __alloc_pages+0x18b/0x350 alloc_pages_mpol+0x91/0x1f0 folio_alloc+0x14/0x50 filemap_alloc_folio+0xb2/0x100 __filemap_get_folio+0x14a/0x490 ext4_write_begin+0xbd/0x4b0 [ext4] generic_perform_write+0xc1/0x1e0 ext4_buffered_write_iter+0x68/0xe0 [ext4] ext4_file_write_iter+0x70/0x740 [ext4] vfs_write+0x33d/0x420 ksys_write+0xa5/0xe0 do_syscall_64+0x80/0x160 entry_SYSCALL_64_after_hwframe+0x6e/0x76 stack_count: 4578 The seq stack_{start,next} functions will iterate through the list stack_list in order to print all stacks. Signed-off-by: Oscar Salvador <osalvador@suse.de> --- mm/page_owner.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-)