Message ID | 20220905031012.4450-4-osalvador@suse.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | page_owner: print stacks and their counter | expand |
On Mon, 5 Sep 2022 05:10:12 +0200, Oscar Salvador wrote: > +static int page_owner_threshold_show(struct seq_file *p, void *v) > +{ > + seq_printf(p, "%lu\n", threshold); Remove a slipped leading 0x20 space here (before seq_printf()). > + return 0; > +} > + > +static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > + size_t count, loff_t *pos) > +{ > + char *kbuf; > + int ret = 0; > + > + count = min_t(size_t, count, PAGE_SIZE); > + kbuf = kmalloc(count, GFP_KERNEL); > + if (!kbuf) > + return -ENOMEM; > + > + if (copy_from_user(kbuf, buf, count)) { > + ret = -EFAULT; > + goto out; > + } > + > + kbuf[count - 1] = '\0'; > + > + ret = kstrtoul(kbuf, 10, &threshold); > + > +out: > + kfree(kbuf); > + return ret ? ret : count; > +} Still the same comment on this, kmalloc() is not really needed here. Capping the size to PAGE_SIZE (usually 4K) is too big. `unsinged long` is 64-bit at most, this means the max val is 18446744073709551615 (20 chars). The lifetime of @kbuf is very short as well, using a stack allocated array of chars is fine? Untested: static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, size_t count, loff_t *pos) { char kbuf[21]; int ret; count = min_t(size_t, count, sizeof(kbuf)); if (copy_from_user(kbuf, buf, count)) return -EFAULT; kbuf[count - 1] = '\0'; ret = kstrtoul(kbuf, 10, &threshold); return ret ? ret : count; }
On Mon 05-09-22 17:51:37, Ammar Faizi wrote: > On Mon, 5 Sep 2022 05:10:12 +0200, Oscar Salvador wrote: > > +static int page_owner_threshold_show(struct seq_file *p, void *v) > > +{ > > + seq_printf(p, "%lu\n", threshold); > > Remove a slipped leading 0x20 space here (before seq_printf()). > > > + return 0; > > +} > > + > > +static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > > + size_t count, loff_t *pos) > > +{ > > + char *kbuf; > > + int ret = 0; > > + > > + count = min_t(size_t, count, PAGE_SIZE); > > + kbuf = kmalloc(count, GFP_KERNEL); > > + if (!kbuf) > > + return -ENOMEM; > > + > > + if (copy_from_user(kbuf, buf, count)) { > > + ret = -EFAULT; > > + goto out; > > + } > > + > > + kbuf[count - 1] = '\0'; > > + > > + ret = kstrtoul(kbuf, 10, &threshold); > > + > > +out: > > + kfree(kbuf); > > + return ret ? ret : count; > > +} > > Still the same comment on this, kmalloc() is not really needed here. > Capping the size to PAGE_SIZE (usually 4K) is too big. `unsinged long` > is 64-bit at most, this means the max val is 18446744073709551615 > (20 chars). The lifetime of @kbuf is very short as well, using a stack > allocated array of chars is fine? > > Untested: > > static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > size_t count, loff_t *pos) > { > char kbuf[21]; > int ret; > > count = min_t(size_t, count, sizeof(kbuf)); > if (copy_from_user(kbuf, buf, count)) > return -EFAULT; > > kbuf[count - 1] = '\0'; > ret = kstrtoul(kbuf, 10, &threshold); > return ret ? ret : count; > } Isn't there a proc_dointvec counterpart for debugfs?
On Mon, 5 Sep 2022 13:31:02 +0200, Michal Hocko wrote: > On Mon 05-09-22 17:51:37, Ammar Faizi wrote: > > On Mon, 5 Sep 2022 05:10:12 +0200, Oscar Salvador wrote: > > > +static int page_owner_threshold_show(struct seq_file *p, void *v) > > > +{ > > > + seq_printf(p, "%lu\n", threshold); > > > > Remove a slipped leading 0x20 space here (before seq_printf()). > > > > > + return 0; > > > +} > > > + > > > +static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > > > + size_t count, loff_t *pos) > > > +{ > > > + char *kbuf; > > > + int ret = 0; > > > + > > > + count = min_t(size_t, count, PAGE_SIZE); > > > + kbuf = kmalloc(count, GFP_KERNEL); > > > + if (!kbuf) > > > + return -ENOMEM; > > > + > > > + if (copy_from_user(kbuf, buf, count)) { > > > + ret = -EFAULT; > > > + goto out; > > > + } > > > + > > > + kbuf[count - 1] = '\0'; > > > + > > > + ret = kstrtoul(kbuf, 10, &threshold); > > > + > > > +out: > > > + kfree(kbuf); > > > + return ret ? ret : count; > > > +} > > > > Still the same comment on this, kmalloc() is not really needed here. > > Capping the size to PAGE_SIZE (usually 4K) is too big. `unsinged long` > > is 64-bit at most, this means the max val is 18446744073709551615 > > (20 chars). The lifetime of @kbuf is very short as well, using a stack > > allocated array of chars is fine? > > > > Untested: > > > > static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > > size_t count, loff_t *pos) > > { > > char kbuf[21]; > > int ret; > > > > count = min_t(size_t, count, sizeof(kbuf)); > > if (copy_from_user(kbuf, buf, count)) > > return -EFAULT; > > > > kbuf[count - 1] = '\0'; > > ret = kstrtoul(kbuf, 10, &threshold); > > return ret ? ret : count; > > } > > Isn't there a proc_dointvec counterpart for debugfs? Ah, well. If that's much simpler, we should go with that. I am not familiar proc_dointvec() interface, so I couldn't say about it. Thanks for the comment. TIL.
On Mon 05-09-22 18:54:59, Ammar Faizi wrote: > On Mon, 5 Sep 2022 13:31:02 +0200, Michal Hocko wrote: [...] > > > static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > > > size_t count, loff_t *pos) > > > { > > > char kbuf[21]; > > > int ret; > > > > > > count = min_t(size_t, count, sizeof(kbuf)); > > > if (copy_from_user(kbuf, buf, count)) > > > return -EFAULT; > > > > > > kbuf[count - 1] = '\0'; > > > ret = kstrtoul(kbuf, 10, &threshold); > > > return ret ? ret : count; > > > } > > > > Isn't there a proc_dointvec counterpart for debugfs? > > Ah, well. If that's much simpler, we should go with that. I am not > familiar proc_dointvec() interface, so I couldn't say about it. Just to clarify. proc_dointvec is rather specific to proc/sysctl interface. I was too lazy to look whether debugfs has something similar available. Maybe writing to debugfs is not all that common but I would expect a shared code to write a simple value would be there.
On Mon, 5 Sep 2022 14:02:09 +0200, Michal Hocko wrote: > On Mon 05-09-22 18:54:59, Ammar Faizi wrote: > > On Mon, 5 Sep 2022 13:31:02 +0200, Michal Hocko wrote: > [...] > > > > static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > > > > size_t count, loff_t *pos) > > > > { > > > > char kbuf[21]; > > > > int ret; > > > > > > > > count = min_t(size_t, count, sizeof(kbuf)); > > > > if (copy_from_user(kbuf, buf, count)) > > > > return -EFAULT; > > > > > > > > kbuf[count - 1] = '\0'; > > > > ret = kstrtoul(kbuf, 10, &threshold); > > > > return ret ? ret : count; > > > > } > > > > > > Isn't there a proc_dointvec counterpart for debugfs? > > > > Ah, well. If that's much simpler, we should go with that. I am not > > familiar proc_dointvec() interface, so I couldn't say about it. > > Just to clarify. proc_dointvec is rather specific to proc/sysctl > interface. I was too lazy to look whether debugfs has something similar > available. Maybe writing to debugfs is not all that common but I would > expect a shared code to write a simple value would be there. I took a look, there is DEFINE_SIMPLE_ATTRIBUTE(). Ref: https://github.com/torvalds/linux/blob/v6.0-rc4/include/linux/fs.h#L3458-L3487 It looks much simpler to me. Untested, but it is something like this: ----------------- static int page_owner_threshold_get(void *data, u64 *val) { *val = threshold; return 0; } static int page_owner_threshold_set(void *data, u64 val) { threshold = val; return 0; } DEFINE_SIMPLE_ATTRIBUTE(proc_page_owner_threshold, &page_owner_threshold_get, &page_owner_threshold_set, "%lu"); ----------------- And then the init should be the same: debugfs_create_file("page_owner_threshold", 0600, NULL, NULL, &proc_page_owner_threshold);
On 9/5/22 05:10, Oscar Salvador wrote: > We want to be able to filter out the output on a threshold basis, > in this way we can get rid of a lot of noise and focus only on those > stacks which have an allegedly high counter. > > We can control the threshold value by a new file called > 'page_owner_threshold', which is 0 by default. The name could suggest it has to do something with "page_owner" but in fact it only affects "page_owner_stacks". So maybe "page_owner_stacks_threshold" ? But now it's rather long. Or maybe "page_owner_stacks_min_count" ? Also long but maybe the most self-evident? > Signed-off-by: Oscar Salvador <osalvador@suse.de> > --- > include/linux/stackdepot.h | 3 ++- > lib/stackdepot.c | 6 +++-- > mm/page_owner.c | 51 +++++++++++++++++++++++++++++++++++++- > 3 files changed, 56 insertions(+), 4 deletions(-) > > diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h > index 19d3f8295df8..742038216cd0 100644 > --- a/include/linux/stackdepot.h > +++ b/include/linux/stackdepot.h > @@ -25,7 +25,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, > gfp_t gfp_flags, bool can_alloc, > enum stack_depot_action action); > void stack_depot_dec_count(depot_stack_handle_t handle); > -int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos); > +int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos, > + unsigned long threshold); > > /* > * Every user of stack depot has to call stack_depot_init() during its own init > diff --git a/lib/stackdepot.c b/lib/stackdepot.c > index a198b2dbe3fb..a31e882853ab 100644 > --- a/lib/stackdepot.c > +++ b/lib/stackdepot.c > @@ -566,7 +566,8 @@ depot_stack_handle_t stack_depot_save_action(unsigned long *entries, > } > EXPORT_SYMBOL_GPL(stack_depot_save_action); > > -int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos) > +int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos, > + unsigned long threshold) > { > int i = *pos, ret = 0; > struct stack_record **stacks, *stack; > @@ -585,7 +586,8 @@ int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos) > for (; stack; stack = stack->next) { > if (!stack->size || stack->size < 0 || > stack->size > size || stack->handle.valid != 1 || > - refcount_read(&stack->count) < 1) > + refcount_read(&stack->count) < 1 || > + refcount_read(&stack->count) < threshold) > continue; > > ret += stack_trace_snprint(buf, size, stack->entries, stack->size, 0); > diff --git a/mm/page_owner.c b/mm/page_owner.c > index d88e6b4aefa0..5b895d347c5f 100644 > --- a/mm/page_owner.c > +++ b/mm/page_owner.c > @@ -43,6 +43,8 @@ static depot_stack_handle_t early_handle; > > static void init_early_allocated_pages(void); > > +static unsigned long threshold; > + > static int __init early_page_owner_param(char *buf) > { > int ret = kstrtobool(buf, &page_owner_enabled); > @@ -675,7 +677,7 @@ static ssize_t read_page_owner_stacks(struct file *file, char __user *buf, > if (!kbuf) > return -ENOMEM; > > - ret += stack_depot_print_stacks_threshold(kbuf, count, pos); > + ret += stack_depot_print_stacks_threshold(kbuf, count, pos, threshold); > if (copy_to_user(buf, kbuf, ret)) > ret = -EFAULT; > > @@ -683,6 +685,51 @@ static ssize_t read_page_owner_stacks(struct file *file, char __user *buf, > return ret; > } > > +static int page_owner_threshold_show(struct seq_file *p, void *v) > +{ > + seq_printf(p, "%lu\n", threshold); > + return 0; > +} > + > +static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, > + size_t count, loff_t *pos) > +{ > + char *kbuf; > + int ret = 0; > + > + count = min_t(size_t, count, PAGE_SIZE); > + kbuf = kmalloc(count, GFP_KERNEL); > + if (!kbuf) > + return -ENOMEM; > + > + if (copy_from_user(kbuf, buf, count)) { > + ret = -EFAULT; > + goto out; > + } > + > + kbuf[count - 1] = '\0'; > + > + ret = kstrtoul(kbuf, 10, &threshold); > + > +out: > + kfree(kbuf); > + return ret ? ret : count; > +} > + > +static int open_page_owner_threshold(struct inode *inode, struct file *file) > +{ > + return single_open(file, page_owner_threshold_show, NULL); > +} > + > + > +static const struct file_operations proc_page_owner_threshold = { > + .open = open_page_owner_threshold, > + .read = seq_read, > + .llseek = seq_lseek, > + .write = write_page_owner_threshold, > + .release = single_release, > +}; > + > static const struct file_operations proc_page_owner_stacks = { > .read = read_page_owner_stacks, > }; > @@ -702,6 +749,8 @@ static int __init pageowner_init(void) > &proc_page_owner_operations); > debugfs_create_file("page_owner_stacks", 0400, NULL, NULL, > &proc_page_owner_stacks); > + debugfs_create_file("page_owner_threshold", 0600, NULL, NULL, > + &proc_page_owner_threshold); > > return 0; > }
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 19d3f8295df8..742038216cd0 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -25,7 +25,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, gfp_t gfp_flags, bool can_alloc, enum stack_depot_action action); void stack_depot_dec_count(depot_stack_handle_t handle); -int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos); +int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos, + unsigned long threshold); /* * Every user of stack depot has to call stack_depot_init() during its own init diff --git a/lib/stackdepot.c b/lib/stackdepot.c index a198b2dbe3fb..a31e882853ab 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -566,7 +566,8 @@ depot_stack_handle_t stack_depot_save_action(unsigned long *entries, } EXPORT_SYMBOL_GPL(stack_depot_save_action); -int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos) +int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos, + unsigned long threshold) { int i = *pos, ret = 0; struct stack_record **stacks, *stack; @@ -585,7 +586,8 @@ int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos) for (; stack; stack = stack->next) { if (!stack->size || stack->size < 0 || stack->size > size || stack->handle.valid != 1 || - refcount_read(&stack->count) < 1) + refcount_read(&stack->count) < 1 || + refcount_read(&stack->count) < threshold) continue; ret += stack_trace_snprint(buf, size, stack->entries, stack->size, 0); diff --git a/mm/page_owner.c b/mm/page_owner.c index d88e6b4aefa0..5b895d347c5f 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -43,6 +43,8 @@ static depot_stack_handle_t early_handle; static void init_early_allocated_pages(void); +static unsigned long threshold; + static int __init early_page_owner_param(char *buf) { int ret = kstrtobool(buf, &page_owner_enabled); @@ -675,7 +677,7 @@ static ssize_t read_page_owner_stacks(struct file *file, char __user *buf, if (!kbuf) return -ENOMEM; - ret += stack_depot_print_stacks_threshold(kbuf, count, pos); + ret += stack_depot_print_stacks_threshold(kbuf, count, pos, threshold); if (copy_to_user(buf, kbuf, ret)) ret = -EFAULT; @@ -683,6 +685,51 @@ static ssize_t read_page_owner_stacks(struct file *file, char __user *buf, return ret; } +static int page_owner_threshold_show(struct seq_file *p, void *v) +{ + seq_printf(p, "%lu\n", threshold); + return 0; +} + +static ssize_t write_page_owner_threshold(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + char *kbuf; + int ret = 0; + + count = min_t(size_t, count, PAGE_SIZE); + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + if (copy_from_user(kbuf, buf, count)) { + ret = -EFAULT; + goto out; + } + + kbuf[count - 1] = '\0'; + + ret = kstrtoul(kbuf, 10, &threshold); + +out: + kfree(kbuf); + return ret ? ret : count; +} + +static int open_page_owner_threshold(struct inode *inode, struct file *file) +{ + return single_open(file, page_owner_threshold_show, NULL); +} + + +static const struct file_operations proc_page_owner_threshold = { + .open = open_page_owner_threshold, + .read = seq_read, + .llseek = seq_lseek, + .write = write_page_owner_threshold, + .release = single_release, +}; + static const struct file_operations proc_page_owner_stacks = { .read = read_page_owner_stacks, }; @@ -702,6 +749,8 @@ static int __init pageowner_init(void) &proc_page_owner_operations); debugfs_create_file("page_owner_stacks", 0400, NULL, NULL, &proc_page_owner_stacks); + debugfs_create_file("page_owner_threshold", 0600, NULL, NULL, + &proc_page_owner_threshold); return 0; }
We want to be able to filter out the output on a threshold basis, in this way we can get rid of a lot of noise and focus only on those stacks which have an allegedly high counter. We can control the threshold value by a new file called 'page_owner_threshold', which is 0 by default. Signed-off-by: Oscar Salvador <osalvador@suse.de> --- include/linux/stackdepot.h | 3 ++- lib/stackdepot.c | 6 +++-- mm/page_owner.c | 51 +++++++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 4 deletions(-)