diff mbox series

[RFC,1/3] proc/meminfo: introduce extra meminfo

Message ID 20200311034441.23243-2-jaewon31.kim@samsung.com (mailing list archive)
State New, archived
Headers show
Series meminfo: introduce extra meminfo | expand

Commit Message

Jaewon Kim March 11, 2020, 3:44 a.m. UTC
Provide APIs to drivers so that they can show its memory usage on
/proc/meminfo.

int register_extra_meminfo(atomic_long_t *val, int shift,
			   const char *name);
int unregister_extra_meminfo(atomic_long_t *val);

Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
---
 fs/proc/meminfo.c  | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h |   4 +++
 lib/show_mem.c     |   1 +
 3 files changed, 108 insertions(+)

Comments

Sergey Senozhatsky March 11, 2020, 6:18 a.m. UTC | #1
On (20/03/11 12:44), Jaewon Kim wrote:
[..]
> +#define NAME_SIZE      15
> +#define NAME_BUF_SIZE  (NAME_SIZE + 2) /* ':' and '\0' */
> +
> +struct extra_meminfo {
> +	struct list_head list;
> +	atomic_long_t *val;
> +	int shift_for_page;
> +	char name[NAME_BUF_SIZE];
> +	char name_pad[NAME_BUF_SIZE];
> +};
> +
> +int register_extra_meminfo(atomic_long_t *val, int shift, const char *name)
> +{
> +	struct extra_meminfo *meminfo, *memtemp;
> +	int len;
> +	int error = 0;
> +
> +	meminfo = kzalloc(sizeof(*meminfo), GFP_KERNEL);
> +	if (!meminfo) {
> +		error = -ENOMEM;
> +		goto out;
> +	}
> +
> +	meminfo->val = val;
> +	meminfo->shift_for_page = shift;
> +	strncpy(meminfo->name, name, NAME_SIZE);
> +	len = strlen(meminfo->name);
> +	meminfo->name[len] = ':';
> +	strncpy(meminfo->name_pad, meminfo->name, NAME_BUF_SIZE);

What happens if there is no NULL byte among the first NAME_SIZE bytes
of passed `name'?

[..]

> +	spin_lock(&meminfo_lock);

Does this need to be a spinlock?

	-ss
Sergey Senozhatsky March 11, 2020, 6:25 a.m. UTC | #2
On (20/03/11 15:18), Sergey Senozhatsky wrote:
> On (20/03/11 12:44), Jaewon Kim wrote:
> [..]
> > +#define NAME_SIZE      15
> > +#define NAME_BUF_SIZE  (NAME_SIZE + 2) /* ':' and '\0' */
> > +
> > +struct extra_meminfo {
> > +	struct list_head list;
> > +	atomic_long_t *val;
> > +	int shift_for_page;
> > +	char name[NAME_BUF_SIZE];
> > +	char name_pad[NAME_BUF_SIZE];
> > +};
> > +
> > +int register_extra_meminfo(atomic_long_t *val, int shift, const char *name)
> > +{
> > +	struct extra_meminfo *meminfo, *memtemp;
> > +	int len;
> > +	int error = 0;
> > +
> > +	meminfo = kzalloc(sizeof(*meminfo), GFP_KERNEL);
> > +	if (!meminfo) {
> > +		error = -ENOMEM;
> > +		goto out;
> > +	}
> > +
> > +	meminfo->val = val;
> > +	meminfo->shift_for_page = shift;
> > +	strncpy(meminfo->name, name, NAME_SIZE);
> > +	len = strlen(meminfo->name);
> > +	meminfo->name[len] = ':';
> > +	strncpy(meminfo->name_pad, meminfo->name, NAME_BUF_SIZE);
> 
> What happens if there is no NULL byte among the first NAME_SIZE bytes
> of passed `name'?

Ah. The buffer size is NAME_BUF_SIZE, so should be fine.

	-ss
Jaewon Kim March 11, 2020, 6:30 a.m. UTC | #3
On 2020년 03월 11일 15:25, Sergey Senozhatsky wrote:
> On (20/03/11 15:18), Sergey Senozhatsky wrote:
>> On (20/03/11 12:44), Jaewon Kim wrote:
>> [..]
>>> +#define NAME_SIZE      15
>>> +#define NAME_BUF_SIZE  (NAME_SIZE + 2) /* ':' and '\0' */
>>> +
>>> +struct extra_meminfo {
>>> +	struct list_head list;
>>> +	atomic_long_t *val;
>>> +	int shift_for_page;
>>> +	char name[NAME_BUF_SIZE];
>>> +	char name_pad[NAME_BUF_SIZE];
>>> +};
>>> +
>>> +int register_extra_meminfo(atomic_long_t *val, int shift, const char *name)
>>> +{
>>> +	struct extra_meminfo *meminfo, *memtemp;
>>> +	int len;
>>> +	int error = 0;
>>> +
>>> +	meminfo = kzalloc(sizeof(*meminfo), GFP_KERNEL);
>>> +	if (!meminfo) {
>>> +		error = -ENOMEM;
>>> +		goto out;
>>> +	}
>>> +
>>> +	meminfo->val = val;
>>> +	meminfo->shift_for_page = shift;
>>> +	strncpy(meminfo->name, name, NAME_SIZE);
>>> +	len = strlen(meminfo->name);
>>> +	meminfo->name[len] = ':';
>>> +	strncpy(meminfo->name_pad, meminfo->name, NAME_BUF_SIZE);
>> What happens if there is no NULL byte among the first NAME_SIZE bytes
>> of passed `name'?
> Ah. The buffer size is NAME_BUF_SIZE, so should be fine.
>
> 	-ss
Hello yes correct.

For your comment of 'spinlock', it may be changed to other lock like rw semaphore.
I think there are just couple of writers compared to many readers.
Thank you for your comment.
>
>
Alexey Dobriyan March 11, 2020, 5:35 p.m. UTC | #4
On Wed, Mar 11, 2020 at 12:44:39PM +0900, Jaewon Kim wrote:
> Provide APIs to drivers so that they can show its memory usage on
> /proc/meminfo.
> 
> int register_extra_meminfo(atomic_long_t *val, int shift,
> 			   const char *name);
> int unregister_extra_meminfo(atomic_long_t *val);

> +			show_val_kb(m, memtemp->name_pad, nr_page);

I have 3 issues.

Can this be printed without "KB" piece and without useless whitespace,
like /proc/vmstat does?

I don't know how do you parse /proc/meminfo.
Do you search for specific string or do you use some kind of map[k] = v
interface?

2) zsmalloc can create top-level symlink and resolve it to necessary value.
It will be only 1 readlink(2) system call to fetch it.

3) android can do the same

For simple values there is no need to register stuff and create
mini subsystems.

	/proc/alexey
Jaewon Kim March 13, 2020, 4:53 a.m. UTC | #5
On 2020년 03월 12일 02:35, Alexey Dobriyan wrote:
> On Wed, Mar 11, 2020 at 12:44:39PM +0900, Jaewon Kim wrote:
>> Provide APIs to drivers so that they can show its memory usage on
>> /proc/meminfo.
>>
>> int register_extra_meminfo(atomic_long_t *val, int shift,
>> 			   const char *name);
>> int unregister_extra_meminfo(atomic_long_t *val);
>> +			show_val_kb(m, memtemp->name_pad, nr_page);
> I have 3 issues.
>
> Can this be printed without "KB" piece and without useless whitespace,
> like /proc/vmstat does?
>
> I don't know how do you parse /proc/meminfo.
> Do you search for specific string or do you use some kind of map[k] = v
> interface?
If need, I can remove KB. but show_free_areas also seems to print KB for some stats.
And I intentionally added : and space to be same format like others in /proc/meminfo.
show_val_kb(m, "MemTotal:       ", i.totalram);
>
> 2) zsmalloc can create top-level symlink and resolve it to necessary value.
> It will be only 1 readlink(2) system call to fetch it.
Yes it could be done by userspace readlink systemcall. But I wanted to see
all huge memory stats on one node of /proc/meminfo.
>
> 3) android can do the same
>
> For simple values there is no need to register stuff and create
> mini subsystems.
>
> 	/proc/alexey
OK as Leon Romanovsky said, I may be able to move other node of /proc/meminfo_extra.
Please let me know if it still have a point to be reviewed.

Thank you
>
>
diff mbox series

Patch

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..12b1f77b091b 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -13,6 +13,7 @@ 
 #include <linux/vmstat.h>
 #include <linux/atomic.h>
 #include <linux/vmalloc.h>
+#include <linux/slab.h>
 #ifdef CONFIG_CMA
 #include <linux/cma.h>
 #endif
@@ -30,6 +31,106 @@  static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 	seq_write(m, " kB\n", 4);
 }
 
+static LIST_HEAD(meminfo_head);
+static DEFINE_SPINLOCK(meminfo_lock);
+
+#define NAME_SIZE      15
+#define NAME_BUF_SIZE  (NAME_SIZE + 2) /* ':' and '\0' */
+
+struct extra_meminfo {
+	struct list_head list;
+	atomic_long_t *val;
+	int shift_for_page;
+	char name[NAME_BUF_SIZE];
+	char name_pad[NAME_BUF_SIZE];
+};
+
+int register_extra_meminfo(atomic_long_t *val, int shift, const char *name)
+{
+	struct extra_meminfo *meminfo, *memtemp;
+	int len;
+	int error = 0;
+
+	meminfo = kzalloc(sizeof(*meminfo), GFP_KERNEL);
+	if (!meminfo) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	meminfo->val = val;
+	meminfo->shift_for_page = shift;
+	strncpy(meminfo->name, name, NAME_SIZE);
+	len = strlen(meminfo->name);
+	meminfo->name[len] = ':';
+	strncpy(meminfo->name_pad, meminfo->name, NAME_BUF_SIZE);
+	while (++len < NAME_BUF_SIZE - 1)
+		meminfo->name_pad[len] = ' ';
+
+	spin_lock(&meminfo_lock);
+	list_for_each_entry(memtemp, &meminfo_head, list) {
+		if (memtemp->val == val) {
+			error = -EINVAL;
+			break;
+		}
+	}
+	if (!error)
+		list_add_tail(&meminfo->list, &meminfo_head);
+	spin_unlock(&meminfo_lock);
+	if (error)
+		kfree(meminfo);
+out:
+
+	return error;
+}
+EXPORT_SYMBOL(register_extra_meminfo);
+
+int unregister_extra_meminfo(atomic_long_t *val)
+{
+	struct extra_meminfo *memtemp, *memtemp2;
+	int error = -EINVAL;
+
+	spin_lock(&meminfo_lock);
+	list_for_each_entry_safe(memtemp, memtemp2, &meminfo_head, list) {
+		if (memtemp->val == val) {
+			list_del(&memtemp->list);
+			error = 0;
+			break;
+		}
+	}
+	spin_unlock(&meminfo_lock);
+	kfree(memtemp);
+
+	return error;
+}
+EXPORT_SYMBOL(unregister_extra_meminfo);
+
+static void __extra_meminfo(struct seq_file *m)
+{
+	struct extra_meminfo *memtemp;
+	unsigned long nr_page;
+
+	spin_lock(&meminfo_lock);
+	list_for_each_entry(memtemp, &meminfo_head, list) {
+		nr_page = (unsigned long)atomic_long_read(memtemp->val);
+		nr_page = nr_page >> memtemp->shift_for_page;
+		if (m)
+			show_val_kb(m, memtemp->name_pad, nr_page);
+		else
+			pr_cont("%s%lukB ", memtemp->name, nr_page);
+	}
+	spin_unlock(&meminfo_lock);
+}
+
+void extra_meminfo_log(void)
+{
+	__extra_meminfo(NULL);
+}
+
+static void extra_meminfo_proc(struct seq_file *m)
+{
+	__extra_meminfo(m);
+}
+
 static int meminfo_proc_show(struct seq_file *m, void *v)
 {
 	struct sysinfo i;
@@ -148,6 +249,8 @@  static int meminfo_proc_show(struct seq_file *m, void *v)
 
 	arch_report_meminfo(m);
 
+	extra_meminfo_proc(m);
+
 	return 0;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c54fb96cb1e6..457570ddd17c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2902,6 +2902,10 @@  void __init setup_nr_node_ids(void);
 static inline void setup_nr_node_ids(void) {}
 #endif
 
+void extra_meminfo_log(void);
+int register_extra_meminfo(atomic_long_t *val, int shift, const char *name);
+int unregister_extra_meminfo(atomic_long_t *val);
+
 extern int memcmp_pages(struct page *page1, struct page *page2);
 
 static inline int pages_identical(struct page *page1, struct page *page2)
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 1c26c14ffbb9..48be5afaca0a 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -14,6 +14,7 @@  void show_mem(unsigned int filter, nodemask_t *nodemask)
 	unsigned long total = 0, reserved = 0, highmem = 0;
 
 	printk("Mem-Info:\n");
+	extra_meminfo_log();
 	show_free_areas(filter, nodemask);
 
 	for_each_online_pgdat(pgdat) {