diff mbox series

[v4,3/4] mm/page_owner: Print memcg information

Message ID 20220202203036.744010-4-longman@redhat.com (mailing list archive)
State New
Headers show
Series [v4,1/4] lib/vsprintf: Avoid redundant work with 0 size | expand

Commit Message

Waiman Long Feb. 2, 2022, 8:30 p.m. UTC
It was found that a number of offline memcgs were not freed because
they were pinned by some charged pages that were present. Even "echo 1 >
/proc/sys/vm/drop_caches" wasn't able to free those pages. These offline
but not freed memcgs tend to increase in number over time with the side
effect that percpu memory consumption as shown in /proc/meminfo also
increases over time.

In order to find out more information about those pages that pin
offline memcgs, the page_owner feature is extended to print memory
cgroup information especially whether the cgroup is offline or not.
RCU read lock is taken when memcg is being accessed to make sure
that it won't be freed.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
---
 mm/page_owner.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

Comments

Mike Rapoport Feb. 3, 2022, 6:53 a.m. UTC | #1
On Wed, Feb 02, 2022 at 03:30:35PM -0500, Waiman Long wrote:
> It was found that a number of offline memcgs were not freed because
> they were pinned by some charged pages that were present. Even "echo 1 >
> /proc/sys/vm/drop_caches" wasn't able to free those pages. These offline
> but not freed memcgs tend to increase in number over time with the side
> effect that percpu memory consumption as shown in /proc/meminfo also
> increases over time.
> 
> In order to find out more information about those pages that pin
> offline memcgs, the page_owner feature is extended to print memory
> cgroup information especially whether the cgroup is offline or not.
> RCU read lock is taken when memcg is being accessed to make sure
> that it won't be freed.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> Acked-by: David Rientjes <rientjes@google.com>
> Acked-by: Roman Gushchin <guro@fb.com>

Acked-by: Mike Rapoport <rppt@linux.ibm.com>

And my akcs for the first two patches are missing somehow in v4...

> ---
>  mm/page_owner.c | 42 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index 28dac73e0542..f7820357e4d4 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -10,6 +10,7 @@
>  #include <linux/migrate.h>
>  #include <linux/stackdepot.h>
>  #include <linux/seq_file.h>
> +#include <linux/memcontrol.h>
>  #include <linux/sched/clock.h>
>  
>  #include "internal.h"
> @@ -325,6 +326,45 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
>  	seq_putc(m, '\n');
>  }
>  
> +/*
> + * Looking for memcg information and print it out
> + */
> +static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
> +					 struct page *page)
> +{
> +#ifdef CONFIG_MEMCG
> +	unsigned long memcg_data;
> +	struct mem_cgroup *memcg;
> +	bool online;
> +	char name[80];
> +
> +	rcu_read_lock();
> +	memcg_data = READ_ONCE(page->memcg_data);
> +	if (!memcg_data)
> +		goto out_unlock;
> +
> +	if (memcg_data & MEMCG_DATA_OBJCGS)
> +		ret += scnprintf(kbuf + ret, count - ret,
> +				"Slab cache page\n");
> +
> +	memcg = page_memcg_check(page);
> +	if (!memcg)
> +		goto out_unlock;
> +
> +	online = (memcg->css.flags & CSS_ONLINE);
> +	cgroup_name(memcg->css.cgroup, name, sizeof(name));
> +	ret += scnprintf(kbuf + ret, count - ret,
> +			"Charged %sto %smemcg %s\n",
> +			PageMemcgKmem(page) ? "(via objcg) " : "",
> +			online ? "" : "offline ",
> +			name);
> +out_unlock:
> +	rcu_read_unlock();
> +#endif /* CONFIG_MEMCG */
> +
> +	return ret;
> +}
> +
>  static ssize_t
>  print_page_owner(char __user *buf, size_t count, unsigned long pfn,
>  		struct page *page, struct page_owner *page_owner,
> @@ -365,6 +405,8 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
>  			migrate_reason_names[page_owner->last_migrate_reason]);
>  	}
>  
> +	ret = print_page_owner_memcg(kbuf, count, ret, page);
> +
>  	ret += snprintf(kbuf + ret, count - ret, "\n");
>  	if (ret >= count)
>  		goto err;
> -- 
> 2.27.0
> 
>
Michal Hocko Feb. 3, 2022, 12:46 p.m. UTC | #2
On Wed 02-02-22 15:30:35, Waiman Long wrote:
[...]
> +#ifdef CONFIG_MEMCG
> +	unsigned long memcg_data;
> +	struct mem_cgroup *memcg;
> +	bool online;
> +	char name[80];
> +
> +	rcu_read_lock();
> +	memcg_data = READ_ONCE(page->memcg_data);
> +	if (!memcg_data)
> +		goto out_unlock;
> +
> +	if (memcg_data & MEMCG_DATA_OBJCGS)
> +		ret += scnprintf(kbuf + ret, count - ret,
> +				"Slab cache page\n");
> +
> +	memcg = page_memcg_check(page);
> +	if (!memcg)
> +		goto out_unlock;
> +
> +	online = (memcg->css.flags & CSS_ONLINE);
> +	cgroup_name(memcg->css.cgroup, name, sizeof(name));

Is there any specific reason to use another buffer allocated on the
stack? Also 80B seems too short to cover NAME_MAX.

Nothing else jumped at me.
Waiman Long Feb. 3, 2022, 7:03 p.m. UTC | #3
On 2/3/22 07:46, Michal Hocko wrote:
> On Wed 02-02-22 15:30:35, Waiman Long wrote:
> [...]
>> +#ifdef CONFIG_MEMCG
>> +	unsigned long memcg_data;
>> +	struct mem_cgroup *memcg;
>> +	bool online;
>> +	char name[80];
>> +
>> +	rcu_read_lock();
>> +	memcg_data = READ_ONCE(page->memcg_data);
>> +	if (!memcg_data)
>> +		goto out_unlock;
>> +
>> +	if (memcg_data & MEMCG_DATA_OBJCGS)
>> +		ret += scnprintf(kbuf + ret, count - ret,
>> +				"Slab cache page\n");
>> +
>> +	memcg = page_memcg_check(page);
>> +	if (!memcg)
>> +		goto out_unlock;
>> +
>> +	online = (memcg->css.flags & CSS_ONLINE);
>> +	cgroup_name(memcg->css.cgroup, name, sizeof(name));
> Is there any specific reason to use another buffer allocated on the
> stack? Also 80B seems too short to cover NAME_MAX.
>
> Nothing else jumped at me.

I suppose we can print directly into kbuf with cgroup_name(), but using 
a separate buffer is easier to read and understand. 79 characters should 
be enough for most cgroup names. Some auto-generated names with some 
kind of embedded uuids may be longer than that, but the random sequence 
of hex digits that may be missing do not convey much information for 
identification purpose. We can always increase the buffer length later 
if it turns out to be an issue.

Cheers,
Longman
Michal Hocko Feb. 7, 2022, 5:20 p.m. UTC | #4
On Thu 03-02-22 14:03:58, Waiman Long wrote:
> On 2/3/22 07:46, Michal Hocko wrote:
> > On Wed 02-02-22 15:30:35, Waiman Long wrote:
> > [...]
> > > +#ifdef CONFIG_MEMCG
> > > +	unsigned long memcg_data;
> > > +	struct mem_cgroup *memcg;
> > > +	bool online;
> > > +	char name[80];
> > > +
> > > +	rcu_read_lock();
> > > +	memcg_data = READ_ONCE(page->memcg_data);
> > > +	if (!memcg_data)
> > > +		goto out_unlock;
> > > +
> > > +	if (memcg_data & MEMCG_DATA_OBJCGS)
> > > +		ret += scnprintf(kbuf + ret, count - ret,
> > > +				"Slab cache page\n");
> > > +
> > > +	memcg = page_memcg_check(page);
> > > +	if (!memcg)
> > > +		goto out_unlock;
> > > +
> > > +	online = (memcg->css.flags & CSS_ONLINE);
> > > +	cgroup_name(memcg->css.cgroup, name, sizeof(name));
> > Is there any specific reason to use another buffer allocated on the
> > stack? Also 80B seems too short to cover NAME_MAX.
> > 
> > Nothing else jumped at me.
> 
> I suppose we can print directly into kbuf with cgroup_name(), but using a
> separate buffer is easier to read and understand. 79 characters should be
> enough for most cgroup names. Some auto-generated names with some kind of
> embedded uuids may be longer than that, but the random sequence of hex
> digits that may be missing do not convey much information for identification
> purpose. We can always increase the buffer length later if it turns out to
> be an issue.

Cutting a name short sounds like a source of confusion and there doesn't
seem to be any good reason for that.
Andrew Morton Feb. 7, 2022, 7:09 p.m. UTC | #5
On Mon, 7 Feb 2022 18:20:04 +0100 Michal Hocko <mhocko@suse.com> wrote:

> On Thu 03-02-22 14:03:58, Waiman Long wrote:
> > On 2/3/22 07:46, Michal Hocko wrote:
> > > On Wed 02-02-22 15:30:35, Waiman Long wrote:
> > > [...]
> ...
> > > > +	online = (memcg->css.flags & CSS_ONLINE);
> > > > +	cgroup_name(memcg->css.cgroup, name, sizeof(name));
> > > Is there any specific reason to use another buffer allocated on the
> > > stack? Also 80B seems too short to cover NAME_MAX.
> > > 
> > > Nothing else jumped at me.
> > 
> > I suppose we can print directly into kbuf with cgroup_name(), but using a
> > separate buffer is easier to read and understand. 79 characters should be
> > enough for most cgroup names. Some auto-generated names with some kind of
> > embedded uuids may be longer than that, but the random sequence of hex
> > digits that may be missing do not convey much information for identification
> > purpose. We can always increase the buffer length later if it turns out to
> > be an issue.
> 
> Cutting a name short sounds like a source of confusion and there doesn't
> seem to be any good reason for that.

Yes.  If we give them 79 characters, someone will go and want 94.  If
we can prevent this once and for ever, let's please do so.
Waiman Long Feb. 7, 2022, 7:33 p.m. UTC | #6
On 2/7/22 14:09, Andrew Morton wrote:
> On Mon, 7 Feb 2022 18:20:04 +0100 Michal Hocko <mhocko@suse.com> wrote:
>
>> On Thu 03-02-22 14:03:58, Waiman Long wrote:
>>> On 2/3/22 07:46, Michal Hocko wrote:
>>>> On Wed 02-02-22 15:30:35, Waiman Long wrote:
>>>> [...]
>> ...
>>>>> +	online = (memcg->css.flags & CSS_ONLINE);
>>>>> +	cgroup_name(memcg->css.cgroup, name, sizeof(name));
>>>> Is there any specific reason to use another buffer allocated on the
>>>> stack? Also 80B seems too short to cover NAME_MAX.
>>>>
>>>> Nothing else jumped at me.
>>> I suppose we can print directly into kbuf with cgroup_name(), but using a
>>> separate buffer is easier to read and understand. 79 characters should be
>>> enough for most cgroup names. Some auto-generated names with some kind of
>>> embedded uuids may be longer than that, but the random sequence of hex
>>> digits that may be missing do not convey much information for identification
>>> purpose. We can always increase the buffer length later if it turns out to
>>> be an issue.
>> Cutting a name short sounds like a source of confusion and there doesn't
>> seem to be any good reason for that.
> Yes.  If we give them 79 characters, someone will go and want 94.  If
> we can prevent this once and for ever, let's please do so.

Sure. Will send a version with that change.

Cheers,
Longman

>
diff mbox series

Patch

diff --git a/mm/page_owner.c b/mm/page_owner.c
index 28dac73e0542..f7820357e4d4 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -10,6 +10,7 @@ 
 #include <linux/migrate.h>
 #include <linux/stackdepot.h>
 #include <linux/seq_file.h>
+#include <linux/memcontrol.h>
 #include <linux/sched/clock.h>
 
 #include "internal.h"
@@ -325,6 +326,45 @@  void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 	seq_putc(m, '\n');
 }
 
+/*
+ * Looking for memcg information and print it out
+ */
+static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
+					 struct page *page)
+{
+#ifdef CONFIG_MEMCG
+	unsigned long memcg_data;
+	struct mem_cgroup *memcg;
+	bool online;
+	char name[80];
+
+	rcu_read_lock();
+	memcg_data = READ_ONCE(page->memcg_data);
+	if (!memcg_data)
+		goto out_unlock;
+
+	if (memcg_data & MEMCG_DATA_OBJCGS)
+		ret += scnprintf(kbuf + ret, count - ret,
+				"Slab cache page\n");
+
+	memcg = page_memcg_check(page);
+	if (!memcg)
+		goto out_unlock;
+
+	online = (memcg->css.flags & CSS_ONLINE);
+	cgroup_name(memcg->css.cgroup, name, sizeof(name));
+	ret += scnprintf(kbuf + ret, count - ret,
+			"Charged %sto %smemcg %s\n",
+			PageMemcgKmem(page) ? "(via objcg) " : "",
+			online ? "" : "offline ",
+			name);
+out_unlock:
+	rcu_read_unlock();
+#endif /* CONFIG_MEMCG */
+
+	return ret;
+}
+
 static ssize_t
 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 		struct page *page, struct page_owner *page_owner,
@@ -365,6 +405,8 @@  print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 			migrate_reason_names[page_owner->last_migrate_reason]);
 	}
 
+	ret = print_page_owner_memcg(kbuf, count, ret, page);
+
 	ret += snprintf(kbuf + ret, count - ret, "\n");
 	if (ret >= count)
 		goto err;