diff mbox series

memcg: add per-memcg vmalloc stat

Message ID 20211221215336.1922823-1-shakeelb@google.com (mailing list archive)
State New
Headers show
Series memcg: add per-memcg vmalloc stat | expand

Commit Message

Shakeel Butt Dec. 21, 2021, 9:53 p.m. UTC
The kvmalloc* allocation functions can fallback to vmalloc allocations
and more often on long running machines. In addition the kernel does
have __GFP_ACCOUNT kvmalloc* calls. So, often on long running machines,
the memory.stat does not tell the complete picture which type of memory
is charged to the memcg. So add a per-memcg vmalloc stat.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
 Documentation/admin-guide/cgroup-v2.rst |  3 +++
 include/linux/memcontrol.h              | 15 +++++++++++++++
 mm/memcontrol.c                         |  1 +
 mm/vmalloc.c                            |  5 +++++
 4 files changed, 24 insertions(+)

Comments

Muchun Song Dec. 22, 2021, 4:15 a.m. UTC | #1
On Wed, Dec 22, 2021 at 5:53 AM Shakeel Butt <shakeelb@google.com> wrote:
>
> The kvmalloc* allocation functions can fallback to vmalloc allocations
> and more often on long running machines. In addition the kernel does
> have __GFP_ACCOUNT kvmalloc* calls. So, often on long running machines,
> the memory.stat does not tell the complete picture which type of memory
> is charged to the memcg. So add a per-memcg vmalloc stat.
>
> Signed-off-by: Shakeel Butt <shakeelb@google.com>
> ---
>  Documentation/admin-guide/cgroup-v2.rst |  3 +++
>  include/linux/memcontrol.h              | 15 +++++++++++++++
>  mm/memcontrol.c                         |  1 +
>  mm/vmalloc.c                            |  5 +++++
>  4 files changed, 24 insertions(+)
>
> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
> index 82c8dc91b2be..5aa368d165da 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1314,6 +1314,9 @@ PAGE_SIZE multiple when read back.
>           sock (npn)
>                 Amount of memory used in network transmission buffers
>
> +         vmalloc (npn)
> +               Amount of memory used for vmap backed memory.
> +
>           shmem
>                 Amount of cached filesystem data that is swap-backed,
>                 such as tmpfs, shm segments, shared anonymous mmap()s
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index d76dad703580..000bfad6ff69 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -33,6 +33,7 @@ enum memcg_stat_item {
>         MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
>         MEMCG_SOCK,
>         MEMCG_PERCPU_B,
> +       MEMCG_VMALLOC,
>         MEMCG_NR_STAT,
>  };
>
> @@ -944,6 +945,15 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
>         local_irq_restore(flags);
>  }
>
> +static inline void mod_memcg_page_state(struct page *page,
> +                                       int idx, int val)
> +{
> +       struct mem_cgroup *memcg = page_memcg(page);
> +
> +       if (!mem_cgroup_disabled() && memcg)
> +               mod_memcg_state(memcg, idx, val);

It's not safe to access @memcg throughout mod_memcg_state() for the
kmem charged through objcg infrastructure. It's supposed to be safe
to access @memcg under rcu read lock. Otherwise, it looks good to
me.

Thanks.
Shakeel Butt Dec. 22, 2021, 5:22 a.m. UTC | #2
On Tue, Dec 21, 2021 at 8:15 PM Muchun Song <songmuchun@bytedance.com> wrote:
>
[...]
> > +static inline void mod_memcg_page_state(struct page *page,
> > +                                       int idx, int val)
> > +{
> > +       struct mem_cgroup *memcg = page_memcg(page);
> > +
> > +       if (!mem_cgroup_disabled() && memcg)
> > +               mod_memcg_state(memcg, idx, val);
>
> It's not safe to access @memcg throughout mod_memcg_state() for the
> kmem charged through objcg infrastructure. It's supposed to be safe
> to access @memcg under rcu read lock. Otherwise, it looks good to
> me.

Indeed you are right. v2 coming.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 82c8dc91b2be..5aa368d165da 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1314,6 +1314,9 @@  PAGE_SIZE multiple when read back.
 	  sock (npn)
 		Amount of memory used in network transmission buffers
 
+	  vmalloc (npn)
+		Amount of memory used for vmap backed memory.
+
 	  shmem
 		Amount of cached filesystem data that is swap-backed,
 		such as tmpfs, shm segments, shared anonymous mmap()s
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d76dad703580..000bfad6ff69 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -33,6 +33,7 @@  enum memcg_stat_item {
 	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
+	MEMCG_VMALLOC,
 	MEMCG_NR_STAT,
 };
 
@@ -944,6 +945,15 @@  static inline void mod_memcg_state(struct mem_cgroup *memcg,
 	local_irq_restore(flags);
 }
 
+static inline void mod_memcg_page_state(struct page *page,
+					int idx, int val)
+{
+	struct mem_cgroup *memcg = page_memcg(page);
+
+	if (!mem_cgroup_disabled() && memcg)
+		mod_memcg_state(memcg, idx, val);
+}
+
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 {
 	return READ_ONCE(memcg->vmstats.state[idx]);
@@ -1399,6 +1409,11 @@  static inline void mod_memcg_state(struct mem_cgroup *memcg,
 {
 }
 
+static inline void mod_memcg_page_state(struct page *page,
+					int idx, int val)
+{
+}
+
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 {
 	return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7ae77608847e..7027a3cc416f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1375,6 +1375,7 @@  static const struct memory_stat memory_stats[] = {
 	{ "pagetables",			NR_PAGETABLE			},
 	{ "percpu",			MEMCG_PERCPU_B			},
 	{ "sock",			MEMCG_SOCK			},
+	{ "vmalloc",			MEMCG_VMALLOC			},
 	{ "shmem",			NR_SHMEM			},
 	{ "file_mapped",		NR_FILE_MAPPED			},
 	{ "file_dirty",			NR_FILE_DIRTY			},
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index eb6e527a6b77..af67ce4fd402 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -39,6 +39,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/hugetlb.h>
 #include <linux/sched/mm.h>
+#include <linux/memcontrol.h>
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
@@ -2626,6 +2627,9 @@  static void __vunmap(const void *addr, int deallocate_pages)
 		unsigned int page_order = vm_area_page_order(area);
 		int i;
 
+		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
+				     -(int)area->nr_pages);
+
 		for (i = 0; i < area->nr_pages; i += 1U << page_order) {
 			struct page *page = area->pages[i];
 
@@ -2964,6 +2968,7 @@  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		page_order, nr_small_pages, area->pages);
 
 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
+	mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC, area->nr_pages);
 
 	/*
 	 * If not enough pages were obtained to accomplish an