diff mbox series

[v2,10/28] mm: memcg: introduce mod_lruvec_memcg_state()

Message ID 20200127173453.2089565-11-guro@fb.com (mailing list archive)
State New, archived
Headers show
Series The new cgroup slab memory controller | expand

Commit Message

Roman Gushchin Jan. 27, 2020, 5:34 p.m. UTC
To prepare for per-object accounting of slab objects, let's introduce
__mod_lruvec_memcg_state() and mod_lruvec_memcg_state() helpers,
which are similar to mod_lruvec_state(), but do not update global
node counters, only lruvec and per-cgroup.

It's necessary because soon node slab counters will be used for
accounting of all memory used by slab pages, however on memcg level
only the actually used memory will be counted. The free space will be
shared between all cgroups, so it can't be accounted to any
specific cgroup.

Signed-off-by: Roman Gushchin <guro@fb.com>
---
 include/linux/memcontrol.h | 22 ++++++++++++++++++++++
 mm/memcontrol.c            | 37 +++++++++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 10 deletions(-)

Comments

Johannes Weiner Feb. 3, 2020, 5:39 p.m. UTC | #1
On Mon, Jan 27, 2020 at 09:34:35AM -0800, Roman Gushchin wrote:
> To prepare for per-object accounting of slab objects, let's introduce
> __mod_lruvec_memcg_state() and mod_lruvec_memcg_state() helpers,
> which are similar to mod_lruvec_state(), but do not update global
> node counters, only lruvec and per-cgroup.
> 
> It's necessary because soon node slab counters will be used for
> accounting of all memory used by slab pages, however on memcg level
> only the actually used memory will be counted. The free space will be
> shared between all cgroups, so it can't be accounted to any
> specific cgroup.

Makes perfect sense. However, I think the existing mod_lruvec_state()
has a bad and misleading name, and adding to it in the same style
makes things worse.

Can we instead rename lruvec_state to node_memcg_state to capture that
it changes all levels. And then do the following, clean API?

- node_state for node only

- memcg_state for memcg only

- lruvec_state for lruvec only

- node_memcg_state convenience wrapper to change node, memcg, lruvec counters

You can then open-code the disjunct node and memcg+lruvec counters.

[ Granted, lruvec counters are never modified on their own - always in
  conjunction with the memcg counters. And frankly, the only memcg
  counters that are modified *without* the lruvec counter-part are the
  special-case MEMCG_ counters.

  It would be nice to have 1) a completely separate API for the MEMCG_
  counters; and then 2) the node API for node and 3) a cgroup API for
  memcg+lruvec VM stat counters that allow you to easily do the
  disjunct accounting for slab memory.

  But I can't think of poignant names for these. At least nothing that
  would be better than separate memcg_state and lruvec_state calls. ]
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 37d4f418e336..73c2a7d32862 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -694,6 +694,8 @@  static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 
 void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 			int val);
+void __mod_lruvec_memcg_state(struct lruvec *lruvec, enum node_stat_item idx,
+			      int val);
 void __mod_lruvec_obj_state(void *p, enum node_stat_item idx, int val);
 void mod_memcg_obj_state(void *p, int idx, int val);
 
@@ -707,6 +709,16 @@  static inline void mod_lruvec_state(struct lruvec *lruvec,
 	local_irq_restore(flags);
 }
 
+static inline void mod_lruvec_memcg_state(struct lruvec *lruvec,
+					  enum node_stat_item idx, int val)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__mod_lruvec_memcg_state(lruvec, idx, val);
+	local_irq_restore(flags);
+}
+
 static inline void __mod_lruvec_page_state(struct page *page,
 					   enum node_stat_item idx, int val)
 {
@@ -1104,6 +1116,16 @@  static inline void mod_lruvec_state(struct lruvec *lruvec,
 	mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 }
 
+static inline void __mod_lruvec_memcg_state(struct lruvec *lruvec,
+					    enum node_stat_item idx, int val)
+{
+}
+
+static inline void mod_lruvec_memcg_state(struct lruvec *lruvec,
+					  enum node_stat_item idx, int val)
+{
+}
+
 static inline void __mod_lruvec_page_state(struct page *page,
 					   enum node_stat_item idx, int val)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cbf01cc0cbac..730f230cee6a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -712,16 +712,16 @@  parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
 }
 
 /**
- * __mod_lruvec_state - update lruvec memory statistics
+ * __mod_lruvec_memcg_state - update lruvec memory statistics
  * @lruvec: the lruvec
  * @idx: the stat item
  * @val: delta to add to the counter, can be negative
  *
  * The lruvec is the intersection of the NUMA node and a cgroup. This
- * function updates the all three counters that are affected by a
- * change of state at this level: per-node, per-cgroup, per-lruvec.
+ * function updates the two of three counters that are affected by a
+ * change of state at this level: per-cgroup and per-lruvec.
  */
-void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+void __mod_lruvec_memcg_state(struct lruvec *lruvec, enum node_stat_item idx,
 			int val)
 {
 	pg_data_t *pgdat = lruvec_pgdat(lruvec);
@@ -729,12 +729,6 @@  void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	struct mem_cgroup *memcg;
 	long x;
 
-	/* Update node */
-	__mod_node_page_state(pgdat, idx, val);
-
-	if (mem_cgroup_disabled())
-		return;
-
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	memcg = pn->memcg;
 
@@ -755,6 +749,29 @@  void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
 }
 
+/**
+ * __mod_lruvec_state - update lruvec memory statistics
+ * @lruvec: the lruvec
+ * @idx: the stat item
+ * @val: delta to add to the counter, can be negative
+ *
+ * The lruvec is the intersection of the NUMA node and a cgroup. This
+ * function updates the all three counters that are affected by a
+ * change of state at this level: per-node, per-cgroup, per-lruvec.
+ */
+void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+			int val)
+{
+	pg_data_t *pgdat = lruvec_pgdat(lruvec);
+
+	/* Update node */
+	__mod_node_page_state(pgdat, idx, val);
+
+	/* Update per-cgroup and per-lruvec stats */
+	if (!mem_cgroup_disabled())
+		__mod_lruvec_memcg_state(lruvec, idx, val);
+}
+
 void __mod_lruvec_obj_state(void *p, enum node_stat_item idx, int val)
 {
 	pg_data_t *pgdat = page_pgdat(virt_to_page(p));