diff mbox series

[RFC,v1,07/11] mm: Account the total top tier memory in use

Message ID 9170c90b0f58dee05a2b2c1d3789d674df42ed65.1617642417.git.tim.c.chen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Manage the top tier memory in a tiered memory | expand

Commit Message

Tim Chen April 5, 2021, 5:08 p.m. UTC
Track the global top tier memory usage stats. They are used as the basis of
deciding when to start demoting pages from memory cgroups that have exceeded
their soft limit.  We start reclaiming top tier memory when the total
top tier memory is low.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/vmstat.h | 18 ++++++++++++++++++
 mm/vmstat.c            | 20 +++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e1a4fa9abb3a..a3ad5a937fd8 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -139,6 +139,7 @@  static inline void vm_events_fold_cpu(int cpu)
  * Zone and node-based page accounting with per cpu differentials.
  */
 extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_toptier_zone_stat[NR_VM_ZONE_STAT_ITEMS];
 extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
 extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
 
@@ -175,6 +176,8 @@  static inline void zone_page_state_add(long x, struct zone *zone,
 {
 	atomic_long_add(x, &zone->vm_stat[item]);
 	atomic_long_add(x, &vm_zone_stat[item]);
+	if (node_state(zone->zone_pgdat->node_id, N_TOPTIER))
+		atomic_long_add(x, &vm_toptier_zone_stat[item]);
 }
 
 static inline void node_page_state_add(long x, struct pglist_data *pgdat,
@@ -212,6 +215,17 @@  static inline unsigned long global_node_page_state(enum node_stat_item item)
 	return global_node_page_state_pages(item);
 }
 
+static inline unsigned long global_toptier_zone_page_state(enum zone_stat_item item)
+{
+	long x = atomic_long_read(&vm_toptier_zone_stat[item]);
+
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 static inline unsigned long zone_page_state(struct zone *zone,
 					enum zone_stat_item item)
 {
@@ -325,6 +339,8 @@  static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
 	atomic_long_inc(&zone->vm_stat[item]);
 	atomic_long_inc(&vm_zone_stat[item]);
+	if (node_state(zone->zone_pgdat->node_id, N_TOPTIER))
+		atomic_long_inc(&vm_toptier_zone_stat[item]);
 }
 
 static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -337,6 +353,8 @@  static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
 	atomic_long_dec(&zone->vm_stat[item]);
 	atomic_long_dec(&vm_zone_stat[item]);
+	if (node_state(zone->zone_pgdat->node_id, N_TOPTIER))
+		atomic_long_dec(&vm_toptier_zone_stat[item]);
 }
 
 static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f299d2e89acb..b59efbcaef4e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -161,9 +161,11 @@  void vm_events_fold_cpu(int cpu)
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_toptier_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
+EXPORT_SYMBOL(vm_toptier_zone_stat);
 EXPORT_SYMBOL(vm_numa_stat);
 EXPORT_SYMBOL(vm_node_stat);
 
@@ -695,7 +697,7 @@  EXPORT_SYMBOL(dec_node_page_state);
  * Returns the number of counters updated.
  */
 #ifdef CONFIG_NUMA
-static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff, int *toptier_diff)
 {
 	int i;
 	int changes = 0;
@@ -717,6 +719,11 @@  static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
 			changes++;
 	}
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		if (toptier_diff[i]) {
+			atomic_long_add(toptier_diff[i], &vm_toptier_zone_stat[i]);
+	}
 	return changes;
 }
 #else
@@ -762,6 +769,7 @@  static int refresh_cpu_vm_stats(bool do_pagesets)
 	struct zone *zone;
 	int i;
 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+	int global_toptier_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
 #ifdef CONFIG_NUMA
 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
 #endif
@@ -779,6 +787,9 @@  static int refresh_cpu_vm_stats(bool do_pagesets)
 
 				atomic_long_add(v, &zone->vm_stat[i]);
 				global_zone_diff[i] += v;
+				if (node_state(zone->zone_pgdat->node_id, N_TOPTIER)) {
+					global_toptier_diff[i] +=v;
+				}
 #ifdef CONFIG_NUMA
 				/* 3 seconds idle till flush */
 				__this_cpu_write(p->expire, 3);
@@ -846,7 +857,7 @@  static int refresh_cpu_vm_stats(bool do_pagesets)
 
 #ifdef CONFIG_NUMA
 	changes += fold_diff(global_zone_diff, global_numa_diff,
-			     global_node_diff);
+			     global_node_diff, global_toptier_diff);
 #else
 	changes += fold_diff(global_zone_diff, global_node_diff);
 #endif
@@ -868,6 +879,7 @@  void cpu_vm_stats_fold(int cpu)
 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
 #endif
 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
+	int global_toptier_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 
 	for_each_populated_zone(zone) {
 		struct per_cpu_pageset *p;
@@ -910,11 +922,13 @@  void cpu_vm_stats_fold(int cpu)
 				p->vm_node_stat_diff[i] = 0;
 				atomic_long_add(v, &pgdat->vm_stat[i]);
 				global_node_diff[i] += v;
+				if (node_state(pgdat->node_id, N_TOPTIER))
+					global_toptier_diff[i] +=v;
 			}
 	}
 
 #ifdef CONFIG_NUMA
-	fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
+	fold_diff(global_zone_diff, global_numa_diff, global_node_diff, global_toptier_diff);
 #else
 	fold_diff(global_zone_diff, global_node_diff);
 #endif