diff mbox series

[RFC,net-next,2/3] net-memcg: Record pressure level when under pressure

Message ID 20230901062141.51972-3-wuyun.abel@bytedance.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series sock: Be aware of memcg pressure on alloc | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8017 this patch: 8017
netdev/cc_maintainers success CCed 11 of 11 maintainers
netdev/build_clang success Errors and warnings before: 2323 this patch: 2323
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8714 this patch: 8714
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 99 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Abel Wu Sept. 1, 2023, 6:21 a.m. UTC
For now memcg->socket_pressure is used for judging whether there
is memory reclaim pressure in this memcg. As different reclaim
efficiencies require different strategies, recording the level of
pressure would help do fine-grained control inside networking
where performance matters a lot.

The vmpressure infrastructure classifies pressure into 3 levels:
low, medium and critical. It would be too much conservative if
constraining socket memory usage at "low" level, so now only the
other two are taken into consideration and the least significant
bit of socket_pressure is enough to record this information.

Signed-off-by: Abel Wu <wuyun.abel@bytedance.com>
---
 include/linux/memcontrol.h | 39 +++++++++++++++++++++++++++++++++-----
 include/net/sock.h         |  2 +-
 include/net/tcp.h          |  2 +-
 mm/vmpressure.c            |  9 ++++++++-
 4 files changed, 44 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index dbf26bc89dd4..a24047bf7722 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -288,6 +288,9 @@  struct mem_cgroup {
 	 * Hint of reclaim pressure for socket memroy management. Note
 	 * that this indicator should NOT be used in legacy cgroup mode
 	 * where socket memory is accounted/charged separately.
+	 *
+	 * The least significant bit is used for indicating the level of
+	 * pressure, 1 for 'critical' and 0 otherwise.
 	 */
 	unsigned long		socket_pressure;
 
@@ -1730,15 +1733,40 @@  extern struct static_key_false memcg_sockets_enabled_key;
 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 void mem_cgroup_sk_alloc(struct sock *sk);
 void mem_cgroup_sk_free(struct sock *sk);
-static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
+
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg,
+						    bool *critical)
 {
+	bool under_pressure = false;
+
+	/*
+	 * When cgroup is in legacy mode where tcpmem is separately
+	 * charged, we have no idea about memcg reclaim pressure from
+	 * here and actually no need to, so just ignore the pressure
+	 * level info.
+	 */
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return !!memcg->tcpmem_pressure;
+
+	if (critical)
+		*critical = false;
+
 	do {
-		if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
-			return true;
+		unsigned long expire = READ_ONCE(memcg->socket_pressure);
+
+		if (time_before(jiffies, expire)) {
+			if (!under_pressure)
+				under_pressure = true;
+			if (!critical)
+				break;
+			if (expire & 1) {
+				*critical = true;
+				break;
+			}
+		}
 	} while ((memcg = parent_mem_cgroup(memcg)));
-	return false;
+
+	return under_pressure;
 }
 
 int alloc_shrinker_info(struct mem_cgroup *memcg);
@@ -1749,7 +1777,8 @@  void reparent_shrinker_deferred(struct mem_cgroup *memcg);
 #define mem_cgroup_sockets_enabled 0
 static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
 static inline void mem_cgroup_sk_free(struct sock *sk) { };
-static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg,
+						    bool *critical)
 {
 	return false;
 }
diff --git a/include/net/sock.h b/include/net/sock.h
index 11d503417591..079bbee5c400 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1434,7 +1434,7 @@  static inline bool sk_under_memory_pressure(const struct sock *sk)
 		return false;
 
 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg, NULL))
 		return true;
 
 	return !!READ_ONCE(*sk->sk_prot->memory_pressure);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 07b21d9a9620..81e1f9c90a94 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -261,7 +261,7 @@  extern unsigned long tcp_memory_pressure;
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg, NULL))
 		return true;
 
 	return READ_ONCE(tcp_memory_pressure);
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 22c6689d9302..5a3ac3768c0f 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -308,6 +308,13 @@  void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 		level = vmpressure_calc_level(scanned, reclaimed);
 
 		if (level > VMPRESSURE_LOW) {
+			unsigned long expire = jiffies + HZ;
+
+			if (level == VMPRESSURE_CRITICAL)
+				expire |=  1UL;
+			else
+				expire &= ~1UL;
+
 			/*
 			 * Let the socket buffer allocator know that
 			 * we are having trouble reclaiming LRU pages.
@@ -316,7 +323,7 @@  void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			 * asserted for a second in which subsequent
 			 * pressure events can occur.
 			 */
-			WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
+			WRITE_ONCE(memcg->socket_pressure, expire);
 		}
 	}
 }