@@ -5029,6 +5029,7 @@ L: linux-mm@kvack.org
S: Maintained
F: mm/memcontrol.c
F: mm/swap_cgroup.c
+F: tools/testing/selftests/cgroup/memcg_protection.m
F: tools/testing/selftests/cgroup/test_kmem.c
F: tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
@@ -0,0 +1,89 @@
+% SPDX-License-Identifier: GPL-2.0
+%
+% run as: octave-cli memcg_protection.m
+%
+% This script simulates reclaim protection behavior on a single level of memcg
+% hierarchy to illustrate how overcommitted protection spreads among siblings
+% (as it depends also on their current consumption).
+%
+% Simulation assumes siblings consumed the initial amount of memory (w/out
+% reclaim) and then the reclaim starts, all memory is reclaimable, i.e. treated
+% same. It simulates only non-low reclaim and assumes all memory.min = 0.
+%
+% Input configurations
+% --------------------
+% E number parent effective protection
+% n vector nominal protection of siblings set at the given level (memory.low)
+% c vector current consumption -,,- (memory.current)
+
+% example from testcase (values in GB)
+E = 50 / 1024;
+n = [75 25 0 500 ] / 1024;
+c = [50 50 50 0] / 1024;
+
+% Reclaim parameters
+% ------------------
+
+% Minimal reclaim amount (GB)
+cluster = 32*4 / 2**20;
+
+% Reclaim coefficient (think as 0.5^sc->priority)
+alpha = .1
+
+% Simulation parameters
+% ---------------------
+epsilon = 1e-7;
+timeout = 1000;
+
+% Simulation loop
+% ---------------
+
+ch = [];
+eh = [];
+rh = [];
+
+for t = 1:timeout
+ % low_usage
+ u = min(c, n);
+ siblings = sum(u);
+
+ % effective_protection()
+ protected = min(n, c); % start with nominal
+ e = protected * min(1, E / siblings); % normalize overcommit
+
+ % recursive protection
+ unclaimed = max(0, E - siblings);
+ parent_overuse = sum(c) - siblings;
+ if (unclaimed > 0 && parent_overuse > 0)
+ overuse = max(0, c - protected);
+ e += unclaimed * (overuse / parent_overuse);
+ endif
+
+ % get_scan_count()
+ r = alpha * c; % assume all memory is in a single LRU list
+
+ % commit 1bc63fb1272b ("mm, memcg: make scan aggression always exclude protection")
+ sz = max(e, c);
+ r .*= (1 - (e+epsilon) ./ (sz+epsilon));
+
+ % uncomment to debug prints
+ % e, c, r
+
+ % nothing to reclaim, reached equilibrium
+ if max(r) < epsilon
+ break;
+ endif
+
+ % SWAP_CLUSTER_MAX roundup
+ r = max(r, (r > epsilon) .* cluster);
+ % XXX here I do parallel reclaim of all siblings
+ % in reality reclaim is serialized and each sibling recalculates own residual
+ c = max(c - r, 0);
+
+ ch = [ch ; c];
+ eh = [eh ; e];
+ rh = [rh ; r];
+endfor
+
+t
+c, e
@@ -248,7 +248,7 @@ static int cg_test_proc_killed(const char *cgroup)
/*
* First, this test creates the following hierarchy:
* A memory.min = 50M, memory.max = 200M
- * A/B memory.min = 50M, memory.current = 50M
+ * A/B memory.min = 50M
* A/B/C memory.min = 75M, memory.current = 50M
* A/B/D memory.min = 25M, memory.current = 50M
* A/B/E memory.min = 0, memory.current = 50M
@@ -259,10 +259,13 @@ static int cg_test_proc_killed(const char *cgroup)
* Then it creates A/G and creates a significant
* memory pressure in it.
*
+ * Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
- * A/B/C memory.current ~= 33M
- * A/B/D memory.current ~= 17M
- * A/B/F memory.current ~= 0
+ * A/B/C memory.current ~= 29M
+ * A/B/D memory.current ~= 21M
+ * A/B/E memory.current ~= 0
+ * A/B/F memory.current = 0
+ * (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
* unprotected memory in A available, and checks
@@ -365,10 +368,10 @@ static int test_memcg_min(const char *root)
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
- if (!values_close(c[0], MB(33), 10))
+ if (!values_close(c[0], MB(29), 10))
goto cleanup;
- if (!values_close(c[1], MB(17), 10))
+ if (!values_close(c[1], MB(21), 10))
goto cleanup;
if (c[3] != 0)
@@ -405,7 +408,7 @@ static int test_memcg_min(const char *root)
/*
* First, this test creates the following hierarchy:
* A memory.low = 50M, memory.max = 200M
- * A/B memory.low = 50M, memory.current = 50M
+ * A/B memory.low = 50M
* A/B/C memory.low = 75M, memory.current = 50M
* A/B/D memory.low = 25M, memory.current = 50M
* A/B/E memory.low = 0, memory.current = 50M
@@ -417,9 +420,11 @@ static int test_memcg_min(const char *root)
*
* Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
- * A/B/ memory.current ~= 33M
- * A/B/D memory.current ~= 17M
- * A/B/F memory.current ~= 0
+ * A/B/C memory.current ~= 29M
+ * A/B/D memory.current ~= 21M
+ * A/B/E memory.current ~= 0
+ * A/B/F memory.current = 0
+ * (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
* unprotected memory in A available,
@@ -512,10 +517,10 @@ static int test_memcg_low(const char *root)
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
- if (!values_close(c[0], MB(33), 10))
+ if (!values_close(c[0], MB(29), 10))
goto cleanup;
- if (!values_close(c[1], MB(17), 10))
+ if (!values_close(c[1], MB(21), 10))
goto cleanup;
if (c[3] != 0)
The numbers are not easy to derive in a closed form (certainly mere protections ratios do not apply), therefore use a simulation to obtain expected numbers. Signed-off-by: Michal Koutný <mkoutny@suse.com> --- MAINTAINERS | 1 + .../selftests/cgroup/memcg_protection.m | 89 +++++++++++++++++++ .../selftests/cgroup/test_memcontrol.c | 29 +++--- 3 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/cgroup/memcg_protection.m