@@ -70,6 +70,15 @@ Brief summary of control files.
memory.memsw.usage_in_bytes show current usage for memory+Swap
(See 5.5 for details)
memory.limit_in_bytes set/show limit of memory usage
+ memory.limit_in_bytes.min show current memory min setting not present
+ on the root control group.
+ (See sysctl's vm.memcg_v1_min_default)
+ memory.limit_in_bytes.low show current memory low setting not present
+ on the root control group.
+ (See sysctl's vm.memcg_v1_low_default)
+ memory.limit_in_bytes.high show current memory low setting not present
+ on the root control group.
+ (See sysctl's vm.memcg_v1_high_default)
memory.memsw.limit_in_bytes set/show limit of memory+Swap usage
memory.failcnt show the number of memory usage hits limits
memory.memsw.failcnt show the number of memory+Swap hits limits
@@ -648,6 +657,30 @@ The output format of memory.numa_stat is::
The "total" count is sum of file + anon + unevictable.
+5.6 limit_in_bytes.low, min, and high
+-------------------------------------
+
+These read-only values enable viewing the current low, min and high
+restrictions added to a newly created cgroup when the sysctl vm
+parameters: vm.memcg_v1_low_default, vm.memcg_v1_min_default,
+and vm.memcg_v1_high_default are enabled.
+
+Example usage:
+ sudo sysctl -w vm.memcg_v1_min_default=10
+ sudo sysctl -w vm.memcg_v1_low_default=30
+ sudo sysctl -w vm.memcg_v1_high_default=80
+
+ sudo mkdir /sys/fs/cgroup/memory/restrict
+ echo 100M | sudo tee /sys/fs/cgroup/memory/restrict/memory.limit_in_bytes
+ cat /sys/fs/cgroup/memory/restrict/memory.limit_in_bytes.min
+ 2560
+ cat /sys/fs/cgroup/memory/restrict/memory.limit_in_bytes.low
+ 7680
+ cat /sys/fs/cgroup/memory/restrict/memory.limit_in_bytes.high
+ 20480
+ echo $$ | sudo tee /sys/fs/cgroup/memory/restrict/tasks
+ dd if=/dev/zero of=~/file.bin bs=10M status=progress
+
6. Hierarchy support
====================
@@ -43,6 +43,9 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
+- memcg_v1_high_default
+- memcg_v1_low_default
+- memcg_v1_min_default
- memory_failure_early_kill
- memory_failure_recovery
- min_free_kbytes
@@ -425,6 +428,36 @@ e.g., up to one or two maps per allocation.
The default value is 65530.
+memcg_v1_min_default:
+=====================
+
+This file contains a percentage of the cgroup memory limit used to
+set the min value of a newly memory cgroup. This value is only used
+with memory cgroup v1 interface.
+
+The default is 0 (disabled). Range is [0, 100].
+
+
+memcg_v1_low_default:
+=====================
+
+This file contains a percentage of the cgroup memory limit used to
+set the low value of a newly memory cgroup. This value is only used
+with memory cgroup v1 interface.
+
+The default is 0 (disabled). Range is [0, 100].
+
+
+memcg_v1_high_default:
+======================
+
+This file contains a percentage of the cgroup memory limit used to
+set the high value of a newly memory cgroup. This value is only used
+with memory cgroup v1 interface.
+
+The default is 0 (disabled). Range is [0, 100].
+
+
memory_failure_early_kill:
==========================
@@ -68,6 +68,11 @@ struct mem_cgroup_id {
refcount_t ref;
};
+/* System default memory protection setting */
+extern int sysctl_memcg_min_default;
+extern int sysctl_memcg_low_default;
+extern int sysctl_memcg_high_default;
+
/*
* Per memcg event counter is incremented at every pagein/pageout. With THP,
* it will be incremented by the number of pages. This counter is used
@@ -2449,6 +2449,35 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+#endif
+#ifdef CONFIG_MEMCG
+ {
+ .procname = "memcg_v1_min_default",
+ .data = &sysctl_memcg_min_default,
+ .maxlen = sizeof(sysctl_memcg_min_default),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "memcg_v1_low_default",
+ .data = &sysctl_memcg_low_default,
+ .maxlen = sizeof(sysctl_memcg_low_default),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "memcg_v1_high_default",
+ .data = &sysctl_memcg_high_default,
+ .maxlen = sizeof(sysctl_memcg_high_default),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
#endif
{ }
};
@@ -82,6 +82,11 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
+/* System default memory protection setting */
+int sysctl_memcg_min_default __read_mostly = 0;
+int sysctl_memcg_low_default __read_mostly = 0;
+int sysctl_memcg_high_default __read_mostly = 0;
+
/* Socket memory accounting disabled? */
static bool cgroup_memory_nosocket __ro_after_init;
@@ -205,6 +210,7 @@ enum res_type {
_MEMSWAP,
_KMEM,
_TCP,
+ _MEM_V1,
};
#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val))
@@ -3676,6 +3682,9 @@ enum {
RES_MAX_USAGE,
RES_FAILCNT,
RES_SOFT_LIMIT,
+ RES_LIMIT_MIN,
+ RES_LIMIT_LOW,
+ RES_LIMIT_HIGH,
};
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
@@ -3686,6 +3695,7 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
switch (MEMFILE_TYPE(cft->private)) {
case _MEM:
+ case _MEM_V1:
counter = &memcg->memory;
break;
case _MEMSWAP:
@@ -3716,6 +3726,12 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
return counter->failcnt;
case RES_SOFT_LIMIT:
return (u64)memcg->soft_limit * PAGE_SIZE;
+ case RES_LIMIT_MIN:
+ return (u64)READ_ONCE(memcg->memory.min);
+ case RES_LIMIT_LOW:
+ return (u64)READ_ONCE(memcg->memory.low);
+ case RES_LIMIT_HIGH:
+ return (u64)READ_ONCE(memcg->memory.high);
default:
BUG();
}
@@ -3815,6 +3831,34 @@ static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
return ret;
}
+static inline void mem_cgroup_v1_set_defaults(struct mem_cgroup *memcg,
+ unsigned long nr_pages)
+{
+ unsigned long min, low, high;
+
+ if (mem_cgroup_is_root(memcg) || PAGE_COUNTER_MAX == nr_pages)
+ return;
+
+ min = READ_ONCE(memcg->memory.min);
+ low = READ_ONCE(memcg->memory.low);
+ if (min || low)
+ return;
+
+ if (!min && sysctl_memcg_min_default > 0) {
+ min = (nr_pages * sysctl_memcg_min_default) / 100;
+ page_counter_set_min(&memcg->memory, min);
+ }
+ if (!low && sysctl_memcg_low_default > 0) {
+ low = (nr_pages * sysctl_memcg_low_default) / 100;
+ page_counter_set_low(&memcg->memory, low);
+ }
+ high = READ_ONCE(memcg->memory.high);
+ if (high == PAGE_COUNTER_MAX && sysctl_memcg_high_default) {
+ high = (nr_pages * sysctl_memcg_high_default) / 100;
+ page_counter_set_high(&memcg->memory, high);
+ }
+}
+
/*
* The user of this function is...
* RES_LIMIT.
@@ -3838,6 +3882,11 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
break;
}
switch (MEMFILE_TYPE(of_cft(of)->private)) {
+ case _MEM_V1:
+ ret = mem_cgroup_resize_max(memcg, nr_pages, false);
+ if (!ret)
+ mem_cgroup_v1_set_defaults(memcg, nr_pages);
+ break;
case _MEM:
ret = mem_cgroup_resize_max(memcg, nr_pages, false);
break;
@@ -5000,10 +5049,28 @@ static struct cftype mem_cgroup_legacy_files[] = {
},
{
.name = "limit_in_bytes",
- .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
+ .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT),
.write = mem_cgroup_write,
.read_u64 = mem_cgroup_read_u64,
},
+ {
+ .name = "limit_in_bytes.min",
+ .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_MIN),
+ .read_u64 = mem_cgroup_read_u64,
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
+ {
+ .name = "limit_in_bytes.low",
+ .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_LOW),
+ .read_u64 = mem_cgroup_read_u64,
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
+ {
+ .name = "limit_in_bytes.high",
+ .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_HIGH),
+ .read_u64 = mem_cgroup_read_u64,
+ .flags = CFTYPE_NOT_ON_ROOT,
+ },
{
.name = "soft_limit_in_bytes",
.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),