diff mbox series

memcg-v1: Enable setting memory min, low, high

Message ID 20230405110107.127156-1-shaun.tancheff@gmail.com (mailing list archive)
State New
Headers show
Series memcg-v1: Enable setting memory min, low, high | expand

Commit Message

Shaun Tancheff April 5, 2023, 11:01 a.m. UTC
From: Shaun Tancheff <shaun.tancheff@hpe.com>

For users that are unable to update to memcg-v2 this
provides a method where memcg-v1 can more effectively
apply enough memory pressure to effectively throttle
filesystem I/O or otherwise minimize being memcg oom
killed at the expense of reduced performance.

This patch extends the memcg-v1 legacy sysfs entries
with:
    limit_in_bytes.min, limit_in_bytes.low and
    limit_in_bytes.high
Since old software will need to be updated to take
advantage of the new files a secondary method
of setting min, low and high based on a percentage
of the limit is also provided. The percentages
are determined by module parameters.

The available module parameters can be set at
kernel boot time, for example:
   memcontrol.memcg_min=10
   memcontrol.memcg_low=30
   memcontrol.memcg_high=80

Would set min to 10%, low to 30% and high to 80% of
the value written to:
  /sys/fs/cgroup/memory/<grp>/memory.limit_in_bytes

Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
---
v0: Initial hard coded limits by percent.
v1: Added sysfs access and module parameters for percent values to enable
v2: Fix 32-bit, remove need for missing __udivdi3
 mm/memcontrol.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 82 insertions(+), 1 deletion(-)

Comments

Greg KH April 5, 2023, 11:23 a.m. UTC | #1
On Wed, Apr 05, 2023 at 06:01:07PM +0700, Shaun Tancheff wrote:
> From: Shaun Tancheff <shaun.tancheff@hpe.com>
> 
> For users that are unable to update to memcg-v2 this
> provides a method where memcg-v1 can more effectively
> apply enough memory pressure to effectively throttle
> filesystem I/O or otherwise minimize being memcg oom
> killed at the expense of reduced performance.
> 
> This patch extends the memcg-v1 legacy sysfs entries
> with:
>     limit_in_bytes.min, limit_in_bytes.low and
>     limit_in_bytes.high
> Since old software will need to be updated to take
> advantage of the new files a secondary method
> of setting min, low and high based on a percentage
> of the limit is also provided. The percentages
> are determined by module parameters.
> 
> The available module parameters can be set at
> kernel boot time, for example:
>    memcontrol.memcg_min=10
>    memcontrol.memcg_low=30
>    memcontrol.memcg_high=80
> 
> Would set min to 10%, low to 30% and high to 80% of
> the value written to:
>   /sys/fs/cgroup/memory/<grp>/memory.limit_in_bytes
> 
> Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
> ---
> v0: Initial hard coded limits by percent.
> v1: Added sysfs access and module parameters for percent values to enable
> v2: Fix 32-bit, remove need for missing __udivdi3
>  mm/memcontrol.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 82 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2eee092f8f11..3cf8386f4f45 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -73,6 +73,18 @@
>  
>  #include <trace/events/vmscan.h>
>  
> +static unsigned int memcg_v1_min_default_percent;
> +module_param_named(memcg_min, memcg_v1_min_default_percent, uint, 0600);
> +MODULE_PARM_DESC(memcg_min, "memcg v1 min default percent");
> +
> +static unsigned int memcg_v1_low_default_percent;
> +module_param_named(memcg_low, memcg_v1_low_default_percent, uint, 0600);
> +MODULE_PARM_DESC(memcg_low, "memcg v1 low default percent");
> +
> +static unsigned int memcg_v1_high_default_percent;
> +module_param_named(memcg_high, memcg_v1_high_default_percent, uint, 0600);
> +MODULE_PARM_DESC(memcg_high, "memcg v1 high default percent");

This is not the 1990's, why are you using module parameters for this?
And this isn't a module, so why use module options, how are you supposed
to set them?

And you didn't document them anywhere?

Also, why is this cc: stable?

thanks,

greg k-h
Shaun Tancheff April 5, 2023, 1:54 p.m. UTC | #2
On Wed, Apr 5, 2023 at 6:23 PM Greg KH <gregkh@linuxfoundation.org> wrote:

> On Wed, Apr 05, 2023 at 06:01:07PM +0700, Shaun Tancheff wrote:
> > From: Shaun Tancheff <shaun.tancheff@hpe.com>
> >
> > For users that are unable to update to memcg-v2 this
> > provides a method where memcg-v1 can more effectively
> > apply enough memory pressure to effectively throttle
> > filesystem I/O or otherwise minimize being memcg oom
> > killed at the expense of reduced performance.
> >
> > This patch extends the memcg-v1 legacy sysfs entries
> > with:
> >     limit_in_bytes.min, limit_in_bytes.low and
> >     limit_in_bytes.high
> > Since old software will need to be updated to take
> > advantage of the new files a secondary method
> > of setting min, low and high based on a percentage
> > of the limit is also provided. The percentages
> > are determined by module parameters.
> >
> > The available module parameters can be set at
> > kernel boot time, for example:
> >    memcontrol.memcg_min=10
> >    memcontrol.memcg_low=30
> >    memcontrol.memcg_high=80
> >
> > Would set min to 10%, low to 30% and high to 80% of
> > the value written to:
> >   /sys/fs/cgroup/memory/<grp>/memory.limit_in_bytes
> >
> > Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
> > ---
> > v0: Initial hard coded limits by percent.
> > v1: Added sysfs access and module parameters for percent values to enable
> > v2: Fix 32-bit, remove need for missing __udivdi3
> >  mm/memcontrol.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 82 insertions(+), 1 deletion(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 2eee092f8f11..3cf8386f4f45 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -73,6 +73,18 @@
> >
> >  #include <trace/events/vmscan.h>
> >
> > +static unsigned int memcg_v1_min_default_percent;
> > +module_param_named(memcg_min, memcg_v1_min_default_percent, uint, 0600);
> > +MODULE_PARM_DESC(memcg_min, "memcg v1 min default percent");
> > +
> > +static unsigned int memcg_v1_low_default_percent;
> > +module_param_named(memcg_low, memcg_v1_low_default_percent, uint, 0600);
> > +MODULE_PARM_DESC(memcg_low, "memcg v1 low default percent");
> > +
> > +static unsigned int memcg_v1_high_default_percent;
> > +module_param_named(memcg_high, memcg_v1_high_default_percent, uint,
> 0600);
> > +MODULE_PARM_DESC(memcg_high, "memcg v1 high default percent");
>
> This is not the 1990's, why are you using module parameters for this?
>
And this isn't a module, so why use module options, how are you supposed
> to set them?
>
Ah .. guess I'm a bit out of date.
The can be set either on the kernel command line or through sysfs,
nominally
  /sys/module/memcontrol/parameters/memcg_high
  /sys/module/memcontrol/parameters/memcg_low
  /sys/module/memcontrol/parameters/memcg_min

I will look at making these sysctl values instead.

And you didn't document them anywhere?
>
Next update will include documentation, as appropriate, Thanks!


> Also, why is this cc: stable?
>
Apologies, I understand this is not suitable for stable and will remove the
cc.

thanks,
>
> greg k-h
>
Greg KH April 5, 2023, 2:21 p.m. UTC | #3
On Wed, Apr 05, 2023 at 08:54:07PM +0700, Shaun Tancheff wrote:
> On Wed, Apr 5, 2023 at 6:23 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> 
> > On Wed, Apr 05, 2023 at 06:01:07PM +0700, Shaun Tancheff wrote:
> > > From: Shaun Tancheff <shaun.tancheff@hpe.com>
> > >
> > > For users that are unable to update to memcg-v2 this
> > > provides a method where memcg-v1 can more effectively
> > > apply enough memory pressure to effectively throttle
> > > filesystem I/O or otherwise minimize being memcg oom
> > > killed at the expense of reduced performance.
> > >
> > > This patch extends the memcg-v1 legacy sysfs entries
> > > with:
> > >     limit_in_bytes.min, limit_in_bytes.low and
> > >     limit_in_bytes.high
> > > Since old software will need to be updated to take
> > > advantage of the new files a secondary method
> > > of setting min, low and high based on a percentage
> > > of the limit is also provided. The percentages
> > > are determined by module parameters.
> > >
> > > The available module parameters can be set at
> > > kernel boot time, for example:
> > >    memcontrol.memcg_min=10
> > >    memcontrol.memcg_low=30
> > >    memcontrol.memcg_high=80
> > >
> > > Would set min to 10%, low to 30% and high to 80% of
> > > the value written to:
> > >   /sys/fs/cgroup/memory/<grp>/memory.limit_in_bytes
> > >
> > > Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
> > > ---
> > > v0: Initial hard coded limits by percent.
> > > v1: Added sysfs access and module parameters for percent values to enable
> > > v2: Fix 32-bit, remove need for missing __udivdi3
> > >  mm/memcontrol.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 82 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 2eee092f8f11..3cf8386f4f45 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -73,6 +73,18 @@
> > >
> > >  #include <trace/events/vmscan.h>
> > >
> > > +static unsigned int memcg_v1_min_default_percent;
> > > +module_param_named(memcg_min, memcg_v1_min_default_percent, uint, 0600);
> > > +MODULE_PARM_DESC(memcg_min, "memcg v1 min default percent");
> > > +
> > > +static unsigned int memcg_v1_low_default_percent;
> > > +module_param_named(memcg_low, memcg_v1_low_default_percent, uint, 0600);
> > > +MODULE_PARM_DESC(memcg_low, "memcg v1 low default percent");
> > > +
> > > +static unsigned int memcg_v1_high_default_percent;
> > > +module_param_named(memcg_high, memcg_v1_high_default_percent, uint,
> > 0600);
> > > +MODULE_PARM_DESC(memcg_high, "memcg v1 high default percent");
> >
> > This is not the 1990's, why are you using module parameters for this?
> >
> And this isn't a module, so why use module options, how are you supposed
> > to set them?
> >
> Ah .. guess I'm a bit out of date.
> The can be set either on the kernel command line or through sysfs,
> nominally
>   /sys/module/memcontrol/parameters/memcg_high
>   /sys/module/memcontrol/parameters/memcg_low
>   /sys/module/memcontrol/parameters/memcg_min

But again, memcontrol is not a module, right?

> I will look at making these sysctl values instead.

Yeah, just stick with that please.

thanks,

greg k-h
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2eee092f8f11..3cf8386f4f45 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -73,6 +73,18 @@ 
 
 #include <trace/events/vmscan.h>
 
+static unsigned int memcg_v1_min_default_percent;
+module_param_named(memcg_min, memcg_v1_min_default_percent, uint, 0600);
+MODULE_PARM_DESC(memcg_min, "memcg v1 min default percent");
+
+static unsigned int memcg_v1_low_default_percent;
+module_param_named(memcg_low, memcg_v1_low_default_percent, uint, 0600);
+MODULE_PARM_DESC(memcg_low, "memcg v1 low default percent");
+
+static unsigned int memcg_v1_high_default_percent;
+module_param_named(memcg_high, memcg_v1_high_default_percent, uint, 0600);
+MODULE_PARM_DESC(memcg_high, "memcg v1 high default percent");
+
 struct cgroup_subsys memory_cgrp_subsys __read_mostly;
 EXPORT_SYMBOL(memory_cgrp_subsys);
 
@@ -205,6 +217,7 @@  enum res_type {
 	_MEMSWAP,
 	_KMEM,
 	_TCP,
+	_MEM_V1,
 };
 
 #define MEMFILE_PRIVATE(x, val)	((x) << 16 | (val))
@@ -3676,6 +3689,9 @@  enum {
 	RES_MAX_USAGE,
 	RES_FAILCNT,
 	RES_SOFT_LIMIT,
+	RES_LIMIT_MIN,
+	RES_LIMIT_LOW,
+	RES_LIMIT_HIGH,
 };
 
 static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
@@ -3686,6 +3702,7 @@  static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 
 	switch (MEMFILE_TYPE(cft->private)) {
 	case _MEM:
+	case _MEM_V1:
 		counter = &memcg->memory;
 		break;
 	case _MEMSWAP:
@@ -3716,6 +3733,12 @@  static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 		return counter->failcnt;
 	case RES_SOFT_LIMIT:
 		return (u64)memcg->soft_limit * PAGE_SIZE;
+	case RES_LIMIT_MIN:
+		return (u64)READ_ONCE(memcg->memory.min);
+	case RES_LIMIT_LOW:
+		return (u64)READ_ONCE(memcg->memory.low);
+	case RES_LIMIT_HIGH:
+		return (u64)READ_ONCE(memcg->memory.high);
 	default:
 		BUG();
 	}
@@ -3815,6 +3838,34 @@  static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
 	return ret;
 }
 
+static inline void mem_cgroup_v1_set_defaults(struct mem_cgroup *memcg,
+					      unsigned long nr_pages)
+{
+	unsigned long min, low, high;
+
+	if (mem_cgroup_is_root(memcg) || PAGE_COUNTER_MAX == nr_pages)
+		return;
+
+	min = READ_ONCE(memcg->memory.min);
+	low = READ_ONCE(memcg->memory.low);
+	if (min || low)
+		return;
+
+	if (!min && memcg_v1_min_default_percent) {
+		min = (nr_pages * memcg_v1_min_default_percent) / 100;
+		page_counter_set_min(&memcg->memory, min);
+	}
+	if (!low && memcg_v1_low_default_percent) {
+		low = (nr_pages * memcg_v1_low_default_percent) / 100;
+		page_counter_set_low(&memcg->memory, low);
+	}
+	high = READ_ONCE(memcg->memory.high);
+	if (high == PAGE_COUNTER_MAX && memcg_v1_high_default_percent) {
+		high = (nr_pages * memcg_v1_high_default_percent) / 100;
+		page_counter_set_high(&memcg->memory, high);
+	}
+}
+
 /*
  * The user of this function is...
  * RES_LIMIT.
@@ -3838,6 +3889,11 @@  static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
 			break;
 		}
 		switch (MEMFILE_TYPE(of_cft(of)->private)) {
+		case _MEM_V1:
+			ret = mem_cgroup_resize_max(memcg, nr_pages, false);
+			if (!ret)
+				mem_cgroup_v1_set_defaults(memcg, nr_pages);
+			break;
 		case _MEM:
 			ret = mem_cgroup_resize_max(memcg, nr_pages, false);
 			break;
@@ -4986,6 +5042,13 @@  static int mem_cgroup_slab_show(struct seq_file *m, void *p)
 }
 #endif
 
+static ssize_t memory_min_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off);
+static ssize_t memory_low_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off);
+static ssize_t memory_high_write(struct kernfs_open_file *of,
+				 char *buf, size_t nbytes, loff_t off);
+
 static struct cftype mem_cgroup_legacy_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -5000,10 +5063,28 @@  static struct cftype mem_cgroup_legacy_files[] = {
 	},
 	{
 		.name = "limit_in_bytes",
-		.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
+		.private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT),
 		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
+	{
+		.name = "limit_in_bytes.min",
+		.private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_MIN),
+		.write = memory_min_write,
+		.read_u64 = mem_cgroup_read_u64,
+	},
+	{
+		.name = "limit_in_bytes.low",
+		.private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_LOW),
+		.write = memory_low_write,
+		.read_u64 = mem_cgroup_read_u64,
+	},
+	{
+		.name = "limit_in_bytes.high",
+		.private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_HIGH),
+		.write = memory_high_write,
+		.read_u64 = mem_cgroup_read_u64,
+	},
 	{
 		.name = "soft_limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),