diff mbox series

[RFC,7/8] memcg: add sysctl and config option to control memory recharging

Message ID 20230720070825.992023-8-yosryahmed@google.com (mailing list archive)
State New
Headers show
Series memory recharging for offline memcgs | expand

Commit Message

Yosry Ahmed July 20, 2023, 7:08 a.m. UTC
Add a sysctl to enable/disable memory recharging for offline memcgs. Add
a config option to control whether or not it is enabled by default.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 include/linux/memcontrol.h |  2 ++
 kernel/sysctl.c            | 11 +++++++++++
 mm/Kconfig                 | 12 ++++++++++++
 mm/memcontrol.c            |  9 ++++++++-
 4 files changed, 33 insertions(+), 1 deletion(-)

Comments

Luis R. Rodriguez July 20, 2023, 6:13 p.m. UTC | #1
On Thu, Jul 20, 2023 at 07:08:24AM +0000, Yosry Ahmed wrote:
> Add a sysctl to enable/disable memory recharging for offline memcgs. Add
> a config option to control whether or not it is enabled by default.
> 
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> ---
>  include/linux/memcontrol.h |  2 ++
>  kernel/sysctl.c            | 11 +++++++++++
>  mm/Kconfig                 | 12 ++++++++++++
>  mm/memcontrol.c            |  9 ++++++++-
>  4 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 59b653d4a76e..ae9f09ee90cb 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -60,6 +60,8 @@ struct mem_cgroup_reclaim_cookie {
>  
>  #ifdef CONFIG_MEMCG
>  
> +extern int sysctl_recharge_offline_memcgs;
> +
>  #define MEM_CGROUP_ID_SHIFT	16
>  #define MEM_CGROUP_ID_MAX	USHRT_MAX
>  
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 354a2d294f52..1735d1d95652 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2249,6 +2249,17 @@ static struct ctl_table vm_table[] = {
>  		.extra2		= (void *)&mmap_rnd_compat_bits_max,
>  	},
>  #endif
> +#ifdef CONFIG_MEMCG
> +	{
> +		.procname	= "recharge_offline_memcgs",
> +		.data		= &sysctl_recharge_offline_memcgs,
> +		.maxlen		= sizeof(sysctl_recharge_offline_memcgs),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif /* CONFIG_MEMCG */
>  	{ }
>  };

Please don't add any more sysctls to kernel/sysctl.c, git log that file
for a series of cleanups which show how to use your own and why we have
been doing that cleanup.

  Luis
Yosry Ahmed July 20, 2023, 6:24 p.m. UTC | #2
On Thu, Jul 20, 2023 at 11:13 AM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Thu, Jul 20, 2023 at 07:08:24AM +0000, Yosry Ahmed wrote:
> > Add a sysctl to enable/disable memory recharging for offline memcgs. Add
> > a config option to control whether or not it is enabled by default.
> >
> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > ---
> >  include/linux/memcontrol.h |  2 ++
> >  kernel/sysctl.c            | 11 +++++++++++
> >  mm/Kconfig                 | 12 ++++++++++++
> >  mm/memcontrol.c            |  9 ++++++++-
> >  4 files changed, 33 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index 59b653d4a76e..ae9f09ee90cb 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -60,6 +60,8 @@ struct mem_cgroup_reclaim_cookie {
> >
> >  #ifdef CONFIG_MEMCG
> >
> > +extern int sysctl_recharge_offline_memcgs;
> > +
> >  #define MEM_CGROUP_ID_SHIFT  16
> >  #define MEM_CGROUP_ID_MAX    USHRT_MAX
> >
> > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > index 354a2d294f52..1735d1d95652 100644
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -2249,6 +2249,17 @@ static struct ctl_table vm_table[] = {
> >               .extra2         = (void *)&mmap_rnd_compat_bits_max,
> >       },
> >  #endif
> > +#ifdef CONFIG_MEMCG
> > +     {
> > +             .procname       = "recharge_offline_memcgs",
> > +             .data           = &sysctl_recharge_offline_memcgs,
> > +             .maxlen         = sizeof(sysctl_recharge_offline_memcgs),
> > +             .mode           = 0644,
> > +             .proc_handler   = proc_dointvec_minmax,
> > +             .extra1         = SYSCTL_ZERO,
> > +             .extra2         = SYSCTL_ONE,
> > +     },
> > +#endif /* CONFIG_MEMCG */
> >       { }
> >  };
>
> Please don't add any more sysctls to kernel/sysctl.c, git log that file
> for a series of cleanups which show how to use your own and why we have
> been doing that cleanup.

Thanks for pointing this out, I definitely missed it. Will do that in
the next version. I guess this will also reduce the reviewer churn if
I won't be touching kernel/sysctl.c?

>
>   Luis
Luis R. Rodriguez July 20, 2023, 6:30 p.m. UTC | #3
On Thu, Jul 20, 2023 at 11:24:20AM -0700, Yosry Ahmed wrote:
> On Thu, Jul 20, 2023 at 11:13 AM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > On Thu, Jul 20, 2023 at 07:08:24AM +0000, Yosry Ahmed wrote:
> > > Add a sysctl to enable/disable memory recharging for offline memcgs. Add
> > > a config option to control whether or not it is enabled by default.
> > >
> > > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > > ---
> > >  include/linux/memcontrol.h |  2 ++
> > >  kernel/sysctl.c            | 11 +++++++++++
> > >  mm/Kconfig                 | 12 ++++++++++++
> > >  mm/memcontrol.c            |  9 ++++++++-
> > >  4 files changed, 33 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > > index 59b653d4a76e..ae9f09ee90cb 100644
> > > --- a/include/linux/memcontrol.h
> > > +++ b/include/linux/memcontrol.h
> > > @@ -60,6 +60,8 @@ struct mem_cgroup_reclaim_cookie {
> > >
> > >  #ifdef CONFIG_MEMCG
> > >
> > > +extern int sysctl_recharge_offline_memcgs;
> > > +
> > >  #define MEM_CGROUP_ID_SHIFT  16
> > >  #define MEM_CGROUP_ID_MAX    USHRT_MAX
> > >
> > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > index 354a2d294f52..1735d1d95652 100644
> > > --- a/kernel/sysctl.c
> > > +++ b/kernel/sysctl.c
> > > @@ -2249,6 +2249,17 @@ static struct ctl_table vm_table[] = {
> > >               .extra2         = (void *)&mmap_rnd_compat_bits_max,
> > >       },
> > >  #endif
> > > +#ifdef CONFIG_MEMCG
> > > +     {
> > > +             .procname       = "recharge_offline_memcgs",
> > > +             .data           = &sysctl_recharge_offline_memcgs,
> > > +             .maxlen         = sizeof(sysctl_recharge_offline_memcgs),
> > > +             .mode           = 0644,
> > > +             .proc_handler   = proc_dointvec_minmax,
> > > +             .extra1         = SYSCTL_ZERO,
> > > +             .extra2         = SYSCTL_ONE,
> > > +     },
> > > +#endif /* CONFIG_MEMCG */
> > >       { }
> > >  };
> >
> > Please don't add any more sysctls to kernel/sysctl.c, git log that file
> > for a series of cleanups which show how to use your own and why we have
> > been doing that cleanup.
> 
> Thanks for pointing this out, I definitely missed it. Will do that in
> the next version. I guess this will also reduce the reviewer churn if
> I won't be touching kernel/sysctl.c?

Right, it means I don't have to care anymore about random sysctl knobs.
Let people knob it all up.

  Luis
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 59b653d4a76e..ae9f09ee90cb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -60,6 +60,8 @@  struct mem_cgroup_reclaim_cookie {
 
 #ifdef CONFIG_MEMCG
 
+extern int sysctl_recharge_offline_memcgs;
+
 #define MEM_CGROUP_ID_SHIFT	16
 #define MEM_CGROUP_ID_MAX	USHRT_MAX
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 354a2d294f52..1735d1d95652 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2249,6 +2249,17 @@  static struct ctl_table vm_table[] = {
 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
 	},
 #endif
+#ifdef CONFIG_MEMCG
+	{
+		.procname	= "recharge_offline_memcgs",
+		.data		= &sysctl_recharge_offline_memcgs,
+		.maxlen		= sizeof(sysctl_recharge_offline_memcgs),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_MEMCG */
 	{ }
 };
 
diff --git a/mm/Kconfig b/mm/Kconfig
index 09130434e30d..9462c4b598d9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1236,6 +1236,18 @@  config LOCK_MM_AND_FIND_VMA
 	bool
 	depends on !STACK_GROWSUP
 
+config MEMCG_RECHARGE_OFFLINE_ENABLED
+	bool "Recharge memory charged to offline memcgs"
+	depends on MEMCG
+	help
+	  When a memory cgroup is removed by userspace, try to recharge any
+	  memory still charged to it to avoid having it live on as an offline
+	  memcg. Offline memcgs potentially consume memory and limit scalability
+	  of some operations.
+
+	  This option enables the above behavior by default. It can be override
+	  at runtime through /proc/sys/vm/recharge_offline_memcgs.
+
 source "mm/damon/Kconfig"
 
 endmenu
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2fe9c6f1be80..25cdb17eaaa3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -96,6 +96,9 @@  static bool cgroup_memory_nobpf __ro_after_init;
 static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
 #endif
 
+int sysctl_recharge_offline_memcgs __read_mostly = IS_ENABLED(
+		CONFIG_MEMCG_RECHARGE_OFFLINE_ENABLED);
+
 static struct workqueue_struct *memcg_recharge_wq;
 
 /* Whether legacy memory+swap accounting is active */
@@ -6592,7 +6595,8 @@  static void memcg_recharge_mapped_folios(struct mem_cgroup *memcg)
 	INIT_DELAYED_WORK(&memcg->recharge_mapped_work.dwork,
 			  memcg_do_recharge_mapped_folios);
 
-	if (memcg_recharge_wq && memcg_nr_local_mapped_pages(memcg)) {
+	if (sysctl_recharge_offline_memcgs &&
+	    memcg_recharge_wq && memcg_nr_local_mapped_pages(memcg)) {
 		memcg->recharge_mapped_work.retries = 0;
 		queue_delayed_work(memcg_recharge_wq,
 				   &memcg->recharge_mapped_work.dwork, 0);
@@ -6605,6 +6609,9 @@  static bool should_do_deferred_recharge(struct folio *folio)
 	struct mem_cgroup *memcg;
 	bool ret;
 
+	if (!sysctl_recharge_offline_memcgs)
+		return false;
+
 	rcu_read_lock();
 	memcg = folio_memcg_rcu(folio);
 	ret = memcg && !!(memcg->css.flags & CSS_DYING);