diff mbox series

[v2,5/7] memcg: pr_warn_once for unexpected events and stats

Message ID 20240427003733.3898961-6-shakeel.butt@linux.dev (mailing list archive)
State New
Headers show
Series memcg: reduce memory consumption by memcg stats | expand

Commit Message

Shakeel Butt April 27, 2024, 12:37 a.m. UTC
To reduce memory usage by the memcg events and stats, the kernel uses
indirection table and only allocate stats and events which are being
used by the memcg code. To make this more robust, let's add warnings
where unexpected stats and events indexes are used.

Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
 mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

Comments

Yosry Ahmed April 27, 2024, 12:58 a.m. UTC | #1
On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
>
> To reduce memory usage by the memcg events and stats, the kernel uses
> indirection table and only allocate stats and events which are being
> used by the memcg code. To make this more robust, let's add warnings
> where unexpected stats and events indexes are used.
>
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
>  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 34 insertions(+), 9 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 103e0e53e20a..36145089dcf5 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
>                 return node_page_state(lruvec_pgdat(lruvec), idx);
>
>         i = memcg_stats_index(idx);
> -       if (i >= 0) {
> +       if (likely(i >= 0)) {
>                 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
>                 x = READ_ONCE(pn->lruvec_stats->state[i]);
> +       } else {
> +               pr_warn_once("%s: stat item index: %d\n", __func__, idx);
>         }

Can we make these more compact by using WARN_ON_ONCE() instead:

if (WARN_ON_ONCE(i < 0))
         return 0;

I guess the advantage of using pr_warn_once() is that we get to print
the exact stat index, but the stack trace from WARN_ON_ONCE() should
make it obvious in most cases AFAICT.

No strong opinions either way.
Shakeel Butt April 27, 2024, 1:18 a.m. UTC | #2
On Fri, Apr 26, 2024 at 05:58:16PM -0700, Yosry Ahmed wrote:
> On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
> >
> > To reduce memory usage by the memcg events and stats, the kernel uses
> > indirection table and only allocate stats and events which are being
> > used by the memcg code. To make this more robust, let's add warnings
> > where unexpected stats and events indexes are used.
> >
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > ---
> >  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 34 insertions(+), 9 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 103e0e53e20a..36145089dcf5 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
> >                 return node_page_state(lruvec_pgdat(lruvec), idx);
> >
> >         i = memcg_stats_index(idx);
> > -       if (i >= 0) {
> > +       if (likely(i >= 0)) {
> >                 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> >                 x = READ_ONCE(pn->lruvec_stats->state[i]);
> > +       } else {
> > +               pr_warn_once("%s: stat item index: %d\n", __func__, idx);
> >         }
> 
> Can we make these more compact by using WARN_ON_ONCE() instead:
> 
> if (WARN_ON_ONCE(i < 0))
>          return 0;
> 
> I guess the advantage of using pr_warn_once() is that we get to print
> the exact stat index, but the stack trace from WARN_ON_ONCE() should
> make it obvious in most cases AFAICT.
> 
> No strong opinions either way.

One reason I used pr_warn_once() over WARN_ON_ONCE() is the syzbot
trigger. No need to trip the bot over this error condition.
Johannes Weiner April 27, 2024, 2:22 p.m. UTC | #3
On Fri, Apr 26, 2024 at 06:18:13PM -0700, Shakeel Butt wrote:
> On Fri, Apr 26, 2024 at 05:58:16PM -0700, Yosry Ahmed wrote:
> > On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
> > >
> > > To reduce memory usage by the memcg events and stats, the kernel uses
> > > indirection table and only allocate stats and events which are being
> > > used by the memcg code. To make this more robust, let's add warnings
> > > where unexpected stats and events indexes are used.
> > >
> > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > > ---
> > >  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
> > >  1 file changed, 34 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 103e0e53e20a..36145089dcf5 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
> > >                 return node_page_state(lruvec_pgdat(lruvec), idx);
> > >
> > >         i = memcg_stats_index(idx);
> > > -       if (i >= 0) {
> > > +       if (likely(i >= 0)) {
> > >                 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > >                 x = READ_ONCE(pn->lruvec_stats->state[i]);
> > > +       } else {
> > > +               pr_warn_once("%s: stat item index: %d\n", __func__, idx);
> > >         }
> > 
> > Can we make these more compact by using WARN_ON_ONCE() instead:
> > 
> > if (WARN_ON_ONCE(i < 0))
> >          return 0;
> > 
> > I guess the advantage of using pr_warn_once() is that we get to print
> > the exact stat index, but the stack trace from WARN_ON_ONCE() should
> > make it obvious in most cases AFAICT.

if (WARN_ONCE(i < 0, "stat item %d not in memcg_node_stat_items\n", i))
	return 0;

should work?

> > No strong opinions either way.
> 
> One reason I used pr_warn_once() over WARN_ON_ONCE() is the syzbot
> trigger. No need to trip the bot over this error condition.

The warn splat is definitely quite verbose. But I think that would
only be annoying initially, in case a site was missed. Down the line,
it seems helpful to have this stand out to somebody who is trying to
add a new cgroup stat and forgets to update the right enums.
Roman Gushchin April 29, 2024, 4:06 p.m. UTC | #4
On Fri, Apr 26, 2024 at 05:37:31PM -0700, Shakeel Butt wrote:
> To reduce memory usage by the memcg events and stats, the kernel uses
> indirection table and only allocate stats and events which are being
> used by the memcg code. To make this more robust, let's add warnings
> where unexpected stats and events indexes are used.
> 
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
>  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 103e0e53e20a..36145089dcf5 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
>  		return node_page_state(lruvec_pgdat(lruvec), idx);
>  
>  	i = memcg_stats_index(idx);
> -	if (i >= 0) {
> +	if (likely(i >= 0)) {
>  		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
>  		x = READ_ONCE(pn->lruvec_stats->state[i]);
> +	} else {
> +		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
>  	}

I think it's generally a CONFIG_DEBUG_VM material. Do we have some extra
concerns here?

Having pr_warn_on_once() would be nice here.
Shakeel Butt April 29, 2024, 7:54 p.m. UTC | #5
On Sat, Apr 27, 2024 at 10:22:34AM -0400, Johannes Weiner wrote:
> On Fri, Apr 26, 2024 at 06:18:13PM -0700, Shakeel Butt wrote:
> > On Fri, Apr 26, 2024 at 05:58:16PM -0700, Yosry Ahmed wrote:
> > > On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
[...]
> > > 
> > > Can we make these more compact by using WARN_ON_ONCE() instead:
> > > 
> > > if (WARN_ON_ONCE(i < 0))
> > >          return 0;
> > > 
> > > I guess the advantage of using pr_warn_once() is that we get to print
> > > the exact stat index, but the stack trace from WARN_ON_ONCE() should
> > > make it obvious in most cases AFAICT.
> 
> if (WARN_ONCE(i < 0, "stat item %d not in memcg_node_stat_items\n", i))
> 	return 0;
> 
> should work?
> 
> > > No strong opinions either way.
> > 
> > One reason I used pr_warn_once() over WARN_ON_ONCE() is the syzbot
> > trigger. No need to trip the bot over this error condition.
> 
> The warn splat is definitely quite verbose. But I think that would
> only be annoying initially, in case a site was missed. Down the line,
> it seems helpful to have this stand out to somebody who is trying to
> add a new cgroup stat and forgets to update the right enums.

Sounds good to me. I will change it to WARN_ONCE().
Shakeel Butt April 29, 2024, 7:56 p.m. UTC | #6
On Mon, Apr 29, 2024 at 09:06:23AM -0700, Roman Gushchin wrote:
> On Fri, Apr 26, 2024 at 05:37:31PM -0700, Shakeel Butt wrote:
> > To reduce memory usage by the memcg events and stats, the kernel uses
> > indirection table and only allocate stats and events which are being
> > used by the memcg code. To make this more robust, let's add warnings
> > where unexpected stats and events indexes are used.
> > 
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > ---
> >  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 34 insertions(+), 9 deletions(-)
> > 
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 103e0e53e20a..36145089dcf5 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
> >  		return node_page_state(lruvec_pgdat(lruvec), idx);
> >  
> >  	i = memcg_stats_index(idx);
> > -	if (i >= 0) {
> > +	if (likely(i >= 0)) {
> >  		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> >  		x = READ_ONCE(pn->lruvec_stats->state[i]);
> > +	} else {
> > +		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
> >  	}
> 
> I think it's generally a CONFIG_DEBUG_VM material. Do we have some extra
> concerns here?
> 
> Having pr_warn_on_once() would be nice here.

No extra concern, just want this indirection table to be up to date in
future.
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 103e0e53e20a..36145089dcf5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -671,9 +671,11 @@  unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
 	i = memcg_stats_index(idx);
-	if (i >= 0) {
+	if (likely(i >= 0)) {
 		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 		x = READ_ONCE(pn->lruvec_stats->state[i]);
+	} else {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 	}
 #ifdef CONFIG_SMP
 	if (x < 0)
@@ -693,9 +695,11 @@  unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
 	i = memcg_stats_index(idx);
-	if (i >= 0) {
+	if (likely(i >= 0)) {
 		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 		x = READ_ONCE(pn->lruvec_stats->state_local[i]);
+	} else {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 	}
 #ifdef CONFIG_SMP
 	if (x < 0)
@@ -922,8 +926,10 @@  unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 	long x;
 	int i = memcg_stats_index(idx);
 
-	if (i < 0)
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return 0;
+	}
 
 	x = READ_ONCE(memcg->vmstats->state[i]);
 #ifdef CONFIG_SMP
@@ -959,8 +965,13 @@  void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
 {
 	int i = memcg_stats_index(idx);
 
-	if (mem_cgroup_disabled() || i < 0)
+	if (mem_cgroup_disabled())
+		return;
+
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return;
+	}
 
 	__this_cpu_add(memcg->vmstats_percpu->state[i], val);
 	memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val));
@@ -972,8 +983,10 @@  static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
 	long x;
 	int i = memcg_stats_index(idx);
 
-	if (i < 0)
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return 0;
+	}
 
 	x = READ_ONCE(memcg->vmstats->state_local[i]);
 #ifdef CONFIG_SMP
@@ -991,8 +1004,10 @@  static void __mod_memcg_lruvec_state(struct lruvec *lruvec,
 	struct mem_cgroup *memcg;
 	int i = memcg_stats_index(idx);
 
-	if (i < 0)
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return;
+	}
 
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	memcg = pn->memcg;
@@ -1107,8 +1122,13 @@  void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 {
 	int index = memcg_events_index(idx);
 
-	if (mem_cgroup_disabled() || index < 0)
+	if (mem_cgroup_disabled())
+		return;
+
+	if (unlikely(index < 0)) {
+		pr_warn_once("%s: event item index: %d\n", __func__, idx);
 		return;
+	}
 
 	memcg_stats_lock();
 	__this_cpu_add(memcg->vmstats_percpu->events[index], count);
@@ -1120,8 +1140,11 @@  static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
 {
 	int index = memcg_events_index(event);
 
-	if (index < 0)
+	if (unlikely(index < 0)) {
+		pr_warn_once("%s: event item index: %d\n", __func__, event);
 		return 0;
+	}
+
 	return READ_ONCE(memcg->vmstats->events[index]);
 }
 
@@ -1129,8 +1152,10 @@  static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
 {
 	int index = memcg_events_index(event);
 
-	if (index < 0)
+	if (unlikely(index < 0)) {
+		pr_warn_once("%s: event item index: %d\n", __func__, event);
 		return 0;
+	}
 
 	return READ_ONCE(memcg->vmstats->events_local[index]);
 }