diff mbox series

[v5.4.y,v4.19.y] mm: memcg/slab: fix memory leak at non-root kmem_cache destroy

Message ID 20200707062754.8383-1-songmuchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series [v5.4.y,v4.19.y] mm: memcg/slab: fix memory leak at non-root kmem_cache destroy | expand

Commit Message

Muchun Song July 7, 2020, 6:27 a.m. UTC
If the kmem_cache refcount is greater than one, we should not
mark the root kmem_cache as dying. If we mark the root kmem_cache
dying incorrectly, the non-root kmem_cache can never be destroyed.
It resulted in memory leak when memcg was destroyed. We can use the
following steps to reproduce.

  1) Use kmem_cache_create() to create a new kmem_cache named A.
  2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
     so the refcount of B is just increased.
  3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
     decrease the B's refcount but mark the B as dying.
  4) Create a new memory cgroup and alloc memory from the kmem_cache
     A. It leads to create a non-root kmem_cache for allocating.
  5) When destroy the memory cgroup created in the step 4), the
     non-root kmem_cache can never be destroyed.

If we repeat steps 4) and 5), this will cause a lot of memory leak.
So only when refcount reach zero, we mark the root kmem_cache as dying.

Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

Comments

Vlastimil Babka July 15, 2020, 11:32 a.m. UTC | #1
On 7/7/20 8:27 AM, Muchun Song wrote:
> If the kmem_cache refcount is greater than one, we should not
> mark the root kmem_cache as dying. If we mark the root kmem_cache
> dying incorrectly, the non-root kmem_cache can never be destroyed.
> It resulted in memory leak when memcg was destroyed. We can use the
> following steps to reproduce.
> 
>   1) Use kmem_cache_create() to create a new kmem_cache named A.
>   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
>      so the refcount of B is just increased.
>   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
>      decrease the B's refcount but mark the B as dying.
>   4) Create a new memory cgroup and alloc memory from the kmem_cache
>      A. It leads to create a non-root kmem_cache for allocating.
>   5) When destroy the memory cgroup created in the step 4), the
>      non-root kmem_cache can never be destroyed.
> 
> If we repeat steps 4) and 5), this will cause a lot of memory leak.
> So only when refcount reach zero, we mark the root kmem_cache as dying.
> 
> Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>

CC Roman, who worked in this area recently.

Also why is this marked "[PATCH v5.4.y, v4.19.y]"? Has it been fixed otherwise
in 5.5+ ?

> ---
>  mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 8c1ffbf7de45..83ee6211aec7 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s)
>  		list_del(&s->memcg_params.kmem_caches_node);
>  	}
>  }
> +
> +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> +{
> +	return is_root_cache(s) && s->memcg_params.dying;
> +}
>  #else
>  static inline int init_memcg_params(struct kmem_cache *s,
>  				    struct kmem_cache *root_cache)
> @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
>  static inline void memcg_unlink_cache(struct kmem_cache *s)
>  {
>  }
> +
> +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> +{
> +	return false;
> +}
>  #endif /* CONFIG_MEMCG_KMEM */
>  
>  /*
> @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s)
>  	if (s->refcount < 0)
>  		return 1;
>  
> +	/*
> +	 * If the kmem_cache is dying. We should also skip this
> +	 * kmem_cache.
> +	 */
> +	if (memcg_kmem_cache_dying(s))
> +		return 1;
> +
>  	return 0;
>  }
>  
> @@ -944,8 +961,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
>  	if (unlikely(!s))
>  		return;
>  
> -	flush_memcg_workqueue(s);
> -
>  	get_online_cpus();
>  	get_online_mems();
>  
> @@ -955,6 +970,30 @@ void kmem_cache_destroy(struct kmem_cache *s)
>  	if (s->refcount)
>  		goto out_unlock;
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +	mutex_unlock(&slab_mutex);
> +
> +	put_online_mems();
> +	put_online_cpus();
> +
> +	flush_memcg_workqueue(s);
> +
> +	get_online_cpus();
> +	get_online_mems();
> +
> +	mutex_lock(&slab_mutex);
> +
> +	if (WARN(s->refcount,
> +		 "kmem_cache_destroy %s: Slab cache is still referenced\n",
> +		 s->name)) {
> +		/*
> +		 * Reset the dying flag setted by flush_memcg_workqueue().
> +		 */
> +		s->memcg_params.dying = false;
> +		goto out_unlock;
> +	}
> +#endif
> +
>  	err = shutdown_memcg_caches(s);
>  	if (!err)
>  		err = shutdown_cache(s);
>
Muchun Song July 15, 2020, 3:13 p.m. UTC | #2
On Wed, Jul 15, 2020 at 7:32 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 7/7/20 8:27 AM, Muchun Song wrote:
> > If the kmem_cache refcount is greater than one, we should not
> > mark the root kmem_cache as dying. If we mark the root kmem_cache
> > dying incorrectly, the non-root kmem_cache can never be destroyed.
> > It resulted in memory leak when memcg was destroyed. We can use the
> > following steps to reproduce.
> >
> >   1) Use kmem_cache_create() to create a new kmem_cache named A.
> >   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
> >      so the refcount of B is just increased.
> >   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
> >      decrease the B's refcount but mark the B as dying.
> >   4) Create a new memory cgroup and alloc memory from the kmem_cache
> >      A. It leads to create a non-root kmem_cache for allocating.
> >   5) When destroy the memory cgroup created in the step 4), the
> >      non-root kmem_cache can never be destroyed.
> >
> > If we repeat steps 4) and 5), this will cause a lot of memory leak.
> > So only when refcount reach zero, we mark the root kmem_cache as dying.
> >
> > Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>
> CC Roman, who worked in this area recently.
>
> Also why is this marked "[PATCH v5.4.y, v4.19.y]"? Has it been fixed otherwise
> in 5.5+ ?

Because the memcg slab/slub is reworked by Roman since v5.8.
Therefore, this problem exists in v5.7 and below.

>
> > ---
> >  mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 41 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 8c1ffbf7de45..83ee6211aec7 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s)
> >               list_del(&s->memcg_params.kmem_caches_node);
> >       }
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > +     return is_root_cache(s) && s->memcg_params.dying;
> > +}
> >  #else
> >  static inline int init_memcg_params(struct kmem_cache *s,
> >                                   struct kmem_cache *root_cache)
> > @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
> >  static inline void memcg_unlink_cache(struct kmem_cache *s)
> >  {
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > +     return false;
> > +}
> >  #endif /* CONFIG_MEMCG_KMEM */
> >
> >  /*
> > @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s)
> >       if (s->refcount < 0)
> >               return 1;
> >
> > +     /*
> > +      * If the kmem_cache is dying. We should also skip this
> > +      * kmem_cache.
> > +      */
> > +     if (memcg_kmem_cache_dying(s))
> > +             return 1;
> > +
> >       return 0;
> >  }
> >
> > @@ -944,8 +961,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >       if (unlikely(!s))
> >               return;
> >
> > -     flush_memcg_workqueue(s);
> > -
> >       get_online_cpus();
> >       get_online_mems();
> >
> > @@ -955,6 +970,30 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >       if (s->refcount)
> >               goto out_unlock;
> >
> > +#ifdef CONFIG_MEMCG_KMEM
> > +     mutex_unlock(&slab_mutex);
> > +
> > +     put_online_mems();
> > +     put_online_cpus();
> > +
> > +     flush_memcg_workqueue(s);
> > +
> > +     get_online_cpus();
> > +     get_online_mems();
> > +
> > +     mutex_lock(&slab_mutex);
> > +
> > +     if (WARN(s->refcount,
> > +              "kmem_cache_destroy %s: Slab cache is still referenced\n",
> > +              s->name)) {
> > +             /*
> > +              * Reset the dying flag setted by flush_memcg_workqueue().
> > +              */
> > +             s->memcg_params.dying = false;
> > +             goto out_unlock;
> > +     }
> > +#endif
> > +
> >       err = shutdown_memcg_caches(s);
> >       if (!err)
> >               err = shutdown_cache(s);
> >
>


--
Yours,
Muchun
Shakeel Butt July 15, 2020, 3:20 p.m. UTC | #3
Sorry I missed this email.


On Mon, Jul 6, 2020 at 11:28 PM Muchun Song <songmuchun@bytedance.com> wrote:
>
> If the kmem_cache refcount is greater than one, we should not
> mark the root kmem_cache as dying. If we mark the root kmem_cache
> dying incorrectly, the non-root kmem_cache can never be destroyed.
> It resulted in memory leak when memcg was destroyed. We can use the
> following steps to reproduce.
>
>   1) Use kmem_cache_create() to create a new kmem_cache named A.
>   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
>      so the refcount of B is just increased.

I definitely missed the alias kmem cache case.

>   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
>      decrease the B's refcount but mark the B as dying.
>   4) Create a new memory cgroup and alloc memory from the kmem_cache
>      A. It leads to create a non-root kmem_cache for allocating.

I think in (4) you meant alloc memory from kmem_cache B instead of A.
There should not be any allocation from A after kmem_cache_destroy()
in (3).

>   5) When destroy the memory cgroup created in the step 4), the
>      non-root kmem_cache can never be destroyed.
>
> If we repeat steps 4) and 5), this will cause a lot of memory leak.
> So only when refcount reach zero, we mark the root kmem_cache as dying.
>
> Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>

The patch looks fine.

Reviewed-by: Shakeel Butt <shakeelb@google.com>

> ---
>  mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 8c1ffbf7de45..83ee6211aec7 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s)
>                 list_del(&s->memcg_params.kmem_caches_node);
>         }
>  }
> +
> +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> +{
> +       return is_root_cache(s) && s->memcg_params.dying;
> +}
>  #else
>  static inline int init_memcg_params(struct kmem_cache *s,
>                                     struct kmem_cache *root_cache)
> @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
>  static inline void memcg_unlink_cache(struct kmem_cache *s)
>  {
>  }
> +
> +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> +{
> +       return false;
> +}
>  #endif /* CONFIG_MEMCG_KMEM */
>
>  /*
> @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s)
>         if (s->refcount < 0)
>                 return 1;
>
> +       /*
> +        * If the kmem_cache is dying. We should also skip this
> +        * kmem_cache.
> +        */
> +       if (memcg_kmem_cache_dying(s))
> +               return 1;
> +
>         return 0;
>  }
>
> @@ -944,8 +961,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
>         if (unlikely(!s))
>                 return;
>
> -       flush_memcg_workqueue(s);
> -
>         get_online_cpus();
>         get_online_mems();
>
> @@ -955,6 +970,30 @@ void kmem_cache_destroy(struct kmem_cache *s)
>         if (s->refcount)
>                 goto out_unlock;
>
> +#ifdef CONFIG_MEMCG_KMEM
> +       mutex_unlock(&slab_mutex);
> +
> +       put_online_mems();
> +       put_online_cpus();
> +
> +       flush_memcg_workqueue(s);
> +
> +       get_online_cpus();
> +       get_online_mems();
> +
> +       mutex_lock(&slab_mutex);
> +
> +       if (WARN(s->refcount,
> +                "kmem_cache_destroy %s: Slab cache is still referenced\n",
> +                s->name)) {
> +               /*
> +                * Reset the dying flag setted by flush_memcg_workqueue().
> +                */
> +               s->memcg_params.dying = false;
> +               goto out_unlock;
> +       }
> +#endif
> +
>         err = shutdown_memcg_caches(s);
>         if (!err)
>                 err = shutdown_cache(s);
> --
> 2.11.0
>
Muchun Song July 15, 2020, 3:28 p.m. UTC | #4
On Wed, Jul 15, 2020 at 11:21 PM Shakeel Butt <shakeelb@google.com> wrote:
>
> Sorry I missed this email.
>
>
> On Mon, Jul 6, 2020 at 11:28 PM Muchun Song <songmuchun@bytedance.com> wrote:
> >
> > If the kmem_cache refcount is greater than one, we should not
> > mark the root kmem_cache as dying. If we mark the root kmem_cache
> > dying incorrectly, the non-root kmem_cache can never be destroyed.
> > It resulted in memory leak when memcg was destroyed. We can use the
> > following steps to reproduce.
> >
> >   1) Use kmem_cache_create() to create a new kmem_cache named A.
> >   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
> >      so the refcount of B is just increased.
>
> I definitely missed the alias kmem cache case.
>
> >   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
> >      decrease the B's refcount but mark the B as dying.
> >   4) Create a new memory cgroup and alloc memory from the kmem_cache
> >      A. It leads to create a non-root kmem_cache for allocating.
>
> I think in (4) you meant alloc memory from kmem_cache B instead of A.
> There should not be any allocation from A after kmem_cache_destroy()
> in (3).

Yeah, here is a typo. I will fix it in the next version patch.

>
> >   5) When destroy the memory cgroup created in the step 4), the
> >      non-root kmem_cache can never be destroyed.
> >
> > If we repeat steps 4) and 5), this will cause a lot of memory leak.
> > So only when refcount reach zero, we mark the root kmem_cache as dying.
> >
> > Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>
> The patch looks fine.
>
> Reviewed-by: Shakeel Butt <shakeelb@google.com>
>
> > ---
> >  mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 41 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 8c1ffbf7de45..83ee6211aec7 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s)
> >                 list_del(&s->memcg_params.kmem_caches_node);
> >         }
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > +       return is_root_cache(s) && s->memcg_params.dying;
> > +}
> >  #else
> >  static inline int init_memcg_params(struct kmem_cache *s,
> >                                     struct kmem_cache *root_cache)
> > @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
> >  static inline void memcg_unlink_cache(struct kmem_cache *s)
> >  {
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > +       return false;
> > +}
> >  #endif /* CONFIG_MEMCG_KMEM */
> >
> >  /*
> > @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s)
> >         if (s->refcount < 0)
> >                 return 1;
> >
> > +       /*
> > +        * If the kmem_cache is dying. We should also skip this
> > +        * kmem_cache.
> > +        */
> > +       if (memcg_kmem_cache_dying(s))
> > +               return 1;
> > +
> >         return 0;
> >  }
> >
> > @@ -944,8 +961,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >         if (unlikely(!s))
> >                 return;
> >
> > -       flush_memcg_workqueue(s);
> > -
> >         get_online_cpus();
> >         get_online_mems();
> >
> > @@ -955,6 +970,30 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >         if (s->refcount)
> >                 goto out_unlock;
> >
> > +#ifdef CONFIG_MEMCG_KMEM
> > +       mutex_unlock(&slab_mutex);
> > +
> > +       put_online_mems();
> > +       put_online_cpus();
> > +
> > +       flush_memcg_workqueue(s);
> > +
> > +       get_online_cpus();
> > +       get_online_mems();
> > +
> > +       mutex_lock(&slab_mutex);
> > +
> > +       if (WARN(s->refcount,
> > +                "kmem_cache_destroy %s: Slab cache is still referenced\n",
> > +                s->name)) {
> > +               /*
> > +                * Reset the dying flag setted by flush_memcg_workqueue().
> > +                */
> > +               s->memcg_params.dying = false;
> > +               goto out_unlock;
> > +       }
> > +#endif
> > +
> >         err = shutdown_memcg_caches(s);
> >         if (!err)
> >                 err = shutdown_cache(s);
> > --
> > 2.11.0
> >
Vlastimil Babka July 15, 2020, 3:43 p.m. UTC | #5
On 7/15/20 5:13 PM, Muchun Song wrote:
> On Wed, Jul 15, 2020 at 7:32 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>>
>> On 7/7/20 8:27 AM, Muchun Song wrote:
>> > If the kmem_cache refcount is greater than one, we should not
>> > mark the root kmem_cache as dying. If we mark the root kmem_cache
>> > dying incorrectly, the non-root kmem_cache can never be destroyed.
>> > It resulted in memory leak when memcg was destroyed. We can use the
>> > following steps to reproduce.
>> >
>> >   1) Use kmem_cache_create() to create a new kmem_cache named A.
>> >   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
>> >      so the refcount of B is just increased.
>> >   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
>> >      decrease the B's refcount but mark the B as dying.
>> >   4) Create a new memory cgroup and alloc memory from the kmem_cache
>> >      A. It leads to create a non-root kmem_cache for allocating.
>> >   5) When destroy the memory cgroup created in the step 4), the
>> >      non-root kmem_cache can never be destroyed.
>> >
>> > If we repeat steps 4) and 5), this will cause a lot of memory leak.
>> > So only when refcount reach zero, we mark the root kmem_cache as dying.
>> >
>> > Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
>> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>>
>> CC Roman, who worked in this area recently.
>>
>> Also why is this marked "[PATCH v5.4.y, v4.19.y]"? Has it been fixed otherwise
>> in 5.5+ ?
> 
> Because the memcg slab/slub is reworked by Roman since v5.8.

That rework is in mmotm, so scheduled for 5.9, AFAIK. If you mean "The new
cgroup slab memory controller" series.

> Therefore, this problem exists in v5.7 and below.

Even 5.7 has a stable series, so no need to list only the LTS's.
To sum up, the patch (once reviewed) should be queued for mainline as usual,
perhaps sent before 5.8 is final, if deemed safe enough, and with added

Cc: <stable@vger.kernel.org>

and the Fixes: tag you provided, the applicable stable versions will pick it.

Vlastimil
Muchun Song July 15, 2020, 3:55 p.m. UTC | #6
On Wed, Jul 15, 2020 at 11:43 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 7/15/20 5:13 PM, Muchun Song wrote:
> > On Wed, Jul 15, 2020 at 7:32 PM Vlastimil Babka <vbabka@suse.cz> wrote:
> >>
> >> On 7/7/20 8:27 AM, Muchun Song wrote:
> >> > If the kmem_cache refcount is greater than one, we should not
> >> > mark the root kmem_cache as dying. If we mark the root kmem_cache
> >> > dying incorrectly, the non-root kmem_cache can never be destroyed.
> >> > It resulted in memory leak when memcg was destroyed. We can use the
> >> > following steps to reproduce.
> >> >
> >> >   1) Use kmem_cache_create() to create a new kmem_cache named A.
> >> >   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
> >> >      so the refcount of B is just increased.
> >> >   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
> >> >      decrease the B's refcount but mark the B as dying.
> >> >   4) Create a new memory cgroup and alloc memory from the kmem_cache
> >> >      A. It leads to create a non-root kmem_cache for allocating.
> >> >   5) When destroy the memory cgroup created in the step 4), the
> >> >      non-root kmem_cache can never be destroyed.
> >> >
> >> > If we repeat steps 4) and 5), this will cause a lot of memory leak.
> >> > So only when refcount reach zero, we mark the root kmem_cache as dying.
> >> >
> >> > Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
> >> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> >>
> >> CC Roman, who worked in this area recently.
> >>
> >> Also why is this marked "[PATCH v5.4.y, v4.19.y]"? Has it been fixed otherwise
> >> in 5.5+ ?
> >
> > Because the memcg slab/slub is reworked by Roman since v5.8.
>
> That rework is in mmotm, so scheduled for 5.9, AFAIK. If you mean "The new
> cgroup slab memory controller" series.

Yeah, I mean "The new cgroup slab memory controller".

>
> > Therefore, this problem exists in v5.7 and below.
>
> Even 5.7 has a stable series, so no need to list only the LTS's.
> To sum up, the patch (once reviewed) should be queued for mainline as usual,
> perhaps sent before 5.8 is final, if deemed safe enough, and with added
>
> Cc: <stable@vger.kernel.org>
>
> and the Fixes: tag you provided, the applicable stable versions will pick it.

Got it. Thanks.

>
> Vlastimil
Roman Gushchin July 15, 2020, 4:24 p.m. UTC | #7
On Wed, Jul 15, 2020 at 01:32:00PM +0200, Vlastimil Babka wrote:
> On 7/7/20 8:27 AM, Muchun Song wrote:
> > If the kmem_cache refcount is greater than one, we should not
> > mark the root kmem_cache as dying. If we mark the root kmem_cache
> > dying incorrectly, the non-root kmem_cache can never be destroyed.
> > It resulted in memory leak when memcg was destroyed. We can use the
> > following steps to reproduce.
> > 
> >   1) Use kmem_cache_create() to create a new kmem_cache named A.
> >   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
> >      so the refcount of B is just increased.
> >   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
> >      decrease the B's refcount but mark the B as dying.
> >   4) Create a new memory cgroup and alloc memory from the kmem_cache
> >      A. It leads to create a non-root kmem_cache for allocating.
> >   5) When destroy the memory cgroup created in the step 4), the
> >      non-root kmem_cache can never be destroyed.

Hello, Muchun!

If the scenario above is accurate, it means that somebody is allocating
from the kmem_cache A (or it's memcg counterparts, doesn't matter) after
calling kmem_cache_destroy()? If so, it's an API violation, and the following
memory leak is a non-issue on the slab side. No one should allocate memory
after calling kmem_cache_destroy(). It has to be called after all outstanding
allocations are freed, and it should be literally the last operation
with the kmem_cache.

Kmem_cache aliasing/sharing, as well as memcg accounting implementation are
implementation details and should not affect the picture.

I wonder, did you see the problem in the wild? How does it look like?
Which kmem_cache is involved? Etc.

BTW, Vlastimil is absolutely right about stable backports and rework planned
for 5.9, but let's figure out the problem first.

Thank you!

> > 
> > If we repeat steps 4) and 5), this will cause a lot of memory leak.
> > So only when refcount reach zero, we mark the root kmem_cache as dying.
> > 
> > Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> 
> CC Roman, who worked in this area recently.
> 
> Also why is this marked "[PATCH v5.4.y, v4.19.y]"? Has it been fixed otherwise
> in 5.5+ ?
> 
> > ---
> >  mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 41 insertions(+), 2 deletions(-)
> > 
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 8c1ffbf7de45..83ee6211aec7 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s)
> >  		list_del(&s->memcg_params.kmem_caches_node);
> >  	}
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > +	return is_root_cache(s) && s->memcg_params.dying;
> > +}
> >  #else
> >  static inline int init_memcg_params(struct kmem_cache *s,
> >  				    struct kmem_cache *root_cache)
> > @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
> >  static inline void memcg_unlink_cache(struct kmem_cache *s)
> >  {
> >  }
> > +
> > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > +{
> > +	return false;
> > +}
> >  #endif /* CONFIG_MEMCG_KMEM */
> >  
> >  /*
> > @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s)
> >  	if (s->refcount < 0)
> >  		return 1;
> >  
> > +	/*
> > +	 * If the kmem_cache is dying. We should also skip this
> > +	 * kmem_cache.
> > +	 */
> > +	if (memcg_kmem_cache_dying(s))
> > +		return 1;
> > +
> >  	return 0;
> >  }
> >  
> > @@ -944,8 +961,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >  	if (unlikely(!s))
> >  		return;
> >  
> > -	flush_memcg_workqueue(s);
> > -
> >  	get_online_cpus();
> >  	get_online_mems();
> >  
> > @@ -955,6 +970,30 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >  	if (s->refcount)
> >  		goto out_unlock;
> >  
> > +#ifdef CONFIG_MEMCG_KMEM
> > +	mutex_unlock(&slab_mutex);
> > +
> > +	put_online_mems();
> > +	put_online_cpus();
> > +
> > +	flush_memcg_workqueue(s);
> > +
> > +	get_online_cpus();
> > +	get_online_mems();
> > +
> > +	mutex_lock(&slab_mutex);
> > +
> > +	if (WARN(s->refcount,
> > +		 "kmem_cache_destroy %s: Slab cache is still referenced\n",
> > +		 s->name)) {
> > +		/*
> > +		 * Reset the dying flag setted by flush_memcg_workqueue().
> > +		 */
> > +		s->memcg_params.dying = false;
> > +		goto out_unlock;
> > +	}
> > +#endif
> > +
> >  	err = shutdown_memcg_caches(s);
> >  	if (!err)
> >  		err = shutdown_cache(s);
> > 
>
Muchun Song July 15, 2020, 4:31 p.m. UTC | #8
On Thu, Jul 16, 2020 at 12:24 AM Roman Gushchin <guro@fb.com> wrote:
>
> On Wed, Jul 15, 2020 at 01:32:00PM +0200, Vlastimil Babka wrote:
> > On 7/7/20 8:27 AM, Muchun Song wrote:
> > > If the kmem_cache refcount is greater than one, we should not
> > > mark the root kmem_cache as dying. If we mark the root kmem_cache
> > > dying incorrectly, the non-root kmem_cache can never be destroyed.
> > > It resulted in memory leak when memcg was destroyed. We can use the
> > > following steps to reproduce.
> > >
> > >   1) Use kmem_cache_create() to create a new kmem_cache named A.
> > >   2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
> > >      so the refcount of B is just increased.
> > >   3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
> > >      decrease the B's refcount but mark the B as dying.
> > >   4) Create a new memory cgroup and alloc memory from the kmem_cache
> > >      A. It leads to create a non-root kmem_cache for allocating.

Hi Roman,

I am sorry, here is a typo. I mean the step 4) allocates memory from
the kmem_cache
B instead of A.

> > >   5) When destroy the memory cgroup created in the step 4), the
> > >      non-root kmem_cache can never be destroyed.
>
> Hello, Muchun!
>
> If the scenario above is accurate, it means that somebody is allocating
> from the kmem_cache A (or it's memcg counterparts, doesn't matter) after
> calling kmem_cache_destroy()? If so, it's an API violation, and the following
> memory leak is a non-issue on the slab side. No one should allocate memory
> after calling kmem_cache_destroy(). It has to be called after all outstanding
> allocations are freed, and it should be literally the last operation
> with the kmem_cache.
>
> Kmem_cache aliasing/sharing, as well as memcg accounting implementation are
> implementation details and should not affect the picture.
>
> I wonder, did you see the problem in the wild? How does it look like?
> Which kmem_cache is involved? Etc.
>
> BTW, Vlastimil is absolutely right about stable backports and rework planned
> for 5.9, but let's figure out the problem first.
>
> Thank you!
>
> > >
> > > If we repeat steps 4) and 5), this will cause a lot of memory leak.
> > > So only when refcount reach zero, we mark the root kmem_cache as dying.
> > >
> > > Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> >
> > CC Roman, who worked in this area recently.
> >
> > Also why is this marked "[PATCH v5.4.y, v4.19.y]"? Has it been fixed otherwise
> > in 5.5+ ?
> >
> > > ---
> > >  mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 41 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > > index 8c1ffbf7de45..83ee6211aec7 100644
> > > --- a/mm/slab_common.c
> > > +++ b/mm/slab_common.c
> > > @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s)
> > >             list_del(&s->memcg_params.kmem_caches_node);
> > >     }
> > >  }
> > > +
> > > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > > +{
> > > +   return is_root_cache(s) && s->memcg_params.dying;
> > > +}
> > >  #else
> > >  static inline int init_memcg_params(struct kmem_cache *s,
> > >                                 struct kmem_cache *root_cache)
> > > @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
> > >  static inline void memcg_unlink_cache(struct kmem_cache *s)
> > >  {
> > >  }
> > > +
> > > +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
> > > +{
> > > +   return false;
> > > +}
> > >  #endif /* CONFIG_MEMCG_KMEM */
> > >
> > >  /*
> > > @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s)
> > >     if (s->refcount < 0)
> > >             return 1;
> > >
> > > +   /*
> > > +    * If the kmem_cache is dying. We should also skip this
> > > +    * kmem_cache.
> > > +    */
> > > +   if (memcg_kmem_cache_dying(s))
> > > +           return 1;
> > > +
> > >     return 0;
> > >  }
> > >
> > > @@ -944,8 +961,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
> > >     if (unlikely(!s))
> > >             return;
> > >
> > > -   flush_memcg_workqueue(s);
> > > -
> > >     get_online_cpus();
> > >     get_online_mems();
> > >
> > > @@ -955,6 +970,30 @@ void kmem_cache_destroy(struct kmem_cache *s)
> > >     if (s->refcount)
> > >             goto out_unlock;
> > >
> > > +#ifdef CONFIG_MEMCG_KMEM
> > > +   mutex_unlock(&slab_mutex);
> > > +
> > > +   put_online_mems();
> > > +   put_online_cpus();
> > > +
> > > +   flush_memcg_workqueue(s);
> > > +
> > > +   get_online_cpus();
> > > +   get_online_mems();
> > > +
> > > +   mutex_lock(&slab_mutex);
> > > +
> > > +   if (WARN(s->refcount,
> > > +            "kmem_cache_destroy %s: Slab cache is still referenced\n",
> > > +            s->name)) {
> > > +           /*
> > > +            * Reset the dying flag setted by flush_memcg_workqueue().
> > > +            */
> > > +           s->memcg_params.dying = false;
> > > +           goto out_unlock;
> > > +   }
> > > +#endif
> > > +
> > >     err = shutdown_memcg_caches(s);
> > >     if (!err)
> > >             err = shutdown_cache(s);
> > >
> >
diff mbox series

Patch

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8c1ffbf7de45..83ee6211aec7 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -258,6 +258,11 @@  static void memcg_unlink_cache(struct kmem_cache *s)
 		list_del(&s->memcg_params.kmem_caches_node);
 	}
 }
+
+static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
+{
+	return is_root_cache(s) && s->memcg_params.dying;
+}
 #else
 static inline int init_memcg_params(struct kmem_cache *s,
 				    struct kmem_cache *root_cache)
@@ -272,6 +277,11 @@  static inline void destroy_memcg_params(struct kmem_cache *s)
 static inline void memcg_unlink_cache(struct kmem_cache *s)
 {
 }
+
+static inline bool memcg_kmem_cache_dying(struct kmem_cache *s)
+{
+	return false;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 /*
@@ -326,6 +336,13 @@  int slab_unmergeable(struct kmem_cache *s)
 	if (s->refcount < 0)
 		return 1;
 
+	/*
+	 * If the kmem_cache is dying. We should also skip this
+	 * kmem_cache.
+	 */
+	if (memcg_kmem_cache_dying(s))
+		return 1;
+
 	return 0;
 }
 
@@ -944,8 +961,6 @@  void kmem_cache_destroy(struct kmem_cache *s)
 	if (unlikely(!s))
 		return;
 
-	flush_memcg_workqueue(s);
-
 	get_online_cpus();
 	get_online_mems();
 
@@ -955,6 +970,30 @@  void kmem_cache_destroy(struct kmem_cache *s)
 	if (s->refcount)
 		goto out_unlock;
 
+#ifdef CONFIG_MEMCG_KMEM
+	mutex_unlock(&slab_mutex);
+
+	put_online_mems();
+	put_online_cpus();
+
+	flush_memcg_workqueue(s);
+
+	get_online_cpus();
+	get_online_mems();
+
+	mutex_lock(&slab_mutex);
+
+	if (WARN(s->refcount,
+		 "kmem_cache_destroy %s: Slab cache is still referenced\n",
+		 s->name)) {
+		/*
+		 * Reset the dying flag setted by flush_memcg_workqueue().
+		 */
+		s->memcg_params.dying = false;
+		goto out_unlock;
+	}
+#endif
+
 	err = shutdown_memcg_caches(s);
 	if (!err)
 		err = shutdown_cache(s);