diff mbox series

[v3,2/2] mm/slub: refactor deactivate_slab()

Message ID 20220307074057.902222-3-42.hyeyoo@gmail.com (mailing list archive)
State New
Headers show
Series slab cleanups | expand

Commit Message

Hyeonggon Yoo March 7, 2022, 7:40 a.m. UTC
Simplify deactivate_slab() by unlocking n->list_lock and retrying
cmpxchg_double() when cmpxchg_double() fails, and perform
add_{partial,full} only when it succeed.

Releasing and taking n->list_lock again here is not harmful as SLUB
avoids deactivating slabs as much as possible.

[ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
  succeed.

  count deactivating full slabs even if debugging flag is not set. ]

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 mm/slub.c | 91 +++++++++++++++++++++++--------------------------------
 1 file changed, 38 insertions(+), 53 deletions(-)

Comments

Vlastimil Babka March 7, 2022, 4:40 p.m. UTC | #1
On 3/7/22 08:40, Hyeonggon Yoo wrote:
> Simplify deactivate_slab() by unlocking n->list_lock and retrying
> cmpxchg_double() when cmpxchg_double() fails, and perform
> add_{partial,full} only when it succeed.
> 
> Releasing and taking n->list_lock again here is not harmful as SLUB
> avoids deactivating slabs as much as possible.
> 
> [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
>   succeed.
> 
>   count deactivating full slabs even if debugging flag is not set. ]
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

adding both to slab-next. Fixed up some nits myself, see below:

>  
> @@ -2420,61 +2416,50 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  	new.frozen = 0;
>  
>  	if (!new.inuse && n->nr_partial >= s->min_partial)
> -		m = M_FREE;
> +		mode = M_FREE;
>  	else if (new.freelist) {

This was against kernel style even before the patch - we use { } in the
'else if' branch, thus all branches should use { } even if one-line.

> -		m = M_PARTIAL;
> -		if (!lock) {
> -			lock = 1;
> -			/*
> -			 * Taking the spinlock removes the possibility that
> -			 * acquire_slab() will see a slab that is frozen
> -			 */
> -			spin_lock_irqsave(&n->list_lock, flags);
> -		}
> -	} else {
> -		m = M_FULL;
> -		if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
> -			lock = 1;
> -			/*
> -			 * This also ensures that the scanning of full
> -			 * slabs from diagnostic functions will not see
> -			 * any frozen slabs.
> -			 */
> -			spin_lock_irqsave(&n->list_lock, flags);
> -		}
> -	}
> -
> -	if (l != m) {
> -		if (l == M_PARTIAL)
> -			remove_partial(n, slab);
> -		else if (l == M_FULL)
> -			remove_full(s, n, slab);
> +		mode = M_PARTIAL;
> +		/*
> +		 * Taking the spinlock removes the possibility that
> +		 * acquire_slab() will see a slab that is frozen
> +		 */
> +		spin_lock_irqsave(&n->list_lock, flags);
> +	} else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
> +		mode = M_FULL;
> +		/*
> +		 * This also ensures that the scanning of full
> +		 * slabs from diagnostic functions will not see
> +		 * any frozen slabs.
> +		 */
> +		spin_lock_irqsave(&n->list_lock, flags);
> +	} else
> +		mode = M_FULL_NOLIST;

Ditto here (this is new).

> -		if (m == M_PARTIAL)
> -			add_partial(n, slab, tail);
> -		else if (m == M_FULL)
> -			add_full(s, n, slab);
> -	}
>  
> -	l = m;
>  	if (!cmpxchg_double_slab(s, slab,
>  				old.freelist, old.counters,
>  				new.freelist, new.counters,
> -				"unfreezing slab"))
> +				"unfreezing slab")) {
> +		if (mode == M_PARTIAL || mode == M_FULL)
> +			spin_unlock_irqrestore(&n->list_lock, flags);
>  		goto redo;
> +	}
>  
> -	if (lock)
> -		spin_unlock_irqrestore(&n->list_lock, flags);
>  
> -	if (m == M_PARTIAL)
> +	if (mode == M_PARTIAL) {
> +		add_partial(n, slab, tail);
> +		spin_unlock_irqrestore(&n->list_lock, flags);
>  		stat(s, tail);
> -	else if (m == M_FULL)
> -		stat(s, DEACTIVATE_FULL);
> -	else if (m == M_FREE) {
> +	} else if (mode == M_FREE) {
>  		stat(s, DEACTIVATE_EMPTY);
>  		discard_slab(s, slab);
>  		stat(s, FREE_SLAB);
> -	}
> +	} else if (mode == M_FULL) {
> +		add_full(s, n, slab);
> +		spin_unlock_irqrestore(&n->list_lock, flags);
> +		stat(s, DEACTIVATE_FULL);
> +	} else if (mode == M_FULL_NOLIST)
> +		stat(s, DEACTIVATE_FULL);

And here.

>  }
>  
>  #ifdef CONFIG_SLUB_CPU_PARTIAL
Xiongwei Song March 8, 2022, 1:40 a.m. UTC | #2
Hello,

On Mon, Mar 7, 2022 at 3:41 PM Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>
> Simplify deactivate_slab() by unlocking n->list_lock and retrying
> cmpxchg_double() when cmpxchg_double() fails, and perform
> add_{partial,full} only when it succeed.
>
> Releasing and taking n->list_lock again here is not harmful as SLUB
> avoids deactivating slabs as much as possible.
>
> [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
>   succeed.
>
>   count deactivating full slabs even if debugging flag is not set. ]
>
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> ---
>  mm/slub.c | 91 +++++++++++++++++++++++--------------------------------
>  1 file changed, 38 insertions(+), 53 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 1ce09b0347ad..f0cb9d0443ac 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2348,10 +2348,10 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
>  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>                             void *freelist)
>  {
> -       enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
> +       enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
>         struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> -       int lock = 0, free_delta = 0;
> -       enum slab_modes l = M_NONE, m = M_NONE;
> +       int free_delta = 0;
> +       enum slab_modes mode = M_NONE;
>         void *nextfree, *freelist_iter, *freelist_tail;
>         int tail = DEACTIVATE_TO_HEAD;
>         unsigned long flags = 0;
> @@ -2393,14 +2393,10 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>          * Ensure that the slab is unfrozen while the list presence
>          * reflects the actual number of objects during unfreeze.
>          *
> -        * We setup the list membership and then perform a cmpxchg
> -        * with the count. If there is a mismatch then the slab
> -        * is not unfrozen but the slab is on the wrong list.
> -        *
> -        * Then we restart the process which may have to remove
> -        * the slab from the list that we just put it on again
> -        * because the number of objects in the slab may have
> -        * changed.
> +        * We first perform cmpxchg holding lock and insert to list
> +        * when it succeed. If there is mismatch then the slab is not
> +        * unfrozen and number of objects in the slab may have changed.
> +        * Then release lock and retry cmpxchg again.
>          */
>  redo:
>
> @@ -2420,61 +2416,50 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>         new.frozen = 0;
>
>         if (!new.inuse && n->nr_partial >= s->min_partial)
> -               m = M_FREE;
> +               mode = M_FREE;
>         else if (new.freelist) {
> -               m = M_PARTIAL;
> -               if (!lock) {
> -                       lock = 1;
> -                       /*
> -                        * Taking the spinlock removes the possibility that
> -                        * acquire_slab() will see a slab that is frozen
> -                        */
> -                       spin_lock_irqsave(&n->list_lock, flags);
> -               }
> -       } else {
> -               m = M_FULL;
> -               if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
> -                       lock = 1;
> -                       /*
> -                        * This also ensures that the scanning of full
> -                        * slabs from diagnostic functions will not see
> -                        * any frozen slabs.
> -                        */
> -                       spin_lock_irqsave(&n->list_lock, flags);
> -               }
> -       }
> -
> -       if (l != m) {
> -               if (l == M_PARTIAL)
> -                       remove_partial(n, slab);
> -               else if (l == M_FULL)
> -                       remove_full(s, n, slab);
> +               mode = M_PARTIAL;
> +               /*
> +                * Taking the spinlock removes the possibility that
> +                * acquire_slab() will see a slab that is frozen
> +                */
> +               spin_lock_irqsave(&n->list_lock, flags);
> +       } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
> +               mode = M_FULL;
> +               /*
> +                * This also ensures that the scanning of full
> +                * slabs from diagnostic functions will not see
> +                * any frozen slabs.
> +                */
> +               spin_lock_irqsave(&n->list_lock, flags);
> +       } else
> +               mode = M_FULL_NOLIST;
>
> -               if (m == M_PARTIAL)
> -                       add_partial(n, slab, tail);
> -               else if (m == M_FULL)
> -                       add_full(s, n, slab);
> -       }
>
> -       l = m;
>         if (!cmpxchg_double_slab(s, slab,
>                                 old.freelist, old.counters,
>                                 new.freelist, new.counters,
> -                               "unfreezing slab"))
> +                               "unfreezing slab")) {
> +               if (mode == M_PARTIAL || mode == M_FULL)
> +                       spin_unlock_irqrestore(&n->list_lock, flags);

The slab doesn't belong to any node here, should we remove locking/unlocking
spin for cmpxchg_double_slab() call? Just calling spin_lock_irqsave() before
add_partial()/add_full call is fine?

>                 goto redo;

How about do {...} while(!cmpxchg_double_slab())? The readability looks better?

Regards,
Xiongwei

> +       }
>
> -       if (lock)
> -               spin_unlock_irqrestore(&n->list_lock, flags);
>
> -       if (m == M_PARTIAL)
> +       if (mode == M_PARTIAL) {
> +               add_partial(n, slab, tail);
> +               spin_unlock_irqrestore(&n->list_lock, flags);
>                 stat(s, tail);
> -       else if (m == M_FULL)
> -               stat(s, DEACTIVATE_FULL);
> -       else if (m == M_FREE) {
> +       } else if (mode == M_FREE) {
>                 stat(s, DEACTIVATE_EMPTY);
>                 discard_slab(s, slab);
>                 stat(s, FREE_SLAB);
> -       }
> +       } else if (mode == M_FULL) {
> +               add_full(s, n, slab);
> +               spin_unlock_irqrestore(&n->list_lock, flags);
> +               stat(s, DEACTIVATE_FULL);
> +       } else if (mode == M_FULL_NOLIST)
> +               stat(s, DEACTIVATE_FULL);
>  }
>
>  #ifdef CONFIG_SLUB_CPU_PARTIAL
> --
> 2.33.1
>
>
Hyeonggon Yoo March 8, 2022, 3:50 a.m. UTC | #3
On Tue, Mar 08, 2022 at 09:40:07AM +0800, Xiongwei Song wrote:
> Hello,
> 
> On Mon, Mar 7, 2022 at 3:41 PM Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> >
> > Simplify deactivate_slab() by unlocking n->list_lock and retrying
> > cmpxchg_double() when cmpxchg_double() fails, and perform
> > add_{partial,full} only when it succeed.
> >
> > Releasing and taking n->list_lock again here is not harmful as SLUB
> > avoids deactivating slabs as much as possible.
> >
> > [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
> >   succeed.
> >
> >   count deactivating full slabs even if debugging flag is not set. ]
> >
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> > ---
> >  mm/slub.c | 91 +++++++++++++++++++++++--------------------------------
> >  1 file changed, 38 insertions(+), 53 deletions(-)
> >
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 1ce09b0347ad..f0cb9d0443ac 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -2348,10 +2348,10 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
> >  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> >                             void *freelist)
> >  {
> > -       enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
> > +       enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
> >         struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> > -       int lock = 0, free_delta = 0;
> > -       enum slab_modes l = M_NONE, m = M_NONE;
> > +       int free_delta = 0;
> > +       enum slab_modes mode = M_NONE;
> >         void *nextfree, *freelist_iter, *freelist_tail;
> >         int tail = DEACTIVATE_TO_HEAD;
> >         unsigned long flags = 0;
> > @@ -2393,14 +2393,10 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> >          * Ensure that the slab is unfrozen while the list presence
> >          * reflects the actual number of objects during unfreeze.
> >          *
> > -        * We setup the list membership and then perform a cmpxchg
> > -        * with the count. If there is a mismatch then the slab
> > -        * is not unfrozen but the slab is on the wrong list.
> > -        *
> > -        * Then we restart the process which may have to remove
> > -        * the slab from the list that we just put it on again
> > -        * because the number of objects in the slab may have
> > -        * changed.
> > +        * We first perform cmpxchg holding lock and insert to list
> > +        * when it succeed. If there is mismatch then the slab is not
> > +        * unfrozen and number of objects in the slab may have changed.
> > +        * Then release lock and retry cmpxchg again.
> >          */
> >  redo:
> >
> > @@ -2420,61 +2416,50 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> >         new.frozen = 0;
> >
> >         if (!new.inuse && n->nr_partial >= s->min_partial)
> > -               m = M_FREE;
> > +               mode = M_FREE;
> >         else if (new.freelist) {
> > -               m = M_PARTIAL;
> > -               if (!lock) {
> > -                       lock = 1;
> > -                       /*
> > -                        * Taking the spinlock removes the possibility that
> > -                        * acquire_slab() will see a slab that is frozen
> > -                        */
> > -                       spin_lock_irqsave(&n->list_lock, flags);
> > -               }
> > -       } else {
> > -               m = M_FULL;
> > -               if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
> > -                       lock = 1;
> > -                       /*
> > -                        * This also ensures that the scanning of full
> > -                        * slabs from diagnostic functions will not see
> > -                        * any frozen slabs.
> > -                        */
> > -                       spin_lock_irqsave(&n->list_lock, flags);
> > -               }
> > -       }
> > -
> > -       if (l != m) {
> > -               if (l == M_PARTIAL)
> > -                       remove_partial(n, slab);
> > -               else if (l == M_FULL)
> > -                       remove_full(s, n, slab);
> > +               mode = M_PARTIAL;
> > +               /*
> > +                * Taking the spinlock removes the possibility that
> > +                * acquire_slab() will see a slab that is frozen
> > +                */
> > +               spin_lock_irqsave(&n->list_lock, flags);
> > +       } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
> > +               mode = M_FULL;
> > +               /*
> > +                * This also ensures that the scanning of full
> > +                * slabs from diagnostic functions will not see
> > +                * any frozen slabs.
> > +                */
> > +               spin_lock_irqsave(&n->list_lock, flags);
> > +       } else
> > +               mode = M_FULL_NOLIST;
> >
> > -               if (m == M_PARTIAL)
> > -                       add_partial(n, slab, tail);
> > -               else if (m == M_FULL)
i> > -                       add_full(s, n, slab);
> > -       }
> >
> > -       l = m;
> >         if (!cmpxchg_double_slab(s, slab,
> >                                 old.freelist, old.counters,
> >                                 new.freelist, new.counters,
> > -                               "unfreezing slab"))
> > +                               "unfreezing slab")) {
> > +               if (mode == M_PARTIAL || mode == M_FULL)
> > +                       spin_unlock_irqrestore(&n->list_lock, flags);
> 
> The slab doesn't belong to any node here, should we remove locking/unlocking
> spin for cmpxchg_double_slab() call? Just calling spin_lock_irqsave() before
> add_partial()/add_full call is fine?
>

I thought about that, and tested, but that is not okay.

taking spinlock around cmpxchg prevents race between __slab_free() and
deactivate_slab(). list can be corrupted without spinlock.


think about case below: (when SLAB_STORE_USER is set)

__slab_free()			deactivate_slab()
=================		=================
				(deactivating full slab)
				cmpxchg_double()


spin_lock_irqsave()
cmpxchg_double()		

/* not in full list yet */
remove_full()
add_partial()
spin_unlock_irqrestore()
				spin_lock_irqsave()
				add_full()			
				spin_unlock_irqrestore()



> >                 goto redo;
> 
> How about do {...} while(!cmpxchg_double_slab())? The readability looks better?
>

This will be:

do {
	if (mode == M_PARTIAL || mode == M_FULL)
		spin_unlock_irqrestore();

	[...]

} while (!cmpxchg_double_slab());

I think goto version is better for reading?

Thanks!

> Regards,
> Xiongwei
> 
> > +       }
> >
> > -       if (lock)
> > -               spin_unlock_irqrestore(&n->list_lock, flags);
> >
> > -       if (m == M_PARTIAL)
> > +       if (mode == M_PARTIAL) {
> > +               add_partial(n, slab, tail);
> > +               spin_unlock_irqrestore(&n->list_lock, flags);
> >                 stat(s, tail);
> > -       else if (m == M_FULL)
> > -               stat(s, DEACTIVATE_FULL);
> > -       else if (m == M_FREE) {
> > +       } else if (mode == M_FREE) {
> >                 stat(s, DEACTIVATE_EMPTY);
> >                 discard_slab(s, slab);
> >                 stat(s, FREE_SLAB);
> > -       }
> > +       } else if (mode == M_FULL) {
> > +               add_full(s, n, slab);
> > +               spin_unlock_irqrestore(&n->list_lock, flags);y
> > +               stat(s, DEACTIVATE_FULL);
> > +       } else if (mode == M_FULL_NOLIST)
> > +               stat(s, DEACTIVATE_FULL);
> >  }
> >
> >  #ifdef CONFIG_SLUB_CPU_PARTIAL
> > --
> > 2.33.1
> >
> >
Hyeonggon Yoo March 8, 2022, 3:58 a.m. UTC | #4
On Mon, Mar 07, 2022 at 05:40:42PM +0100, Vlastimil Babka wrote:
> On 3/7/22 08:40, Hyeonggon Yoo wrote:
> > Simplify deactivate_slab() by unlocking n->list_lock and retrying
> > cmpxchg_double() when cmpxchg_double() fails, and perform
> > add_{partial,full} only when it succeed.
> > 
> > Releasing and taking n->list_lock again here is not harmful as SLUB
> > avoids deactivating slabs as much as possible.
> > 
> > [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
> >   succeed.
> > 
> >   count deactivating full slabs even if debugging flag is not set. ]
> > 
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> 
> adding both to slab-next. Fixed up some nits myself, see below:
> 
> >  
> > @@ -2420,61 +2416,50 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> >  	new.frozen = 0;
> >  
> >  	if (!new.inuse && n->nr_partial >= s->min_partial)
> > -		m = M_FREE;
> > +		mode = M_FREE;
> >  	else if (new.freelist) {
> 
> This was against kernel style even before the patch - we use { } in the
> 'else if' branch, thus all branches should use { } even if one-line.
>

Ah, you are right. Agree with this change.
"Remove unnecessary brace" rule does not apply here.

> > -		m = M_PARTIAL;
> > -		if (!lock) {
> > -			lock = 1;
> > -			/*
> > -			 * Taking the spinlock removes the possibility that
> > -			 * acquire_slab() will see a slab that is frozen
> > -			 */
> > -			spin_lock_irqsave(&n->list_lock, flags);
> > -		}
> > -	} else {
> > -		m = M_FULL;
> > -		if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
> > -			lock = 1;
> > -			/*
> > -			 * This also ensures that the scanning of full
> > -			 * slabs from diagnostic functions will not see
> > -			 * any frozen slabs.
> > -			 */
> > -			spin_lock_irqsave(&n->list_lock, flags);
> > -		}
> > -	}
> > -
> > -	if (l != m) {
> > -		if (l == M_PARTIAL)
> > -			remove_partial(n, slab);
> > -		else if (l == M_FULL)
> > -			remove_full(s, n, slab);
> > +		mode = M_PARTIAL;
> > +		/*
> > +		 * Taking the spinlock removes the possibility that
> > +		 * acquire_slab() will see a slab that is frozen
> > +		 */
> > +		spin_lock_irqsave(&n->list_lock, flags);
> > +	} else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
> > +		mode = M_FULL;
> > +		/*
> > +		 * This also ensures that the scanning of full
> > +		 * slabs from diagnostic functions will not see
> > +		 * any frozen slabs.
> > +		 */
> > +		spin_lock_irqsave(&n->list_lock, flags);
> > +	} else
> > +		mode = M_FULL_NOLIST;
> 
> Ditto here (this is new).

Yes.

> 
> > -		if (m == M_PARTIAL)
> > -			add_partial(n, slab, tail);
> > -		else if (m == M_FULL)
> > -			add_full(s, n, slab);
> > -	}
> >  
> > -	l = m;
> >  	if (!cmpxchg_double_slab(s, slab,
> >  				old.freelist, old.counters,
> >  				new.freelist, new.counters,
> > -				"unfreezing slab"))
> > +				"unfreezing slab")) {
> > +		if (mode == M_PARTIAL || mode == M_FULL)
> > +			spin_unlock_irqrestore(&n->list_lock, flags);
> >  		goto redo;
> > +	}
> >  
> > -	if (lock)
> > -		spin_unlock_irqrestore(&n->list_lock, flags);
> >  
> > -	if (m == M_PARTIAL)
> > +	if (mode == M_PARTIAL) {
> > +		add_partial(n, slab, tail);
> > +		spin_unlock_irqrestore(&n->list_lock, flags);
> >  		stat(s, tail);
> > -	else if (m == M_FULL)
> > -		stat(s, DEACTIVATE_FULL);
> > -	else if (m == M_FREE) {
> > +	} else if (mode == M_FREE) {
> >  		stat(s, DEACTIVATE_EMPTY);
> >  		discard_slab(s, slab);
> >  		stat(s, FREE_SLAB);
> > -	}
> > +	} else if (mode == M_FULL) {
> > +		add_full(s, n, slab);
> > +		spin_unlock_irqrestore(&n->list_lock, flags);
> > +		stat(s, DEACTIVATE_FULL);
> > +	} else if (mode == M_FULL_NOLIST)
> > +		stat(s, DEACTIVATE_FULL);
> 
> And here.
>

Yes.

> >  }
> >  
> >  #ifdef CONFIG_SLUB_CPU_PARTIAL
> 

Thanks!
Roman Gushchin March 8, 2022, 5:01 a.m. UTC | #5
On Mon, Mar 07, 2022 at 07:40:56AM +0000, Hyeonggon Yoo wrote:
> Simplify deactivate_slab() by unlocking n->list_lock and retrying
> cmpxchg_double() when cmpxchg_double() fails, and perform
> add_{partial,full} only when it succeed.
> 
> Releasing and taking n->list_lock again here is not harmful as SLUB
> avoids deactivating slabs as much as possible.
> 
> [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
>   succeed.
> 
>   count deactivating full slabs even if debugging flag is not set. ]
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!
Xiongwei Song March 8, 2022, 5:29 a.m. UTC | #6
On Tue, Mar 8, 2022 at 11:50 AM Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>
> On Tue, Mar 08, 2022 at 09:40:07AM +0800, Xiongwei Song wrote:
> > Hello,
> >
> > On Mon, Mar 7, 2022 at 3:41 PM Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> > >
> > > Simplify deactivate_slab() by unlocking n->list_lock and retrying
> > > cmpxchg_double() when cmpxchg_double() fails, and perform
> > > add_{partial,full} only when it succeed.
> > >
> > > Releasing and taking n->list_lock again here is not harmful as SLUB
> > > avoids deactivating slabs as much as possible.
> > >
> > > [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
> > >   succeed.
> > >
> > >   count deactivating full slabs even if debugging flag is not set. ]
> > >
> > > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> > > ---
> > >  mm/slub.c | 91 +++++++++++++++++++++++--------------------------------
> > >  1 file changed, 38 insertions(+), 53 deletions(-)
> > >
> > > diff --git a/mm/slub.c b/mm/slub.c
> > > index 1ce09b0347ad..f0cb9d0443ac 100644
> > > --- a/mm/slub.c
> > > +++ b/mm/slub.c
> > > @@ -2348,10 +2348,10 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
> > >  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> > >                             void *freelist)
> > >  {
> > > -       enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
> > > +       enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
> > >         struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> > > -       int lock = 0, free_delta = 0;
> > > -       enum slab_modes l = M_NONE, m = M_NONE;
> > > +       int free_delta = 0;
> > > +       enum slab_modes mode = M_NONE;
> > >         void *nextfree, *freelist_iter, *freelist_tail;
> > >         int tail = DEACTIVATE_TO_HEAD;
> > >         unsigned long flags = 0;
> > > @@ -2393,14 +2393,10 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> > >          * Ensure that the slab is unfrozen while the list presence
> > >          * reflects the actual number of objects during unfreeze.
> > >          *
> > > -        * We setup the list membership and then perform a cmpxchg
> > > -        * with the count. If there is a mismatch then the slab
> > > -        * is not unfrozen but the slab is on the wrong list.
> > > -        *
> > > -        * Then we restart the process which may have to remove
> > > -        * the slab from the list that we just put it on again
> > > -        * because the number of objects in the slab may have
> > > -        * changed.
> > > +        * We first perform cmpxchg holding lock and insert to list
> > > +        * when it succeed. If there is mismatch then the slab is not
> > > +        * unfrozen and number of objects in the slab may have changed.
> > > +        * Then release lock and retry cmpxchg again.
> > >          */
> > >  redo:
> > >
> > > @@ -2420,61 +2416,50 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> > >         new.frozen = 0;
> > >
> > >         if (!new.inuse && n->nr_partial >= s->min_partial)
> > > -               m = M_FREE;
> > > +               mode = M_FREE;
> > >         else if (new.freelist) {
> > > -               m = M_PARTIAL;
> > > -               if (!lock) {
> > > -                       lock = 1;
> > > -                       /*
> > > -                        * Taking the spinlock removes the possibility that
> > > -                        * acquire_slab() will see a slab that is frozen
> > > -                        */
> > > -                       spin_lock_irqsave(&n->list_lock, flags);
> > > -               }
> > > -       } else {
> > > -               m = M_FULL;
> > > -               if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
> > > -                       lock = 1;
> > > -                       /*
> > > -                        * This also ensures that the scanning of full
> > > -                        * slabs from diagnostic functions will not see
> > > -                        * any frozen slabs.
> > > -                        */
> > > -                       spin_lock_irqsave(&n->list_lock, flags);
> > > -               }
> > > -       }
> > > -
> > > -       if (l != m) {
> > > -               if (l == M_PARTIAL)
> > > -                       remove_partial(n, slab);
> > > -               else if (l == M_FULL)
> > > -                       remove_full(s, n, slab);
> > > +               mode = M_PARTIAL;
> > > +               /*
> > > +                * Taking the spinlock removes the possibility that
> > > +                * acquire_slab() will see a slab that is frozen
> > > +                */
> > > +               spin_lock_irqsave(&n->list_lock, flags);
> > > +       } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
> > > +               mode = M_FULL;
> > > +               /*
> > > +                * This also ensures that the scanning of full
> > > +                * slabs from diagnostic functions will not see
> > > +                * any frozen slabs.
> > > +                */
> > > +               spin_lock_irqsave(&n->list_lock, flags);
> > > +       } else
> > > +               mode = M_FULL_NOLIST;
> > >
> > > -               if (m == M_PARTIAL)
> > > -                       add_partial(n, slab, tail);
> > > -               else if (m == M_FULL)
> i> > -                       add_full(s, n, slab);
> > > -       }
> > >
> > > -       l = m;
> > >         if (!cmpxchg_double_slab(s, slab,
> > >                                 old.freelist, old.counters,
> > >                                 new.freelist, new.counters,
> > > -                               "unfreezing slab"))
> > > +                               "unfreezing slab")) {
> > > +               if (mode == M_PARTIAL || mode == M_FULL)
> > > +                       spin_unlock_irqrestore(&n->list_lock, flags);
> >
> > The slab doesn't belong to any node here, should we remove locking/unlocking
> > spin for cmpxchg_double_slab() call? Just calling spin_lock_irqsave() before
> > add_partial()/add_full call is fine?
> >
>
> I thought about that, and tested, but that is not okay.
>
> taking spinlock around cmpxchg prevents race between __slab_free() and
> deactivate_slab(). list can be corrupted without spinlock.
>
>
> think about case below: (when SLAB_STORE_USER is set)
>
> __slab_free()                   deactivate_slab()
> =================               =================
>                                 (deactivating full slab)
>                                 cmpxchg_double()
>
>
> spin_lock_irqsave()
> cmpxchg_double()
>
> /* not in full list yet */
> remove_full()
> add_partial()
> spin_unlock_irqrestore()
>                                 spin_lock_irqsave()
>                                 add_full()
>                                 spin_unlock_irqrestore()
>

Oh... Looks reasonable. Thanks for the detailed explanation.

>
>
> > >                 goto redo;
> >
> > How about do {...} while(!cmpxchg_double_slab())? The readability looks better?
> >
>
> This will be:
>
> do {
>         if (mode == M_PARTIAL || mode == M_FULL)
>                 spin_unlock_irqrestore();
>
>         [...]
>
> } while (!cmpxchg_double_slab());
>

I saw __slab_free() is doing so. Not a big deal.

Regards,
Xiongwei

> I think goto version is better for reading?
>
> Thanks!
>
> > Regards,
> > Xiongwei
> >
> > > +       }
> > >
> > > -       if (lock)
> > > -               spin_unlock_irqrestore(&n->list_lock, flags);
> > >
> > > -       if (m == M_PARTIAL)
> > > +       if (mode == M_PARTIAL) {
> > > +               add_partial(n, slab, tail);
> > > +               spin_unlock_irqrestore(&n->list_lock, flags);
> > >                 stat(s, tail);
> > > -       else if (m == M_FULL)
> > > -               stat(s, DEACTIVATE_FULL);
> > > -       else if (m == M_FREE) {
> > > +       } else if (mode == M_FREE) {
> > >                 stat(s, DEACTIVATE_EMPTY);
> > >                 discard_slab(s, slab);
> > >                 stat(s, FREE_SLAB);
> > > -       }
> > > +       } else if (mode == M_FULL) {
> > > +               add_full(s, n, slab);
> > > +               spin_unlock_irqrestore(&n->list_lock, flags);y
> > > +               stat(s, DEACTIVATE_FULL);
> > > +       } else if (mode == M_FULL_NOLIST)
> > > +               stat(s, DEACTIVATE_FULL);
> > >  }
> > >
> > >  #ifdef CONFIG_SLUB_CPU_PARTIAL
> > > --
> > > 2.33.1
> > >
> > >
>
> --
> Thank you, You are awesome!
> Hyeonggon :-)
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index 1ce09b0347ad..f0cb9d0443ac 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2348,10 +2348,10 @@  static void init_kmem_cache_cpus(struct kmem_cache *s)
 static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 			    void *freelist)
 {
-	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
+	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
 	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
-	int lock = 0, free_delta = 0;
-	enum slab_modes l = M_NONE, m = M_NONE;
+	int free_delta = 0;
+	enum slab_modes mode = M_NONE;
 	void *nextfree, *freelist_iter, *freelist_tail;
 	int tail = DEACTIVATE_TO_HEAD;
 	unsigned long flags = 0;
@@ -2393,14 +2393,10 @@  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 	 * Ensure that the slab is unfrozen while the list presence
 	 * reflects the actual number of objects during unfreeze.
 	 *
-	 * We setup the list membership and then perform a cmpxchg
-	 * with the count. If there is a mismatch then the slab
-	 * is not unfrozen but the slab is on the wrong list.
-	 *
-	 * Then we restart the process which may have to remove
-	 * the slab from the list that we just put it on again
-	 * because the number of objects in the slab may have
-	 * changed.
+	 * We first perform cmpxchg holding lock and insert to list
+	 * when it succeed. If there is mismatch then the slab is not
+	 * unfrozen and number of objects in the slab may have changed.
+	 * Then release lock and retry cmpxchg again.
 	 */
 redo:
 
@@ -2420,61 +2416,50 @@  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 	new.frozen = 0;
 
 	if (!new.inuse && n->nr_partial >= s->min_partial)
-		m = M_FREE;
+		mode = M_FREE;
 	else if (new.freelist) {
-		m = M_PARTIAL;
-		if (!lock) {
-			lock = 1;
-			/*
-			 * Taking the spinlock removes the possibility that
-			 * acquire_slab() will see a slab that is frozen
-			 */
-			spin_lock_irqsave(&n->list_lock, flags);
-		}
-	} else {
-		m = M_FULL;
-		if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
-			lock = 1;
-			/*
-			 * This also ensures that the scanning of full
-			 * slabs from diagnostic functions will not see
-			 * any frozen slabs.
-			 */
-			spin_lock_irqsave(&n->list_lock, flags);
-		}
-	}
-
-	if (l != m) {
-		if (l == M_PARTIAL)
-			remove_partial(n, slab);
-		else if (l == M_FULL)
-			remove_full(s, n, slab);
+		mode = M_PARTIAL;
+		/*
+		 * Taking the spinlock removes the possibility that
+		 * acquire_slab() will see a slab that is frozen
+		 */
+		spin_lock_irqsave(&n->list_lock, flags);
+	} else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
+		mode = M_FULL;
+		/*
+		 * This also ensures that the scanning of full
+		 * slabs from diagnostic functions will not see
+		 * any frozen slabs.
+		 */
+		spin_lock_irqsave(&n->list_lock, flags);
+	} else
+		mode = M_FULL_NOLIST;
 
-		if (m == M_PARTIAL)
-			add_partial(n, slab, tail);
-		else if (m == M_FULL)
-			add_full(s, n, slab);
-	}
 
-	l = m;
 	if (!cmpxchg_double_slab(s, slab,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
-				"unfreezing slab"))
+				"unfreezing slab")) {
+		if (mode == M_PARTIAL || mode == M_FULL)
+			spin_unlock_irqrestore(&n->list_lock, flags);
 		goto redo;
+	}
 
-	if (lock)
-		spin_unlock_irqrestore(&n->list_lock, flags);
 
-	if (m == M_PARTIAL)
+	if (mode == M_PARTIAL) {
+		add_partial(n, slab, tail);
+		spin_unlock_irqrestore(&n->list_lock, flags);
 		stat(s, tail);
-	else if (m == M_FULL)
-		stat(s, DEACTIVATE_FULL);
-	else if (m == M_FREE) {
+	} else if (mode == M_FREE) {
 		stat(s, DEACTIVATE_EMPTY);
 		discard_slab(s, slab);
 		stat(s, FREE_SLAB);
-	}
+	} else if (mode == M_FULL) {
+		add_full(s, n, slab);
+		spin_unlock_irqrestore(&n->list_lock, flags);
+		stat(s, DEACTIVATE_FULL);
+	} else if (mode == M_FULL_NOLIST)
+		stat(s, DEACTIVATE_FULL);
 }
 
 #ifdef CONFIG_SLUB_CPU_PARTIAL