mbox series

[0/5] fix validation races and cleanup locking

Message ID 20220812091426.18418-1-vbabka@suse.cz (mailing list archive)
Headers show
Series fix validation races and cleanup locking | expand

Message

Vlastimil Babka Aug. 12, 2022, 9:14 a.m. UTC
This series builds on the validation races fix posted previously [1]
that became patch 2 here and contains all the details in its
description.

Thanks to Hyeonggon Yoo's observation, patch 3 removes more slab_lock()
usage that became unnecessary after patch 2.

This made it possible to further simplify locking code in patches 4 and
5. Since those are related to PREEMPT_RT, I'm CCing relevant people on
this series.

[1] https://lore.kernel.org/all/20220809140043.9903-1-vbabka@suse.cz/

Vlastimil Babka (5):
  mm/slub: move free_debug_processing() further
  mm/slub: restrict sysfs validation to debug caches and make it safe
  mm/slub: remove slab_lock() usage for debug operations
  mm/slub: convert object_map_lock to non-raw spinlock
  mm/slub: simplify __cmpxchg_double_slab() and slab_[un]lock()

 mm/slub.c | 417 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 252 insertions(+), 165 deletions(-)

Comments

Vlastimil Babka Aug. 12, 2022, 9:16 a.m. UTC | #1
On 8/12/22 11:14, Vlastimil Babka wrote:
> This series builds on the validation races fix posted previously [1]
> that became patch 2 here and contains all the details in its
> description.
> 
> Thanks to Hyeonggon Yoo's observation, patch 3 removes more slab_lock()
> usage that became unnecessary after patch 2.
> 
> This made it possible to further simplify locking code in patches 4 and
> 5. Since those are related to PREEMPT_RT, I'm CCing relevant people on
> this series.

Forgot the link to the git version of this:
https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/linux.git/log/?h=slub-validate-fix-v2r1

> [1] https://lore.kernel.org/all/20220809140043.9903-1-vbabka@suse.cz/
> 
> Vlastimil Babka (5):
>   mm/slub: move free_debug_processing() further
>   mm/slub: restrict sysfs validation to debug caches and make it safe
>   mm/slub: remove slab_lock() usage for debug operations
>   mm/slub: convert object_map_lock to non-raw spinlock
>   mm/slub: simplify __cmpxchg_double_slab() and slab_[un]lock()
> 
>  mm/slub.c | 417 +++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 252 insertions(+), 165 deletions(-)
>
Hyeonggon Yoo Aug. 14, 2022, 1:42 p.m. UTC | #2
On Fri, Aug 12, 2022 at 11:14:22AM +0200, Vlastimil Babka wrote:
> In the following patch, the function free_debug_processing() will be
> calling add_partial(), remove_partial() and discard_slab(), se move it
> below their definitions to avoid forward declarations. To make review
> easier, separate the move from functional changes.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slub.c | 114 +++++++++++++++++++++++++++---------------------------
>  1 file changed, 57 insertions(+), 57 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 862dbd9af4f5..87e794ab101a 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1385,63 +1385,6 @@ static inline int free_consistency_checks(struct kmem_cache *s,
>  	return 1;
>  }
>  
> -/* Supports checking bulk free of a constructed freelist */
> -static noinline int free_debug_processing(
> -	struct kmem_cache *s, struct slab *slab,
> -	void *head, void *tail, int bulk_cnt,
> -	unsigned long addr)
> -{
> -	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> -	void *object = head;
> -	int cnt = 0;
> -	unsigned long flags, flags2;
> -	int ret = 0;
> -	depot_stack_handle_t handle = 0;
> -
> -	if (s->flags & SLAB_STORE_USER)
> -		handle = set_track_prepare();
> -
> -	spin_lock_irqsave(&n->list_lock, flags);
> -	slab_lock(slab, &flags2);
> -
> -	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
> -		if (!check_slab(s, slab))
> -			goto out;
> -	}
> -
> -next_object:
> -	cnt++;
> -
> -	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
> -		if (!free_consistency_checks(s, slab, object, addr))
> -			goto out;
> -	}
> -
> -	if (s->flags & SLAB_STORE_USER)
> -		set_track_update(s, object, TRACK_FREE, addr, handle);
> -	trace(s, slab, object, 0);
> -	/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
> -	init_object(s, object, SLUB_RED_INACTIVE);
> -
> -	/* Reached end of constructed freelist yet? */
> -	if (object != tail) {
> -		object = get_freepointer(s, object);
> -		goto next_object;
> -	}
> -	ret = 1;
> -
> -out:
> -	if (cnt != bulk_cnt)
> -		slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
> -			 bulk_cnt, cnt);
> -
> -	slab_unlock(slab, &flags2);
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> -	if (!ret)
> -		slab_fix(s, "Object at 0x%p not freed", object);
> -	return ret;
> -}
> -
>  /*
>   * Parse a block of slub_debug options. Blocks are delimited by ';'
>   *
> @@ -2788,6 +2731,63 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
>  {
>  	return atomic_long_read(&n->total_objects);
>  }
> +
> +/* Supports checking bulk free of a constructed freelist */
> +static noinline int free_debug_processing(
> +	struct kmem_cache *s, struct slab *slab,
> +	void *head, void *tail, int bulk_cnt,
> +	unsigned long addr)
> +{
> +	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> +	void *object = head;
> +	int cnt = 0;
> +	unsigned long flags, flags2;
> +	int ret = 0;
> +	depot_stack_handle_t handle = 0;
> +
> +	if (s->flags & SLAB_STORE_USER)
> +		handle = set_track_prepare();
> +
> +	spin_lock_irqsave(&n->list_lock, flags);
> +	slab_lock(slab, &flags2);
> +
> +	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
> +		if (!check_slab(s, slab))
> +			goto out;
> +	}
> +
> +next_object:
> +	cnt++;
> +
> +	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
> +		if (!free_consistency_checks(s, slab, object, addr))
> +			goto out;
> +	}
> +
> +	if (s->flags & SLAB_STORE_USER)
> +		set_track_update(s, object, TRACK_FREE, addr, handle);
> +	trace(s, slab, object, 0);
> +	/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
> +	init_object(s, object, SLUB_RED_INACTIVE);
> +
> +	/* Reached end of constructed freelist yet? */
> +	if (object != tail) {
> +		object = get_freepointer(s, object);
> +		goto next_object;
> +	}
> +	ret = 1;
> +
> +out:
> +	if (cnt != bulk_cnt)
> +		slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
> +			 bulk_cnt, cnt);
> +
> +	slab_unlock(slab, &flags2);
> +	spin_unlock_irqrestore(&n->list_lock, flags);
> +	if (!ret)
> +		slab_fix(s, "Object at 0x%p not freed", object);
> +	return ret;
> +}
>  #endif /* CONFIG_SLUB_DEBUG */
>  
>  #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
> -- 
> 2.37.1
> 

Looks good to me.

Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Thanks!
Hyeonggon Yoo Aug. 14, 2022, 2:54 p.m. UTC | #3
On Fri, Aug 12, 2022 at 11:14:24AM +0200, Vlastimil Babka wrote:
> All alloc and free operations on debug caches are now serialized by
> n->list_lock, so we can remove slab_lock() usage in validate_slab()
> and list_slab_objects() as those also happen under n->list_lock.
> 
> Note the usage in list_slab_objects() could happen even on non-debug
> caches, but only during cache shutdown time so there should not be any
> parallel freeing activity anymore. Except for buggy slab users, but in
> that case the slab_lock() would not help against the common cmpxchg
> based fast paths (in non-debug caches) anyway.
> 
> Also adjust documentation comments accordingly.
> 
> Suggested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slub.c | 19 ++++++++-----------
>  1 file changed, 8 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index fa7efd2d98be..32b79bc3ae6d 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -50,7 +50,7 @@
>   *   1. slab_mutex (Global Mutex)
>   *   2. node->list_lock (Spinlock)
>   *   3. kmem_cache->cpu_slab->lock (Local lock)
> - *   4. slab_lock(slab) (Only on some arches or for debugging)
> + *   4. slab_lock(slab) (Only on some arches)
>   *   5. object_map_lock (Only for debugging)
>   *
>   *   slab_mutex
> @@ -64,8 +64,9 @@
>   *   The slab_lock is a wrapper around the page lock, thus it is a bit
>   *   spinlock.
>   *
> - *   The slab_lock is only used for debugging and on arches that do not
> - *   have the ability to do a cmpxchg_double. It only protects:
> + *   The slab_lock is only used on arches that do not have the ability
> + *   to do a cmpxchg_double. It only protects:
> + *
>   *	A. slab->freelist	-> List of free objects in a slab
>   *	B. slab->inuse		-> Number of objects in use
>   *	C. slab->objects	-> Number of objects in slab
> @@ -94,6 +95,9 @@
>   *   allocating a long series of objects that fill up slabs does not require
>   *   the list lock.
>   *
> + *   For debug caches, all allocations are forced to go through a list_lock
> + *   protected region to serialize against concurrent validation.
> + *
>   *   cpu_slab->lock local lock
>   *
>   *   This locks protect slowpath manipulation of all kmem_cache_cpu fields
> @@ -4369,7 +4373,6 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
>  	void *p;
>  
>  	slab_err(s, slab, text, s->name);
> -	slab_lock(slab, &flags);
>  
>  	map = get_map(s, slab);
>  	for_each_object(p, s, addr, slab->objects) {
> @@ -4380,7 +4383,6 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
>  		}
>  	}
>  	put_map(map);
> -	slab_unlock(slab, &flags);
>  #endif
>  }
>  
> @@ -5107,12 +5109,9 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab,
>  {
>  	void *p;
>  	void *addr = slab_address(slab);
> -	unsigned long flags;
> -
> -	slab_lock(slab, &flags);
>  
>  	if (!check_slab(s, slab) || !on_freelist(s, slab, NULL))
> -		goto unlock;
> +		return;
>  
>  	/* Now we know that a valid freelist exists */
>  	__fill_map(obj_map, s, slab);
> @@ -5123,8 +5122,6 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab,
>  		if (!check_object(s, slab, p, val))
>  			break;
>  	}
> -unlock:
> -	slab_unlock(slab, &flags);
>  }
>  
>  static int validate_slab_node(struct kmem_cache *s,
> -- 
> 2.37.1
> 

Looks good to me.
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
David Rientjes Aug. 15, 2022, 12:03 a.m. UTC | #4
On Fri, 12 Aug 2022, Vlastimil Babka wrote:

> The only remaining user of object_map_lock is list_slab_objects().
> Obtaining the lock there used to happen under slab_lock() which implied
> disabling irqs on PREEMPT_RT, thus it's a raw_spinlock. With the
> slab_lock() removed, we can convert it to a normal spinlock.
> 
> Also remove the get_map()/put_map() wrappers as list_slab_objects()
> became their only remaining user.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

I don't see a benefit to keeping the VM_BUG_ON(!irqs_disabled()); around 
either.

Acked-by: David Rientjes <rientjes@google.com>
David Rientjes Aug. 15, 2022, 12:03 a.m. UTC | #5
On Fri, 12 Aug 2022, Vlastimil Babka wrote:

> In the following patch, the function free_debug_processing() will be
> calling add_partial(), remove_partial() and discard_slab(), se move it
> below their definitions to avoid forward declarations. To make review
> easier, separate the move from functional changes.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Acked-by: David Rientjes <rientjes@google.com>
David Rientjes Aug. 15, 2022, 12:04 a.m. UTC | #6
On Fri, 12 Aug 2022, Vlastimil Babka wrote:

> All alloc and free operations on debug caches are now serialized by
> n->list_lock, so we can remove slab_lock() usage in validate_slab()
> and list_slab_objects() as those also happen under n->list_lock.
> 
> Note the usage in list_slab_objects() could happen even on non-debug
> caches, but only during cache shutdown time so there should not be any
> parallel freeing activity anymore. Except for buggy slab users, but in
> that case the slab_lock() would not help against the common cmpxchg
> based fast paths (in non-debug caches) anyway.
> 
> Also adjust documentation comments accordingly.
> 
> Suggested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Acked-by: David Rientjes <rientjes@google.com>
David Rientjes Aug. 15, 2022, 12:04 a.m. UTC | #7
On Fri, 12 Aug 2022, Vlastimil Babka wrote:

> The PREEMPT_RT specific disabling of irqs in __cmpxchg_double_slab()
> (through slab_[un]lock()) makes it behave on PREEMPT_RT exactly as
> cmpxchg_double_slab() does, so it's simpler to just redirect the usage
> of the former to the latter on RT.
> 
> That means we no longer need the slab_[un]lock() wrappers, so delete
> them and rename the current __slab_[un]lock() to slab_[un]lock().
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Acked-by: David Rientjes <rientjes@google.com>
Hyeonggon Yoo Aug. 15, 2022, 12:53 p.m. UTC | #8
On Fri, Aug 12, 2022 at 11:14:25AM +0200, Vlastimil Babka wrote:
> The only remaining user of object_map_lock is list_slab_objects().
> Obtaining the lock there used to happen under slab_lock() which implied
> disabling irqs on PREEMPT_RT, thus it's a raw_spinlock. With the
> slab_lock() removed, we can convert it to a normal spinlock.
> 
> Also remove the get_map()/put_map() wrappers as list_slab_objects()
> became their only remaining user.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slub.c | 36 ++++++------------------------------
>  1 file changed, 6 insertions(+), 30 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 32b79bc3ae6d..dffb5063acbf 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -565,7 +565,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
>  
>  #ifdef CONFIG_SLUB_DEBUG
>  static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
> -static DEFINE_RAW_SPINLOCK(object_map_lock);
> +static DEFINE_SPINLOCK(object_map_lock);
>  
>  static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
>  		       struct slab *slab)
> @@ -599,30 +599,6 @@ static bool slab_add_kunit_errors(void)
>  static inline bool slab_add_kunit_errors(void) { return false; }
>  #endif
>  
> -/*
> - * Determine a map of objects in use in a slab.
> - *
> - * Node listlock must be held to guarantee that the slab does
> - * not vanish from under us.
> - */
> -static unsigned long *get_map(struct kmem_cache *s, struct slab *slab)
> -	__acquires(&object_map_lock)
> -{
> -	VM_BUG_ON(!irqs_disabled());
> -
> -	raw_spin_lock(&object_map_lock);
> -
> -	__fill_map(object_map, s, slab);
> -
> -	return object_map;
> -}
> -
> -static void put_map(unsigned long *map) __releases(&object_map_lock)
> -{
> -	VM_BUG_ON(map != object_map);
> -	raw_spin_unlock(&object_map_lock);
> -}
> -
>  static inline unsigned int size_from_object(struct kmem_cache *s)
>  {
>  	if (s->flags & SLAB_RED_ZONE)
> @@ -4368,21 +4344,21 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
>  {
>  #ifdef CONFIG_SLUB_DEBUG
>  	void *addr = slab_address(slab);
> -	unsigned long flags;
> -	unsigned long *map;
>  	void *p;
>  
>  	slab_err(s, slab, text, s->name);
>  
> -	map = get_map(s, slab);
> +	spin_lock(&object_map_lock);
> +	__fill_map(object_map, s, slab);
> +
>  	for_each_object(p, s, addr, slab->objects) {
>  
> -		if (!test_bit(__obj_to_index(s, addr, p), map)) {
> +		if (!test_bit(__obj_to_index(s, addr, p), object_map)) {
>  			pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
>  			print_tracking(s, p);
>  		}
>  	}
> -	put_map(map);
> +	spin_unlock(&object_map_lock);
>  #endif
>  }
>  
> -- 
> 2.37.1
> 

Looks good to me.
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>