diff mbox series

[net] mm: slub: fix a deadlock warning in kmem_cache_destroy

Message ID 388098b2c03fbf0a732834fc01b2d875c335bc49.1642169368.git.lucien.xin@gmail.com (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series [net] mm: slub: fix a deadlock warning in kmem_cache_destroy | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 105 this patch: 105
netdev/cc_maintainers fail 2 blamed authors not CCed: bigeasy@linutronix.de vbabka@suse.cz; 8 maintainers not CCed: linux-mm@kvack.org bigeasy@linutronix.de cl@linux.com penberg@kernel.org vbabka@suse.cz iamjoonsoo.kim@lge.com rientjes@google.com akpm@linux-foundation.org
netdev/build_clang success Errors and warnings before: 22 this patch: 22
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 110 this patch: 110
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 30 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xin Long Jan. 14, 2022, 2:09 p.m. UTC
cpus_read_lock() is introduced into kmem_cache_destroy() by
commit 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations
__free_slab() invocations out of IRQ context"), and it could cause
a deadlock.

As Antoine pointed out, when one thread calls kmem_cache_destroy(), it is
blocking until kn->active becomes 0 in kernfs_drain() after holding
cpu_hotplug_lock. While in another thread, when calling kernfs_fop_write(),
it may try to hold cpu_hotplug_lock after incrementing kn->active by
calling kernfs_get_active():

        CPU0                        CPU1
        ----                        ----
  cpus_read_lock()
                                   kn->active++
                                   cpus_read_lock() [a]
  wait until kn->active == 0

Although cpu_hotplug_lock is a RWSEM, [a] will not block in there. But as
lockdep annotations are added for cpu_hotplug_lock, a deadlock warning
would be detected:

  ======================================================
  WARNING: possible circular locking dependency detected
  ------------------------------------------------------
  dmsetup/1832 is trying to acquire lock:
  ffff986f5a0f9f20 (kn->count#144){++++}-{0:0}, at: kernfs_remove+0x1d/0x30

  but task is already holding lock:
  ffffffffa43817c0 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_destroy+0x2a/0x120

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #2 (slab_mutex){+.+.}-{3:3}:
         lock_acquire+0xe8/0x470
         mutex_lock_nested+0x47/0x80
         kmem_cache_destroy+0x2a/0x120
         bioset_exit+0xb5/0x100
         cleanup_mapped_device+0x26/0xf0 [dm_mod]
         free_dev+0x43/0xb0 [dm_mod]
         __dm_destroy+0x153/0x1b0 [dm_mod]
         dev_remove+0xe4/0x1a0 [dm_mod]
         ctl_ioctl+0x1af/0x3f0 [dm_mod]
         dm_ctl_ioctl+0xa/0x10 [dm_mod]
         do_vfs_ioctl+0xa5/0x760
         ksys_ioctl+0x60/0x90
         __x64_sys_ioctl+0x16/0x20
         do_syscall_64+0x8c/0x240
         entry_SYSCALL_64_after_hwframe+0x6a/0xdf

  -> #1 (cpu_hotplug_lock){++++}-{0:0}:
         lock_acquire+0xe8/0x470
         cpus_read_lock+0x39/0x100
         cpu_partial_store+0x44/0x80
         slab_attr_store+0x20/0x30
         kernfs_fop_write+0x101/0x1b0
         vfs_write+0xd4/0x1e0
         ksys_write+0x52/0xc0
         do_syscall_64+0x8c/0x240
         entry_SYSCALL_64_after_hwframe+0x6a/0xdf

  -> #0 (kn->count#144){++++}-{0:0}:
         check_prevs_add+0x185/0xb80
         __lock_acquire+0xd8f/0xe90
         lock_acquire+0xe8/0x470
         __kernfs_remove+0x25e/0x320
         kernfs_remove+0x1d/0x30
         kobject_del+0x28/0x60
         kmem_cache_destroy+0xf1/0x120
         bioset_exit+0xb5/0x100
         cleanup_mapped_device+0x26/0xf0 [dm_mod]
         free_dev+0x43/0xb0 [dm_mod]
         __dm_destroy+0x153/0x1b0 [dm_mod]
         dev_remove+0xe4/0x1a0 [dm_mod]
         ctl_ioctl+0x1af/0x3f0 [dm_mod]
         dm_ctl_ioctl+0xa/0x10 [dm_mod]
         do_vfs_ioctl+0xa5/0x760
         ksys_ioctl+0x60/0x90
         __x64_sys_ioctl+0x16/0x20
         do_syscall_64+0x8c/0x240
         entry_SYSCALL_64_after_hwframe+0x6a/0xdf

  other info that might help us debug this:

  Chain exists of:
    kn->count#144 --> cpu_hotplug_lock --> slab_mutex

   Possible unsafe locking scenario:

         CPU0                    CPU1
         ----                    ----
    lock(slab_mutex);
                                 lock(cpu_hotplug_lock);
                                 lock(slab_mutex);
    lock(kn->count#144);

   *** DEADLOCK ***

  3 locks held by dmsetup/1832:
   #0: ffffffffa43fe5c0 (bio_slab_lock){+.+.}-{3:3}, at: bioset_exit+0x62/0x100
   #1: ffffffffa3e87c20 (cpu_hotplug_lock){++++}-{0:0}, at: kmem_cache_destroy+0x1c/0x120
   #2: ffffffffa43817c0 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_destroy+0x2a/0x120

  stack backtrace:
  Call Trace:
   dump_stack+0x5c/0x80
   check_noncircular+0xff/0x120
   check_prevs_add+0x185/0xb80
   __lock_acquire+0xd8f/0xe90
   lock_acquire+0xe8/0x470
   __kernfs_remove+0x25e/0x320
   kernfs_remove+0x1d/0x30
   kobject_del+0x28/0x60
   kmem_cache_destroy+0xf1/0x120
   bioset_exit+0xb5/0x100
   cleanup_mapped_device+0x26/0xf0 [dm_mod]
   free_dev+0x43/0xb0 [dm_mod]
   __dm_destroy+0x153/0x1b0 [dm_mod]
   dev_remove+0xe4/0x1a0 [dm_mod]
   ctl_ioctl+0x1af/0x3f0 [dm_mod]
   dm_ctl_ioctl+0xa/0x10 [dm_mod]
   do_vfs_ioctl+0xa5/0x760
   ksys_ioctl+0x60/0x90
   __x64_sys_ioctl+0x16/0x20
   do_syscall_64+0x8c/0x240
   entry_SYSCALL_64_after_hwframe+0x6a/0xdf

Since cpus_read_lock() is supposed to protect the cpu related data, it
makes sense to fix this issue by moving cpus_read_lock() from
kmem_cache_destroy() to __kmem_cache_shutdown(). While at it,
add the missing cpus_read_lock() in slab_mem_going_offline_callback().

Fixes: 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 mm/slab_common.c | 2 --
 mm/slub.c        | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

Comments

Xin Long Jan. 14, 2022, 2:11 p.m. UTC | #1
sorry, please drop this, it's in the wrong mail-list.

On Fri, Jan 14, 2022 at 10:09 PM Xin Long <lucien.xin@gmail.com> wrote:
>
> cpus_read_lock() is introduced into kmem_cache_destroy() by
> commit 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations
> __free_slab() invocations out of IRQ context"), and it could cause
> a deadlock.
>
> As Antoine pointed out, when one thread calls kmem_cache_destroy(), it is
> blocking until kn->active becomes 0 in kernfs_drain() after holding
> cpu_hotplug_lock. While in another thread, when calling kernfs_fop_write(),
> it may try to hold cpu_hotplug_lock after incrementing kn->active by
> calling kernfs_get_active():
>
>         CPU0                        CPU1
>         ----                        ----
>   cpus_read_lock()
>                                    kn->active++
>                                    cpus_read_lock() [a]
>   wait until kn->active == 0
>
> Although cpu_hotplug_lock is a RWSEM, [a] will not block in there. But as
> lockdep annotations are added for cpu_hotplug_lock, a deadlock warning
> would be detected:
>
>   ======================================================
>   WARNING: possible circular locking dependency detected
>   ------------------------------------------------------
>   dmsetup/1832 is trying to acquire lock:
>   ffff986f5a0f9f20 (kn->count#144){++++}-{0:0}, at: kernfs_remove+0x1d/0x30
>
>   but task is already holding lock:
>   ffffffffa43817c0 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_destroy+0x2a/0x120
>
>   which lock already depends on the new lock.
>
>   the existing dependency chain (in reverse order) is:
>
>   -> #2 (slab_mutex){+.+.}-{3:3}:
>          lock_acquire+0xe8/0x470
>          mutex_lock_nested+0x47/0x80
>          kmem_cache_destroy+0x2a/0x120
>          bioset_exit+0xb5/0x100
>          cleanup_mapped_device+0x26/0xf0 [dm_mod]
>          free_dev+0x43/0xb0 [dm_mod]
>          __dm_destroy+0x153/0x1b0 [dm_mod]
>          dev_remove+0xe4/0x1a0 [dm_mod]
>          ctl_ioctl+0x1af/0x3f0 [dm_mod]
>          dm_ctl_ioctl+0xa/0x10 [dm_mod]
>          do_vfs_ioctl+0xa5/0x760
>          ksys_ioctl+0x60/0x90
>          __x64_sys_ioctl+0x16/0x20
>          do_syscall_64+0x8c/0x240
>          entry_SYSCALL_64_after_hwframe+0x6a/0xdf
>
>   -> #1 (cpu_hotplug_lock){++++}-{0:0}:
>          lock_acquire+0xe8/0x470
>          cpus_read_lock+0x39/0x100
>          cpu_partial_store+0x44/0x80
>          slab_attr_store+0x20/0x30
>          kernfs_fop_write+0x101/0x1b0
>          vfs_write+0xd4/0x1e0
>          ksys_write+0x52/0xc0
>          do_syscall_64+0x8c/0x240
>          entry_SYSCALL_64_after_hwframe+0x6a/0xdf
>
>   -> #0 (kn->count#144){++++}-{0:0}:
>          check_prevs_add+0x185/0xb80
>          __lock_acquire+0xd8f/0xe90
>          lock_acquire+0xe8/0x470
>          __kernfs_remove+0x25e/0x320
>          kernfs_remove+0x1d/0x30
>          kobject_del+0x28/0x60
>          kmem_cache_destroy+0xf1/0x120
>          bioset_exit+0xb5/0x100
>          cleanup_mapped_device+0x26/0xf0 [dm_mod]
>          free_dev+0x43/0xb0 [dm_mod]
>          __dm_destroy+0x153/0x1b0 [dm_mod]
>          dev_remove+0xe4/0x1a0 [dm_mod]
>          ctl_ioctl+0x1af/0x3f0 [dm_mod]
>          dm_ctl_ioctl+0xa/0x10 [dm_mod]
>          do_vfs_ioctl+0xa5/0x760
>          ksys_ioctl+0x60/0x90
>          __x64_sys_ioctl+0x16/0x20
>          do_syscall_64+0x8c/0x240
>          entry_SYSCALL_64_after_hwframe+0x6a/0xdf
>
>   other info that might help us debug this:
>
>   Chain exists of:
>     kn->count#144 --> cpu_hotplug_lock --> slab_mutex
>
>    Possible unsafe locking scenario:
>
>          CPU0                    CPU1
>          ----                    ----
>     lock(slab_mutex);
>                                  lock(cpu_hotplug_lock);
>                                  lock(slab_mutex);
>     lock(kn->count#144);
>
>    *** DEADLOCK ***
>
>   3 locks held by dmsetup/1832:
>    #0: ffffffffa43fe5c0 (bio_slab_lock){+.+.}-{3:3}, at: bioset_exit+0x62/0x100
>    #1: ffffffffa3e87c20 (cpu_hotplug_lock){++++}-{0:0}, at: kmem_cache_destroy+0x1c/0x120
>    #2: ffffffffa43817c0 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_destroy+0x2a/0x120
>
>   stack backtrace:
>   Call Trace:
>    dump_stack+0x5c/0x80
>    check_noncircular+0xff/0x120
>    check_prevs_add+0x185/0xb80
>    __lock_acquire+0xd8f/0xe90
>    lock_acquire+0xe8/0x470
>    __kernfs_remove+0x25e/0x320
>    kernfs_remove+0x1d/0x30
>    kobject_del+0x28/0x60
>    kmem_cache_destroy+0xf1/0x120
>    bioset_exit+0xb5/0x100
>    cleanup_mapped_device+0x26/0xf0 [dm_mod]
>    free_dev+0x43/0xb0 [dm_mod]
>    __dm_destroy+0x153/0x1b0 [dm_mod]
>    dev_remove+0xe4/0x1a0 [dm_mod]
>    ctl_ioctl+0x1af/0x3f0 [dm_mod]
>    dm_ctl_ioctl+0xa/0x10 [dm_mod]
>    do_vfs_ioctl+0xa5/0x760
>    ksys_ioctl+0x60/0x90
>    __x64_sys_ioctl+0x16/0x20
>    do_syscall_64+0x8c/0x240
>    entry_SYSCALL_64_after_hwframe+0x6a/0xdf
>
> Since cpus_read_lock() is supposed to protect the cpu related data, it
> makes sense to fix this issue by moving cpus_read_lock() from
> kmem_cache_destroy() to __kmem_cache_shutdown(). While at it,
> add the missing cpus_read_lock() in slab_mem_going_offline_callback().
>
> Fixes: 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context")
> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> ---
>  mm/slab_common.c | 2 --
>  mm/slub.c        | 4 ++--
>  2 files changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index e5d080a93009..06ec3fa585e6 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -494,7 +494,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
>         if (unlikely(!s))
>                 return;
>
> -       cpus_read_lock();
>         mutex_lock(&slab_mutex);
>
>         s->refcount--;
> @@ -509,7 +508,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
>         }
>  out_unlock:
>         mutex_unlock(&slab_mutex);
> -       cpus_read_unlock();
>  }
>  EXPORT_SYMBOL(kmem_cache_destroy);
>
> diff --git a/mm/slub.c b/mm/slub.c
> index abe7db581d68..754f020235ee 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4311,7 +4311,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
>         int node;
>         struct kmem_cache_node *n;
>
> -       flush_all_cpus_locked(s);
> +       flush_all(s);
>         /* Attempt to free all objects */
>         for_each_kmem_cache_node(s, node, n) {
>                 free_partial(s, n);
> @@ -4646,7 +4646,7 @@ static int slab_mem_going_offline_callback(void *arg)
>
>         mutex_lock(&slab_mutex);
>         list_for_each_entry(s, &slab_caches, list) {
> -               flush_all_cpus_locked(s);
> +               flush_all(s);
>                 __kmem_cache_do_shrink(s);
>         }
>         mutex_unlock(&slab_mutex);
> --
> 2.27.0
>
diff mbox series

Patch

diff --git a/mm/slab_common.c b/mm/slab_common.c
index e5d080a93009..06ec3fa585e6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -494,7 +494,6 @@  void kmem_cache_destroy(struct kmem_cache *s)
 	if (unlikely(!s))
 		return;
 
-	cpus_read_lock();
 	mutex_lock(&slab_mutex);
 
 	s->refcount--;
@@ -509,7 +508,6 @@  void kmem_cache_destroy(struct kmem_cache *s)
 	}
 out_unlock:
 	mutex_unlock(&slab_mutex);
-	cpus_read_unlock();
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
diff --git a/mm/slub.c b/mm/slub.c
index abe7db581d68..754f020235ee 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4311,7 +4311,7 @@  int __kmem_cache_shutdown(struct kmem_cache *s)
 	int node;
 	struct kmem_cache_node *n;
 
-	flush_all_cpus_locked(s);
+	flush_all(s);
 	/* Attempt to free all objects */
 	for_each_kmem_cache_node(s, node, n) {
 		free_partial(s, n);
@@ -4646,7 +4646,7 @@  static int slab_mem_going_offline_callback(void *arg)
 
 	mutex_lock(&slab_mutex);
 	list_for_each_entry(s, &slab_caches, list) {
-		flush_all_cpus_locked(s);
+		flush_all(s);
 		__kmem_cache_do_shrink(s);
 	}
 	mutex_unlock(&slab_mutex);