diff mbox series

inotify: Fix possible deadlock in fsnotify_destroy_mark

Message ID 20240927091231.360334-1-lizhi.xu@windriver.com (mailing list archive)
State New
Headers show
Series inotify: Fix possible deadlock in fsnotify_destroy_mark | expand

Commit Message

Lizhi Xu Sept. 27, 2024, 9:12 a.m. UTC
[Syzbot reported]
WARNING: possible circular locking dependency detected
6.11.0-rc4-syzkaller-00019-gb311c1b497e5 #0 Not tainted
------------------------------------------------------
kswapd0/78 is trying to acquire lock:
ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578

but task is already holding lock:
ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat mm/vmscan.c:6841 [inline]
ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: kswapd+0xbb4/0x35a0 mm/vmscan.c:7223

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (fs_reclaim){+.+.}-{0:0}:
       lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759
       __fs_reclaim_acquire mm/page_alloc.c:3818 [inline]
       fs_reclaim_acquire+0x88/0x140 mm/page_alloc.c:3832
       might_alloc include/linux/sched/mm.h:334 [inline]
       slab_pre_alloc_hook mm/slub.c:3939 [inline]
       slab_alloc_node mm/slub.c:4017 [inline]
       kmem_cache_alloc_noprof+0x3d/0x2a0 mm/slub.c:4044
       inotify_new_watch fs/notify/inotify/inotify_user.c:599 [inline]
       inotify_update_watch fs/notify/inotify/inotify_user.c:647 [inline]
       __do_sys_inotify_add_watch fs/notify/inotify/inotify_user.c:786 [inline]
       __se_sys_inotify_add_watch+0x72e/0x1070 fs/notify/inotify/inotify_user.c:729
       do_syscall_x64 arch/x86/entry/common.c:52 [inline]
       do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
       entry_SYSCALL_64_after_hwframe+0x77/0x7f

-> #0 (&group->mark_mutex){+.+.}-{3:3}:
       check_prev_add kernel/locking/lockdep.c:3133 [inline]
       check_prevs_add kernel/locking/lockdep.c:3252 [inline]
       validate_chain+0x18e0/0x5900 kernel/locking/lockdep.c:3868
       __lock_acquire+0x137a/0x2040 kernel/locking/lockdep.c:5142
       lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759
       __mutex_lock_common kernel/locking/mutex.c:608 [inline]
       __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
       fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
       fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578
       fsnotify_destroy_marks+0x14a/0x660 fs/notify/mark.c:934
       fsnotify_inoderemove include/linux/fsnotify.h:264 [inline]
       dentry_unlink_inode+0x2e0/0x430 fs/dcache.c:403
       __dentry_kill+0x20d/0x630 fs/dcache.c:610
       shrink_kill+0xa9/0x2c0 fs/dcache.c:1055
       shrink_dentry_list+0x2c0/0x5b0 fs/dcache.c:1082
       prune_dcache_sb+0x10f/0x180 fs/dcache.c:1163
       super_cache_scan+0x34f/0x4b0 fs/super.c:221
       do_shrink_slab+0x701/0x1160 mm/shrinker.c:435
       shrink_slab+0x1093/0x14d0 mm/shrinker.c:662
       shrink_one+0x43b/0x850 mm/vmscan.c:4815
       shrink_many mm/vmscan.c:4876 [inline]
       lru_gen_shrink_node mm/vmscan.c:4954 [inline]
       shrink_node+0x3799/0x3de0 mm/vmscan.c:5934
       kswapd_shrink_node mm/vmscan.c:6762 [inline]
       balance_pgdat mm/vmscan.c:6954 [inline]
       kswapd+0x1bcd/0x35a0 mm/vmscan.c:7223
       kthread+0x2f0/0x390 kernel/kthread.c:389
       ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
       ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(fs_reclaim);
                               lock(&group->mark_mutex);
                               lock(fs_reclaim);
  lock(&group->mark_mutex);

 *** DEADLOCK ***

[Analysis] 
The inotify_new_watch() call passes through GFP_KERNEL, use memalloc_nofs_save/
memalloc_nofs_restore to make sure we don't end up with the fs reclaim dependency.

Reported-and-tested-by: syzbot+c679f13773f295d2da53@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c679f13773f295d2da53
Signed-off-by: Lizhi Xu <lizhi.xu@windriver.com>
---
 fs/notify/inotify/inotify_user.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

Comments

Amir Goldstein Sept. 27, 2024, 9:31 a.m. UTC | #1
On Fri, Sep 27, 2024 at 11:12 AM Lizhi Xu <lizhi.xu@windriver.com> wrote:
>
> [Syzbot reported]
> WARNING: possible circular locking dependency detected
> 6.11.0-rc4-syzkaller-00019-gb311c1b497e5 #0 Not tainted
> ------------------------------------------------------
> kswapd0/78 is trying to acquire lock:
> ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
> ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578
>
> but task is already holding lock:
> ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat mm/vmscan.c:6841 [inline]
> ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: kswapd+0xbb4/0x35a0 mm/vmscan.c:7223
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (fs_reclaim){+.+.}-{0:0}:
>        lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759
>        __fs_reclaim_acquire mm/page_alloc.c:3818 [inline]
>        fs_reclaim_acquire+0x88/0x140 mm/page_alloc.c:3832
>        might_alloc include/linux/sched/mm.h:334 [inline]
>        slab_pre_alloc_hook mm/slub.c:3939 [inline]
>        slab_alloc_node mm/slub.c:4017 [inline]
>        kmem_cache_alloc_noprof+0x3d/0x2a0 mm/slub.c:4044
>        inotify_new_watch fs/notify/inotify/inotify_user.c:599 [inline]
>        inotify_update_watch fs/notify/inotify/inotify_user.c:647 [inline]
>        __do_sys_inotify_add_watch fs/notify/inotify/inotify_user.c:786 [inline]
>        __se_sys_inotify_add_watch+0x72e/0x1070 fs/notify/inotify/inotify_user.c:729
>        do_syscall_x64 arch/x86/entry/common.c:52 [inline]
>        do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
>        entry_SYSCALL_64_after_hwframe+0x77/0x7f
>
> -> #0 (&group->mark_mutex){+.+.}-{3:3}:
>        check_prev_add kernel/locking/lockdep.c:3133 [inline]
>        check_prevs_add kernel/locking/lockdep.c:3252 [inline]
>        validate_chain+0x18e0/0x5900 kernel/locking/lockdep.c:3868
>        __lock_acquire+0x137a/0x2040 kernel/locking/lockdep.c:5142
>        lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759
>        __mutex_lock_common kernel/locking/mutex.c:608 [inline]
>        __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
>        fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
>        fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578
>        fsnotify_destroy_marks+0x14a/0x660 fs/notify/mark.c:934
>        fsnotify_inoderemove include/linux/fsnotify.h:264 [inline]
>        dentry_unlink_inode+0x2e0/0x430 fs/dcache.c:403
>        __dentry_kill+0x20d/0x630 fs/dcache.c:610
>        shrink_kill+0xa9/0x2c0 fs/dcache.c:1055
>        shrink_dentry_list+0x2c0/0x5b0 fs/dcache.c:1082
>        prune_dcache_sb+0x10f/0x180 fs/dcache.c:1163
>        super_cache_scan+0x34f/0x4b0 fs/super.c:221
>        do_shrink_slab+0x701/0x1160 mm/shrinker.c:435
>        shrink_slab+0x1093/0x14d0 mm/shrinker.c:662
>        shrink_one+0x43b/0x850 mm/vmscan.c:4815
>        shrink_many mm/vmscan.c:4876 [inline]
>        lru_gen_shrink_node mm/vmscan.c:4954 [inline]
>        shrink_node+0x3799/0x3de0 mm/vmscan.c:5934
>        kswapd_shrink_node mm/vmscan.c:6762 [inline]
>        balance_pgdat mm/vmscan.c:6954 [inline]
>        kswapd+0x1bcd/0x35a0 mm/vmscan.c:7223
>        kthread+0x2f0/0x390 kernel/kthread.c:389
>        ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
>        ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
>
> other info that might help us debug this:
>
>  Possible unsafe locking scenario:
>
>        CPU0                    CPU1
>        ----                    ----
>   lock(fs_reclaim);
>                                lock(&group->mark_mutex);
>                                lock(fs_reclaim);
>   lock(&group->mark_mutex);
>
>  *** DEADLOCK ***
>
> [Analysis]
> The inotify_new_watch() call passes through GFP_KERNEL, use memalloc_nofs_save/
> memalloc_nofs_restore to make sure we don't end up with the fs reclaim dependency.

I don't think this can actually happen, because an inode with
an inotify mark cannot get evicted, but I cannot think of a way to annotate
this to lockdep, so if we need to silence lockdep, this is what
FSNOTIFY_GROUP_NOFS was created for.

Thanks,
Amir.

>
> Reported-and-tested-by: syzbot+c679f13773f295d2da53@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=c679f13773f295d2da53
> Signed-off-by: Lizhi Xu <lizhi.xu@windriver.com>
> ---
>  fs/notify/inotify/inotify_user.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
> index c7e451d5bd51..70b77b6186a6 100644
> --- a/fs/notify/inotify/inotify_user.c
> +++ b/fs/notify/inotify/inotify_user.c
> @@ -643,8 +643,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
>         /* try to update and existing watch with the new arg */
>         ret = inotify_update_existing_watch(group, inode, arg);
>         /* no mark present, try to add a new one */
> -       if (ret == -ENOENT)
> +       if (ret == -ENOENT) {
> +               unsigned int nofs_flag;
> +
> +               nofs_flag = memalloc_nofs_save();
>                 ret = inotify_new_watch(group, inode, arg);
> +               memalloc_nofs_restore(nofs_flag);
> +       }
>         fsnotify_group_unlock(group);
>
>         return ret;
> --
> 2.43.0
>
Jan Kara Sept. 27, 2024, 10:20 a.m. UTC | #2
On Fri 27-09-24 11:31:50, Amir Goldstein wrote:
> On Fri, Sep 27, 2024 at 11:12 AM Lizhi Xu <lizhi.xu@windriver.com> wrote:
> >
> > [Syzbot reported]
> > WARNING: possible circular locking dependency detected
> > 6.11.0-rc4-syzkaller-00019-gb311c1b497e5 #0 Not tainted
> > ------------------------------------------------------
> > kswapd0/78 is trying to acquire lock:
> > ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
> > ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578
> >
> > but task is already holding lock:
> > ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat mm/vmscan.c:6841 [inline]
> > ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: kswapd+0xbb4/0x35a0 mm/vmscan.c:7223
> >
> > which lock already depends on the new lock.
> >
> >
> > the existing dependency chain (in reverse order) is:
> >
> > -> #1 (fs_reclaim){+.+.}-{0:0}:
> >        lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759
> >        __fs_reclaim_acquire mm/page_alloc.c:3818 [inline]
> >        fs_reclaim_acquire+0x88/0x140 mm/page_alloc.c:3832
> >        might_alloc include/linux/sched/mm.h:334 [inline]
> >        slab_pre_alloc_hook mm/slub.c:3939 [inline]
> >        slab_alloc_node mm/slub.c:4017 [inline]
> >        kmem_cache_alloc_noprof+0x3d/0x2a0 mm/slub.c:4044
> >        inotify_new_watch fs/notify/inotify/inotify_user.c:599 [inline]
> >        inotify_update_watch fs/notify/inotify/inotify_user.c:647 [inline]
> >        __do_sys_inotify_add_watch fs/notify/inotify/inotify_user.c:786 [inline]
> >        __se_sys_inotify_add_watch+0x72e/0x1070 fs/notify/inotify/inotify_user.c:729
> >        do_syscall_x64 arch/x86/entry/common.c:52 [inline]
> >        do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
> >        entry_SYSCALL_64_after_hwframe+0x77/0x7f
> >
> > -> #0 (&group->mark_mutex){+.+.}-{3:3}:
> >        check_prev_add kernel/locking/lockdep.c:3133 [inline]
> >        check_prevs_add kernel/locking/lockdep.c:3252 [inline]
> >        validate_chain+0x18e0/0x5900 kernel/locking/lockdep.c:3868
> >        __lock_acquire+0x137a/0x2040 kernel/locking/lockdep.c:5142
> >        lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759
> >        __mutex_lock_common kernel/locking/mutex.c:608 [inline]
> >        __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
> >        fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
> >        fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578
> >        fsnotify_destroy_marks+0x14a/0x660 fs/notify/mark.c:934
> >        fsnotify_inoderemove include/linux/fsnotify.h:264 [inline]
> >        dentry_unlink_inode+0x2e0/0x430 fs/dcache.c:403
> >        __dentry_kill+0x20d/0x630 fs/dcache.c:610
> >        shrink_kill+0xa9/0x2c0 fs/dcache.c:1055
> >        shrink_dentry_list+0x2c0/0x5b0 fs/dcache.c:1082
> >        prune_dcache_sb+0x10f/0x180 fs/dcache.c:1163
> >        super_cache_scan+0x34f/0x4b0 fs/super.c:221
> >        do_shrink_slab+0x701/0x1160 mm/shrinker.c:435
> >        shrink_slab+0x1093/0x14d0 mm/shrinker.c:662
> >        shrink_one+0x43b/0x850 mm/vmscan.c:4815
> >        shrink_many mm/vmscan.c:4876 [inline]
> >        lru_gen_shrink_node mm/vmscan.c:4954 [inline]
> >        shrink_node+0x3799/0x3de0 mm/vmscan.c:5934
> >        kswapd_shrink_node mm/vmscan.c:6762 [inline]
> >        balance_pgdat mm/vmscan.c:6954 [inline]
> >        kswapd+0x1bcd/0x35a0 mm/vmscan.c:7223
> >        kthread+0x2f0/0x390 kernel/kthread.c:389
> >        ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
> >        ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
> >
> > other info that might help us debug this:
> >
> >  Possible unsafe locking scenario:
> >
> >        CPU0                    CPU1
> >        ----                    ----
> >   lock(fs_reclaim);
> >                                lock(&group->mark_mutex);
> >                                lock(fs_reclaim);
> >   lock(&group->mark_mutex);
> >
> >  *** DEADLOCK ***
> >
> > [Analysis]
> > The inotify_new_watch() call passes through GFP_KERNEL, use memalloc_nofs_save/
> > memalloc_nofs_restore to make sure we don't end up with the fs reclaim dependency.
> 
> I don't think this can actually happen, because an inode with
> an inotify mark cannot get evicted,

Well, in the trace above dentry reclaim apparently raced with unlink and so
ended up going to dentry_unlink_inode() -> fsnotify_inoderemove() which
does indeed end up grabbing group->mark_mutex. 

> but I cannot think of a way to annotate
> this to lockdep, so if we need to silence lockdep, this is what
> FSNOTIFY_GROUP_NOFS was created for.

So yes, inotify needs FSNOTIFY_GROUP_NOFS as well. In fact this trace shows
that any notification group needs to use NOFS allocations to be safe
against this race so we can just remove FSNOTIFY_GROUP_NOFS and
unconditionally do memalloc_nofs_save() in fsnotify_group_lock(). Lizhi,
will you send a patch please?

								Honza

> > Reported-and-tested-by: syzbot+c679f13773f295d2da53@syzkaller.appspotmail.com
> > Closes: https://syzkaller.appspot.com/bug?extid=c679f13773f295d2da53
> > Signed-off-by: Lizhi Xu <lizhi.xu@windriver.com>
> > ---
> >  fs/notify/inotify/inotify_user.c | 7 ++++++-
> >  1 file changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
> > index c7e451d5bd51..70b77b6186a6 100644
> > --- a/fs/notify/inotify/inotify_user.c
> > +++ b/fs/notify/inotify/inotify_user.c
> > @@ -643,8 +643,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
> >         /* try to update and existing watch with the new arg */
> >         ret = inotify_update_existing_watch(group, inode, arg);
> >         /* no mark present, try to add a new one */
> > -       if (ret == -ENOENT)
> > +       if (ret == -ENOENT) {
> > +               unsigned int nofs_flag;
> > +
> > +               nofs_flag = memalloc_nofs_save();
> >                 ret = inotify_new_watch(group, inode, arg);
> > +               memalloc_nofs_restore(nofs_flag);
> > +       }
> >         fsnotify_group_unlock(group);
> >
> >         return ret;
> > --
> > 2.43.0
> >
diff mbox series

Patch

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index c7e451d5bd51..70b77b6186a6 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -643,8 +643,13 @@  static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
 	/* try to update and existing watch with the new arg */
 	ret = inotify_update_existing_watch(group, inode, arg);
 	/* no mark present, try to add a new one */
-	if (ret == -ENOENT)
+	if (ret == -ENOENT) {
+		unsigned int nofs_flag;
+
+		nofs_flag = memalloc_nofs_save();
 		ret = inotify_new_watch(group, inode, arg);
+		memalloc_nofs_restore(nofs_flag);
+	}
 	fsnotify_group_unlock(group);
 
 	return ret;