Message ID | 818b41faf6260be972ffa3bd436dda518963384b.1718276261.git.fdmanana@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: fix a deadlock with reclaim during logging/log replay | expand |
在 2024/6/13 20:35, fdmanana@kernel.org 写道: > From: Filipe Manana <fdmanana@suse.com> > > During inode logging (and log replay too), we are holding a transaction > handle and we often need to call btrfs_iget(), which will read an inode > from its subvolume btree if it's not loaded in memory and that results in > allocating an inode with GFP_KERNEL semantics at the btrfs_alloc_inode() > callback - and this may recurse into the filesystem in case we are under > memory pressure and attempt to commit the current transaction, resulting > in a deadlock since the logging (or log replay) task is holding a > transaction handle open. > > Syzbot reported this with the following stack traces: > > ====================================================== > WARNING: possible circular locking dependency detected > 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 Not tainted > ------------------------------------------------------ > syz-executor.1/9919 is trying to acquire lock: > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: might_alloc include/linux/sched/mm.h:334 [inline] > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_pre_alloc_hook mm/slub.c:3891 [inline] > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_alloc_node mm/slub.c:3981 [inline] > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > > but task is already holding lock: > ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #3 (&ei->log_mutex){+.+.}-{3:3}: > __mutex_lock_common kernel/locking/mutex.c:608 [inline] > __mutex_lock+0x175/0x9c0 kernel/locking/mutex.c:752 > btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > btrfs_log_inode_parent+0x8cb/0x2a90 fs/btrfs/tree-log.c:7079 > btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > vfs_fsync_range+0x141/0x230 fs/sync.c:188 > generic_write_sync include/linux/fs.h:2794 [inline] > btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > new_sync_write fs/read_write.c:497 [inline] > vfs_write+0x6b6/0x1140 fs/read_write.c:590 > ksys_write+0x12f/0x260 fs/read_write.c:643 > do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > > -> #2 (btrfs_trans_num_extwriters){++++}-{0:0}: > join_transaction+0x164/0xf40 fs/btrfs/transaction.c:315 > start_transaction+0x427/0x1a70 fs/btrfs/transaction.c:700 > btrfs_commit_super+0xa1/0x110 fs/btrfs/disk-io.c:4170 > close_ctree+0xcb0/0xf90 fs/btrfs/disk-io.c:4324 > generic_shutdown_super+0x159/0x3d0 fs/super.c:642 > kill_anon_super+0x3a/0x60 fs/super.c:1226 > btrfs_kill_super+0x3b/0x50 fs/btrfs/super.c:2096 > deactivate_locked_super+0xbe/0x1a0 fs/super.c:473 > deactivate_super+0xde/0x100 fs/super.c:506 > cleanup_mnt+0x222/0x450 fs/namespace.c:1267 > task_work_run+0x14e/0x250 kernel/task_work.c:180 > resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] > exit_to_user_mode_loop kernel/entry/common.c:114 [inline] > exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] > __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] > syscall_exit_to_user_mode+0x278/0x2a0 kernel/entry/common.c:218 > __do_fast_syscall_32+0x80/0x120 arch/x86/entry/common.c:389 > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > > -> #1 (btrfs_trans_num_writers){++++}-{0:0}: > __lock_release kernel/locking/lockdep.c:5468 [inline] > lock_release+0x33e/0x6c0 kernel/locking/lockdep.c:5774 > percpu_up_read include/linux/percpu-rwsem.h:99 [inline] > __sb_end_write include/linux/fs.h:1650 [inline] > sb_end_intwrite include/linux/fs.h:1767 [inline] > __btrfs_end_transaction+0x5ca/0x920 fs/btrfs/transaction.c:1071 > btrfs_commit_inode_delayed_inode+0x228/0x330 fs/btrfs/delayed-inode.c:1301 > btrfs_evict_inode+0x960/0xe80 fs/btrfs/inode.c:5291 > evict+0x2ed/0x6c0 fs/inode.c:667 > iput_final fs/inode.c:1741 [inline] > iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 > iput+0x5c/0x80 fs/inode.c:1757 > dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 > __dentry_kill+0x1d0/0x600 fs/dcache.c:603 > dput.part.0+0x4b1/0x9b0 fs/dcache.c:845 > dput+0x1f/0x30 fs/dcache.c:835 > ovl_stack_put+0x60/0x90 fs/overlayfs/util.c:132 > ovl_destroy_inode+0xc6/0x190 fs/overlayfs/super.c:182 > destroy_inode+0xc4/0x1b0 fs/inode.c:311 > iput_final fs/inode.c:1741 [inline] > iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 > iput+0x5c/0x80 fs/inode.c:1757 > dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 > __dentry_kill+0x1d0/0x600 fs/dcache.c:603 > shrink_kill fs/dcache.c:1048 [inline] > shrink_dentry_list+0x140/0x5d0 fs/dcache.c:1075 > prune_dcache_sb+0xeb/0x150 fs/dcache.c:1156 > super_cache_scan+0x32a/0x550 fs/super.c:221 > do_shrink_slab+0x44f/0x11c0 mm/shrinker.c:435 > shrink_slab_memcg mm/shrinker.c:548 [inline] > shrink_slab+0xa87/0x1310 mm/shrinker.c:626 > shrink_one+0x493/0x7c0 mm/vmscan.c:4790 > shrink_many mm/vmscan.c:4851 [inline] > lru_gen_shrink_node+0x89f/0x1750 mm/vmscan.c:4951 > shrink_node mm/vmscan.c:5910 [inline] > kswapd_shrink_node mm/vmscan.c:6720 [inline] > balance_pgdat+0x1105/0x1970 mm/vmscan.c:6911 > kswapd+0x5ea/0xbf0 mm/vmscan.c:7180 > kthread+0x2c1/0x3a0 kernel/kthread.c:389 > ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 > ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 > > -> #0 (fs_reclaim){+.+.}-{0:0}: > check_prev_add kernel/locking/lockdep.c:3134 [inline] > check_prevs_add kernel/locking/lockdep.c:3253 [inline] > validate_chain kernel/locking/lockdep.c:3869 [inline] > __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 > lock_acquire kernel/locking/lockdep.c:5754 [inline] > lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 > __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] > fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 > might_alloc include/linux/sched/mm.h:334 [inline] > slab_pre_alloc_hook mm/slub.c:3891 [inline] > slab_alloc_node mm/slub.c:3981 [inline] > kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 > alloc_inode+0x5d/0x230 fs/inode.c:261 > iget5_locked fs/inode.c:1235 [inline] > iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 > btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] > btrfs_iget_path fs/btrfs/inode.c:5607 [inline] > btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 > add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] > copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 > btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 > log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] > btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 > btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] > btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 > btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > vfs_fsync_range+0x141/0x230 fs/sync.c:188 > generic_write_sync include/linux/fs.h:2794 [inline] > btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 > vfs_writev+0x36f/0xde0 fs/read_write.c:971 > do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] > __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] > __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 > do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > > other info that might help us debug this: > > Chain exists of: > fs_reclaim --> btrfs_trans_num_extwriters --> &ei->log_mutex > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&ei->log_mutex); > lock(btrfs_trans_num_extwriters); > lock(&ei->log_mutex); > lock(fs_reclaim); > > *** DEADLOCK *** > > 7 locks held by syz-executor.1/9919: > #0: ffff88802be20420 (sb_writers#23){.+.+}-{0:0}, at: do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: inode_lock include/linux/fs.h:791 [inline] > #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: btrfs_inode_lock+0xc8/0x110 fs/btrfs/inode.c:385 > #2: ffff888065c0f778 (&ei->i_mmap_lock){++++}-{3:3}, at: btrfs_inode_lock+0xee/0x110 fs/btrfs/inode.c:388 > #3: ffff88802be20610 (sb_internal#4){.+.+}-{0:0}, at: btrfs_sync_file+0x95b/0xe10 fs/btrfs/file.c:1952 > #4: ffff8880546323f0 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 > #5: ffff888054632418 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 > #6: ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > > stack backtrace: > CPU: 2 PID: 9919 Comm: syz-executor.1 Not tainted 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > Call Trace: > <TASK> > __dump_stack lib/dump_stack.c:88 [inline] > dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 > check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2187 > check_prev_add kernel/locking/lockdep.c:3134 [inline] > check_prevs_add kernel/locking/lockdep.c:3253 [inline] > validate_chain kernel/locking/lockdep.c:3869 [inline] > __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 > lock_acquire kernel/locking/lockdep.c:5754 [inline] > lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 > __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] > fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 > might_alloc include/linux/sched/mm.h:334 [inline] > slab_pre_alloc_hook mm/slub.c:3891 [inline] > slab_alloc_node mm/slub.c:3981 [inline] > kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 > alloc_inode+0x5d/0x230 fs/inode.c:261 > iget5_locked fs/inode.c:1235 [inline] > iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 > btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] > btrfs_iget_path fs/btrfs/inode.c:5607 [inline] > btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 > add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] > copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 > btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 > log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] > btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 > btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] > btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 > btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > vfs_fsync_range+0x141/0x230 fs/sync.c:188 > generic_write_sync include/linux/fs.h:2794 [inline] > btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 > vfs_writev+0x36f/0xde0 fs/read_write.c:971 > do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] > __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] > __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 > do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > RIP: 0023:0xf7334579 > Code: b8 01 10 06 03 (...) > RSP: 002b:00000000f5f265ac EFLAGS: 00000292 ORIG_RAX: 000000000000017b > RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00000000200002c0 > RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 > RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000000000 > R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 > </TASK> > > Fix this by ensuring we are under a NOFS scope whenever we call > btrfs_iget() during inode logging and log replay. > > Reported-by: syzbot+8576cfa84070dce4d59b@syzkaller.appspotmail.com > Link: https://lore.kernel.org/linux-btrfs/000000000000274a3a061abbd928@google.com/ > Fixes: 712e36c5f2a7 ("btrfs: use GFP_KERNEL in btrfs_alloc_inode") I'm wondering if logging is the only location where we can trigger the deadlock. Would regular inode_get() causing such deadlock? Otherwise the patch looks good to me. Thanks, Qu > Signed-off-by: Filipe Manana <fdmanana@suse.com> > --- > fs/btrfs/tree-log.c | 43 ++++++++++++++++++++++++++++--------------- > 1 file changed, 28 insertions(+), 15 deletions(-) > > diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > index 6d892d76d4fb..4c9cc8eecb30 100644 > --- a/fs/btrfs/tree-log.c > +++ b/fs/btrfs/tree-log.c > @@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid); > * and once to do all the other items. > */ > > +static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) > +{ > + unsigned int nofs_flag; > + struct inode *inode; > + > + /* > + * We're holding a transaction handle whether we are logging or > + * replaying a log tree, so we must make sure NOFS semantics apply > + * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL > + * to allocate an inode, which can recurse back into the filesystem and > + * attempt a transaction commit, resulting in a deadlock. > + */ > + nofs_flag = memalloc_nofs_save(); > + inode = btrfs_iget(root->fs_info->sb, objectid, root); > + memalloc_nofs_restore(nofs_flag); > + > + return inode; > +} > + > /* > * start a sub transaction and setup the log tree > * this increments the log tree writer count to make the people > @@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, > { > struct inode *inode; > > - inode = btrfs_iget(root->fs_info->sb, objectid, root); > + inode = btrfs_iget_logging(objectid, root); > if (IS_ERR(inode)) > inode = NULL; > return inode; > @@ -5443,7 +5462,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, > struct btrfs_log_ctx *ctx) > { > struct btrfs_root *root = start_inode->root; > - struct btrfs_fs_info *fs_info = root->fs_info; > struct btrfs_path *path; > LIST_HEAD(dir_list); > struct btrfs_dir_list *dir_elem; > @@ -5504,7 +5522,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, > continue; > > btrfs_release_path(path); > - di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); > + di_inode = btrfs_iget_logging(di_key.objectid, root); > if (IS_ERR(di_inode)) { > ret = PTR_ERR(di_inode); > goto out; > @@ -5564,7 +5582,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, > btrfs_add_delayed_iput(curr_inode); > curr_inode = NULL; > > - vfs_inode = btrfs_iget(fs_info->sb, ino, root); > + vfs_inode = btrfs_iget_logging(ino, root); > if (IS_ERR(vfs_inode)) { > ret = PTR_ERR(vfs_inode); > break; > @@ -5659,7 +5677,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, > if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) > return BTRFS_LOG_FORCE_COMMIT; > > - inode = btrfs_iget(root->fs_info->sb, ino, root); > + inode = btrfs_iget_logging(ino, root); > /* > * If the other inode that had a conflicting dir entry was deleted in > * the current transaction then we either: > @@ -5760,7 +5778,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, > struct btrfs_root *root, > struct btrfs_log_ctx *ctx) > { > - struct btrfs_fs_info *fs_info = root->fs_info; > int ret = 0; > > /* > @@ -5791,7 +5808,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, > list_del(&curr->list); > kfree(curr); > > - inode = btrfs_iget(fs_info->sb, ino, root); > + inode = btrfs_iget_logging(ino, root); > /* > * If the other inode that had a conflicting dir entry was > * deleted in the current transaction, we need to log its parent > @@ -5802,7 +5819,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, > if (ret != -ENOENT) > break; > > - inode = btrfs_iget(fs_info->sb, parent, root); > + inode = btrfs_iget_logging(parent, root); > if (IS_ERR(inode)) { > ret = PTR_ERR(inode); > break; > @@ -6324,7 +6341,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, > struct btrfs_log_ctx *ctx) > { > const bool orig_log_new_dentries = ctx->log_new_dentries; > - struct btrfs_fs_info *fs_info = trans->fs_info; > struct btrfs_delayed_item *item; > int ret = 0; > > @@ -6350,7 +6366,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, > if (key.type == BTRFS_ROOT_ITEM_KEY) > continue; > > - di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root); > + di_inode = btrfs_iget_logging(key.objectid, inode->root); > if (IS_ERR(di_inode)) { > ret = PTR_ERR(di_inode); > break; > @@ -6734,7 +6750,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, > struct btrfs_inode *inode, > struct btrfs_log_ctx *ctx) > { > - struct btrfs_fs_info *fs_info = trans->fs_info; > int ret; > struct btrfs_path *path; > struct btrfs_key key; > @@ -6799,8 +6814,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, > cur_offset = item_size; > } > > - dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid, > - root); > + dir_inode = btrfs_iget_logging(inode_key.objectid, root); > /* > * If the parent inode was deleted, return an error to > * fallback to a transaction commit. This is to prevent > @@ -6862,7 +6876,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, > btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); > > while (true) { > - struct btrfs_fs_info *fs_info = root->fs_info; > struct extent_buffer *leaf; > int slot; > struct btrfs_key search_key; > @@ -6877,7 +6890,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, > search_key.objectid = found_key.offset; > search_key.type = BTRFS_INODE_ITEM_KEY; > search_key.offset = 0; > - inode = btrfs_iget(fs_info->sb, ino, root); > + inode = btrfs_iget_logging(ino, root); > if (IS_ERR(inode)) > return PTR_ERR(inode); >
On Thu, Jun 13, 2024 at 10:37 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > 在 2024/6/13 20:35, fdmanana@kernel.org 写道: > > From: Filipe Manana <fdmanana@suse.com> > > > > During inode logging (and log replay too), we are holding a transaction > > handle and we often need to call btrfs_iget(), which will read an inode > > from its subvolume btree if it's not loaded in memory and that results in > > allocating an inode with GFP_KERNEL semantics at the btrfs_alloc_inode() > > callback - and this may recurse into the filesystem in case we are under > > memory pressure and attempt to commit the current transaction, resulting > > in a deadlock since the logging (or log replay) task is holding a > > transaction handle open. > > > > Syzbot reported this with the following stack traces: > > > > ====================================================== > > WARNING: possible circular locking dependency detected > > 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 Not tainted > > ------------------------------------------------------ > > syz-executor.1/9919 is trying to acquire lock: > > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: might_alloc include/linux/sched/mm.h:334 [inline] > > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_pre_alloc_hook mm/slub.c:3891 [inline] > > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_alloc_node mm/slub.c:3981 [inline] > > ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > > > > but task is already holding lock: > > ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > > > > which lock already depends on the new lock. > > > > the existing dependency chain (in reverse order) is: > > > > -> #3 (&ei->log_mutex){+.+.}-{3:3}: > > __mutex_lock_common kernel/locking/mutex.c:608 [inline] > > __mutex_lock+0x175/0x9c0 kernel/locking/mutex.c:752 > > btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > > btrfs_log_inode_parent+0x8cb/0x2a90 fs/btrfs/tree-log.c:7079 > > btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > > btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > > vfs_fsync_range+0x141/0x230 fs/sync.c:188 > > generic_write_sync include/linux/fs.h:2794 [inline] > > btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > > new_sync_write fs/read_write.c:497 [inline] > > vfs_write+0x6b6/0x1140 fs/read_write.c:590 > > ksys_write+0x12f/0x260 fs/read_write.c:643 > > do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > > __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > > > > -> #2 (btrfs_trans_num_extwriters){++++}-{0:0}: > > join_transaction+0x164/0xf40 fs/btrfs/transaction.c:315 > > start_transaction+0x427/0x1a70 fs/btrfs/transaction.c:700 > > btrfs_commit_super+0xa1/0x110 fs/btrfs/disk-io.c:4170 > > close_ctree+0xcb0/0xf90 fs/btrfs/disk-io.c:4324 > > generic_shutdown_super+0x159/0x3d0 fs/super.c:642 > > kill_anon_super+0x3a/0x60 fs/super.c:1226 > > btrfs_kill_super+0x3b/0x50 fs/btrfs/super.c:2096 > > deactivate_locked_super+0xbe/0x1a0 fs/super.c:473 > > deactivate_super+0xde/0x100 fs/super.c:506 > > cleanup_mnt+0x222/0x450 fs/namespace.c:1267 > > task_work_run+0x14e/0x250 kernel/task_work.c:180 > > resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] > > exit_to_user_mode_loop kernel/entry/common.c:114 [inline] > > exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] > > __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] > > syscall_exit_to_user_mode+0x278/0x2a0 kernel/entry/common.c:218 > > __do_fast_syscall_32+0x80/0x120 arch/x86/entry/common.c:389 > > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > > > > -> #1 (btrfs_trans_num_writers){++++}-{0:0}: > > __lock_release kernel/locking/lockdep.c:5468 [inline] > > lock_release+0x33e/0x6c0 kernel/locking/lockdep.c:5774 > > percpu_up_read include/linux/percpu-rwsem.h:99 [inline] > > __sb_end_write include/linux/fs.h:1650 [inline] > > sb_end_intwrite include/linux/fs.h:1767 [inline] > > __btrfs_end_transaction+0x5ca/0x920 fs/btrfs/transaction.c:1071 > > btrfs_commit_inode_delayed_inode+0x228/0x330 fs/btrfs/delayed-inode.c:1301 > > btrfs_evict_inode+0x960/0xe80 fs/btrfs/inode.c:5291 > > evict+0x2ed/0x6c0 fs/inode.c:667 > > iput_final fs/inode.c:1741 [inline] > > iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 > > iput+0x5c/0x80 fs/inode.c:1757 > > dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 > > __dentry_kill+0x1d0/0x600 fs/dcache.c:603 > > dput.part.0+0x4b1/0x9b0 fs/dcache.c:845 > > dput+0x1f/0x30 fs/dcache.c:835 > > ovl_stack_put+0x60/0x90 fs/overlayfs/util.c:132 > > ovl_destroy_inode+0xc6/0x190 fs/overlayfs/super.c:182 > > destroy_inode+0xc4/0x1b0 fs/inode.c:311 > > iput_final fs/inode.c:1741 [inline] > > iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 > > iput+0x5c/0x80 fs/inode.c:1757 > > dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 > > __dentry_kill+0x1d0/0x600 fs/dcache.c:603 > > shrink_kill fs/dcache.c:1048 [inline] > > shrink_dentry_list+0x140/0x5d0 fs/dcache.c:1075 > > prune_dcache_sb+0xeb/0x150 fs/dcache.c:1156 > > super_cache_scan+0x32a/0x550 fs/super.c:221 > > do_shrink_slab+0x44f/0x11c0 mm/shrinker.c:435 > > shrink_slab_memcg mm/shrinker.c:548 [inline] > > shrink_slab+0xa87/0x1310 mm/shrinker.c:626 > > shrink_one+0x493/0x7c0 mm/vmscan.c:4790 > > shrink_many mm/vmscan.c:4851 [inline] > > lru_gen_shrink_node+0x89f/0x1750 mm/vmscan.c:4951 > > shrink_node mm/vmscan.c:5910 [inline] > > kswapd_shrink_node mm/vmscan.c:6720 [inline] > > balance_pgdat+0x1105/0x1970 mm/vmscan.c:6911 > > kswapd+0x5ea/0xbf0 mm/vmscan.c:7180 > > kthread+0x2c1/0x3a0 kernel/kthread.c:389 > > ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 > > ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 > > > > -> #0 (fs_reclaim){+.+.}-{0:0}: > > check_prev_add kernel/locking/lockdep.c:3134 [inline] > > check_prevs_add kernel/locking/lockdep.c:3253 [inline] > > validate_chain kernel/locking/lockdep.c:3869 [inline] > > __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 > > lock_acquire kernel/locking/lockdep.c:5754 [inline] > > lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 > > __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] > > fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 > > might_alloc include/linux/sched/mm.h:334 [inline] > > slab_pre_alloc_hook mm/slub.c:3891 [inline] > > slab_alloc_node mm/slub.c:3981 [inline] > > kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > > btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 > > alloc_inode+0x5d/0x230 fs/inode.c:261 > > iget5_locked fs/inode.c:1235 [inline] > > iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 > > btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] > > btrfs_iget_path fs/btrfs/inode.c:5607 [inline] > > btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 > > add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] > > copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 > > btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 > > log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] > > btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 > > btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] > > btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 > > btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > > btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > > vfs_fsync_range+0x141/0x230 fs/sync.c:188 > > generic_write_sync include/linux/fs.h:2794 [inline] > > btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > > do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 > > vfs_writev+0x36f/0xde0 fs/read_write.c:971 > > do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > > __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] > > __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] > > __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 > > do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > > __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > > > > other info that might help us debug this: > > > > Chain exists of: > > fs_reclaim --> btrfs_trans_num_extwriters --> &ei->log_mutex > > > > Possible unsafe locking scenario: > > > > CPU0 CPU1 > > ---- ---- > > lock(&ei->log_mutex); > > lock(btrfs_trans_num_extwriters); > > lock(&ei->log_mutex); > > lock(fs_reclaim); > > > > *** DEADLOCK *** > > > > 7 locks held by syz-executor.1/9919: > > #0: ffff88802be20420 (sb_writers#23){.+.+}-{0:0}, at: do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > > #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: inode_lock include/linux/fs.h:791 [inline] > > #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: btrfs_inode_lock+0xc8/0x110 fs/btrfs/inode.c:385 > > #2: ffff888065c0f778 (&ei->i_mmap_lock){++++}-{3:3}, at: btrfs_inode_lock+0xee/0x110 fs/btrfs/inode.c:388 > > #3: ffff88802be20610 (sb_internal#4){.+.+}-{0:0}, at: btrfs_sync_file+0x95b/0xe10 fs/btrfs/file.c:1952 > > #4: ffff8880546323f0 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 > > #5: ffff888054632418 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 > > #6: ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > > > > stack backtrace: > > CPU: 2 PID: 9919 Comm: syz-executor.1 Not tainted 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 > > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > > Call Trace: > > <TASK> > > __dump_stack lib/dump_stack.c:88 [inline] > > dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 > > check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2187 > > check_prev_add kernel/locking/lockdep.c:3134 [inline] > > check_prevs_add kernel/locking/lockdep.c:3253 [inline] > > validate_chain kernel/locking/lockdep.c:3869 [inline] > > __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 > > lock_acquire kernel/locking/lockdep.c:5754 [inline] > > lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 > > __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] > > fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 > > might_alloc include/linux/sched/mm.h:334 [inline] > > slab_pre_alloc_hook mm/slub.c:3891 [inline] > > slab_alloc_node mm/slub.c:3981 [inline] > > kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > > btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 > > alloc_inode+0x5d/0x230 fs/inode.c:261 > > iget5_locked fs/inode.c:1235 [inline] > > iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 > > btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] > > btrfs_iget_path fs/btrfs/inode.c:5607 [inline] > > btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 > > add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] > > copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 > > btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 > > log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] > > btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 > > btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] > > btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 > > btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > > btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > > vfs_fsync_range+0x141/0x230 fs/sync.c:188 > > generic_write_sync include/linux/fs.h:2794 [inline] > > btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > > do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 > > vfs_writev+0x36f/0xde0 fs/read_write.c:971 > > do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > > __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] > > __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] > > __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 > > do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > > __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > > do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > > entry_SYSENTER_compat_after_hwframe+0x84/0x8e > > RIP: 0023:0xf7334579 > > Code: b8 01 10 06 03 (...) > > RSP: 002b:00000000f5f265ac EFLAGS: 00000292 ORIG_RAX: 000000000000017b > > RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00000000200002c0 > > RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 > > RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 > > R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000000000 > > R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 > > </TASK> > > > > Fix this by ensuring we are under a NOFS scope whenever we call > > btrfs_iget() during inode logging and log replay. > > > > Reported-by: syzbot+8576cfa84070dce4d59b@syzkaller.appspotmail.com > > Link: https://lore.kernel.org/linux-btrfs/000000000000274a3a061abbd928@google.com/ > > Fixes: 712e36c5f2a7 ("btrfs: use GFP_KERNEL in btrfs_alloc_inode") > > I'm wondering if logging is the only location where we can trigger the > deadlock. > > Would regular inode_get() causing such deadlock? What is inode_get()? I can't find anything with that exact name. If it's some path with a transaction handle open that can trigger btrfs_alloc_inode() then yes, otherwise it depends on what locks are held if any. > > Otherwise the patch looks good to me. > > Thanks, > Qu > > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > > --- > > fs/btrfs/tree-log.c | 43 ++++++++++++++++++++++++++++--------------- > > 1 file changed, 28 insertions(+), 15 deletions(-) > > > > diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > > index 6d892d76d4fb..4c9cc8eecb30 100644 > > --- a/fs/btrfs/tree-log.c > > +++ b/fs/btrfs/tree-log.c > > @@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid); > > * and once to do all the other items. > > */ > > > > +static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) > > +{ > > + unsigned int nofs_flag; > > + struct inode *inode; > > + > > + /* > > + * We're holding a transaction handle whether we are logging or > > + * replaying a log tree, so we must make sure NOFS semantics apply > > + * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL > > + * to allocate an inode, which can recurse back into the filesystem and > > + * attempt a transaction commit, resulting in a deadlock. > > + */ > > + nofs_flag = memalloc_nofs_save(); > > + inode = btrfs_iget(root->fs_info->sb, objectid, root); > > + memalloc_nofs_restore(nofs_flag); > > + > > + return inode; > > +} > > + > > /* > > * start a sub transaction and setup the log tree > > * this increments the log tree writer count to make the people > > @@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, > > { > > struct inode *inode; > > > > - inode = btrfs_iget(root->fs_info->sb, objectid, root); > > + inode = btrfs_iget_logging(objectid, root); > > if (IS_ERR(inode)) > > inode = NULL; > > return inode; > > @@ -5443,7 +5462,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, > > struct btrfs_log_ctx *ctx) > > { > > struct btrfs_root *root = start_inode->root; > > - struct btrfs_fs_info *fs_info = root->fs_info; > > struct btrfs_path *path; > > LIST_HEAD(dir_list); > > struct btrfs_dir_list *dir_elem; > > @@ -5504,7 +5522,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, > > continue; > > > > btrfs_release_path(path); > > - di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); > > + di_inode = btrfs_iget_logging(di_key.objectid, root); > > if (IS_ERR(di_inode)) { > > ret = PTR_ERR(di_inode); > > goto out; > > @@ -5564,7 +5582,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, > > btrfs_add_delayed_iput(curr_inode); > > curr_inode = NULL; > > > > - vfs_inode = btrfs_iget(fs_info->sb, ino, root); > > + vfs_inode = btrfs_iget_logging(ino, root); > > if (IS_ERR(vfs_inode)) { > > ret = PTR_ERR(vfs_inode); > > break; > > @@ -5659,7 +5677,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, > > if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) > > return BTRFS_LOG_FORCE_COMMIT; > > > > - inode = btrfs_iget(root->fs_info->sb, ino, root); > > + inode = btrfs_iget_logging(ino, root); > > /* > > * If the other inode that had a conflicting dir entry was deleted in > > * the current transaction then we either: > > @@ -5760,7 +5778,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, > > struct btrfs_root *root, > > struct btrfs_log_ctx *ctx) > > { > > - struct btrfs_fs_info *fs_info = root->fs_info; > > int ret = 0; > > > > /* > > @@ -5791,7 +5808,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, > > list_del(&curr->list); > > kfree(curr); > > > > - inode = btrfs_iget(fs_info->sb, ino, root); > > + inode = btrfs_iget_logging(ino, root); > > /* > > * If the other inode that had a conflicting dir entry was > > * deleted in the current transaction, we need to log its parent > > @@ -5802,7 +5819,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, > > if (ret != -ENOENT) > > break; > > > > - inode = btrfs_iget(fs_info->sb, parent, root); > > + inode = btrfs_iget_logging(parent, root); > > if (IS_ERR(inode)) { > > ret = PTR_ERR(inode); > > break; > > @@ -6324,7 +6341,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, > > struct btrfs_log_ctx *ctx) > > { > > const bool orig_log_new_dentries = ctx->log_new_dentries; > > - struct btrfs_fs_info *fs_info = trans->fs_info; > > struct btrfs_delayed_item *item; > > int ret = 0; > > > > @@ -6350,7 +6366,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, > > if (key.type == BTRFS_ROOT_ITEM_KEY) > > continue; > > > > - di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root); > > + di_inode = btrfs_iget_logging(key.objectid, inode->root); > > if (IS_ERR(di_inode)) { > > ret = PTR_ERR(di_inode); > > break; > > @@ -6734,7 +6750,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, > > struct btrfs_inode *inode, > > struct btrfs_log_ctx *ctx) > > { > > - struct btrfs_fs_info *fs_info = trans->fs_info; > > int ret; > > struct btrfs_path *path; > > struct btrfs_key key; > > @@ -6799,8 +6814,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, > > cur_offset = item_size; > > } > > > > - dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid, > > - root); > > + dir_inode = btrfs_iget_logging(inode_key.objectid, root); > > /* > > * If the parent inode was deleted, return an error to > > * fallback to a transaction commit. This is to prevent > > @@ -6862,7 +6876,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, > > btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); > > > > while (true) { > > - struct btrfs_fs_info *fs_info = root->fs_info; > > struct extent_buffer *leaf; > > int slot; > > struct btrfs_key search_key; > > @@ -6877,7 +6890,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, > > search_key.objectid = found_key.offset; > > search_key.type = BTRFS_INODE_ITEM_KEY; > > search_key.offset = 0; > > - inode = btrfs_iget(fs_info->sb, ino, root); > > + inode = btrfs_iget_logging(ino, root); > > if (IS_ERR(inode)) > > return PTR_ERR(inode); > >
在 2024/6/14 07:24, Filipe Manana 写道: > On Thu, Jun 13, 2024 at 10:37 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: >> >> >> >> 在 2024/6/13 20:35, fdmanana@kernel.org 写道: >>> From: Filipe Manana <fdmanana@suse.com> >>> >>> During inode logging (and log replay too), we are holding a transaction >>> handle and we often need to call btrfs_iget(), which will read an inode >>> from its subvolume btree if it's not loaded in memory and that results in >>> allocating an inode with GFP_KERNEL semantics at the btrfs_alloc_inode() >>> callback - and this may recurse into the filesystem in case we are under >>> memory pressure and attempt to commit the current transaction, resulting >>> in a deadlock since the logging (or log replay) task is holding a >>> transaction handle open. >>> >>> Syzbot reported this with the following stack traces: >>> >>> ====================================================== >>> WARNING: possible circular locking dependency detected >>> 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 Not tainted >>> ------------------------------------------------------ >>> syz-executor.1/9919 is trying to acquire lock: >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: might_alloc include/linux/sched/mm.h:334 [inline] >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_pre_alloc_hook mm/slub.c:3891 [inline] >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_alloc_node mm/slub.c:3981 [inline] >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 >>> >>> but task is already holding lock: >>> ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 >>> >>> which lock already depends on the new lock. >>> >>> the existing dependency chain (in reverse order) is: >>> >>> -> #3 (&ei->log_mutex){+.+.}-{3:3}: >>> __mutex_lock_common kernel/locking/mutex.c:608 [inline] >>> __mutex_lock+0x175/0x9c0 kernel/locking/mutex.c:752 >>> btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 >>> btrfs_log_inode_parent+0x8cb/0x2a90 fs/btrfs/tree-log.c:7079 >>> btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 >>> btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 >>> vfs_fsync_range+0x141/0x230 fs/sync.c:188 >>> generic_write_sync include/linux/fs.h:2794 [inline] >>> btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 >>> new_sync_write fs/read_write.c:497 [inline] >>> vfs_write+0x6b6/0x1140 fs/read_write.c:590 >>> ksys_write+0x12f/0x260 fs/read_write.c:643 >>> do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] >>> __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e >>> >>> -> #2 (btrfs_trans_num_extwriters){++++}-{0:0}: >>> join_transaction+0x164/0xf40 fs/btrfs/transaction.c:315 >>> start_transaction+0x427/0x1a70 fs/btrfs/transaction.c:700 >>> btrfs_commit_super+0xa1/0x110 fs/btrfs/disk-io.c:4170 >>> close_ctree+0xcb0/0xf90 fs/btrfs/disk-io.c:4324 >>> generic_shutdown_super+0x159/0x3d0 fs/super.c:642 >>> kill_anon_super+0x3a/0x60 fs/super.c:1226 >>> btrfs_kill_super+0x3b/0x50 fs/btrfs/super.c:2096 >>> deactivate_locked_super+0xbe/0x1a0 fs/super.c:473 >>> deactivate_super+0xde/0x100 fs/super.c:506 >>> cleanup_mnt+0x222/0x450 fs/namespace.c:1267 >>> task_work_run+0x14e/0x250 kernel/task_work.c:180 >>> resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] >>> exit_to_user_mode_loop kernel/entry/common.c:114 [inline] >>> exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] >>> __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] >>> syscall_exit_to_user_mode+0x278/0x2a0 kernel/entry/common.c:218 >>> __do_fast_syscall_32+0x80/0x120 arch/x86/entry/common.c:389 >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e >>> >>> -> #1 (btrfs_trans_num_writers){++++}-{0:0}: >>> __lock_release kernel/locking/lockdep.c:5468 [inline] >>> lock_release+0x33e/0x6c0 kernel/locking/lockdep.c:5774 >>> percpu_up_read include/linux/percpu-rwsem.h:99 [inline] >>> __sb_end_write include/linux/fs.h:1650 [inline] >>> sb_end_intwrite include/linux/fs.h:1767 [inline] >>> __btrfs_end_transaction+0x5ca/0x920 fs/btrfs/transaction.c:1071 >>> btrfs_commit_inode_delayed_inode+0x228/0x330 fs/btrfs/delayed-inode.c:1301 >>> btrfs_evict_inode+0x960/0xe80 fs/btrfs/inode.c:5291 >>> evict+0x2ed/0x6c0 fs/inode.c:667 >>> iput_final fs/inode.c:1741 [inline] >>> iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 >>> iput+0x5c/0x80 fs/inode.c:1757 >>> dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 >>> __dentry_kill+0x1d0/0x600 fs/dcache.c:603 >>> dput.part.0+0x4b1/0x9b0 fs/dcache.c:845 >>> dput+0x1f/0x30 fs/dcache.c:835 >>> ovl_stack_put+0x60/0x90 fs/overlayfs/util.c:132 >>> ovl_destroy_inode+0xc6/0x190 fs/overlayfs/super.c:182 >>> destroy_inode+0xc4/0x1b0 fs/inode.c:311 >>> iput_final fs/inode.c:1741 [inline] >>> iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 >>> iput+0x5c/0x80 fs/inode.c:1757 >>> dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 >>> __dentry_kill+0x1d0/0x600 fs/dcache.c:603 >>> shrink_kill fs/dcache.c:1048 [inline] >>> shrink_dentry_list+0x140/0x5d0 fs/dcache.c:1075 >>> prune_dcache_sb+0xeb/0x150 fs/dcache.c:1156 >>> super_cache_scan+0x32a/0x550 fs/super.c:221 >>> do_shrink_slab+0x44f/0x11c0 mm/shrinker.c:435 >>> shrink_slab_memcg mm/shrinker.c:548 [inline] >>> shrink_slab+0xa87/0x1310 mm/shrinker.c:626 >>> shrink_one+0x493/0x7c0 mm/vmscan.c:4790 >>> shrink_many mm/vmscan.c:4851 [inline] >>> lru_gen_shrink_node+0x89f/0x1750 mm/vmscan.c:4951 >>> shrink_node mm/vmscan.c:5910 [inline] >>> kswapd_shrink_node mm/vmscan.c:6720 [inline] >>> balance_pgdat+0x1105/0x1970 mm/vmscan.c:6911 >>> kswapd+0x5ea/0xbf0 mm/vmscan.c:7180 >>> kthread+0x2c1/0x3a0 kernel/kthread.c:389 >>> ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 >>> ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 >>> >>> -> #0 (fs_reclaim){+.+.}-{0:0}: >>> check_prev_add kernel/locking/lockdep.c:3134 [inline] >>> check_prevs_add kernel/locking/lockdep.c:3253 [inline] >>> validate_chain kernel/locking/lockdep.c:3869 [inline] >>> __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 >>> lock_acquire kernel/locking/lockdep.c:5754 [inline] >>> lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 >>> __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] >>> fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 >>> might_alloc include/linux/sched/mm.h:334 [inline] >>> slab_pre_alloc_hook mm/slub.c:3891 [inline] >>> slab_alloc_node mm/slub.c:3981 [inline] >>> kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 >>> btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 >>> alloc_inode+0x5d/0x230 fs/inode.c:261 >>> iget5_locked fs/inode.c:1235 [inline] >>> iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 >>> btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] >>> btrfs_iget_path fs/btrfs/inode.c:5607 [inline] >>> btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 >>> add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] >>> copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 >>> btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 >>> log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] >>> btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 >>> btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] >>> btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 >>> btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 >>> btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 >>> vfs_fsync_range+0x141/0x230 fs/sync.c:188 >>> generic_write_sync include/linux/fs.h:2794 [inline] >>> btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 >>> do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 >>> vfs_writev+0x36f/0xde0 fs/read_write.c:971 >>> do_pwritev+0x1b2/0x260 fs/read_write.c:1072 >>> __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] >>> __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] >>> __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 >>> do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] >>> __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e >>> >>> other info that might help us debug this: >>> >>> Chain exists of: >>> fs_reclaim --> btrfs_trans_num_extwriters --> &ei->log_mutex >>> >>> Possible unsafe locking scenario: >>> >>> CPU0 CPU1 >>> ---- ---- >>> lock(&ei->log_mutex); >>> lock(btrfs_trans_num_extwriters); >>> lock(&ei->log_mutex); >>> lock(fs_reclaim); >>> >>> *** DEADLOCK *** >>> >>> 7 locks held by syz-executor.1/9919: >>> #0: ffff88802be20420 (sb_writers#23){.+.+}-{0:0}, at: do_pwritev+0x1b2/0x260 fs/read_write.c:1072 >>> #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: inode_lock include/linux/fs.h:791 [inline] >>> #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: btrfs_inode_lock+0xc8/0x110 fs/btrfs/inode.c:385 >>> #2: ffff888065c0f778 (&ei->i_mmap_lock){++++}-{3:3}, at: btrfs_inode_lock+0xee/0x110 fs/btrfs/inode.c:388 >>> #3: ffff88802be20610 (sb_internal#4){.+.+}-{0:0}, at: btrfs_sync_file+0x95b/0xe10 fs/btrfs/file.c:1952 >>> #4: ffff8880546323f0 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 >>> #5: ffff888054632418 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 >>> #6: ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 >>> >>> stack backtrace: >>> CPU: 2 PID: 9919 Comm: syz-executor.1 Not tainted 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 >>> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 >>> Call Trace: >>> <TASK> >>> __dump_stack lib/dump_stack.c:88 [inline] >>> dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 >>> check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2187 >>> check_prev_add kernel/locking/lockdep.c:3134 [inline] >>> check_prevs_add kernel/locking/lockdep.c:3253 [inline] >>> validate_chain kernel/locking/lockdep.c:3869 [inline] >>> __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 >>> lock_acquire kernel/locking/lockdep.c:5754 [inline] >>> lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 >>> __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] >>> fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 >>> might_alloc include/linux/sched/mm.h:334 [inline] >>> slab_pre_alloc_hook mm/slub.c:3891 [inline] >>> slab_alloc_node mm/slub.c:3981 [inline] >>> kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 >>> btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 >>> alloc_inode+0x5d/0x230 fs/inode.c:261 >>> iget5_locked fs/inode.c:1235 [inline] >>> iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 >>> btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] >>> btrfs_iget_path fs/btrfs/inode.c:5607 [inline] >>> btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 >>> add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] >>> copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 >>> btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 >>> log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] >>> btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 >>> btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] >>> btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 >>> btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 >>> btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 >>> vfs_fsync_range+0x141/0x230 fs/sync.c:188 >>> generic_write_sync include/linux/fs.h:2794 [inline] >>> btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 >>> do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 >>> vfs_writev+0x36f/0xde0 fs/read_write.c:971 >>> do_pwritev+0x1b2/0x260 fs/read_write.c:1072 >>> __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] >>> __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] >>> __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 >>> do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] >>> __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e >>> RIP: 0023:0xf7334579 >>> Code: b8 01 10 06 03 (...) >>> RSP: 002b:00000000f5f265ac EFLAGS: 00000292 ORIG_RAX: 000000000000017b >>> RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00000000200002c0 >>> RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 >>> RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 >>> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000000000 >>> R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 >>> </TASK> >>> >>> Fix this by ensuring we are under a NOFS scope whenever we call >>> btrfs_iget() during inode logging and log replay. >>> >>> Reported-by: syzbot+8576cfa84070dce4d59b@syzkaller.appspotmail.com >>> Link: https://lore.kernel.org/linux-btrfs/000000000000274a3a061abbd928@google.com/ >>> Fixes: 712e36c5f2a7 ("btrfs: use GFP_KERNEL in btrfs_alloc_inode") >> >> I'm wondering if logging is the only location where we can trigger the >> deadlock. >> >> Would regular inode_get() causing such deadlock? > > What is inode_get()? I can't find anything with that exact name. My bad, I mean iget(). > > If it's some path with a transaction handle open that can trigger > btrfs_alloc_inode() then yes, otherwise it depends on what locks are > held if any. > Then would it be safer to revert the offending commit, aka make btrfs_alloc_inode() to use the old GFP_NOFS? I just checked ext4_alloc_inode() and f2fs_alloc_inode(), both are still using NOFS instead. Thus I'm wondering if it's really a good idea to go GFP_KERNEL in the first place. Thanks, Qu
On Thu, Jun 13, 2024 at 11:21 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > 在 2024/6/14 07:24, Filipe Manana 写道: > > On Thu, Jun 13, 2024 at 10:37 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > >> > >> > >> > >> 在 2024/6/13 20:35, fdmanana@kernel.org 写道: > >>> From: Filipe Manana <fdmanana@suse.com> > >>> > >>> During inode logging (and log replay too), we are holding a transaction > >>> handle and we often need to call btrfs_iget(), which will read an inode > >>> from its subvolume btree if it's not loaded in memory and that results in > >>> allocating an inode with GFP_KERNEL semantics at the btrfs_alloc_inode() > >>> callback - and this may recurse into the filesystem in case we are under > >>> memory pressure and attempt to commit the current transaction, resulting > >>> in a deadlock since the logging (or log replay) task is holding a > >>> transaction handle open. > >>> > >>> Syzbot reported this with the following stack traces: > >>> > >>> ====================================================== > >>> WARNING: possible circular locking dependency detected > >>> 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 Not tainted > >>> ------------------------------------------------------ > >>> syz-executor.1/9919 is trying to acquire lock: > >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: might_alloc include/linux/sched/mm.h:334 [inline] > >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_pre_alloc_hook mm/slub.c:3891 [inline] > >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_alloc_node mm/slub.c:3981 [inline] > >>> ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > >>> > >>> but task is already holding lock: > >>> ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > >>> > >>> which lock already depends on the new lock. > >>> > >>> the existing dependency chain (in reverse order) is: > >>> > >>> -> #3 (&ei->log_mutex){+.+.}-{3:3}: > >>> __mutex_lock_common kernel/locking/mutex.c:608 [inline] > >>> __mutex_lock+0x175/0x9c0 kernel/locking/mutex.c:752 > >>> btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > >>> btrfs_log_inode_parent+0x8cb/0x2a90 fs/btrfs/tree-log.c:7079 > >>> btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > >>> btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > >>> vfs_fsync_range+0x141/0x230 fs/sync.c:188 > >>> generic_write_sync include/linux/fs.h:2794 [inline] > >>> btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > >>> new_sync_write fs/read_write.c:497 [inline] > >>> vfs_write+0x6b6/0x1140 fs/read_write.c:590 > >>> ksys_write+0x12f/0x260 fs/read_write.c:643 > >>> do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > >>> __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e > >>> > >>> -> #2 (btrfs_trans_num_extwriters){++++}-{0:0}: > >>> join_transaction+0x164/0xf40 fs/btrfs/transaction.c:315 > >>> start_transaction+0x427/0x1a70 fs/btrfs/transaction.c:700 > >>> btrfs_commit_super+0xa1/0x110 fs/btrfs/disk-io.c:4170 > >>> close_ctree+0xcb0/0xf90 fs/btrfs/disk-io.c:4324 > >>> generic_shutdown_super+0x159/0x3d0 fs/super.c:642 > >>> kill_anon_super+0x3a/0x60 fs/super.c:1226 > >>> btrfs_kill_super+0x3b/0x50 fs/btrfs/super.c:2096 > >>> deactivate_locked_super+0xbe/0x1a0 fs/super.c:473 > >>> deactivate_super+0xde/0x100 fs/super.c:506 > >>> cleanup_mnt+0x222/0x450 fs/namespace.c:1267 > >>> task_work_run+0x14e/0x250 kernel/task_work.c:180 > >>> resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] > >>> exit_to_user_mode_loop kernel/entry/common.c:114 [inline] > >>> exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] > >>> __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] > >>> syscall_exit_to_user_mode+0x278/0x2a0 kernel/entry/common.c:218 > >>> __do_fast_syscall_32+0x80/0x120 arch/x86/entry/common.c:389 > >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e > >>> > >>> -> #1 (btrfs_trans_num_writers){++++}-{0:0}: > >>> __lock_release kernel/locking/lockdep.c:5468 [inline] > >>> lock_release+0x33e/0x6c0 kernel/locking/lockdep.c:5774 > >>> percpu_up_read include/linux/percpu-rwsem.h:99 [inline] > >>> __sb_end_write include/linux/fs.h:1650 [inline] > >>> sb_end_intwrite include/linux/fs.h:1767 [inline] > >>> __btrfs_end_transaction+0x5ca/0x920 fs/btrfs/transaction.c:1071 > >>> btrfs_commit_inode_delayed_inode+0x228/0x330 fs/btrfs/delayed-inode.c:1301 > >>> btrfs_evict_inode+0x960/0xe80 fs/btrfs/inode.c:5291 > >>> evict+0x2ed/0x6c0 fs/inode.c:667 > >>> iput_final fs/inode.c:1741 [inline] > >>> iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 > >>> iput+0x5c/0x80 fs/inode.c:1757 > >>> dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 > >>> __dentry_kill+0x1d0/0x600 fs/dcache.c:603 > >>> dput.part.0+0x4b1/0x9b0 fs/dcache.c:845 > >>> dput+0x1f/0x30 fs/dcache.c:835 > >>> ovl_stack_put+0x60/0x90 fs/overlayfs/util.c:132 > >>> ovl_destroy_inode+0xc6/0x190 fs/overlayfs/super.c:182 > >>> destroy_inode+0xc4/0x1b0 fs/inode.c:311 > >>> iput_final fs/inode.c:1741 [inline] > >>> iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 > >>> iput+0x5c/0x80 fs/inode.c:1757 > >>> dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 > >>> __dentry_kill+0x1d0/0x600 fs/dcache.c:603 > >>> shrink_kill fs/dcache.c:1048 [inline] > >>> shrink_dentry_list+0x140/0x5d0 fs/dcache.c:1075 > >>> prune_dcache_sb+0xeb/0x150 fs/dcache.c:1156 > >>> super_cache_scan+0x32a/0x550 fs/super.c:221 > >>> do_shrink_slab+0x44f/0x11c0 mm/shrinker.c:435 > >>> shrink_slab_memcg mm/shrinker.c:548 [inline] > >>> shrink_slab+0xa87/0x1310 mm/shrinker.c:626 > >>> shrink_one+0x493/0x7c0 mm/vmscan.c:4790 > >>> shrink_many mm/vmscan.c:4851 [inline] > >>> lru_gen_shrink_node+0x89f/0x1750 mm/vmscan.c:4951 > >>> shrink_node mm/vmscan.c:5910 [inline] > >>> kswapd_shrink_node mm/vmscan.c:6720 [inline] > >>> balance_pgdat+0x1105/0x1970 mm/vmscan.c:6911 > >>> kswapd+0x5ea/0xbf0 mm/vmscan.c:7180 > >>> kthread+0x2c1/0x3a0 kernel/kthread.c:389 > >>> ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 > >>> ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 > >>> > >>> -> #0 (fs_reclaim){+.+.}-{0:0}: > >>> check_prev_add kernel/locking/lockdep.c:3134 [inline] > >>> check_prevs_add kernel/locking/lockdep.c:3253 [inline] > >>> validate_chain kernel/locking/lockdep.c:3869 [inline] > >>> __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 > >>> lock_acquire kernel/locking/lockdep.c:5754 [inline] > >>> lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 > >>> __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] > >>> fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 > >>> might_alloc include/linux/sched/mm.h:334 [inline] > >>> slab_pre_alloc_hook mm/slub.c:3891 [inline] > >>> slab_alloc_node mm/slub.c:3981 [inline] > >>> kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > >>> btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 > >>> alloc_inode+0x5d/0x230 fs/inode.c:261 > >>> iget5_locked fs/inode.c:1235 [inline] > >>> iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 > >>> btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] > >>> btrfs_iget_path fs/btrfs/inode.c:5607 [inline] > >>> btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 > >>> add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] > >>> copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 > >>> btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 > >>> log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] > >>> btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 > >>> btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] > >>> btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 > >>> btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > >>> btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > >>> vfs_fsync_range+0x141/0x230 fs/sync.c:188 > >>> generic_write_sync include/linux/fs.h:2794 [inline] > >>> btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > >>> do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 > >>> vfs_writev+0x36f/0xde0 fs/read_write.c:971 > >>> do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > >>> __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] > >>> __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] > >>> __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 > >>> do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > >>> __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e > >>> > >>> other info that might help us debug this: > >>> > >>> Chain exists of: > >>> fs_reclaim --> btrfs_trans_num_extwriters --> &ei->log_mutex > >>> > >>> Possible unsafe locking scenario: > >>> > >>> CPU0 CPU1 > >>> ---- ---- > >>> lock(&ei->log_mutex); > >>> lock(btrfs_trans_num_extwriters); > >>> lock(&ei->log_mutex); > >>> lock(fs_reclaim); > >>> > >>> *** DEADLOCK *** > >>> > >>> 7 locks held by syz-executor.1/9919: > >>> #0: ffff88802be20420 (sb_writers#23){.+.+}-{0:0}, at: do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > >>> #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: inode_lock include/linux/fs.h:791 [inline] > >>> #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: btrfs_inode_lock+0xc8/0x110 fs/btrfs/inode.c:385 > >>> #2: ffff888065c0f778 (&ei->i_mmap_lock){++++}-{3:3}, at: btrfs_inode_lock+0xee/0x110 fs/btrfs/inode.c:388 > >>> #3: ffff88802be20610 (sb_internal#4){.+.+}-{0:0}, at: btrfs_sync_file+0x95b/0xe10 fs/btrfs/file.c:1952 > >>> #4: ffff8880546323f0 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 > >>> #5: ffff888054632418 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 > >>> #6: ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 > >>> > >>> stack backtrace: > >>> CPU: 2 PID: 9919 Comm: syz-executor.1 Not tainted 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 > >>> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > >>> Call Trace: > >>> <TASK> > >>> __dump_stack lib/dump_stack.c:88 [inline] > >>> dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 > >>> check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2187 > >>> check_prev_add kernel/locking/lockdep.c:3134 [inline] > >>> check_prevs_add kernel/locking/lockdep.c:3253 [inline] > >>> validate_chain kernel/locking/lockdep.c:3869 [inline] > >>> __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 > >>> lock_acquire kernel/locking/lockdep.c:5754 [inline] > >>> lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 > >>> __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] > >>> fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 > >>> might_alloc include/linux/sched/mm.h:334 [inline] > >>> slab_pre_alloc_hook mm/slub.c:3891 [inline] > >>> slab_alloc_node mm/slub.c:3981 [inline] > >>> kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 > >>> btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 > >>> alloc_inode+0x5d/0x230 fs/inode.c:261 > >>> iget5_locked fs/inode.c:1235 [inline] > >>> iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 > >>> btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] > >>> btrfs_iget_path fs/btrfs/inode.c:5607 [inline] > >>> btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 > >>> add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] > >>> copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 > >>> btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 > >>> log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] > >>> btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 > >>> btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] > >>> btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 > >>> btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 > >>> btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 > >>> vfs_fsync_range+0x141/0x230 fs/sync.c:188 > >>> generic_write_sync include/linux/fs.h:2794 [inline] > >>> btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 > >>> do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 > >>> vfs_writev+0x36f/0xde0 fs/read_write.c:971 > >>> do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > >>> __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] > >>> __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] > >>> __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 > >>> do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > >>> __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e > >>> RIP: 0023:0xf7334579 > >>> Code: b8 01 10 06 03 (...) > >>> RSP: 002b:00000000f5f265ac EFLAGS: 00000292 ORIG_RAX: 000000000000017b > >>> RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00000000200002c0 > >>> RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 > >>> RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 > >>> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000000000 > >>> R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 > >>> </TASK> > >>> > >>> Fix this by ensuring we are under a NOFS scope whenever we call > >>> btrfs_iget() during inode logging and log replay. > >>> > >>> Reported-by: syzbot+8576cfa84070dce4d59b@syzkaller.appspotmail.com > >>> Link: https://lore.kernel.org/linux-btrfs/000000000000274a3a061abbd928@google.com/ > >>> Fixes: 712e36c5f2a7 ("btrfs: use GFP_KERNEL in btrfs_alloc_inode") > >> > >> I'm wondering if logging is the only location where we can trigger the > >> deadlock. > >> > >> Would regular inode_get() causing such deadlock? > > > > What is inode_get()? I can't find anything with that exact name. > > My bad, I mean iget(). We don't have anything called exactly iget() either... :) You mean btrfs_iget()? > > > > > If it's some path with a transaction handle open that can trigger > > btrfs_alloc_inode() then yes, otherwise it depends on what locks are > > held if any. > > > > Then would it be safer to revert the offending commit, aka make > btrfs_alloc_inode() to use the old GFP_NOFS? > > I just checked ext4_alloc_inode() and f2fs_alloc_inode(), both are still > using NOFS instead. > > Thus I'm wondering if it's really a good idea to go GFP_KERNEL in the > first place. The GFP_KERNEL is fine in some paths, like during dentry lookup (btrfs_lookup_dentry() callback), where we don't hold a transaction handle or any lock that would cause a deadlock in case reclaim is triggered. > > Thanks, > Qu
On Fri, Jun 14, 2024 at 07:51:44AM +0930, Qu Wenruo wrote: > >>> do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 > >>> vfs_writev+0x36f/0xde0 fs/read_write.c:971 > >>> do_pwritev+0x1b2/0x260 fs/read_write.c:1072 > >>> __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] > >>> __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] > >>> __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 > >>> do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] > >>> __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 > >>> do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 > >>> entry_SYSENTER_compat_after_hwframe+0x84/0x8e > >>> RIP: 0023:0xf7334579 > >>> Code: b8 01 10 06 03 (...) > >>> RSP: 002b:00000000f5f265ac EFLAGS: 00000292 ORIG_RAX: 000000000000017b > >>> RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00000000200002c0 > >>> RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 > >>> RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 > >>> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000000000 > >>> R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 > >>> </TASK> > >>> > >>> Fix this by ensuring we are under a NOFS scope whenever we call > >>> btrfs_iget() during inode logging and log replay. > >>> > >>> Reported-by: syzbot+8576cfa84070dce4d59b@syzkaller.appspotmail.com > >>> Link: https://lore.kernel.org/linux-btrfs/000000000000274a3a061abbd928@google.com/ > >>> Fixes: 712e36c5f2a7 ("btrfs: use GFP_KERNEL in btrfs_alloc_inode") > >> > >> I'm wondering if logging is the only location where we can trigger the > >> deadlock. > >> > >> Would regular inode_get() causing such deadlock? > > > > What is inode_get()? I can't find anything with that exact name. > > My bad, I mean iget(). > > > > > If it's some path with a transaction handle open that can trigger > > btrfs_alloc_inode() then yes, otherwise it depends on what locks are > > held if any. > > > > Then would it be safer to revert the offending commit, aka make > btrfs_alloc_inode() to use the old GFP_NOFS? > > I just checked ext4_alloc_inode() and f2fs_alloc_inode(), both are still > using NOFS instead. > > Thus I'm wondering if it's really a good idea to go GFP_KERNEL in the > first place. In the long run we want to use the scoped NOFS and GFP_KERNEL. All easy cases have been done (with occasional bugs like this patch fixes).
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6d892d76d4fb..4c9cc8eecb30 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid); * and once to do all the other items. */ +static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) +{ + unsigned int nofs_flag; + struct inode *inode; + + /* + * We're holding a transaction handle whether we are logging or + * replaying a log tree, so we must make sure NOFS semantics apply + * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL + * to allocate an inode, which can recurse back into the filesystem and + * attempt a transaction commit, resulting in a deadlock. + */ + nofs_flag = memalloc_nofs_save(); + inode = btrfs_iget(root->fs_info->sb, objectid, root); + memalloc_nofs_restore(nofs_flag); + + return inode; +} + /* * start a sub transaction and setup the log tree * this increments the log tree writer count to make the people @@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, { struct inode *inode; - inode = btrfs_iget(root->fs_info->sb, objectid, root); + inode = btrfs_iget_logging(objectid, root); if (IS_ERR(inode)) inode = NULL; return inode; @@ -5443,7 +5462,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { struct btrfs_root *root = start_inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; LIST_HEAD(dir_list); struct btrfs_dir_list *dir_elem; @@ -5504,7 +5522,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, continue; btrfs_release_path(path); - di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); + di_inode = btrfs_iget_logging(di_key.objectid, root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); goto out; @@ -5564,7 +5582,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, btrfs_add_delayed_iput(curr_inode); curr_inode = NULL; - vfs_inode = btrfs_iget(fs_info->sb, ino, root); + vfs_inode = btrfs_iget_logging(ino, root); if (IS_ERR(vfs_inode)) { ret = PTR_ERR(vfs_inode); break; @@ -5659,7 +5677,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) return BTRFS_LOG_FORCE_COMMIT; - inode = btrfs_iget(root->fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was deleted in * the current transaction then we either: @@ -5760,7 +5778,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; /* @@ -5791,7 +5808,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, list_del(&curr->list); kfree(curr); - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was * deleted in the current transaction, we need to log its parent @@ -5802,7 +5819,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, if (ret != -ENOENT) break; - inode = btrfs_iget(fs_info->sb, parent, root); + inode = btrfs_iget_logging(parent, root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); break; @@ -6324,7 +6341,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { const bool orig_log_new_dentries = ctx->log_new_dentries; - struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_item *item; int ret = 0; @@ -6350,7 +6366,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, if (key.type == BTRFS_ROOT_ITEM_KEY) continue; - di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root); + di_inode = btrfs_iget_logging(key.objectid, inode->root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); break; @@ -6734,7 +6750,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_path *path; struct btrfs_key key; @@ -6799,8 +6814,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, cur_offset = item_size; } - dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid, - root); + dir_inode = btrfs_iget_logging(inode_key.objectid, root); /* * If the parent inode was deleted, return an error to * fallback to a transaction commit. This is to prevent @@ -6862,7 +6876,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); while (true) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; int slot; struct btrfs_key search_key; @@ -6877,7 +6890,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, search_key.objectid = found_key.offset; search_key.type = BTRFS_INODE_ITEM_KEY; search_key.offset = 0; - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); if (IS_ERR(inode)) return PTR_ERR(inode);