From patchwork Wed Dec 14 02:11:23 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: robbieko X-Patchwork-Id: 13072698 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4CED2C4167B for ; Wed, 14 Dec 2022 02:11:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237047AbiLNCLh (ORCPT ); Tue, 13 Dec 2022 21:11:37 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34062 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235771AbiLNCLf (ORCPT ); Tue, 13 Dec 2022 21:11:35 -0500 Received: from synology.com (mail.synology.com [211.23.38.101]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2C97621E34 for ; Tue, 13 Dec 2022 18:11:34 -0800 (PST) From: robbieko DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=synology.com; s=123; t=1670983892; bh=01R+R6z/j4ZRdQ3Pr8zxf077SScevdzaAkNBwRZ3xkA=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=DRX/nMu7ztieRc5CZztqIBQgdxWXQc8AWdcVGd9z1R0vcsNd0t2ETMc9NUjVwE76B hcuxKgm+tBJ+hWLCPrx0Q7yCy3/0LyciIhhEQT/Z5Snd28Bj8FzS1cjX5FwrK0dMnZ uO/4PoAPDeNSq+5wKXOpKznXxkyKhuX704IkmlfQ= To: linux-btrfs@vger.kernel.org Cc: Robbie Ko Subject: [PATCH v2 1/3] btrfs: refactor anon_dev with new_fs_root_args for create subvolume/snapshot Date: Wed, 14 Dec 2022 10:11:23 +0800 Message-Id: <20221214021125.28289-2-robbieko@synology.com> In-Reply-To: <20221214021125.28289-1-robbieko@synology.com> References: <20221214021125.28289-1-robbieko@synology.com> X-Synology-MCP-Status: no X-Synology-Spam-Flag: no X-Synology-Spam-Status: score=0, required 6, WHITELIST_FROM_ADDRESS 0 X-Synology-Virus-Status: no Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Robbie Ko Create a new structure called btrfs_new_fs_root_args that refactor the type of anon_dev. With the new structure, btrfs_get_new_fs_root can input btrfs_new_fs_root_args into btrfs_init_fs_root by order. Signed-off-by: Robbie Ko --- fs/btrfs/disk-io.c | 61 +++++++++++++++++++++++++++++------------- fs/btrfs/disk-io.h | 10 ++++++- fs/btrfs/ioctl.c | 34 +++++++++++------------ fs/btrfs/transaction.c | 2 +- fs/btrfs/transaction.h | 5 ++-- 5 files changed, 72 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bd6cd8956ec8..e45fd1ef5d11 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1506,12 +1506,42 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, return root; } +struct btrfs_new_fs_root_args *btrfs_alloc_new_fs_root_args(void) +{ + int err; + struct btrfs_new_fs_root_args *args; + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return ERR_PTR(-ENOMEM); + + err = get_anon_bdev(&args->anon_dev); + if (err) + goto error; + + return args; + +error: + btrfs_free_new_fs_root_args(args); + return ERR_PTR(err); +} + +void btrfs_free_new_fs_root_args(struct btrfs_new_fs_root_args *args) +{ + if (!args) + return; + if (args->anon_dev) + free_anon_bdev(args->anon_dev); + kfree(args); +} + /* * Initialize subvolume root in-memory structure * * @anon_dev: anonymous device to attach to the root, if zero, allocate new */ -static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) +static int btrfs_init_fs_root(struct btrfs_root *root, + struct btrfs_new_fs_root_args *new_fs_root_args) { int ret; unsigned int nofs_flag; @@ -1538,12 +1568,13 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) */ if (is_fstree(root->root_key.objectid) && btrfs_root_refs(&root->root_item) > 0) { - if (!anon_dev) { + if (!new_fs_root_args || !new_fs_root_args->anon_dev) { ret = get_anon_bdev(&root->anon_dev); if (ret) goto fail; } else { - root->anon_dev = anon_dev; + root->anon_dev = new_fs_root_args->anon_dev; + new_fs_root_args->anon_dev = 0; } } @@ -1717,7 +1748,8 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, + struct btrfs_new_fs_root_args *new_fs_root_args, bool check_ref) { struct btrfs_root *root; @@ -1731,8 +1763,8 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* Shouldn't get new_fs_root_args for cached roots */ + ASSERT(!new_fs_root_args); if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); @@ -1752,7 +1784,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, new_fs_root_args); if (ret) goto fail; @@ -1782,14 +1814,6 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, } return root; fail: - /* - * If our caller provided us an anonymous device, then it's his - * responsibility to free it in case we fail. So we have to set our - * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() - * and once again by our caller. - */ - if (anon_dev) - root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); } @@ -1804,7 +1828,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1816,9 +1840,10 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * parameter value */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, + struct btrfs_new_fs_root_args *new_fs_root_args) { - return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); + return btrfs_get_root_ref(fs_info, objectid, new_fs_root_args, true); } /* diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 363935cfc084..67c6625797ca 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,6 +25,11 @@ static inline u64 btrfs_sb_offset(int mirror) return BTRFS_SUPER_INFO_OFFSET; } +struct btrfs_new_fs_root_args { + /* Preallocated anonymous block device number */ + dev_t anon_dev; +}; + struct btrfs_device; struct btrfs_fs_devices; struct btrfs_tree_parent_check; @@ -58,6 +63,8 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, int btrfs_commit_super(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_key *key); +struct btrfs_new_fs_root_args *btrfs_alloc_new_fs_root_args(void); +void btrfs_free_new_fs_root_args(struct btrfs_new_fs_root_args *args); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); @@ -65,7 +72,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, + struct btrfs_new_fs_root_args *new_fs_root_args); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7e348bd2ccde..de0cafe9b62b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -599,8 +599,8 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, }; unsigned int trans_num_items; int ret; - dev_t anon_dev; u64 objectid; + struct btrfs_new_fs_root_args *new_fs_root_args = NULL; root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); if (!root_item) @@ -619,14 +619,17 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, goto out_root_item; } - ret = get_anon_bdev(&anon_dev); - if (ret < 0) + new_fs_root_args = btrfs_alloc_new_fs_root_args(); + if (IS_ERR(new_fs_root_args)) { + ret = PTR_ERR(new_fs_root_args); + new_fs_root_args = NULL; goto out_root_item; + } new_inode_args.inode = btrfs_new_subvol_inode(mnt_userns, dir); if (!new_inode_args.inode) { ret = -ENOMEM; - goto out_anon_dev; + goto out_root_item; } ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items); if (ret) @@ -717,14 +720,12 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, new_fs_root_args); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); goto out; } - /* anon_dev is owned by new_root now. */ - anon_dev = 0; BTRFS_I(new_inode_args.inode)->root = new_root; /* ... and new_root is owned by new_inode_args.inode now. */ @@ -763,10 +764,8 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, btrfs_new_inode_args_destroy(&new_inode_args); out_inode: iput(new_inode_args.inode); -out_anon_dev: - if (anon_dev) - free_anon_bdev(anon_dev); out_root_item: + btrfs_free_new_fs_root_args(new_fs_root_args); kfree(root_item); return ret; } @@ -802,9 +801,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (!pending_snapshot) return -ENOMEM; - ret = get_anon_bdev(&pending_snapshot->anon_dev); - if (ret < 0) + pending_snapshot->new_fs_root_args = btrfs_alloc_new_fs_root_args(); + if (IS_ERR(pending_snapshot->new_fs_root_args)) { + ret = PTR_ERR(pending_snapshot->new_fs_root_args); + pending_snapshot->new_fs_root_args = NULL; goto free_pending; + } + pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), GFP_KERNEL); pending_snapshot->path = btrfs_alloc_path(); @@ -861,16 +864,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, d_instantiate(dentry, inode); ret = 0; - pending_snapshot->anon_dev = 0; fail: - /* Prevent double freeing of anon_dev */ - if (ret && pending_snapshot->snap) - pending_snapshot->snap->anon_dev = 0; btrfs_put_root(pending_snapshot->snap); btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv); free_pending: - if (pending_snapshot->anon_dev) - free_anon_bdev(pending_snapshot->anon_dev); + btrfs_free_new_fs_root_args(pending_snapshot->new_fs_root_args); kfree(pending_snapshot->root_item); btrfs_free_path(pending_snapshot->path); kfree(pending_snapshot); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b8c52e89688c..042626fbd3b8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1801,7 +1801,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->new_fs_root_args); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 97f6c39f59c8..60022240c1ea 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -11,6 +11,7 @@ #include "delayed-ref.h" #include "ctree.h" #include "misc.h" +#include "disk-io.h" enum btrfs_trans_state { TRANS_STATE_RUNNING, @@ -163,8 +164,8 @@ struct btrfs_pending_snapshot { struct btrfs_block_rsv block_rsv; /* extra metadata reservation for relocation */ int error; - /* Preallocated anonymous block device number */ - dev_t anon_dev; + /* Preallocated new_fs_root_args */ + struct btrfs_new_fs_root_args *new_fs_root_args; bool readonly; struct list_head list; }; From patchwork Wed Dec 14 02:11:24 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: robbieko X-Patchwork-Id: 13072699 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 33CA5C10F1B for ; Wed, 14 Dec 2022 02:11:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237053AbiLNCLh (ORCPT ); Tue, 13 Dec 2022 21:11:37 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34072 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S236871AbiLNCLg (ORCPT ); Tue, 13 Dec 2022 21:11:36 -0500 Received: from synology.com (mail.synology.com [211.23.38.101]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 28A25220E1 for ; Tue, 13 Dec 2022 18:11:35 -0800 (PST) From: robbieko DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=synology.com; s=123; t=1670983893; bh=kf0ITIjM8WQ7X4OdjEACXrniQHZKopXQI62U23e6Itw=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=VlQi7qfkR+qWT+bLMGJFb902ZyqIp9mdRf+35mJDzINRarWaavFUTF9mbL2Pz9uZI 8PFP1OtOKRNJ5lJN+07IBzHvkI7UJEkrtb7iHXdhggRG0NXpu5k4oB7M/A1cjijFBB 0Kvh3Q9HNW5xVe0kbNQf0X7ff1tZHA3rOYT1pkUk= To: linux-btrfs@vger.kernel.org Cc: Robbie Ko Subject: [PATCH v2 2/3] btrfs: change snapshot_lock to dynamic pointer Date: Wed, 14 Dec 2022 10:11:24 +0800 Message-Id: <20221214021125.28289-3-robbieko@synology.com> In-Reply-To: <20221214021125.28289-1-robbieko@synology.com> References: <20221214021125.28289-1-robbieko@synology.com> X-Synology-MCP-Status: no X-Synology-Spam-Flag: no X-Synology-Spam-Status: score=0, required 6, WHITELIST_FROM_ADDRESS 0 X-Synology-Virus-Status: no Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Robbie Ko Change snapshot_lock to dynamic pointer to allocate memory at the beginning of creating a subvolume/snapshot. Signed-off-by: Robbie Ko --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 12 ++++++++++-- fs/btrfs/file.c | 8 ++++---- fs/btrfs/inode.c | 12 ++++++------ fs/btrfs/ioctl.c | 4 ++-- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6965703a81b6..d0e703ad865f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -307,7 +307,7 @@ struct btrfs_root { */ int dedupe_in_progress; /* For exclusion of snapshot creation and nocow writes */ - struct btrfs_drew_lock snapshot_lock; + struct btrfs_drew_lock *snapshot_lock; atomic_t snapshot_force_cow; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e45fd1ef5d11..ed2ce2dfbfcd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1546,12 +1546,17 @@ static int btrfs_init_fs_root(struct btrfs_root *root, int ret; unsigned int nofs_flag; + root->snapshot_lock = kzalloc(sizeof(*root->snapshot_lock), GFP_NOFS); + if (!root->snapshot_lock) { + ret = -ENOMEM; + goto fail; + } /* * We might be called under a transaction (e.g. indirect backref * resolution) which could deadlock if it triggers memory reclaim */ nofs_flag = memalloc_nofs_save(); - ret = btrfs_drew_lock_init(&root->snapshot_lock); + ret = btrfs_drew_lock_init(root->snapshot_lock); memalloc_nofs_restore(nofs_flag); if (ret) goto fail; @@ -2260,7 +2265,10 @@ void btrfs_put_root(struct btrfs_root *root) WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); - btrfs_drew_lock_destroy(&root->snapshot_lock); + if (root->snapshot_lock) { + btrfs_drew_lock_destroy(root->snapshot_lock); + kfree(root->snapshot_lock); + } free_root_extent_buffers(root); #ifdef CONFIG_BTRFS_DEBUG spin_lock(&root->fs_info->fs_roots_radix_lock); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 91b00eb2440e..377ed246792c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1071,7 +1071,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) return 0; - if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) + if (!btrfs_drew_try_write_lock(root->snapshot_lock)) return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); @@ -1082,7 +1082,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, if (nowait) { if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend, &cached_state)) { - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_drew_write_unlock(root->snapshot_lock); return -EAGAIN; } } else { @@ -1092,7 +1092,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, NULL, NULL, NULL, nowait, false); if (ret <= 0) - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_drew_write_unlock(root->snapshot_lock); else *write_bytes = min_t(size_t, *write_bytes , num_bytes - pos + lockstart); @@ -1103,7 +1103,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, void btrfs_check_nocow_unlock(struct btrfs_inode *inode) { - btrfs_drew_write_unlock(&inode->root->snapshot_lock); + btrfs_drew_write_unlock(inode->root->snapshot_lock); } static void update_time_for_write(struct inode *inode) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bcad9940154..a0a1cd70f5ee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5218,16 +5218,16 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) * truncation, it must capture all writes that happened before * this truncation. */ - btrfs_drew_write_lock(&root->snapshot_lock); + btrfs_drew_write_lock(root->snapshot_lock); ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, newsize); if (ret) { - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_drew_write_unlock(root->snapshot_lock); return ret; } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_drew_write_unlock(root->snapshot_lock); return PTR_ERR(trans); } @@ -5235,7 +5235,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0); pagecache_isize_extended(inode, oldsize, newsize); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_drew_write_unlock(root->snapshot_lock); btrfs_end_transaction(trans); } else { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -11090,7 +11090,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, * completes before the first write into the swap file after it is * activated, than that write would fallback to COW. */ - if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { + if (!btrfs_drew_try_write_lock(root->snapshot_lock)) { btrfs_exclop_finish(fs_info); btrfs_warn(fs_info, "cannot activate swapfile because snapshot creation is in progress"); @@ -11263,7 +11263,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, if (ret) btrfs_swap_deactivate(file); - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_drew_write_unlock(root->snapshot_lock); btrfs_exclop_finish(fs_info); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index de0cafe9b62b..9adaf3384f58 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1019,7 +1019,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, * possible. This is to avoid later writeback (running dealloc) to * fallback to COW mode and unexpectedly fail with ENOSPC. */ - btrfs_drew_read_lock(&root->snapshot_lock); + btrfs_drew_read_lock(root->snapshot_lock); ret = btrfs_start_delalloc_snapshot(root, false); if (ret) @@ -1040,7 +1040,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, out: if (snapshot_force_cow) atomic_dec(&root->snapshot_force_cow); - btrfs_drew_read_unlock(&root->snapshot_lock); + btrfs_drew_read_unlock(root->snapshot_lock); return ret; } From patchwork Wed Dec 14 02:11:25 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: robbieko X-Patchwork-Id: 13072700 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1B5B7C4332F for ; Wed, 14 Dec 2022 02:11:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237054AbiLNCLi (ORCPT ); Tue, 13 Dec 2022 21:11:38 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34088 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S236997AbiLNCLh (ORCPT ); Tue, 13 Dec 2022 21:11:37 -0500 Received: from synology.com (mail.synology.com [211.23.38.101]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BF412220EB for ; Tue, 13 Dec 2022 18:11:35 -0800 (PST) From: robbieko DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=synology.com; s=123; t=1670983893; bh=zn4dNxCU5vMMPp4VIoYINzrbGuPlkT9psjxltxwI4eQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=VLFCCEOPHY7uHBsGQozLl8nRUyEeom37H6c3RDBEeXeCdgEKMn5nYUsNvnm/fmYT7 22kUH7KHheBWVB1axneH8Kdf60jtMgPqFxnk3txR4fXur4QOESZIy73ws5BTc2pbcb xU0PVdQPor5I886vpwlGrfzWUosomxGGDvx5UqGo= To: linux-btrfs@vger.kernel.org Cc: Robbie Ko Subject: [PATCH v2 3/3] btrfs: add snapshot_lock to new_fs_root_args Date: Wed, 14 Dec 2022 10:11:25 +0800 Message-Id: <20221214021125.28289-4-robbieko@synology.com> In-Reply-To: <20221214021125.28289-1-robbieko@synology.com> References: <20221214021125.28289-1-robbieko@synology.com> X-Synology-MCP-Status: no X-Synology-Spam-Flag: no X-Synology-Spam-Status: score=0, required 6, WHITELIST_FROM_ADDRESS 0 X-Synology-Virus-Status: no Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Robbie Ko [Issue] When creating subvolume/snapshot, the transaction may be abort due to -ENOMEM. [Cause] During creating a subvolume/snapshot, it is necessary to allocate memory for Initializing fs root. Therefore, it can only use GFP_NOFS to allocate memory to avoid deadlock issues. However, atomic allocation is required when processing percpu_counter_init without GFP_KERNEL due to the unique structure of percpu_counter. In this situation, allocating memory for initializing fs root may cause unexpected -ENOMEM when free memory is low and causes btrfs transaction to abort. [Fix] So we add snapshot_lock into new_fs_root_args to allocate the memory before holding a transaction handle. This way, we can reduce the chances of -ENOMEM when calling btrfs_init_fs_root. Furthermore, it can avoid aborting a transaction and turn the fs to RO mode. Signed-off-by: Robbie Ko --- fs/btrfs/disk-io.c | 44 +++++++++++++++++++++++++++++++------------- fs/btrfs/disk-io.h | 2 ++ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ed2ce2dfbfcd..7ba1a019f5b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1519,6 +1519,15 @@ struct btrfs_new_fs_root_args *btrfs_alloc_new_fs_root_args(void) if (err) goto error; + args->snapshot_lock = kzalloc(sizeof(*args->snapshot_lock), GFP_KERNEL); + if (!args->snapshot_lock) { + err = -ENOMEM; + goto error; + } + err = btrfs_drew_lock_init(args->snapshot_lock); + if (err) + goto error; + return args; error: @@ -1530,6 +1539,10 @@ void btrfs_free_new_fs_root_args(struct btrfs_new_fs_root_args *args) { if (!args) return; + if (args->snapshot_lock) { + btrfs_drew_lock_destroy(args->snapshot_lock); + kfree(args->snapshot_lock); + } if (args->anon_dev) free_anon_bdev(args->anon_dev); kfree(args); @@ -1546,20 +1559,25 @@ static int btrfs_init_fs_root(struct btrfs_root *root, int ret; unsigned int nofs_flag; - root->snapshot_lock = kzalloc(sizeof(*root->snapshot_lock), GFP_NOFS); - if (!root->snapshot_lock) { - ret = -ENOMEM; - goto fail; + if (new_fs_root_args && new_fs_root_args->snapshot_lock) { + root->snapshot_lock = new_fs_root_args->snapshot_lock; + new_fs_root_args->snapshot_lock = NULL; + } else { + root->snapshot_lock = kzalloc(sizeof(*root->snapshot_lock), GFP_NOFS); + if (!root->snapshot_lock) { + ret = -ENOMEM; + goto fail; + } + /* + * We might be called under a transaction (e.g. indirect backref + * resolution) which could deadlock if it triggers memory reclaim. + */ + nofs_flag = memalloc_nofs_save(); + ret = btrfs_drew_lock_init(root->snapshot_lock); + memalloc_nofs_restore(nofs_flag); + if (ret) + goto fail; } - /* - * We might be called under a transaction (e.g. indirect backref - * resolution) which could deadlock if it triggers memory reclaim - */ - nofs_flag = memalloc_nofs_save(); - ret = btrfs_drew_lock_init(root->snapshot_lock); - memalloc_nofs_restore(nofs_flag); - if (ret) - goto fail; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && !btrfs_is_data_reloc_root(root)) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 67c6625797ca..21c41cf8d115 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -28,6 +28,8 @@ static inline u64 btrfs_sb_offset(int mirror) struct btrfs_new_fs_root_args { /* Preallocated anonymous block device number */ dev_t anon_dev; + /* Preallocated snapshot lock */ + struct btrfs_drew_lock *snapshot_lock; }; struct btrfs_device;