@@ -5227,6 +5227,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
iput(inode);
inode = ERR_PTR(ret);
}
+ /*
+ * If orphan cleanup did remove any orphans, it means the tree
+ * was modified and therefore the commit root is not the same as
+ * the current root anymore. This is a problem, because send
+ * uses the commit root and therefore can see inode items that
+ * don't exist in the current root anymore, and for example make
+ * calls to btrfs_iget, which will do tree lookups based on the
+ * current root and not on the commit root. Those lookups will
+ * fail, returning a -ESTALE error, and making send fail with
+ * that error. So make sure a send does not see any orphans we
+ * have just removed, and that it will see the same inodes
+ * regardless of whether a transaction commit happened before
+ * it started (meaning that the commit root will be the same as
+ * the current root) or not.
+ */
+ if (sub_root->node != sub_root->commit_root) {
+ u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
+
+ if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
+ struct extent_buffer *eb;
+
+ /*
+ * Assert we can't have races between dentry
+ * lookup called through the snapshot creation
+ * ioctl and the VFS.
+ */
+ ASSERT(mutex_is_locked(&dir->i_mutex));
+
+ down_write(&root->fs_info->commit_root_sem);
+ eb = sub_root->commit_root;
+ sub_root->commit_root =
+ btrfs_root_node(sub_root);
+ up_write(&root->fs_info->commit_root_sem);
+ free_extent_buffer(eb);
+ }
+ }
}
return inode;
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
goto fail;
- ret = btrfs_orphan_cleanup(pending_snapshot->snap);
- if (ret)
- goto fail;
-
- /*
- * If orphan cleanup did remove any orphans, it means the tree was
- * modified and therefore the commit root is not the same as the
- * current root anymore. This is a problem, because send uses the
- * commit root and therefore can see inode items that don't exist
- * in the current root anymore, and for example make calls to
- * btrfs_iget, which will do tree lookups based on the current root
- * and not on the commit root. Those lookups will fail, returning a
- * -ESTALE error, and making send fail with that error. So make sure
- * a send does not see any orphans we have just removed, and that it
- * will see the same inodes regardless of whether a transaction
- * commit happened before it started (meaning that the commit root
- * will be the same as the current root) or not.
- */
- if (readonly && pending_snapshot->snap->node !=
- pending_snapshot->snap->commit_root) {
- trans = btrfs_join_transaction(pending_snapshot->snap);
- if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
- ret = PTR_ERR(trans);
- goto fail;
- }
- if (!IS_ERR(trans)) {
- ret = btrfs_commit_transaction(trans,
- pending_snapshot->snap);
- if (ret)
- goto fail;
- }
- }
-
inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
This is a better solution for the problem addressed in the following commit: Btrfs: update commit root on snapshot creation after orphan cleanup (3821f348889e506efbd268cc8149e0ebfa47c4e5) The previous solution wasn't the best because of 2 reasons: 1) It added another full transaction commit, which is more expensive than just swapping the commit root with the root; 2) If a reboot happened after the first transaction commit (the one that creates the snapshot) and before the second transaction commit, then we would end up with the same problem if a send using that snapshot was requested before the first transaction commit after the reboot. This change addresses those 2 issues. The second issue is addressed by switching the commit root in the dentry lookup VFS callback, which is also called by the snapshot/subvol creation ioctl and performs orphan cleanup if needed. Like the vfs, the ioctl locks the parent inode too, preventing race issues between a dentry lookup and snapshot creation. Cc: Alex Lyakas <alex.btrfs@zadarastorage.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> --- V2: Updated commit message, as original second issue was not correct. Removed redundant btrfs_orphan_cleanup() call in the snapshot creation ioctl, as it's performed by btrfs_lookup_dentry() which is called by the ioctl. fs/btrfs/inode.c | 36 ++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.c | 33 --------------------------------- 2 files changed, 36 insertions(+), 33 deletions(-)