diff mbox series

btrfs: stop locking the source extent range during reflink

Message ID 09a3da957a5b7f60a1dba5f4609971a62b3f7c23.1711134182.git.fdmanana@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: stop locking the source extent range during reflink | expand

Commit Message

Filipe Manana March 22, 2024, 7:07 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Nowadays before starting a reflink operation we do this:

1) Take the VFS lock of the inodes in exlcusive mode (a rw semaphore);

2) Take the  mmap lock of the inodes (struct btrfs_inode::i_mmap_lock);

3) Flush all dealloc in the source and target ranges;

4) Wait for all ordered extents in the source and target ranges to
   complete;

5) Lock the source and destination ranges in the inodes' io trees.

In step 5 we don't need anymore to lock the source range because nowadays
we have the mmap lock (step 2) and locking the source range was just to
avoid races with mmap writes before we had that mmap lock, which was added
in commit 8c99516a8cdd ("btrfs: exclude mmaps while doing remap").

So stop locking the source file range, allowing concurrent reads to happen
in parallel and making test case generic/733 pass.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/reflink.c | 42 +++++++++---------------------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

Comments

Boris Burkov March 22, 2024, 10:58 p.m. UTC | #1
On Fri, Mar 22, 2024 at 07:07:18PM +0000, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> Nowadays before starting a reflink operation we do this:
> 
> 1) Take the VFS lock of the inodes in exlcusive mode (a rw semaphore);
> 
> 2) Take the  mmap lock of the inodes (struct btrfs_inode::i_mmap_lock);
> 
> 3) Flush all dealloc in the source and target ranges;
> 
> 4) Wait for all ordered extents in the source and target ranges to
>    complete;
> 
> 5) Lock the source and destination ranges in the inodes' io trees.
> 
> In step 5 we don't need anymore to lock the source range because nowadays
> we have the mmap lock (step 2) and locking the source range was just to
> avoid races with mmap writes before we had that mmap lock, which was added
> in commit 8c99516a8cdd ("btrfs: exclude mmaps while doing remap").
> 
> So stop locking the source file range, allowing concurrent reads to happen
> in parallel and making test case generic/733 pass.
> 
> Signed-off-by: Filipe Manana <fdmanana@suse.com>

Reviewed-by: Boris Burkov <boris@bur.io>

> ---
>  fs/btrfs/reflink.c | 42 +++++++++---------------------------------
>  1 file changed, 9 insertions(+), 33 deletions(-)
> 
> diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
> index 08d0fb46ceec..6e7a3970394b 100644
> --- a/fs/btrfs/reflink.c
> +++ b/fs/btrfs/reflink.c
> @@ -616,35 +616,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
>  	return ret;
>  }
>  
> -static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
> -				       struct inode *inode2, u64 loff2, u64 len)
> -{
> -	unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL);
> -	unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL);
> -}
> -
> -static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
> -				     struct inode *inode2, u64 loff2, u64 len)
> -{
> -	u64 range1_end = loff1 + len - 1;
> -	u64 range2_end = loff2 + len - 1;
> -
> -	if (inode1 < inode2) {
> -		swap(inode1, inode2);
> -		swap(loff1, loff2);
> -		swap(range1_end, range2_end);
> -	} else if (inode1 == inode2 && loff2 < loff1) {
> -		swap(loff1, loff2);
> -		swap(range1_end, range2_end);
> -	}
> -
> -	lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL);
> -	lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL);
> -
> -	btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end);
> -	btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end);
> -}
> -
>  static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2)
>  {
>  	if (inode1 < inode2)
> @@ -662,6 +633,8 @@ static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2)
>  static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
>  				   struct inode *dst, u64 dst_loff)
>  {
> +	const u64 end = dst_loff + len - 1;
> +	struct extent_state *cached_state = NULL;
>  	struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info;
>  	const u64 bs = fs_info->sectorsize;
>  	int ret;
> @@ -670,9 +643,9 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
>  	 * Lock destination range to serialize with concurrent readahead() and
>  	 * source range to serialize with relocation.
>  	 */
> -	btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
> +	lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
>  	ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
> -	btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
> +	unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
>  
>  	btrfs_btree_balance_dirty(fs_info);
>  
> @@ -724,6 +697,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
>  static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
>  					u64 off, u64 olen, u64 destoff)
>  {
> +	struct extent_state *cached_state = NULL;
>  	struct inode *inode = file_inode(file);
>  	struct inode *src = file_inode(file_src);
>  	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
> @@ -731,6 +705,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
>  	int wb_ret;
>  	u64 len = olen;
>  	u64 bs = fs_info->sectorsize;
> +	u64 end;
>  
>  	/*
>  	 * VFS's generic_remap_file_range_prep() protects us from cloning the
> @@ -766,9 +741,10 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
>  	 * Lock destination range to serialize with concurrent readahead() and
>  	 * source range to serialize with relocation.
>  	 */
> -	btrfs_double_extent_lock(src, off, inode, destoff, len);
> +	end = destoff + len - 1;
> +	lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
>  	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
> -	btrfs_double_extent_unlock(src, off, inode, destoff, len);
> +	unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
>  
>  	/*
>  	 * We may have copied an inline extent into a page of the destination
> -- 
> 2.43.0
>
diff mbox series

Patch

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 08d0fb46ceec..6e7a3970394b 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -616,35 +616,6 @@  static int btrfs_clone(struct inode *src, struct inode *inode,
 	return ret;
 }
 
-static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
-				       struct inode *inode2, u64 loff2, u64 len)
-{
-	unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL);
-	unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL);
-}
-
-static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
-				     struct inode *inode2, u64 loff2, u64 len)
-{
-	u64 range1_end = loff1 + len - 1;
-	u64 range2_end = loff2 + len - 1;
-
-	if (inode1 < inode2) {
-		swap(inode1, inode2);
-		swap(loff1, loff2);
-		swap(range1_end, range2_end);
-	} else if (inode1 == inode2 && loff2 < loff1) {
-		swap(loff1, loff2);
-		swap(range1_end, range2_end);
-	}
-
-	lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL);
-	lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL);
-
-	btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end);
-	btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end);
-}
-
 static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2)
 {
 	if (inode1 < inode2)
@@ -662,6 +633,8 @@  static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2)
 static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
 				   struct inode *dst, u64 dst_loff)
 {
+	const u64 end = dst_loff + len - 1;
+	struct extent_state *cached_state = NULL;
 	struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info;
 	const u64 bs = fs_info->sectorsize;
 	int ret;
@@ -670,9 +643,9 @@  static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
 	 * Lock destination range to serialize with concurrent readahead() and
 	 * source range to serialize with relocation.
 	 */
-	btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+	lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
 	ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
-	btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
+	unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
 
 	btrfs_btree_balance_dirty(fs_info);
 
@@ -724,6 +697,7 @@  static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
 static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 					u64 off, u64 olen, u64 destoff)
 {
+	struct extent_state *cached_state = NULL;
 	struct inode *inode = file_inode(file);
 	struct inode *src = file_inode(file_src);
 	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
@@ -731,6 +705,7 @@  static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 	int wb_ret;
 	u64 len = olen;
 	u64 bs = fs_info->sectorsize;
+	u64 end;
 
 	/*
 	 * VFS's generic_remap_file_range_prep() protects us from cloning the
@@ -766,9 +741,10 @@  static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 	 * Lock destination range to serialize with concurrent readahead() and
 	 * source range to serialize with relocation.
 	 */
-	btrfs_double_extent_lock(src, off, inode, destoff, len);
+	end = destoff + len - 1;
+	lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
 	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
-	btrfs_double_extent_unlock(src, off, inode, destoff, len);
+	unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
 
 	/*
 	 * We may have copied an inline extent into a page of the destination