Message ID | 09a3da957a5b7f60a1dba5f4609971a62b3f7c23.1711134182.git.fdmanana@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: stop locking the source extent range during reflink | expand |
On Fri, Mar 22, 2024 at 07:07:18PM +0000, fdmanana@kernel.org wrote: > From: Filipe Manana <fdmanana@suse.com> > > Nowadays before starting a reflink operation we do this: > > 1) Take the VFS lock of the inodes in exlcusive mode (a rw semaphore); > > 2) Take the mmap lock of the inodes (struct btrfs_inode::i_mmap_lock); > > 3) Flush all dealloc in the source and target ranges; > > 4) Wait for all ordered extents in the source and target ranges to > complete; > > 5) Lock the source and destination ranges in the inodes' io trees. > > In step 5 we don't need anymore to lock the source range because nowadays > we have the mmap lock (step 2) and locking the source range was just to > avoid races with mmap writes before we had that mmap lock, which was added > in commit 8c99516a8cdd ("btrfs: exclude mmaps while doing remap"). > > So stop locking the source file range, allowing concurrent reads to happen > in parallel and making test case generic/733 pass. > > Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: Boris Burkov <boris@bur.io> > --- > fs/btrfs/reflink.c | 42 +++++++++--------------------------------- > 1 file changed, 9 insertions(+), 33 deletions(-) > > diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c > index 08d0fb46ceec..6e7a3970394b 100644 > --- a/fs/btrfs/reflink.c > +++ b/fs/btrfs/reflink.c > @@ -616,35 +616,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, > return ret; > } > > -static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, > - struct inode *inode2, u64 loff2, u64 len) > -{ > - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL); > - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL); > -} > - > -static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, > - struct inode *inode2, u64 loff2, u64 len) > -{ > - u64 range1_end = loff1 + len - 1; > - u64 range2_end = loff2 + len - 1; > - > - if (inode1 < inode2) { > - swap(inode1, inode2); > - swap(loff1, loff2); > - swap(range1_end, range2_end); > - } else if (inode1 == inode2 && loff2 < loff1) { > - swap(loff1, loff2); > - swap(range1_end, range2_end); > - } > - > - lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL); > - lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL); > - > - btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end); > - btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end); > -} > - > static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2) > { > if (inode1 < inode2) > @@ -662,6 +633,8 @@ static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2) > static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, > struct inode *dst, u64 dst_loff) > { > + const u64 end = dst_loff + len - 1; > + struct extent_state *cached_state = NULL; > struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info; > const u64 bs = fs_info->sectorsize; > int ret; > @@ -670,9 +643,9 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, > * Lock destination range to serialize with concurrent readahead() and > * source range to serialize with relocation. > */ > - btrfs_double_extent_lock(src, loff, dst, dst_loff, len); > + lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state); > ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); > - btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); > + unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state); > > btrfs_btree_balance_dirty(fs_info); > > @@ -724,6 +697,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, > static noinline int btrfs_clone_files(struct file *file, struct file *file_src, > u64 off, u64 olen, u64 destoff) > { > + struct extent_state *cached_state = NULL; > struct inode *inode = file_inode(file); > struct inode *src = file_inode(file_src); > struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); > @@ -731,6 +705,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, > int wb_ret; > u64 len = olen; > u64 bs = fs_info->sectorsize; > + u64 end; > > /* > * VFS's generic_remap_file_range_prep() protects us from cloning the > @@ -766,9 +741,10 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, > * Lock destination range to serialize with concurrent readahead() and > * source range to serialize with relocation. > */ > - btrfs_double_extent_lock(src, off, inode, destoff, len); > + end = destoff + len - 1; > + lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state); > ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); > - btrfs_double_extent_unlock(src, off, inode, destoff, len); > + unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state); > > /* > * We may have copied an inline extent into a page of the destination > -- > 2.43.0 >
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 08d0fb46ceec..6e7a3970394b 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -616,35 +616,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, return ret; } -static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) -{ - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL); - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL); -} - -static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) -{ - u64 range1_end = loff1 + len - 1; - u64 range2_end = loff2 + len - 1; - - if (inode1 < inode2) { - swap(inode1, inode2); - swap(loff1, loff2); - swap(range1_end, range2_end); - } else if (inode1 == inode2 && loff2 < loff1) { - swap(loff1, loff2); - swap(range1_end, range2_end); - } - - lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL); - lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL); - - btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end); - btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end); -} - static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2) { if (inode1 < inode2) @@ -662,6 +633,8 @@ static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2) static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, struct inode *dst, u64 dst_loff) { + const u64 end = dst_loff + len - 1; + struct extent_state *cached_state = NULL; struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info; const u64 bs = fs_info->sectorsize; int ret; @@ -670,9 +643,9 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, * Lock destination range to serialize with concurrent readahead() and * source range to serialize with relocation. */ - btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state); ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); - btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state); btrfs_btree_balance_dirty(fs_info); @@ -724,6 +697,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, static noinline int btrfs_clone_files(struct file *file, struct file *file_src, u64 off, u64 olen, u64 destoff) { + struct extent_state *cached_state = NULL; struct inode *inode = file_inode(file); struct inode *src = file_inode(file_src); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); @@ -731,6 +705,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, int wb_ret; u64 len = olen; u64 bs = fs_info->sectorsize; + u64 end; /* * VFS's generic_remap_file_range_prep() protects us from cloning the @@ -766,9 +741,10 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, * Lock destination range to serialize with concurrent readahead() and * source range to serialize with relocation. */ - btrfs_double_extent_lock(src, off, inode, destoff, len); + end = destoff + len - 1; + lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state); ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); - btrfs_double_extent_unlock(src, off, inode, destoff, len); + unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state); /* * We may have copied an inline extent into a page of the destination