diff mbox series

[RFC,06/31] btrfs: lock extents while truncating

Message ID 6a65ad8036c65f0d484dc98ee30ba21d4c4812fc.1623567940.git.rgoldwyn@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs buffered iomap support | expand

Commit Message

Goldwyn Rodrigues June 13, 2021, 1:39 p.m. UTC
From: Goldwyn Rodrigues <rgoldwyn@suse.com>

Lock order change: Lock extents before pages.

This removes extent locking in invalidatepages().
invalidatepage() is either called during truncating or while evicting
inode. Inode eviction does not require locking.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/btrfs/inode.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

Comments

Nikolay Borisov June 16, 2021, 6:42 a.m. UTC | #1
On 13.06.21 г. 16:39, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> Lock order change: Lock extents before pages.
> 
> This removes extent locking in invalidatepages().
> invalidatepage() is either called during truncating or while evicting
> inode. Inode eviction does not require locking.

Imo the changelog of that patch warrants an explicit mention why its
correct, namely that it's lifting the locking from btrfs_invalidatepage
and putting it in btrfs_setsize, wrapping truncate_setsize which
eventually boils down to calling invalidatepage,

> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/btrfs/inode.c | 29 +++++++++++++++++++++++------
>  1 file changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 794d906cba6c..7761a60788d0 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -5201,6 +5201,9 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
>  		btrfs_end_transaction(trans);
>  	} else {
>  		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +		u64 start = round_down(oldsize, fs_info->sectorsize);
> +		u64 end = round_up(newsize, fs_info->sectorsize) - 1;
> +		struct extent_state **cached = NULL;
>  
>  		if (btrfs_is_zoned(fs_info)) {
>  			ret = btrfs_wait_ordered_range(inode,
> @@ -5219,7 +5222,10 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
>  			set_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
>  				&BTRFS_I(inode)->runtime_flags);
>  
> +		lock_extent_bits(&BTRFS_I(inode)->io_tree, start, end, cached);
>  		truncate_setsize(inode, newsize);
> +		unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, end,
> +				cached);

This effectively lifts the granular locking, which is performed on a
per-page basis and turns it into one coarse lock, which covers the whole
region. This might have some performance repercussions which can go
either way:

1. On the one hand you are keeping the whole ranged locked from the
start until the page cache is truncated.

2. On the other you reduce the overall number of extent lock/unlocks.

In either we should think about quantifying the impact, if any.

>  
>  		inode_dio_wait(inode);
>  
> @@ -8322,9 +8328,23 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
>  
>  static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
>  {
> +	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
> +	struct extent_map *em;
> +	int ret;
> +
>  	if (PageWriteback(page) || PageDirty(page))
>  		return 0;
> -	return __btrfs_releasepage(page, gfp_flags);
> +
> +	em = lookup_extent_mapping(&inode->extent_tree, page_offset(page),
> +			PAGE_SIZE - 1);
> +	if (!em)
> +		return 0;
> +	if (!try_lock_extent(&inode->io_tree, em->start, extent_map_end(em) - 1))
> +		return 0;
> +	ret = __btrfs_releasepage(page, gfp_flags);
> +	unlock_extent(&inode->io_tree, em->start, extent_map_end(em));
> +	free_extent_map(em);
> +	return ret;
>  }
>  
>  #ifdef CONFIG_MIGRATION
> @@ -8398,9 +8418,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>  		return;
>  	}
>  
> -	if (!inode_evicting)
> -		lock_extent_bits(tree, page_start, page_end, &cached_state);
> -
>  	cur = page_start;
>  	while (cur < page_end) {
>  		struct btrfs_ordered_extent *ordered;
> @@ -8458,7 +8475,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>  		if (!inode_evicting)
>  			clear_extent_bit(tree, cur, range_end,
>  					 EXTENT_DELALLOC |
> -					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
> +					 EXTENT_DO_ACCOUNTING |
>  					 EXTENT_DEFRAG, 1, 0, &cached_state);
>  
>  		spin_lock_irq(&inode->ordered_tree.lock);
> @@ -8503,7 +8520,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>  		 */
>  		btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
>  		if (!inode_evicting) {
> -			clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
> +			clear_extent_bit(tree, cur, range_end,
>  				 EXTENT_DELALLOC | EXTENT_UPTODATE |
>  				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
>  				 delete_states, &cached_state);
>
Nikolay Borisov June 16, 2021, 1:13 p.m. UTC | #2
On 13.06.21 г. 16:39, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> Lock order change: Lock extents before pages.
> 
> This removes extent locking in invalidatepages().
> invalidatepage() is either called during truncating or while evicting
> inode. Inode eviction does not require locking.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/btrfs/inode.c | 29 +++++++++++++++++++++++------
>  1 file changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 794d906cba6c..7761a60788d0 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -5201,6 +5201,9 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
>  		btrfs_end_transaction(trans);
>  	} else {
>  		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +		u64 start = round_down(oldsize, fs_info->sectorsize);
> +		u64 end = round_up(newsize, fs_info->sectorsize) - 1;
> +		struct extent_state **cached = NULL;
>  
>  		if (btrfs_is_zoned(fs_info)) {
>  			ret = btrfs_wait_ordered_range(inode,
> @@ -5219,7 +5222,10 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
>  			set_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
>  				&BTRFS_I(inode)->runtime_flags);
>  
> +		lock_extent_bits(&BTRFS_I(inode)->io_tree, start, end, cached);
>  		truncate_setsize(inode, newsize);
> +		unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, end,
> +				cached);
>  
>  		inode_dio_wait(inode);
>  
> @@ -8322,9 +8328,23 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
>  
>  static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
>  {
> +	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
> +	struct extent_map *em;
> +	int ret;
> +
>  	if (PageWriteback(page) || PageDirty(page))
>  		return 0;
> -	return __btrfs_releasepage(page, gfp_flags);
> +
> +	em = lookup_extent_mapping(&inode->extent_tree, page_offset(page),
> +			PAGE_SIZE - 1);
> +	if (!em)
> +		return 0;
> +	if (!try_lock_extent(&inode->io_tree, em->start, extent_map_end(em) - 1))
> +		return 0;
> +	ret = __btrfs_releasepage(page, gfp_flags);
> +	unlock_extent(&inode->io_tree, em->start, extent_map_end(em));
> +	free_extent_map(em);
> +	return ret;
>  }

Care to elaborate why this is needed, looking at the code I couldn't
find the answer.

>  
>  #ifdef CONFIG_MIGRATION
> @@ -8398,9 +8418,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>  		return;
>  	}
>  
> -	if (!inode_evicting)
> -		lock_extent_bits(tree, page_start, page_end, &cached_state);
> -
>  	cur = page_start;
>  	while (cur < page_end) {
>  		struct btrfs_ordered_extent *ordered;
> @@ -8458,7 +8475,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>  		if (!inode_evicting)
>  			clear_extent_bit(tree, cur, range_end,
>  					 EXTENT_DELALLOC |
> -					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
> +					 EXTENT_DO_ACCOUNTING |
>  					 EXTENT_DEFRAG, 1, 0, &cached_state);
>  
>  		spin_lock_irq(&inode->ordered_tree.lock);
> @@ -8503,7 +8520,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>  		 */
>  		btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
>  		if (!inode_evicting) {
> -			clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
> +			clear_extent_bit(tree, cur, range_end,
>  				 EXTENT_DELALLOC | EXTENT_UPTODATE |
>  				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
>  				 delete_states, &cached_state);
>
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 794d906cba6c..7761a60788d0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5201,6 +5201,9 @@  static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		btrfs_end_transaction(trans);
 	} else {
 		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+		u64 start = round_down(oldsize, fs_info->sectorsize);
+		u64 end = round_up(newsize, fs_info->sectorsize) - 1;
+		struct extent_state **cached = NULL;
 
 		if (btrfs_is_zoned(fs_info)) {
 			ret = btrfs_wait_ordered_range(inode,
@@ -5219,7 +5222,10 @@  static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 			set_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
 				&BTRFS_I(inode)->runtime_flags);
 
+		lock_extent_bits(&BTRFS_I(inode)->io_tree, start, end, cached);
 		truncate_setsize(inode, newsize);
+		unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, end,
+				cached);
 
 		inode_dio_wait(inode);
 
@@ -8322,9 +8328,23 @@  static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 
 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 {
+	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+	struct extent_map *em;
+	int ret;
+
 	if (PageWriteback(page) || PageDirty(page))
 		return 0;
-	return __btrfs_releasepage(page, gfp_flags);
+
+	em = lookup_extent_mapping(&inode->extent_tree, page_offset(page),
+			PAGE_SIZE - 1);
+	if (!em)
+		return 0;
+	if (!try_lock_extent(&inode->io_tree, em->start, extent_map_end(em) - 1))
+		return 0;
+	ret = __btrfs_releasepage(page, gfp_flags);
+	unlock_extent(&inode->io_tree, em->start, extent_map_end(em));
+	free_extent_map(em);
+	return ret;
 }
 
 #ifdef CONFIG_MIGRATION
@@ -8398,9 +8418,6 @@  static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 		return;
 	}
 
-	if (!inode_evicting)
-		lock_extent_bits(tree, page_start, page_end, &cached_state);
-
 	cur = page_start;
 	while (cur < page_end) {
 		struct btrfs_ordered_extent *ordered;
@@ -8458,7 +8475,7 @@  static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 		if (!inode_evicting)
 			clear_extent_bit(tree, cur, range_end,
 					 EXTENT_DELALLOC |
-					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+					 EXTENT_DO_ACCOUNTING |
 					 EXTENT_DEFRAG, 1, 0, &cached_state);
 
 		spin_lock_irq(&inode->ordered_tree.lock);
@@ -8503,7 +8520,7 @@  static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 		 */
 		btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
 		if (!inode_evicting) {
-			clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
+			clear_extent_bit(tree, cur, range_end,
 				 EXTENT_DELALLOC | EXTENT_UPTODATE |
 				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
 				 delete_states, &cached_state);