diff mbox series

[v2] btrfs: export per-inode stable writes flag

Message ID 31652cd455fd5814440672e8bed8f1361d53f459.1737692275.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series [v2] btrfs: export per-inode stable writes flag | expand

Commit Message

Qu Wenruo Jan. 24, 2025, 4:19 a.m. UTC
The address space flag AS_STABLE_WRITES determine if FGP_STABLE for will
wait for the folio to finish its writeback.

For btrfs, due to the default data checksum behavior, if we modify the
folio while it's still under writeback, it will cause data checksum
mismatch.
Thus for quite some call sites we manually call folio_wait_writeback()
to prevent such problem from happneing.

Currently there is only one call site inside btrfs really utilize
FGP_STABLE, and in that case we also manually call folio_wait_writeback()
to do the wait.

But it's better to properly export the stable writes flag to a per-inode
basis.

This involves:

- Update the mapping's stable write flag when setting/clearing NODATASUM
  inode flag using ioctl
  This only works for empty files, so it should be fine.

- Update the mapping's stable write flag when reading an inode from disk

- Remove the explicitly folio_wait_writeback() for FGP_BEGINWRITE call
  site

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
Changelog:
v2:
- Remove the superblock SB_I_STABLE_WRITES flag
  Since we're setting the proper flag for each inode, there is no need
  to bother the superblock flag anymore.
---
 fs/btrfs/btrfs_inode.h | 8 ++++++++
 fs/btrfs/file.c        | 1 -
 fs/btrfs/inode.c       | 2 ++
 fs/btrfs/ioctl.c       | 8 ++++++--
 4 files changed, 16 insertions(+), 3 deletions(-)

Comments

David Sterba Jan. 27, 2025, 9:57 p.m. UTC | #1
On Fri, Jan 24, 2025 at 02:49:28PM +1030, Qu Wenruo wrote:
> The address space flag AS_STABLE_WRITES determine if FGP_STABLE for will
> wait for the folio to finish its writeback.
> 
> For btrfs, due to the default data checksum behavior, if we modify the
> folio while it's still under writeback, it will cause data checksum
> mismatch.
> Thus for quite some call sites we manually call folio_wait_writeback()
> to prevent such problem from happneing.
> 
> Currently there is only one call site inside btrfs really utilize
> FGP_STABLE, and in that case we also manually call folio_wait_writeback()
> to do the wait.
> 
> But it's better to properly export the stable writes flag to a per-inode
> basis.

I think 'export' is a bit confusing here, exporing for who?

> 
> This involves:
> 
> - Update the mapping's stable write flag when setting/clearing NODATASUM
>   inode flag using ioctl
>   This only works for empty files, so it should be fine.
> 
> - Update the mapping's stable write flag when reading an inode from disk
> 
> - Remove the explicitly folio_wait_writeback() for FGP_BEGINWRITE call
>   site
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> Changelog:
> v2:
> - Remove the superblock SB_I_STABLE_WRITES flag
>   Since we're setting the proper flag for each inode, there is no need
>   to bother the superblock flag anymore.
> ---
>  fs/btrfs/btrfs_inode.h | 8 ++++++++
>  fs/btrfs/file.c        | 1 -
>  fs/btrfs/inode.c       | 2 ++
>  fs/btrfs/ioctl.c       | 8 ++++++--
>  4 files changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index b2fa33911c28..b090a435675a 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -516,6 +516,14 @@ static inline void btrfs_assert_inode_locked(struct btrfs_inode *inode)
>  	lockdep_assert_held(&inode->vfs_inode.i_rwsem);
>  }
>  
> +static inline void btrfs_update_address_space_flags(struct btrfs_inode *inode)

I suggest to rename it to btrfs_update_inode_mapping_flags(), namely to
mention 'mapping' in the name because this is more common.

> +{
> +	if (inode->flags & BTRFS_INODE_NODATASUM)
> +		mapping_clear_stable_writes(inode->vfs_inode.i_mapping);
> +	else
> +		mapping_set_stable_writes(inode->vfs_inode.i_mapping);
> +}
> +
>  /* Array of bytes with variable length, hexadecimal format 0x1234 */
>  #define CSUM_FMT				"0x%*phN"
>  #define CSUM_FMT_VALUE(size, bytes)		size, bytes
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index bb821fb89fc1..68b14ee1f85c 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -875,7 +875,6 @@ static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_
>  			ret = PTR_ERR(folio);
>  		return ret;
>  	}
> -	folio_wait_writeback(folio);
>  	/* Only support page sized folio yet. */
>  	ASSERT(folio_order(folio) == 0);
>  	ret = set_folio_extent_mapped(folio);
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 5b3fdba10245..c4769c438859 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -3948,6 +3948,7 @@ static int btrfs_read_locked_inode(struct inode *inode, struct btrfs_path *path)
>  
>  	btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
>  				&BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
> +	btrfs_update_address_space_flags(BTRFS_I(inode));
>  
>  cache_index:
>  	/*
> @@ -6363,6 +6364,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
>  		if (btrfs_test_opt(fs_info, NODATACOW))
>  			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
>  				BTRFS_INODE_NODATASUM;
> +		btrfs_update_address_space_flags(BTRFS_I(inode));
>  	}
>  
>  	ret = btrfs_insert_inode_locked(inode);
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 69c0444369b7..c70e4d2d9b27 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -325,9 +325,11 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
>  			 * Otherwise we want the flag to reflect the real COW
>  			 * status of the file and will not set it.
>  			 */
> -			if (inode->i_size == 0)
> +			if (inode->i_size == 0) {
>  				binode_flags |= BTRFS_INODE_NODATACOW |
>  						BTRFS_INODE_NODATASUM;
> +				btrfs_update_address_space_flags(binode);

Does this really work? Here the NODATASUM bit is set on a local variable
that is not synced to 'binode' yet.

> +			}
>  		} else {
>  			binode_flags |= BTRFS_INODE_NODATACOW;
>  		}
> @@ -336,9 +338,11 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
>  		 * Revert back under same assumptions as above
>  		 */
>  		if (S_ISREG(inode->i_mode)) {
> -			if (inode->i_size == 0)
> +			if (inode->i_size == 0) {
>  				binode_flags &= ~(BTRFS_INODE_NODATACOW |
>  						  BTRFS_INODE_NODATASUM);
> +				btrfs_update_address_space_flags(binode);

Same here.

The mapping flag update should be done somewhere in the update_flags:
label.

> +			}
>  		} else {
>  			binode_flags &= ~BTRFS_INODE_NODATACOW;
>  		}
> -- 
> 2.48.1
>
diff mbox series

Patch

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b2fa33911c28..b090a435675a 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -516,6 +516,14 @@  static inline void btrfs_assert_inode_locked(struct btrfs_inode *inode)
 	lockdep_assert_held(&inode->vfs_inode.i_rwsem);
 }
 
+static inline void btrfs_update_address_space_flags(struct btrfs_inode *inode)
+{
+	if (inode->flags & BTRFS_INODE_NODATASUM)
+		mapping_clear_stable_writes(inode->vfs_inode.i_mapping);
+	else
+		mapping_set_stable_writes(inode->vfs_inode.i_mapping);
+}
+
 /* Array of bytes with variable length, hexadecimal format 0x1234 */
 #define CSUM_FMT				"0x%*phN"
 #define CSUM_FMT_VALUE(size, bytes)		size, bytes
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bb821fb89fc1..68b14ee1f85c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -875,7 +875,6 @@  static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_
 			ret = PTR_ERR(folio);
 		return ret;
 	}
-	folio_wait_writeback(folio);
 	/* Only support page sized folio yet. */
 	ASSERT(folio_order(folio) == 0);
 	ret = set_folio_extent_mapped(folio);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b3fdba10245..c4769c438859 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3948,6 +3948,7 @@  static int btrfs_read_locked_inode(struct inode *inode, struct btrfs_path *path)
 
 	btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
 				&BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
+	btrfs_update_address_space_flags(BTRFS_I(inode));
 
 cache_index:
 	/*
@@ -6363,6 +6364,7 @@  int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
 		if (btrfs_test_opt(fs_info, NODATACOW))
 			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
 				BTRFS_INODE_NODATASUM;
+		btrfs_update_address_space_flags(BTRFS_I(inode));
 	}
 
 	ret = btrfs_insert_inode_locked(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 69c0444369b7..c70e4d2d9b27 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -325,9 +325,11 @@  int btrfs_fileattr_set(struct mnt_idmap *idmap,
 			 * Otherwise we want the flag to reflect the real COW
 			 * status of the file and will not set it.
 			 */
-			if (inode->i_size == 0)
+			if (inode->i_size == 0) {
 				binode_flags |= BTRFS_INODE_NODATACOW |
 						BTRFS_INODE_NODATASUM;
+				btrfs_update_address_space_flags(binode);
+			}
 		} else {
 			binode_flags |= BTRFS_INODE_NODATACOW;
 		}
@@ -336,9 +338,11 @@  int btrfs_fileattr_set(struct mnt_idmap *idmap,
 		 * Revert back under same assumptions as above
 		 */
 		if (S_ISREG(inode->i_mode)) {
-			if (inode->i_size == 0)
+			if (inode->i_size == 0) {
 				binode_flags &= ~(BTRFS_INODE_NODATACOW |
 						  BTRFS_INODE_NODATASUM);
+				btrfs_update_address_space_flags(binode);
+			}
 		} else {
 			binode_flags &= ~BTRFS_INODE_NODATACOW;
 		}