diff mbox series

[v14,29/42] btrfs: introduce dedicated data write path for ZONED mode

Message ID 698bfc6446634e06a9399fa819d0f19aba3b4196.1611627788.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned block device support | expand

Commit Message

Naohiro Aota Jan. 26, 2021, 2:25 a.m. UTC
If more than one IO is issued for one file extent, these IO can be written
to separate regions on a device. Since we cannot map one file extent to
such a separate area, we need to follow the "one IO == one ordered extent"
rule.

The Normal buffered, uncompressed, not pre-allocated write path (used by
cow_file_range()) sometimes does not follow this rule. It can write a part
of an ordered extent when specified a region to write e.g., when its
called from fdatasync().

Introduces a dedicated (uncompressed buffered) data write path for ZONED
mode. This write path will CoW the region and write it at once.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/inode.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

Comments

David Sterba Feb. 2, 2021, 3 p.m. UTC | #1
On Tue, Jan 26, 2021 at 11:25:07AM +0900, Naohiro Aota wrote:
> If more than one IO is issued for one file extent, these IO can be written
> to separate regions on a device. Since we cannot map one file extent to
> such a separate area, we need to follow the "one IO == one ordered extent"
> rule.
> 
> The Normal buffered, uncompressed, not pre-allocated write path (used by
> cow_file_range()) sometimes does not follow this rule. It can write a part
> of an ordered extent when specified a region to write e.g., when its
> called from fdatasync().
> 
> Introduces a dedicated (uncompressed buffered) data write path for ZONED
> mode. This write path will CoW the region and write it at once.
> 
> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  fs/btrfs/inode.c | 34 ++++++++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a9bf78eaed42..6d43aaa1f537 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -1400,6 +1400,29 @@ static int cow_file_range_async(struct btrfs_inode *inode,
>  	return 0;
>  }
>  
> +static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
> +				       struct page *locked_page, u64 start,
> +				       u64 end, int *page_started,
> +				       unsigned long *nr_written)
> +{
> +	int ret;
> +
> +	ret = cow_file_range(inode, locked_page, start, end,
> +			     page_started, nr_written, 0);
> +	if (ret)
> +		return ret;
> +
> +	if (*page_started)
> +		return 0;
> +
> +	__set_page_dirty_nobuffers(locked_page);
> +	account_page_redirty(locked_page);
> +	extent_write_locked_range(&inode->vfs_inode, start, end, WB_SYNC_ALL);
> +	*page_started = 1;
> +
> +	return 0;
> +}
> +
>  static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
>  					u64 bytenr, u64 num_bytes)
>  {
> @@ -1879,17 +1902,24 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
>  {
>  	int ret;
>  	int force_cow = need_force_cow(inode, start, end);
> +	const bool do_compress = inode_can_compress(inode) &&
> +		inode_need_compress(inode, start, end);

This would make sense to cache the values, but inode_need_compress is
quite heavy as it runs the compression heuristic. This would affect all
cases and drop some perf.

> +	const bool zoned = btrfs_is_zoned(inode->root->fs_info);
>  
>  	if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
> +		ASSERT(!zoned);
>  		ret = run_delalloc_nocow(inode, locked_page, start, end,
>  					 page_started, 1, nr_written);
>  	} else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
> +		ASSERT(!zoned);
>  		ret = run_delalloc_nocow(inode, locked_page, start, end,
>  					 page_started, 0, nr_written);
> -	} else if (!inode_can_compress(inode) ||
> -		   !inode_need_compress(inode, start, end)) {
> +	} else if (!do_compress && !zoned) {
>  		ret = cow_file_range(inode, locked_page, start, end,
>  				     page_started, nr_written, 1);
> +	} else if (!do_compress && zoned) {
> +		ret = run_delalloc_zoned(inode, locked_page, start, end,
> +					 page_started, nr_written);

The part of the condition is shared so it should be structured lik

	} else if (!<the compression checks>) {
		if (zoned)
			run_delalloc_zoned
		else
			cow_file_range
	} ...

>  	} else {
>  		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
>  		ret = cow_file_range_async(inode, wbc, locked_page, start, end,
> -- 
> 2.27.0
Naohiro Aota Feb. 4, 2021, 8:25 a.m. UTC | #2
On Tue, Feb 02, 2021 at 04:00:45PM +0100, David Sterba wrote:
>On Tue, Jan 26, 2021 at 11:25:07AM +0900, Naohiro Aota wrote:
>> If more than one IO is issued for one file extent, these IO can be written
>> to separate regions on a device. Since we cannot map one file extent to
>> such a separate area, we need to follow the "one IO == one ordered extent"
>> rule.
>>
>> The Normal buffered, uncompressed, not pre-allocated write path (used by
>> cow_file_range()) sometimes does not follow this rule. It can write a part
>> of an ordered extent when specified a region to write e.g., when its
>> called from fdatasync().
>>
>> Introduces a dedicated (uncompressed buffered) data write path for ZONED
>> mode. This write path will CoW the region and write it at once.
>>
>> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
>> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
>> ---
>>  fs/btrfs/inode.c | 34 ++++++++++++++++++++++++++++++++--
>>  1 file changed, 32 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>> index a9bf78eaed42..6d43aaa1f537 100644
>> --- a/fs/btrfs/inode.c
>> +++ b/fs/btrfs/inode.c
>> @@ -1400,6 +1400,29 @@ static int cow_file_range_async(struct btrfs_inode *inode,
>>  	return 0;
>>  }
>>
>> +static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
>> +				       struct page *locked_page, u64 start,
>> +				       u64 end, int *page_started,
>> +				       unsigned long *nr_written)
>> +{
>> +	int ret;
>> +
>> +	ret = cow_file_range(inode, locked_page, start, end,
>> +			     page_started, nr_written, 0);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (*page_started)
>> +		return 0;
>> +
>> +	__set_page_dirty_nobuffers(locked_page);
>> +	account_page_redirty(locked_page);
>> +	extent_write_locked_range(&inode->vfs_inode, start, end, WB_SYNC_ALL);
>> +	*page_started = 1;
>> +
>> +	return 0;
>> +}
>> +
>>  static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
>>  					u64 bytenr, u64 num_bytes)
>>  {
>> @@ -1879,17 +1902,24 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
>>  {
>>  	int ret;
>>  	int force_cow = need_force_cow(inode, start, end);
>> +	const bool do_compress = inode_can_compress(inode) &&
>> +		inode_need_compress(inode, start, end);
>
>This would make sense to cache the values, but inode_need_compress is
>quite heavy as it runs the compression heuristic. This would affect all
>cases and drop some perf.
>
>> +	const bool zoned = btrfs_is_zoned(inode->root->fs_info);
>>
>>  	if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
>> +		ASSERT(!zoned);
>>  		ret = run_delalloc_nocow(inode, locked_page, start, end,
>>  					 page_started, 1, nr_written);
>>  	} else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
>> +		ASSERT(!zoned);
>>  		ret = run_delalloc_nocow(inode, locked_page, start, end,
>>  					 page_started, 0, nr_written);
>> -	} else if (!inode_can_compress(inode) ||
>> -		   !inode_need_compress(inode, start, end)) {
>> +	} else if (!do_compress && !zoned) {
>>  		ret = cow_file_range(inode, locked_page, start, end,
>>  				     page_started, nr_written, 1);
>> +	} else if (!do_compress && zoned) {
>> +		ret = run_delalloc_zoned(inode, locked_page, start, end,
>> +					 page_started, nr_written);
>
>The part of the condition is shared so it should be structured lik
>
>	} else if (!<the compression checks>) {
>		if (zoned)
>			run_delalloc_zoned
>		else
>			cow_file_range
>	} ...
>

Sure. I'll rewrite the code like this in v15.

Thanks,

>>  	} else {
>>  		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
>>  		ret = cow_file_range_async(inode, wbc, locked_page, start, end,
>> --
>> 2.27.0
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a9bf78eaed42..6d43aaa1f537 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1400,6 +1400,29 @@  static int cow_file_range_async(struct btrfs_inode *inode,
 	return 0;
 }
 
+static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+				       struct page *locked_page, u64 start,
+				       u64 end, int *page_started,
+				       unsigned long *nr_written)
+{
+	int ret;
+
+	ret = cow_file_range(inode, locked_page, start, end,
+			     page_started, nr_written, 0);
+	if (ret)
+		return ret;
+
+	if (*page_started)
+		return 0;
+
+	__set_page_dirty_nobuffers(locked_page);
+	account_page_redirty(locked_page);
+	extent_write_locked_range(&inode->vfs_inode, start, end, WB_SYNC_ALL);
+	*page_started = 1;
+
+	return 0;
+}
+
 static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
 					u64 bytenr, u64 num_bytes)
 {
@@ -1879,17 +1902,24 @@  int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 {
 	int ret;
 	int force_cow = need_force_cow(inode, start, end);
+	const bool do_compress = inode_can_compress(inode) &&
+		inode_need_compress(inode, start, end);
+	const bool zoned = btrfs_is_zoned(inode->root->fs_info);
 
 	if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
+		ASSERT(!zoned);
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 1, nr_written);
 	} else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+		ASSERT(!zoned);
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 0, nr_written);
-	} else if (!inode_can_compress(inode) ||
-		   !inode_need_compress(inode, start, end)) {
+	} else if (!do_compress && !zoned) {
 		ret = cow_file_range(inode, locked_page, start, end,
 				     page_started, nr_written, 1);
+	} else if (!do_compress && zoned) {
+		ret = run_delalloc_zoned(inode, locked_page, start, end,
+					 page_started, nr_written);
 	} else {
 		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
 		ret = cow_file_range_async(inode, wbc, locked_page, start, end,