diff mbox series

[v2,5/8] btrfs: make btrfs_do_readpage() to do block-by-block read

Message ID bc941d1a5ad95c25de4108bd261ce9da96145689.1740635497.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series btrfs: make subpage handling be feature full | expand

Commit Message

Qu Wenruo Feb. 27, 2025, 5:54 a.m. UTC
Currently if a btrfs has its block size (the older sector size) smaller
than the page size, btrfs_do_readpage() will handle the range extent by
extent, this is good for performance as it doesn't need to re-lookup the
same extent map again and again.
(Although get_extent_map() already does extra cached em check, thus
the optimization is not that obvious)

This is totally fine and is a valid optimization, but it has an
assumption that, there is no partial uptodate range in the page.

Meanwhile there is an incoming feature, requiring btrfs to skip the full
page read if a buffered write range covers a full block but not a full
page.

In that case, we can have a page that is partially uptodate, and the
current per-extent lookup can not handle such case.

So here we change btrfs_do_readpage() to do block-by-block read, this
simplifies the following things:

- Remove the need for @iosize variable
  Because we just use sectorsize as our increment.

- Remove @pg_offset, and calculate it inside the loop when needed
  It's just offset_in_folio().

- Use a for() loop instead of a while() loop

This will slightly reduce the read performance for subpage cases, but for
the future where we need to skip already uptodate blocks, it should still
be worthy.

For block size == page size, this brings no performance change.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 38 ++++++++++++--------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

Comments

Filipe Manana Feb. 27, 2025, 12:48 p.m. UTC | #1
On Thu, Feb 27, 2025 at 5:56 AM Qu Wenruo <wqu@suse.com> wrote:
>
> Currently if a btrfs has its block size (the older sector size) smaller
> than the page size, btrfs_do_readpage() will handle the range extent by
> extent, this is good for performance as it doesn't need to re-lookup the
> same extent map again and again.
> (Although get_extent_map() already does extra cached em check, thus
> the optimization is not that obvious)
>
> This is totally fine and is a valid optimization, but it has an
> assumption that, there is no partial uptodate range in the page.
>
> Meanwhile there is an incoming feature, requiring btrfs to skip the full
> page read if a buffered write range covers a full block but not a full
> page.
>
> In that case, we can have a page that is partially uptodate, and the
> current per-extent lookup can not handle such case.
>
> So here we change btrfs_do_readpage() to do block-by-block read, this
> simplifies the following things:
>
> - Remove the need for @iosize variable
>   Because we just use sectorsize as our increment.
>
> - Remove @pg_offset, and calculate it inside the loop when needed
>   It's just offset_in_folio().
>
> - Use a for() loop instead of a while() loop
>
> This will slightly reduce the read performance for subpage cases, but for
> the future where we need to skip already uptodate blocks, it should still
> be worthy.
>
> For block size == page size, this brings no performance change.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>

Reviewed-by: Filipe Manana <fdmanana@suse.com>

Looks good, thanks.

> ---
>  fs/btrfs/extent_io.c | 38 ++++++++++++--------------------------
>  1 file changed, 12 insertions(+), 26 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 3968ecbb727d..2abf489e1a9b 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -942,14 +942,11 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
>         struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
>         u64 start = folio_pos(folio);
>         const u64 end = start + PAGE_SIZE - 1;
> -       u64 cur = start;
>         u64 extent_offset;
>         u64 last_byte = i_size_read(inode);
>         struct extent_map *em;
>         int ret = 0;
> -       size_t pg_offset = 0;
> -       size_t iosize;
> -       size_t blocksize = fs_info->sectorsize;
> +       const size_t blocksize = fs_info->sectorsize;
>
>         ret = set_folio_extent_mapped(folio);
>         if (ret < 0) {
> @@ -960,24 +957,23 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
>         if (folio_contains(folio, last_byte >> PAGE_SHIFT)) {
>                 size_t zero_offset = offset_in_folio(folio, last_byte);
>
> -               if (zero_offset) {
> -                       iosize = folio_size(folio) - zero_offset;
> -                       folio_zero_range(folio, zero_offset, iosize);
> -               }
> +               if (zero_offset)
> +                       folio_zero_range(folio, zero_offset,
> +                                        folio_size(folio) - zero_offset);
>         }
>         bio_ctrl->end_io_func = end_bbio_data_read;
>         begin_folio_read(fs_info, folio);
> -       while (cur <= end) {
> +       for (u64 cur = start; cur <= end; cur += blocksize) {
>                 enum btrfs_compression_type compress_type = BTRFS_COMPRESS_NONE;
> +               unsigned long pg_offset = offset_in_folio(folio, cur);
>                 bool force_bio_submit = false;
>                 u64 disk_bytenr;
>                 u64 block_start;
>
>                 ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
>                 if (cur >= last_byte) {
> -                       iosize = folio_size(folio) - pg_offset;
> -                       folio_zero_range(folio, pg_offset, iosize);
> -                       end_folio_read(folio, true, cur, iosize);
> +                       folio_zero_range(folio, pg_offset, end - cur + 1);
> +                       end_folio_read(folio, true, cur, end - cur + 1);
>                         break;
>                 }
>                 em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached);
> @@ -991,8 +987,6 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
>
>                 compress_type = extent_map_compression(em);
>
> -               iosize = min(extent_map_end(em) - cur, end - cur + 1);
> -               iosize = ALIGN(iosize, blocksize);
>                 if (compress_type != BTRFS_COMPRESS_NONE)
>                         disk_bytenr = em->disk_bytenr;
>                 else
> @@ -1050,18 +1044,13 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
>
>                 /* we've found a hole, just zero and go on */
>                 if (block_start == EXTENT_MAP_HOLE) {
> -                       folio_zero_range(folio, pg_offset, iosize);
> -
> -                       end_folio_read(folio, true, cur, iosize);
> -                       cur = cur + iosize;
> -                       pg_offset += iosize;
> +                       folio_zero_range(folio, pg_offset, blocksize);
> +                       end_folio_read(folio, true, cur, blocksize);
>                         continue;
>                 }
>                 /* the get_extent function already copied into the folio */
>                 if (block_start == EXTENT_MAP_INLINE) {
> -                       end_folio_read(folio, true, cur, iosize);
> -                       cur = cur + iosize;
> -                       pg_offset += iosize;
> +                       end_folio_read(folio, true, cur, blocksize);
>                         continue;
>                 }
>
> @@ -1072,12 +1061,9 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
>
>                 if (force_bio_submit)
>                         submit_one_bio(bio_ctrl);
> -               submit_extent_folio(bio_ctrl, disk_bytenr, folio, iosize,
> +               submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize,
>                                     pg_offset);
> -               cur = cur + iosize;
> -               pg_offset += iosize;
>         }
> -
>         return 0;
>  }
>
> --
> 2.48.1
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3968ecbb727d..2abf489e1a9b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -942,14 +942,11 @@  static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
 	u64 start = folio_pos(folio);
 	const u64 end = start + PAGE_SIZE - 1;
-	u64 cur = start;
 	u64 extent_offset;
 	u64 last_byte = i_size_read(inode);
 	struct extent_map *em;
 	int ret = 0;
-	size_t pg_offset = 0;
-	size_t iosize;
-	size_t blocksize = fs_info->sectorsize;
+	const size_t blocksize = fs_info->sectorsize;
 
 	ret = set_folio_extent_mapped(folio);
 	if (ret < 0) {
@@ -960,24 +957,23 @@  static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 	if (folio_contains(folio, last_byte >> PAGE_SHIFT)) {
 		size_t zero_offset = offset_in_folio(folio, last_byte);
 
-		if (zero_offset) {
-			iosize = folio_size(folio) - zero_offset;
-			folio_zero_range(folio, zero_offset, iosize);
-		}
+		if (zero_offset)
+			folio_zero_range(folio, zero_offset,
+					 folio_size(folio) - zero_offset);
 	}
 	bio_ctrl->end_io_func = end_bbio_data_read;
 	begin_folio_read(fs_info, folio);
-	while (cur <= end) {
+	for (u64 cur = start; cur <= end; cur += blocksize) {
 		enum btrfs_compression_type compress_type = BTRFS_COMPRESS_NONE;
+		unsigned long pg_offset = offset_in_folio(folio, cur);
 		bool force_bio_submit = false;
 		u64 disk_bytenr;
 		u64 block_start;
 
 		ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
 		if (cur >= last_byte) {
-			iosize = folio_size(folio) - pg_offset;
-			folio_zero_range(folio, pg_offset, iosize);
-			end_folio_read(folio, true, cur, iosize);
+			folio_zero_range(folio, pg_offset, end - cur + 1);
+			end_folio_read(folio, true, cur, end - cur + 1);
 			break;
 		}
 		em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached);
@@ -991,8 +987,6 @@  static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 
 		compress_type = extent_map_compression(em);
 
-		iosize = min(extent_map_end(em) - cur, end - cur + 1);
-		iosize = ALIGN(iosize, blocksize);
 		if (compress_type != BTRFS_COMPRESS_NONE)
 			disk_bytenr = em->disk_bytenr;
 		else
@@ -1050,18 +1044,13 @@  static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 
 		/* we've found a hole, just zero and go on */
 		if (block_start == EXTENT_MAP_HOLE) {
-			folio_zero_range(folio, pg_offset, iosize);
-
-			end_folio_read(folio, true, cur, iosize);
-			cur = cur + iosize;
-			pg_offset += iosize;
+			folio_zero_range(folio, pg_offset, blocksize);
+			end_folio_read(folio, true, cur, blocksize);
 			continue;
 		}
 		/* the get_extent function already copied into the folio */
 		if (block_start == EXTENT_MAP_INLINE) {
-			end_folio_read(folio, true, cur, iosize);
-			cur = cur + iosize;
-			pg_offset += iosize;
+			end_folio_read(folio, true, cur, blocksize);
 			continue;
 		}
 
@@ -1072,12 +1061,9 @@  static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 
 		if (force_bio_submit)
 			submit_one_bio(bio_ctrl);
-		submit_extent_folio(bio_ctrl, disk_bytenr, folio, iosize,
+		submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize,
 				    pg_offset);
-		cur = cur + iosize;
-		pg_offset += iosize;
 	}
-
 	return 0;
 }