[1/2] btrfs: introduce extent_map::em_cached member

Message ID	3843c1c37c19f547236a8ec2690d9b258a1c4489.1723096922.git.wqu@suse.com (mailing list archive)
State	New, archived
Headers	show Received: from smtp-out1.suse.de (smtp-out1.suse.de [195.135.223.130]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1714B158214 for <linux-btrfs@vger.kernel.org>; Thu, 8 Aug 2024 06:06:23 +0000 (UTC) From: Qu Wenruo <wqu@suse.com> To: linux-btrfs@vger.kernel.org Subject: [PATCH 1/2] btrfs: introduce extent_map::em_cached member Date: Thu, 8 Aug 2024 15:35:59 +0930 Message-ID: <3843c1c37c19f547236a8ec2690d9b258a1c4489.1723096922.git.wqu@suse.com> In-Reply-To: <cover.1723096922.git.wqu@suse.com> References: <cover.1723096922.git.wqu@suse.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	btrfs: reduce extent map lookup overhead for data write \| expand [0/2] btrfs: reduce extent map lookup overhead for data write [1/2] btrfs: introduce extent_map::em_cached member [2/2] btrfs: utilize cached extent map for data writeback

Message ID

3843c1c37c19f547236a8ec2690d9b258a1c4489.1723096922.git.wqu@suse.com (mailing list archive)

State

New, archived

Headers

From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 1/2] btrfs: introduce extent_map::em_cached member
Date: Thu,  8 Aug 2024 15:35:59 +0930
Message-ID: 
 <3843c1c37c19f547236a8ec2690d9b258a1c4489.1723096922.git.wqu@suse.com>
In-Reply-To: <cover.1723096922.git.wqu@suse.com>
References: <cover.1723096922.git.wqu@suse.com>
Precedence: bulk
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit

Series

btrfs: reduce extent map lookup overhead for data write | expand

Commit Message

Qu Wenruo Aug. 8, 2024, 6:05 a.m. UTC

For data read, we always pass an extent_map pointer as @em_cached for
btrfs_readpage().
And that @em_cached pointer has the same lifespan as the @bio_ctrl we
passed in, so it's a perfect match to move @em_cached into @bio_ctrl.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

Comments

Filipe Manana Aug. 8, 2024, 11:36 a.m. UTC | #1

On Thu, Aug 8, 2024 at 7:06 AM Qu Wenruo <wqu@suse.com> wrote:
>
> For data read, we always pass an extent_map pointer as @em_cached for
> btrfs_readpage().
> And that @em_cached pointer has the same lifespan as the @bio_ctrl we
> passed in, so it's a perfect match to move @em_cached into @bio_ctrl.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/extent_io.c | 19 +++++++++----------
>  1 file changed, 9 insertions(+), 10 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 822e2bf8bc99..d4ad98488c03 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -101,6 +101,8 @@ struct btrfs_bio_ctrl {
>         blk_opf_t opf;
>         btrfs_bio_end_io_t end_io_func;
>         struct writeback_control *wbc;
> +       /* For read/write extent map cache. */
> +       struct extent_map *em_cached;

As mentioned before, this can be named just "em", it's clear enough
given the comment and the fact the structure is contextual.
The naming "em_cached" is odd, and would be more logical as
"cached_em" if it were outside the structure.

>  };
>
>  static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
> @@ -1003,8 +1005,8 @@ static struct extent_map *__get_extent_map(struct inode *inode,
>   * XXX JDM: This needs looking at to ensure proper page locking
>   * return 0 on success, otherwise return error
>   */
> -static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
> -                     struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start)
> +static int btrfs_do_readpage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl,
> +                            u64 *prev_em_start)
>  {
>         struct inode *inode = folio->mapping->host;
>         struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
> @@ -1052,7 +1054,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
>                         break;
>                 }
>                 em = __get_extent_map(inode, folio, cur, end - cur + 1,
> -                                     em_cached);
> +                                     &bio_ctrl->em_cached);
>                 if (IS_ERR(em)) {
>                         unlock_extent(tree, cur, end, NULL);
>                         end_folio_read(folio, false, cur, end + 1 - cur);
> @@ -1160,13 +1162,12 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
>         u64 start = folio_pos(folio);
>         u64 end = start + folio_size(folio) - 1;
>         struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
> -       struct extent_map *em_cached = NULL;
>         int ret;
>
>         btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
>
> -       ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
> -       free_extent_map(em_cached);
> +       ret = btrfs_do_readpage(folio, &bio_ctrl, NULL);
> +       free_extent_map(bio_ctrl.em_cached);
>
>         /*
>          * If btrfs_do_readpage() failed we will want to submit the assembled
> @@ -2349,16 +2350,14 @@ void btrfs_readahead(struct readahead_control *rac)
>         struct folio *folio;
>         u64 start = readahead_pos(rac);
>         u64 end = start + readahead_length(rac) - 1;
> -       struct extent_map *em_cached = NULL;
>         u64 prev_em_start = (u64)-1;
>
>         btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
>
>         while ((folio = readahead_folio(rac)) != NULL)
> -               btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
> +               btrfs_do_readpage(folio, &bio_ctrl, &prev_em_start);
>
> -       if (em_cached)
> -               free_extent_map(em_cached);
> +       free_extent_map(bio_ctrl.em_cached);

So instead of calling free_extent_map() before each submit_one_bio()
call, it would be better to do the call inside submit_one_bio() and
then set bio_ctrl->em_cached to NULL there,
to avoid any use-after-free surprises and leaks in case someone later
forgets to call free_extent_map(). Also removes duplicated code.

Also, as mentioned before, the subject of the patch is incorrect, it
should mention btrfs_bio_ctrl:: instead of extent_map::

Thanks.

>         submit_one_bio(&bio_ctrl);
>  }

>
> --
> 2.45.2
>
>

Qu Wenruo Aug. 8, 2024, 11:13 p.m. UTC | #2

在 2024/8/8 21:06, Filipe Manana 写道:
> On Thu, Aug 8, 2024 at 7:06 AM Qu Wenruo <wqu@suse.com> wrote:
>>
>> For data read, we always pass an extent_map pointer as @em_cached for
>> btrfs_readpage().
>> And that @em_cached pointer has the same lifespan as the @bio_ctrl we
>> passed in, so it's a perfect match to move @em_cached into @bio_ctrl.
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>>   fs/btrfs/extent_io.c | 19 +++++++++----------
>>   1 file changed, 9 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
>> index 822e2bf8bc99..d4ad98488c03 100644
>> --- a/fs/btrfs/extent_io.c
>> +++ b/fs/btrfs/extent_io.c
>> @@ -101,6 +101,8 @@ struct btrfs_bio_ctrl {
>>          blk_opf_t opf;
>>          btrfs_bio_end_io_t end_io_func;
>>          struct writeback_control *wbc;
>> +       /* For read/write extent map cache. */
>> +       struct extent_map *em_cached;
>
> As mentioned before, this can be named just "em", it's clear enough
> given the comment and the fact the structure is contextual.
> The naming "em_cached" is odd, and would be more logical as
> "cached_em" if it were outside the structure.
>

Definitely will fix the naming and subjective line.

[...]
>
> So instead of calling free_extent_map() before each submit_one_bio()
> call, it would be better to do the call inside submit_one_bio() and
> then set bio_ctrl->em_cached to NULL there,
> to avoid any use-after-free surprises and leaks in case someone later
> forgets to call free_extent_map(). Also removes duplicated code.

This is not that straightforward. E.g.

	|<- em 1 ->|<- em 2 ->|
         | 1  |  2  |  3  |  4 | <- Pages

For page 1, we got em1, and now bio_ctrl->em = em1, add the page into
the bio_ctrl->bio.
For page 2, the same, we just reuse bio_ctrl->em, add the page into
bio_ctrl->bio.

For page 3, we find bio_ctrl->em no longer matches our range, so we get
the new em2 and stored it into bio_ctrl->em, so far so good.

But at submit_extent_folio(), we found page 3 is not contig for
bio_ctrl->bio, thus we should submit the bio.

Remember the bio_ctrl->em is now em2, if we clear it right now, the next
page, page 4 will still need to do an em lookup.

So unfortunately we can not free the cached em at submit_one_bio(), as
at that timing, our cached em can be switched to the next range.

This is not a huge deal, as we're only doing at most 2 em lookups per
dirty range, but it's still not the ideal 1 em lookup of the existing code.

And considering there are only 4 call sites allocating an on-stack
bio_ctrl, the explicit free_extent_map() call is not that a big deal.

Thanks,
Qu

>
> Also, as mentioned before, the subject of the patch is incorrect, it
> should mention btrfs_bio_ctrl:: instead of extent_map::
>
> Thanks.
>
>>          submit_one_bio(&bio_ctrl);
>>   }
>
>>
>> --
>> 2.45.2
>>
>>
>

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 822e2bf8bc99..d4ad98488c03 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -101,6 +101,8 @@  struct btrfs_bio_ctrl {
 	blk_opf_t opf;
 	btrfs_bio_end_io_t end_io_func;
 	struct writeback_control *wbc;
+	/* For read/write extent map cache. */
+	struct extent_map *em_cached;
 };
 
 static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
@@ -1003,8 +1005,8 @@  static struct extent_map *__get_extent_map(struct inode *inode,
  * XXX JDM: This needs looking at to ensure proper page locking
  * return 0 on success, otherwise return error
  */
-static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
-		      struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start)
+static int btrfs_do_readpage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl,
+			     u64 *prev_em_start)
 {
 	struct inode *inode = folio->mapping->host;
 	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
@@ -1052,7 +1054,7 @@  static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 			break;
 		}
 		em = __get_extent_map(inode, folio, cur, end - cur + 1,
-				      em_cached);
+				      &bio_ctrl->em_cached);
 		if (IS_ERR(em)) {
 			unlock_extent(tree, cur, end, NULL);
 			end_folio_read(folio, false, cur, end + 1 - cur);
@@ -1160,13 +1162,12 @@  int btrfs_read_folio(struct file *file, struct folio *folio)
 	u64 start = folio_pos(folio);
 	u64 end = start + folio_size(folio) - 1;
 	struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
-	struct extent_map *em_cached = NULL;
 	int ret;
 
 	btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
 
-	ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
-	free_extent_map(em_cached);
+	ret = btrfs_do_readpage(folio, &bio_ctrl, NULL);
+	free_extent_map(bio_ctrl.em_cached);
 
 	/*
 	 * If btrfs_do_readpage() failed we will want to submit the assembled
@@ -2349,16 +2350,14 @@  void btrfs_readahead(struct readahead_control *rac)
 	struct folio *folio;
 	u64 start = readahead_pos(rac);
 	u64 end = start + readahead_length(rac) - 1;
-	struct extent_map *em_cached = NULL;
 	u64 prev_em_start = (u64)-1;
 
 	btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
 
 	while ((folio = readahead_folio(rac)) != NULL)
-		btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
+		btrfs_do_readpage(folio, &bio_ctrl, &prev_em_start);
 
-	if (em_cached)
-		free_extent_map(em_cached);
+	free_extent_map(bio_ctrl.em_cached);
 	submit_one_bio(&bio_ctrl);
 }

[1/2] btrfs: introduce extent_map::em_cached member

Commit Message

Comments

Patch