diff mbox series

[13/13] iomap: map multiple blocks at a time

Message ID 20231126124720.1249310-14-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/13] iomap: clear the per-folio dirty bits on all writeback failures | expand

Commit Message

Christoph Hellwig Nov. 26, 2023, 12:47 p.m. UTC
The ->map_blocks interface returns a valid range for writeback, but we
still call back into it for every block, which is a bit inefficient.

Change xfs_writepage_map to use the valid range in the map until the end
of the folio or the dirty range inside the folio instead of calling back
into every block.

Note that the range is not used over folio boundaries as we need to be
able to check the mapping sequence count under the folio lock.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 113 ++++++++++++++++++++++++++++-------------
 include/linux/iomap.h  |   7 +++
 2 files changed, 86 insertions(+), 34 deletions(-)

Comments

Darrick J. Wong Nov. 29, 2023, 5:25 a.m. UTC | #1
On Sun, Nov 26, 2023 at 01:47:20PM +0100, Christoph Hellwig wrote:
> The ->map_blocks interface returns a valid range for writeback, but we
> still call back into it for every block, which is a bit inefficient.
> 
> Change xfs_writepage_map to use the valid range in the map until the end

iomap_writepage_map?

> of the folio or the dirty range inside the folio instead of calling back
> into every block.
> 
> Note that the range is not used over folio boundaries as we need to be
> able to check the mapping sequence count under the folio lock.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/iomap/buffered-io.c | 113 ++++++++++++++++++++++++++++-------------
>  include/linux/iomap.h  |   7 +++
>  2 files changed, 86 insertions(+), 34 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index a01b0686e7c8a0..a98cb38a71ebc8 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -1,7 +1,7 @@
>  // SPDX-License-Identifier: GPL-2.0
>  /*
>   * Copyright (C) 2010 Red Hat, Inc.
> - * Copyright (C) 2016-2019 Christoph Hellwig.
> + * Copyright (C) 2016-2023 Christoph Hellwig.
>   */
>  #include <linux/module.h>
>  #include <linux/compiler.h>
> @@ -95,6 +95,42 @@ static inline bool ifs_block_is_dirty(struct folio *folio,
>  	return test_bit(block + blks_per_folio, ifs->state);
>  }
>  
> +static unsigned ifs_find_dirty_range(struct folio *folio,
> +		struct iomap_folio_state *ifs, u64 *range_start, u64 range_end)
> +{
> +	struct inode *inode = folio->mapping->host;
> +	unsigned start_blk =
> +		offset_in_folio(folio, *range_start) >> inode->i_blkbits;
> +	unsigned end_blk = min_not_zero(
> +		offset_in_folio(folio, range_end) >> inode->i_blkbits,
> +		i_blocks_per_folio(inode, folio));
> +	unsigned nblks = 1;
> +
> +	while (!ifs_block_is_dirty(folio, ifs, start_blk))
> +		if (++start_blk == end_blk)
> +			return 0;
> +
> +	while (start_blk + nblks < end_blk &&
> +	       ifs_block_is_dirty(folio, ifs, start_blk + nblks))
> +		nblks++;

I don't like ^^^ the single-space indentation of the loop body over the
loop test expression because I almost missed that.  Extra braces for
clarity, please?

> +
> +	*range_start = folio_pos(folio) + (start_blk << inode->i_blkbits);
> +	return nblks << inode->i_blkbits;
> +}
> +
> +static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start,
> +		u64 range_end)
> +{
> +	struct iomap_folio_state *ifs = folio->private;
> +	

   ^^^^^ extra whitespace?

> +	if (*range_start >= range_end)
> +		return 0;
> +
> +	if (ifs)
> +		return ifs_find_dirty_range(folio, ifs, range_start, range_end);
> +	return range_end - *range_start;
> +}
> +
>  static void ifs_clear_range_dirty(struct folio *folio,
>  		struct iomap_folio_state *ifs, size_t off, size_t len)
>  {
> @@ -1712,10 +1748,9 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset)
>   */
>  static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
>  		struct writeback_control *wbc, struct folio *folio,
> -		struct inode *inode, loff_t pos)
> +		struct inode *inode, loff_t pos, unsigned len)
>  {
>  	struct iomap_folio_state *ifs = folio->private;
> -	unsigned len = i_blocksize(inode);
>  	size_t poff = offset_in_folio(folio, pos);
>  	int error;
>  
> @@ -1739,28 +1774,41 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
>  
>  static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
>  		struct writeback_control *wbc, struct folio *folio,
> -		struct inode *inode, u64 pos, unsigned *count)
> +		struct inode *inode, u64 pos, unsigned dirty_len,
> +		unsigned *count)
>  {
>  	int error;
>  
> -	error = wpc->ops->map_blocks(wpc, inode, pos);
> -	if (error)
> -		goto fail;
> -	trace_iomap_writepage_map(inode, &wpc->iomap);
> -
> -	switch (wpc->iomap.type) {
> -	case IOMAP_INLINE:
> -		WARN_ON_ONCE(1);
> -		error = -EIO;
> -		break;
> -	case IOMAP_HOLE:
> -		break;
> -	default:
> -		iomap_add_to_ioend(wpc, wbc, folio, inode, pos);

Hey wait, the previous patch missed the error return here!

> -		(*count)++;
> -	}
> +	do {
> +		unsigned map_len;
> +
> +		error = wpc->ops->map_blocks(wpc, inode, pos);
> +		if (error)
> +			break;
> +		trace_iomap_writepage_map(inode, &wpc->iomap);
> +
> +		map_len = min_t(u64, dirty_len,
> +			wpc->iomap.offset + wpc->iomap.length - pos);
> +		WARN_ON_ONCE(!folio->private && map_len < dirty_len);
> +
> +		switch (wpc->iomap.type) {
> +		case IOMAP_INLINE:
> +			WARN_ON_ONCE(1);
> +			error = -EIO;
> +			break;
> +		case IOMAP_HOLE:
> +			break;
> +		default:
> +			error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos,
> +					map_len);
> +			if (!error)
> +				(*count)++;
> +			break;
> +		}
> +		dirty_len -= map_len;
> +		pos += map_len;
> +	} while (dirty_len && !error);
>  
> -fail:
>  	/*
>  	 * We cannot cancel the ioend directly here on error.  We may have
>  	 * already set other pages under writeback and hence we have to run I/O
> @@ -1827,7 +1875,7 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
>  		 * beyond i_size.
>  		 */
>  		folio_zero_segment(folio, poff, folio_size(folio));
> -		*end_pos = isize;
> +		*end_pos = round_up(isize, i_blocksize(inode));
>  	}
>  
>  	return true;
> @@ -1838,12 +1886,11 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
>  {
>  	struct iomap_folio_state *ifs = folio->private;
>  	struct inode *inode = folio->mapping->host;
> -	unsigned len = i_blocksize(inode);
> -	unsigned nblocks = i_blocks_per_folio(inode, folio);
>  	u64 pos = folio_pos(folio);
>  	u64 end_pos = pos + folio_size(folio);
>  	unsigned count = 0;
> -	int error = 0, i;
> +	int error = 0;
> +	u32 rlen;
>  
>  	WARN_ON_ONCE(!folio_test_locked(folio));
>  	WARN_ON_ONCE(folio_test_dirty(folio));
> @@ -1857,7 +1904,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
>  	}
>  	WARN_ON_ONCE(end_pos <= pos);
>  
> -	if (nblocks > 1) {
> +	if (i_blocks_per_folio(inode, folio) > 1) {
>  		if (!ifs) {
>  			ifs = ifs_alloc(inode, folio, 0);
>  			iomap_set_range_dirty(folio, 0, end_pos - pos);
> @@ -1880,18 +1927,16 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
>  	folio_start_writeback(folio);
>  
>  	/*
> -	 * Walk through the folio to find areas to write back. If we
> -	 * run off the end of the current map or find the current map
> -	 * invalid, grab a new one.
> +	 * Walk through the folio to find dirty areas to write back.
>  	 */
> -	for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
> -		if (ifs && !ifs_block_is_dirty(folio, ifs, i))
> -			continue;
> -		error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, pos,
> -				&count);
> +	while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) {
> +		error = iomap_writepage_map_blocks(wpc, wbc, folio, inode,
> +				pos, rlen, &count);
>  		if (error)
>  			break;
> +		pos += rlen;
>  	}
> +
>  	if (count)
>  		wpc->nr_folios++;
>  
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index b8d3b658ad2b03..49d93f53878565 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -309,6 +309,13 @@ struct iomap_writeback_ops {
>  	/*
>  	 * Required, maps the blocks so that writeback can be performed on
>  	 * the range starting at offset.
> +	 *
> +	 * Can return arbitrarily large regions, but we need to call into it at
> +	 * least once per folio to allow the file systems to synchronize with
> +	 * the write path that could be invalidating mappings.

Does xfs_map_blocks already return arbitrarily large regions?  I think
it already does since I see it setting imap.br_blockcount in places.

--D

> +	 *
> +	 * An existing mapping from a previous call to this method can be reused
> +	 * by the file system if it is still valid.
>  	 */
>  	int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
>  				loff_t offset);
> -- 
> 2.39.2
> 
>
Christoph Hellwig Nov. 29, 2023, 5:44 a.m. UTC | #2
On Tue, Nov 28, 2023 at 09:25:35PM -0800, Darrick J. Wong wrote:
> > -	case IOMAP_HOLE:
> > -		break;
> > -	default:
> > -		iomap_add_to_ioend(wpc, wbc, folio, inode, pos);
> 
> Hey wait, the previous patch missed the error return here!

Oh.  I guess that's what you get for rebasing and reordering a little
too much..

> > index b8d3b658ad2b03..49d93f53878565 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -309,6 +309,13 @@ struct iomap_writeback_ops {
> >  	/*
> >  	 * Required, maps the blocks so that writeback can be performed on
> >  	 * the range starting at offset.
> > +	 *
> > +	 * Can return arbitrarily large regions, but we need to call into it at
> > +	 * least once per folio to allow the file systems to synchronize with
> > +	 * the write path that could be invalidating mappings.
> 
> Does xfs_map_blocks already return arbitrarily large regions?  I think
> it already does since I see it setting imap.br_blockcount in places.

Yes, it does try to convert the entire underlying delalloc extent.
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index a01b0686e7c8a0..a98cb38a71ebc8 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1,7 +1,7 @@ 
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2010 Red Hat, Inc.
- * Copyright (C) 2016-2019 Christoph Hellwig.
+ * Copyright (C) 2016-2023 Christoph Hellwig.
  */
 #include <linux/module.h>
 #include <linux/compiler.h>
@@ -95,6 +95,42 @@  static inline bool ifs_block_is_dirty(struct folio *folio,
 	return test_bit(block + blks_per_folio, ifs->state);
 }
 
+static unsigned ifs_find_dirty_range(struct folio *folio,
+		struct iomap_folio_state *ifs, u64 *range_start, u64 range_end)
+{
+	struct inode *inode = folio->mapping->host;
+	unsigned start_blk =
+		offset_in_folio(folio, *range_start) >> inode->i_blkbits;
+	unsigned end_blk = min_not_zero(
+		offset_in_folio(folio, range_end) >> inode->i_blkbits,
+		i_blocks_per_folio(inode, folio));
+	unsigned nblks = 1;
+
+	while (!ifs_block_is_dirty(folio, ifs, start_blk))
+		if (++start_blk == end_blk)
+			return 0;
+
+	while (start_blk + nblks < end_blk &&
+	       ifs_block_is_dirty(folio, ifs, start_blk + nblks))
+		nblks++;
+
+	*range_start = folio_pos(folio) + (start_blk << inode->i_blkbits);
+	return nblks << inode->i_blkbits;
+}
+
+static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start,
+		u64 range_end)
+{
+	struct iomap_folio_state *ifs = folio->private;
+	
+	if (*range_start >= range_end)
+		return 0;
+
+	if (ifs)
+		return ifs_find_dirty_range(folio, ifs, range_start, range_end);
+	return range_end - *range_start;
+}
+
 static void ifs_clear_range_dirty(struct folio *folio,
 		struct iomap_folio_state *ifs, size_t off, size_t len)
 {
@@ -1712,10 +1748,9 @@  iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset)
  */
 static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
 		struct writeback_control *wbc, struct folio *folio,
-		struct inode *inode, loff_t pos)
+		struct inode *inode, loff_t pos, unsigned len)
 {
 	struct iomap_folio_state *ifs = folio->private;
-	unsigned len = i_blocksize(inode);
 	size_t poff = offset_in_folio(folio, pos);
 	int error;
 
@@ -1739,28 +1774,41 @@  static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
 
 static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
 		struct writeback_control *wbc, struct folio *folio,
-		struct inode *inode, u64 pos, unsigned *count)
+		struct inode *inode, u64 pos, unsigned dirty_len,
+		unsigned *count)
 {
 	int error;
 
-	error = wpc->ops->map_blocks(wpc, inode, pos);
-	if (error)
-		goto fail;
-	trace_iomap_writepage_map(inode, &wpc->iomap);
-
-	switch (wpc->iomap.type) {
-	case IOMAP_INLINE:
-		WARN_ON_ONCE(1);
-		error = -EIO;
-		break;
-	case IOMAP_HOLE:
-		break;
-	default:
-		iomap_add_to_ioend(wpc, wbc, folio, inode, pos);
-		(*count)++;
-	}
+	do {
+		unsigned map_len;
+
+		error = wpc->ops->map_blocks(wpc, inode, pos);
+		if (error)
+			break;
+		trace_iomap_writepage_map(inode, &wpc->iomap);
+
+		map_len = min_t(u64, dirty_len,
+			wpc->iomap.offset + wpc->iomap.length - pos);
+		WARN_ON_ONCE(!folio->private && map_len < dirty_len);
+
+		switch (wpc->iomap.type) {
+		case IOMAP_INLINE:
+			WARN_ON_ONCE(1);
+			error = -EIO;
+			break;
+		case IOMAP_HOLE:
+			break;
+		default:
+			error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos,
+					map_len);
+			if (!error)
+				(*count)++;
+			break;
+		}
+		dirty_len -= map_len;
+		pos += map_len;
+	} while (dirty_len && !error);
 
-fail:
 	/*
 	 * We cannot cancel the ioend directly here on error.  We may have
 	 * already set other pages under writeback and hence we have to run I/O
@@ -1827,7 +1875,7 @@  static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
 		 * beyond i_size.
 		 */
 		folio_zero_segment(folio, poff, folio_size(folio));
-		*end_pos = isize;
+		*end_pos = round_up(isize, i_blocksize(inode));
 	}
 
 	return true;
@@ -1838,12 +1886,11 @@  static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 {
 	struct iomap_folio_state *ifs = folio->private;
 	struct inode *inode = folio->mapping->host;
-	unsigned len = i_blocksize(inode);
-	unsigned nblocks = i_blocks_per_folio(inode, folio);
 	u64 pos = folio_pos(folio);
 	u64 end_pos = pos + folio_size(folio);
 	unsigned count = 0;
-	int error = 0, i;
+	int error = 0;
+	u32 rlen;
 
 	WARN_ON_ONCE(!folio_test_locked(folio));
 	WARN_ON_ONCE(folio_test_dirty(folio));
@@ -1857,7 +1904,7 @@  static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	}
 	WARN_ON_ONCE(end_pos <= pos);
 
-	if (nblocks > 1) {
+	if (i_blocks_per_folio(inode, folio) > 1) {
 		if (!ifs) {
 			ifs = ifs_alloc(inode, folio, 0);
 			iomap_set_range_dirty(folio, 0, end_pos - pos);
@@ -1880,18 +1927,16 @@  static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	folio_start_writeback(folio);
 
 	/*
-	 * Walk through the folio to find areas to write back. If we
-	 * run off the end of the current map or find the current map
-	 * invalid, grab a new one.
+	 * Walk through the folio to find dirty areas to write back.
 	 */
-	for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
-		if (ifs && !ifs_block_is_dirty(folio, ifs, i))
-			continue;
-		error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, pos,
-				&count);
+	while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) {
+		error = iomap_writepage_map_blocks(wpc, wbc, folio, inode,
+				pos, rlen, &count);
 		if (error)
 			break;
+		pos += rlen;
 	}
+
 	if (count)
 		wpc->nr_folios++;
 
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index b8d3b658ad2b03..49d93f53878565 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -309,6 +309,13 @@  struct iomap_writeback_ops {
 	/*
 	 * Required, maps the blocks so that writeback can be performed on
 	 * the range starting at offset.
+	 *
+	 * Can return arbitrarily large regions, but we need to call into it at
+	 * least once per folio to allow the file systems to synchronize with
+	 * the write path that could be invalidating mappings.
+	 *
+	 * An existing mapping from a previous call to this method can be reused
+	 * by the file system if it is still valid.
 	 */
 	int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
 				loff_t offset);