diff mbox series

[RFC] iomap: more granular iteration

Message ID 20241125140623.20633-1-bfoster@redhat.com (mailing list archive)
State New
Headers show
Series [RFC] iomap: more granular iteration | expand

Commit Message

Brian Foster Nov. 25, 2024, 2:06 p.m. UTC
Not-Signed-off-by: Brian Foster <bfoster@redhat.com>
---

Hi all,

I just wanted to throw this against the wall for early
thoughts/feedback. This relates to to the iomap folio batch support
series, in particular pushing the batch handling code further down into
the normal get folio path.

Firstly for reference, this RFC is a squash of several logical changes
that should be separate patches:

1. Split off state clearing from iomap_advance_iter() into a separate
helper.
2. Add an iter_len field to iomap_iter to track per-iteration length.
3. Update iomap_zero_iter() to use granular iteration instead of local
variables for pos/len.

The objective is I'm looking for a cleaner way to support a sparse range
(based on a set of folios in a batch) beyond just turning pos/len into
pointers. Most of the iomap iterators duplicate a lot of boilerplate
around copying pos/len out of the iter, processing it locally in a loop
based on length, and then passing state back into the iter via
iter->processed.

The idea here is to replace some of that with more granular iter state
so the iteratora function can advance the iter on its own. In turn, this
facilitates the ability of the write_begin() path to advance the
iterator based on batched folio lookup if the folio set happens to be
logically discontiguous.

I still need to push some of the length/bytes chunking stuff further
down closer to get_folio before this is possible, but the hope is we can
end up with something like the following at the iterator level:

	do {
		/* iter->pos/iter_len advanced based on folio pos */
		status = iomap_write_begin(iter, &folio);

		... calc/process folio_[offset|bytes] ...

		iomap_advance_iter(iter, folio_bytes);
	} while (iter->iter_len > 0);
	...
	return 0;

The one warty thing I have so far is marking the iomap stale on iter_len
consumption to allow these types of iterators to return error/success
(i.e. return 0 instead of a written count) without the higher level
advance causing a loop termination. This could probably be done
differently, but this should be a no-op for non-granular iterators so
I'm not that worried about it as of yet.

Thoughts/comments on any of this appreciated. Thanks.

Brian

 fs/iomap/buffered-io.c | 22 ++++++++-------------
 fs/iomap/iter.c        | 45 +++++++++++++++++++++++++++++++-----------
 include/linux/iomap.h  |  2 ++
 3 files changed, 43 insertions(+), 26 deletions(-)
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0708be776740..c479ec73dedd 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1352,18 +1352,14 @@  static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
 
 static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
 {
-	loff_t pos = iter->pos;
-	loff_t length = iomap_length(iter);
-	loff_t written = 0;
-
 	do {
 		struct folio *folio;
 		int status;
 		size_t offset;
-		size_t bytes = min_t(u64, SIZE_MAX, length);
+		size_t bytes = min_t(u64, SIZE_MAX, iter->iter_len);
 		bool ret;
 
-		status = iomap_write_begin(iter, pos, bytes, &folio);
+		status = iomap_write_begin(iter, iter->pos, bytes, &folio);
 		if (status)
 			return status;
 		if (iter->iomap.flags & IOMAP_F_STALE)
@@ -1371,26 +1367,24 @@  static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
 
 		/* warn about zeroing folios beyond eof that won't write back */
 		WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
-		offset = offset_in_folio(folio, pos);
+		offset = offset_in_folio(folio, iter->pos);
 		if (bytes > folio_size(folio) - offset)
 			bytes = folio_size(folio) - offset;
 
 		folio_zero_range(folio, offset, bytes);
 		folio_mark_accessed(folio);
 
-		ret = iomap_write_end(iter, pos, bytes, bytes, folio);
-		__iomap_put_folio(iter, pos, bytes, folio);
+		ret = iomap_write_end(iter, iter->pos, bytes, bytes, folio);
+		__iomap_put_folio(iter, iter->pos, bytes, folio);
 		if (WARN_ON_ONCE(!ret))
 			return -EIO;
 
-		pos += bytes;
-		length -= bytes;
-		written += bytes;
-	} while (length > 0);
+		iomap_iter_advance(iter, bytes);
+	} while (iter->iter_len > 0);
 
 	if (did_zero)
 		*did_zero = true;
-	return written;
+	return 0;
 }
 
 int
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index 3790918646af..962f4b35d856 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -7,6 +7,14 @@ 
 #include <linux/iomap.h>
 #include "trace.h"
 
+/* clear out iomaps from the fs */
+static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
+{
+	iter->processed = iter->iter_len = 0;
+	memset(&iter->iomap, 0, sizeof(iter->iomap));
+	memset(&iter->srcmap, 0, sizeof(iter->srcmap));
+}
+
 /*
  * Advance to the next range we need to map.
  *
@@ -19,30 +27,40 @@ 
  * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we
  * need to remap the entire remaining range.
  */
-static inline int iomap_iter_advance(struct iomap_iter *iter)
+int iomap_iter_advance(struct iomap_iter *iter, s64 count)
 {
 	bool stale = iter->iomap.flags & IOMAP_F_STALE;
 	int ret = 1;
 
 	/* handle the previous iteration (if any) */
 	if (iter->iomap.length) {
-		if (iter->processed < 0)
-			return iter->processed;
-		if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
+		if (count < 0)
+			return count;
+		if (WARN_ON_ONCE(count > iter->iter_len))
 			return -EIO;
-		iter->pos += iter->processed;
-		iter->len -= iter->processed;
-		if (!iter->len || (!iter->processed && !stale))
+		iter->pos += count;
+		iter->len -= count;
+		if (!iter->len || (!count && !stale))
 			ret = 0;
+		iter->iter_len -= count;
+		/*
+		 * XXX: Stale the mapping on consuming iter_len to prevent
+		 * interference from subsequent 0 byte (i.e. return success
+		 * advance). Perhaps use a separate flag here for "incremental"
+		 * iterators..?
+		 */
+		if (!iter->iter_len)
+			iter->iomap.flags |= IOMAP_F_STALE;
 	}
 
-	/* clear the per iteration state */
-	iter->processed = 0;
-	memset(&iter->iomap, 0, sizeof(iter->iomap));
-	memset(&iter->srcmap, 0, sizeof(iter->srcmap));
 	return ret;
 }
 
+static inline int iomap_iter_advance_processed(struct iomap_iter *iter)
+{
+	return iomap_iter_advance(iter, iter->processed);
+}
+
 static inline void iomap_iter_done(struct iomap_iter *iter)
 {
 	WARN_ON_ONCE(iter->iomap.offset > iter->pos);
@@ -53,6 +71,8 @@  static inline void iomap_iter_done(struct iomap_iter *iter)
 	trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
 	if (iter->srcmap.type != IOMAP_HOLE)
 		trace_iomap_iter_srcmap(iter->inode, &iter->srcmap);
+
+	iter->iter_len = iomap_length(iter);
 }
 
 /**
@@ -83,7 +103,8 @@  int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
 	}
 
 	trace_iomap_iter(iter, ops, _RET_IP_);
-	ret = iomap_iter_advance(iter);
+	ret = iomap_iter_advance_processed(iter);
+	iomap_iter_reset_iomap(iter);
 	if (ret <= 0)
 		return ret;
 
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 27048ec10e1c..6fb7adc1f2c6 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -217,6 +217,7 @@  struct iomap_iter {
 	struct inode *inode;
 	loff_t pos;
 	u64 len;
+	u64 iter_len;
 	s64 processed;
 	unsigned flags;
 	struct iomap iomap;
@@ -225,6 +226,7 @@  struct iomap_iter {
 };
 
 int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
+int iomap_iter_advance(struct iomap_iter *iter, s64 count);
 
 /**
  * iomap_length - length of the current iomap iteration