diff mbox series

[V2] lib/iov_iter.c: extract virt-contiguous pages in iov_iter_extract_bvec_pages

Message ID 20241011035247.2444033-1-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series [V2] lib/iov_iter.c: extract virt-contiguous pages in iov_iter_extract_bvec_pages | expand

Commit Message

Ming Lei Oct. 11, 2024, 3:52 a.m. UTC
Actually iov_iter_extract_pages() requires that there isn't gap in the
extracted pages, so 'offset' only exists in the 1st page, then these
pages can be mapped to one virtual(contiguous) address.

All iov_iter_bvec() users only want to extract virt-contiguous pages from
iov_iter_extract_pages() instead physical-contiguous pages.

Change iov_iter_extract_bvec_pages() to extract virt-contiguous pages via
bvec helper.

This way can fill much more pages one time, instead of (often)one page from
iov_iter_extract_pages() each time.

The change is reasonable & safe since oher kind of iterators(UBUF, KVEC, ...)
do return non physically-contiguous pages.

Fixes: a7e689dd1c06 ("block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages")
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
V2:
	- add fixes
	- improve commit log

 include/linux/bvec.h |  6 ++++++
 lib/iov_iter.c       | 47 +++++++++++++++++++++++---------------------
 2 files changed, 31 insertions(+), 22 deletions(-)

Comments

Christoph Hellwig Oct. 11, 2024, 8:44 a.m. UTC | #1
On Fri, Oct 11, 2024 at 11:52:47AM +0800, Ming Lei wrote:
> Actually iov_iter_extract_pages() requires that there isn't gap in the
> extracted pages, so 'offset' only exists in the 1st page, then these
> pages can be mapped to one virtual(contiguous) address.
> 
> All iov_iter_bvec() users only want to extract virt-contiguous pages from
> iov_iter_extract_pages() instead physical-contiguous pages.
> 
> Change iov_iter_extract_bvec_pages() to extract virt-contiguous pages via
> bvec helper.
> 
> This way can fill much more pages one time, instead of (often)one page from
> iov_iter_extract_pages() each time.
> 
> The change is reasonable & safe since oher kind of iterators(UBUF, KVEC, ...)
> do return non physically-contiguous pages.

I had to read through the code to understand what this means.  Here is
what I have written based on my understanding:

The iov_iter_extract_pages interface allows to return physically
discontiguous pages, as long as all but the first and last page
in the array are page aligned and page size.  Rewrite
iov_iter_extract_bvec_pages to take advantage of that instead of only
returning ranges of physically contiguous pages.

> Fixes: a7e689dd1c06 ("block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages")

As far as I can tell the current behavior is highly suboptimal, but not
actually buggy, does this really warrant a fixes tag?

> +#define for_each_bvec_max(bvl, bio_vec, iter, start, nr_bvecs)		\
> +	for (iter = (start);						\
> +	     (iter).bi_size && iter.bi_idx < nr_bvecs &&		\
> +		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\
> +	     bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
> +

And this helper just makes the code harder to read, and makes people
wonder what this undocumented _max interface does.  What about the
below variant of your patch that open codes it, and throws in a few
comments to explain the logic?

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1abb32c0da50bc..7768f1b2006d81 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1677,8 +1677,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
 }
 
 /*
- * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
- * not get references on the pages, nor does it get a pin on them.
+ * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
+ * This does not get references on the pages, nor does it get a pin on them.
  */
 static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   struct page ***pages, size_t maxsize,
@@ -1686,35 +1686,57 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
-	struct page **p, *page;
-	size_t skip = i->iov_offset, offset, size;
-	int k;
+	size_t skip = i->iov_offset, size = 0;
+	struct bvec_iter bi;
+	int k = 0;
 
-	for (;;) {
-		if (i->nr_segs == 0)
-			return 0;
-		size = min(maxsize, i->bvec->bv_len - skip);
-		if (size)
-			break;
+	if (i->nr_segs == 0)
+		return 0;
+
+	if (i->iov_offset == i->bvec->bv_len) {
 		i->iov_offset = 0;
 		i->nr_segs--;
 		i->bvec++;
 		skip = 0;
 	}
+	bi.bi_size = maxsize + skip;
+	bi.bi_bvec_done = skip;
+
+	maxpages = want_pages_array(pages, maxsize, skip, maxpages);
+
+	while (bi.bi_size && bi.bi_idx < i->nr_segs) {
+		struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
+
+		/*
+		 * The iov_iter_extract_pages interface only allows an offset
+		 * into the first page.  Break out of the loop if we see an
+		 * offset into subsequent pages, the caller will have to call
+		 * iov_iter_extract_pages again for the reminder.
+		 */
+		if (k) {
+			if (bv.bv_offset)
+				break;
+		} else {
+			*offset0 = bv.bv_offset;
+		}
 
-	skip += i->bvec->bv_offset;
-	page = i->bvec->bv_page + skip / PAGE_SIZE;
-	offset = skip % PAGE_SIZE;
-	*offset0 = offset;
+		(*pages)[k++] = bv.bv_page;
+		size += bv.bv_len;
 
-	maxpages = want_pages_array(pages, size, offset, maxpages);
-	if (!maxpages)
-		return -ENOMEM;
-	p = *pages;
-	for (k = 0; k < maxpages; k++)
-		p[k] = page + k;
+		if (k >= maxpages)
+			break;
+
+		/*
+		 * Similarly, we are done when the end of the bvec doesn't align
+		 * to a page boundary as that would create a hole in the
+		 * returned space.  The caller will handle this with another call
+		 * to iov_iter_extract_pages.
+		 */
+		if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
+			break;
+	     	bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);
+	}
 
-	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
 	iov_iter_advance(i, size);
 	return size;
 }
Ming Lei Oct. 11, 2024, 12:03 p.m. UTC | #2
On Fri, Oct 11, 2024 at 01:44:14AM -0700, Christoph Hellwig wrote:
> On Fri, Oct 11, 2024 at 11:52:47AM +0800, Ming Lei wrote:
> > Actually iov_iter_extract_pages() requires that there isn't gap in the
> > extracted pages, so 'offset' only exists in the 1st page, then these
> > pages can be mapped to one virtual(contiguous) address.
> > 
> > All iov_iter_bvec() users only want to extract virt-contiguous pages from
> > iov_iter_extract_pages() instead physical-contiguous pages.
> > 
> > Change iov_iter_extract_bvec_pages() to extract virt-contiguous pages via
> > bvec helper.
> > 
> > This way can fill much more pages one time, instead of (often)one page from
> > iov_iter_extract_pages() each time.
> > 
> > The change is reasonable & safe since oher kind of iterators(UBUF, KVEC, ...)
> > do return non physically-contiguous pages.
> 
> I had to read through the code to understand what this means.  Here is
> what I have written based on my understanding:
> 
> The iov_iter_extract_pages interface allows to return physically
> discontiguous pages, as long as all but the first and last page
> in the array are page aligned and page size.  Rewrite
> iov_iter_extract_bvec_pages to take advantage of that instead of only
> returning ranges of physically contiguous pages.
> 
> > Fixes: a7e689dd1c06 ("block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages")
> 
> As far as I can tell the current behavior is highly suboptimal, but not
> actually buggy, does this really warrant a fixes tag?

This bad behavior is really introduced from this commit, I'd suggest to
add 'Fixes:'

> 
> > +#define for_each_bvec_max(bvl, bio_vec, iter, start, nr_bvecs)		\
> > +	for (iter = (start);						\
> > +	     (iter).bi_size && iter.bi_idx < nr_bvecs &&		\
> > +		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\
> > +	     bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
> > +
> 
> And this helper just makes the code harder to read, and makes people
> wonder what this undocumented _max interface does.  What about the
> below variant of your patch that open codes it, and throws in a few
> comments to explain the logic?

Looks open-code iterator is more readable, the patch looks fine, and
I have verified it works as expected.

> 
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 1abb32c0da50bc..7768f1b2006d81 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -1677,8 +1677,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
>  }
>  
>  /*
> - * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
> - * not get references on the pages, nor does it get a pin on them.
> + * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
> + * This does not get references on the pages, nor does it get a pin on them.
>   */
>  static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
>  					   struct page ***pages, size_t maxsize,
> @@ -1686,35 +1686,57 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
>  					   iov_iter_extraction_t extraction_flags,
>  					   size_t *offset0)
>  {
> -	struct page **p, *page;
> -	size_t skip = i->iov_offset, offset, size;
> -	int k;
> +	size_t skip = i->iov_offset, size = 0;
> +	struct bvec_iter bi;
> +	int k = 0;
>  
> -	for (;;) {
> -		if (i->nr_segs == 0)
> -			return 0;
> -		size = min(maxsize, i->bvec->bv_len - skip);
> -		if (size)
> -			break;
> +	if (i->nr_segs == 0)
> +		return 0;
> +
> +	if (i->iov_offset == i->bvec->bv_len) {
>  		i->iov_offset = 0;
>  		i->nr_segs--;
>  		i->bvec++;
>  		skip = 0;
>  	}
> +	bi.bi_size = maxsize + skip;
> +	bi.bi_bvec_done = skip;
> +
> +	maxpages = want_pages_array(pages, maxsize, skip, maxpages);
> +
> +	while (bi.bi_size && bi.bi_idx < i->nr_segs) {
> +		struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
> +
> +		/*
> +		 * The iov_iter_extract_pages interface only allows an offset
> +		 * into the first page.  Break out of the loop if we see an
> +		 * offset into subsequent pages, the caller will have to call
> +		 * iov_iter_extract_pages again for the reminder.
> +		 */
> +		if (k) {
> +			if (bv.bv_offset)
> +				break;
> +		} else {
> +			*offset0 = bv.bv_offset;
> +		}
>  
> -	skip += i->bvec->bv_offset;
> -	page = i->bvec->bv_page + skip / PAGE_SIZE;
> -	offset = skip % PAGE_SIZE;
> -	*offset0 = offset;
> +		(*pages)[k++] = bv.bv_page;
> +		size += bv.bv_len;
>  
> -	maxpages = want_pages_array(pages, size, offset, maxpages);
> -	if (!maxpages)
> -		return -ENOMEM;
> -	p = *pages;
> -	for (k = 0; k < maxpages; k++)
> -		p[k] = page + k;
> +		if (k >= maxpages)
> +			break;
> +
> +		/*
> +		 * Similarly, we are done when the end of the bvec doesn't align
> +		 * to a page boundary as that would create a hole in the
> +		 * returned space.  The caller will handle this with another call
> +		 * to iov_iter_extract_pages.
> +		 */
> +		if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
> +			break;
> +	     	bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);

Indent.


Thanks,
Ming
Christoph Hellwig Oct. 11, 2024, 12:08 p.m. UTC | #3
On Fri, Oct 11, 2024 at 08:03:49PM +0800, Ming Lei wrote:
> Looks open-code iterator is more readable, the patch looks fine, and
> I have verified it works as expected.

Do you want me to resend this formally or pick it up from here?
(either way you should be credited as my take was just a trivial
cleanup)
Ming Lei Oct. 11, 2024, 12:17 p.m. UTC | #4
On Fri, Oct 11, 2024 at 05:08:37AM -0700, Christoph Hellwig wrote:
> On Fri, Oct 11, 2024 at 08:03:49PM +0800, Ming Lei wrote:
> > Looks open-code iterator is more readable, the patch looks fine, and
> > I have verified it works as expected.
> 
> Do you want me to resend this formally or pick it up from here?
> (either way you should be credited as my take was just a trivial
> cleanup)

Yeah, please re-send the patch formally.


Thanks,
Ming
diff mbox series

Patch

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index f41c7f0ef91e..98e1a4ad09e0 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -184,6 +184,12 @@  static inline void bvec_iter_advance_single(const struct bio_vec *bv,
 		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\
 	     bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
 
+#define for_each_bvec_max(bvl, bio_vec, iter, start, nr_bvecs)		\
+	for (iter = (start);						\
+	     (iter).bi_size && iter.bi_idx < nr_bvecs &&		\
+		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\
+	     bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
+
 /* for iterating one bio from start to end */
 #define BVEC_ITER_ALL_INIT (struct bvec_iter)				\
 {									\
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 97003155bfac..6e00f6da5259 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1677,8 +1677,8 @@  static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
 }
 
 /*
- * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
- * not get references on the pages, nor does it get a pin on them.
+ * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
+ * This does not get references on the pages, nor does it get a pin on them.
  */
 static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   struct page ***pages, size_t maxsize,
@@ -1686,35 +1686,38 @@  static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
-	struct page **p, *page;
-	size_t skip = i->iov_offset, offset, size;
-	int k;
+	size_t skip = i->iov_offset, size = 0;
+	struct bvec_iter bi;
+	struct bio_vec bv;
+	int k = 0;
 
-	for (;;) {
-		if (i->nr_segs == 0)
-			return 0;
-		size = min(maxsize, i->bvec->bv_len - skip);
-		if (size)
-			break;
+	if (i->nr_segs == 0)
+		return 0;
+
+	if (i->iov_offset == i->bvec->bv_len) {
 		i->iov_offset = 0;
 		i->nr_segs--;
 		i->bvec++;
 		skip = 0;
 	}
+	bi.bi_size = maxsize + skip;
+	bi.bi_bvec_done = skip;
 
-	skip += i->bvec->bv_offset;
-	page = i->bvec->bv_page + skip / PAGE_SIZE;
-	offset = skip % PAGE_SIZE;
-	*offset0 = offset;
+	maxpages = want_pages_array(pages, maxsize, skip, maxpages);
 
-	maxpages = want_pages_array(pages, size, offset, maxpages);
-	if (!maxpages)
-		return -ENOMEM;
-	p = *pages;
-	for (k = 0; k < maxpages; k++)
-		p[k] = page + k;
+	for_each_bvec_max(bv, i->bvec, bi, bi, i->nr_segs) {
+		if (k >= maxpages)
+			break;
+		if (!k)
+			*offset0 = bv.bv_offset;
+		else if (bv.bv_offset)
+			break;
+		(*pages)[k++] = bv.bv_page;
+		size += bv.bv_len;
+		if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
+			break;
+	}
 
-	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
 	iov_iter_advance(i, size);
 	return size;
 }